From fcb3d091bcae7585db84d968e6737f9725180bdb Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 22 Sep 2022 08:57:55 -0400
Subject: [PATCH 01/51] file upload WIP

---
 internal/pkg/api/handleUpload.go      | 231 ++++++++++++++++++++++++++
 internal/pkg/api/handleUpload_test.go |   8 +
 internal/pkg/api/metrics.go           |   2 +
 internal/pkg/api/router.go            |  32 +++-
 internal/pkg/api/router_test.go       |   2 +-
 internal/pkg/api/schema.go            |   7 +
 internal/pkg/dl/upload.go             |  27 +++
 internal/pkg/model/schema.go          |  98 ++++++++++-
 internal/pkg/server/fleet.go          |   3 +-
 internal/pkg/upload/upload.go         | 131 +++++++++++++++
 licenses/license_header.go            |  19 ++-
 model/schema.json                     | 156 +++++++++++++++++
 12 files changed, 701 insertions(+), 15 deletions(-)
 create mode 100644 internal/pkg/api/handleUpload.go
 create mode 100644 internal/pkg/api/handleUpload_test.go
 create mode 100644 internal/pkg/dl/upload.go
 create mode 100644 internal/pkg/upload/upload.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
new file mode 100644
index 000000000..9f5b5d881
--- /dev/null
+++ b/internal/pkg/api/handleUpload.go
@@ -0,0 +1,231 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package api
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
+	"github.com/elastic/fleet-server/v7/internal/pkg/config"
+	"github.com/elastic/fleet-server/v7/internal/pkg/limit"
+	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
+	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
+	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
+	"github.com/julienschmidt/httprouter"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+const (
+	// TODO: move to a config
+	maxParallelUploads = 5
+
+	// specification-designated maximum
+	maxChunkSize = 4194304 // 4 MiB
+)
+
+func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	start := time.Now()
+
+	reqID := r.Header.Get(logger.HeaderRequestID)
+
+	zlog := log.With().
+		Str(ECSHTTPRequestID, reqID).
+		Logger()
+
+	err := rt.ut.handleUploadStart(&zlog, w, r)
+
+	if err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+
+		// Log this as warn for visibility that limit has been reached.
+		// This allows customers to tune the configuration on detection of threshold.
+		if errors.Is(err, limit.ErrMaxLimit) || errors.Is(err, upload.ErrMaxConcurrentUploads) {
+			resp.Level = zerolog.WarnLevel
+		}
+
+		zlog.WithLevel(resp.Level).
+			Err(err).
+			Int(ECSHTTPResponseCode, resp.StatusCode).
+			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
+			Msg("fail checkin")
+
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("fail writing error response")
+		}
+	}
+}
+
+func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	start := time.Now()
+
+	id := ps.ByName("id")
+	chunkID := ps.ByName("num")
+
+	reqID := r.Header.Get(logger.HeaderRequestID)
+
+	zlog := log.With().
+		Str(LogAgentID, id).
+		Str(ECSHTTPRequestID, reqID).
+		Logger()
+
+	chunkNum, err := strconv.Atoi(chunkID)
+	if err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("fail writing error response")
+		}
+		return
+	}
+	err = rt.ut.handleUploadChunk(&zlog, w, r, id, chunkNum)
+
+	if err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+
+		// Log this as warn for visibility that limit has been reached.
+		// This allows customers to tune the configuration on detection of threshold.
+		if errors.Is(err, limit.ErrMaxLimit) {
+			resp.Level = zerolog.WarnLevel
+		}
+
+		zlog.WithLevel(resp.Level).
+			Err(err).
+			Int(ECSHTTPResponseCode, resp.StatusCode).
+			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
+			Msg("fail checkin")
+
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("fail writing error response")
+		}
+	}
+}
+
+func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	start := time.Now()
+
+	id := ps.ByName("id")
+
+	reqID := r.Header.Get(logger.HeaderRequestID)
+
+	zlog := log.With().
+		Str(LogAgentID, id).
+		Str(ECSHTTPRequestID, reqID).
+		Logger()
+
+	err := rt.ut.handleUploadComplete(&zlog, w, r, id)
+
+	if err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+
+		// Log this as warn for visibility that limit has been reached.
+		// This allows customers to tune the configuration on detection of threshold.
+		if errors.Is(err, limit.ErrMaxLimit) {
+			resp.Level = zerolog.WarnLevel
+		}
+
+		zlog.WithLevel(resp.Level).
+			Err(err).
+			Int(ECSHTTPResponseCode, resp.StatusCode).
+			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
+			Msg("fail checkin")
+
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("fail writing error response")
+		}
+	}
+}
+
+type UploadT struct {
+	bulker     bulk.Bulk
+	cache      cache.Cache
+	esThrottle *throttle.Throttle
+	upl        *upload.Uploader
+}
+
+func NewUploadT(cfg *config.Server, bulker bulk.Bulk, cache cache.Cache) *UploadT {
+	log.Info().
+		Interface("limits", cfg.Limits.ArtifactLimit).
+		Int("maxParallel", defaultMaxParallel).
+		Msg("Artifact install limits")
+
+	return &UploadT{
+		bulker:     bulker,
+		cache:      cache,
+		esThrottle: throttle.NewThrottle(defaultMaxParallel),
+		upl:        upload.New(maxParallelUploads),
+	}
+}
+
+func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error {
+	var fi FileInfo
+	if err := json.NewDecoder(r.Body).Decode(&fi); err != nil {
+		r.Body.Close()
+		if errors.Is(err, io.EOF) {
+			return fmt.Errorf("file info body is required: %w", err)
+		}
+		return err
+	}
+	r.Body.Close()
+
+	if strings.TrimSpace(fi.Name) == "" {
+		return errors.New("file name is required")
+	}
+	if fi.Size <= 0 {
+		return errors.New("invalid file size, size is required")
+	}
+
+	uploadID, err := ut.upl.Begin()
+	if err != nil {
+		return err
+	}
+
+	// TODO: write header doc
+
+	_, err = w.Write([]byte(uploadID))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string, chunkID int) error {
+	// prevent over-sized chunks
+	chunk := http.MaxBytesReader(w, r.Body, maxChunkSize)
+	data, err := ut.upl.Chunk(uplID, chunkID, chunk)
+	if err != nil {
+		return err
+	}
+
+	_, err = w.Write([]byte(data))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
+	data, err := ut.upl.Complete(uplID)
+	if err != nil {
+		return err
+	}
+
+	_, err = w.Write([]byte(data))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/pkg/api/handleUpload_test.go b/internal/pkg/api/handleUpload_test.go
new file mode 100644
index 000000000..bad0dab64
--- /dev/null
+++ b/internal/pkg/api/handleUpload_test.go
@@ -0,0 +1,8 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+//go:build !integration
+// +build !integration
+
+package api
diff --git a/internal/pkg/api/metrics.go b/internal/pkg/api/metrics.go
index a72be2173..34e05f562 100644
--- a/internal/pkg/api/metrics.go
+++ b/internal/pkg/api/metrics.go
@@ -32,6 +32,7 @@ var (
 	cntEnroll    routeStats
 	cntAcks      routeStats
 	cntStatus    routeStats
+	cntUpload    routeStats
 	cntArtifacts artifactStats
 )
 
@@ -103,6 +104,7 @@ func init() {
 	cntArtifacts.Register(routesRegistry.NewRegistry("artifacts"))
 	cntAcks.Register(routesRegistry.NewRegistry("acks"))
 	cntStatus.Register(routesRegistry.NewRegistry("status"))
+	cntUpload.Register(routesRegistry.NewRegistry("upload"))
 }
 
 func (rt *routeStats) IncError(err error) {
diff --git a/internal/pkg/api/router.go b/internal/pkg/api/router.go
index 7d219cbdf..2e7fc9837 100644
--- a/internal/pkg/api/router.go
+++ b/internal/pkg/api/router.go
@@ -25,11 +25,14 @@ import (
 )
 
 const (
-	RouteStatus    = "/api/status"
-	RouteEnroll    = "/api/fleet/agents/:id"
-	RouteCheckin   = "/api/fleet/agents/:id/checkin"
-	RouteAcks      = "/api/fleet/agents/:id/acks"
-	RouteArtifacts = "/api/fleet/artifacts/:id/:sha2"
+	RouteStatus         = "/api/status"
+	RouteEnroll         = "/api/fleet/agents/:id"
+	RouteCheckin        = "/api/fleet/agents/:id/checkin"
+	RouteAcks           = "/api/fleet/agents/:id/acks"
+	RouteArtifacts      = "/api/fleet/artifacts/:id/:sha2"
+	RouteUploadBegin    = "/api/fleet/uploads"
+	RouteUploadChunk    = "/api/fleet/uploads/:id/:num"
+	RouteUploadComplete = "/api/fleet/uploads/:id"
 )
 
 type Router struct {
@@ -41,12 +44,13 @@ type Router struct {
 	at     *ArtifactT
 	ack    *AckT
 	st     *StatusT
+	ut     *UploadT
 	sm     policy.SelfMonitor
 	tracer *apm.Tracer
 	bi     build.Info
 }
 
-func NewRouter(cfg *config.Server, bulker bulk.Bulk, ct *CheckinT, et *EnrollerT, at *ArtifactT, ack *AckT, st *StatusT, sm policy.SelfMonitor, tracer *apm.Tracer, bi build.Info) *Router {
+func NewRouter(cfg *config.Server, bulker bulk.Bulk, ct *CheckinT, et *EnrollerT, at *ArtifactT, ack *AckT, st *StatusT, ut *UploadT, sm policy.SelfMonitor, tracer *apm.Tracer, bi build.Info) *Router {
 	rt := &Router{
 		cfg:    cfg,
 		bulker: bulker,
@@ -57,6 +61,7 @@ func NewRouter(cfg *config.Server, bulker bulk.Bulk, ct *CheckinT, et *EnrollerT
 		ack:    ack,
 		st:     st,
 		tracer: tracer,
+		ut:     ut,
 		bi:     bi,
 	}
 
@@ -98,6 +103,21 @@ func (rt *Router) newHTTPRouter(addr string) *httprouter.Router {
 			RouteArtifacts,
 			limiter.WrapArtifact(rt.handleArtifacts, &cntArtifacts),
 		},
+		{
+			http.MethodPost,
+			RouteUploadBegin,
+			rt.handleUploadStart,
+		},
+		{
+			http.MethodPut,
+			RouteUploadChunk,
+			rt.handleUploadChunk,
+		},
+		{
+			http.MethodPost,
+			RouteUploadComplete,
+			rt.handleUploadComplete,
+		},
 	}
 
 	router := httprouter.New()
diff --git a/internal/pkg/api/router_test.go b/internal/pkg/api/router_test.go
index 9844c0ec9..c1d4ef7d1 100644
--- a/internal/pkg/api/router_test.go
+++ b/internal/pkg/api/router_test.go
@@ -48,7 +48,7 @@ func TestRun(t *testing.T) {
 	et, err := NewEnrollerT(verCon, cfg, nil, c)
 	require.NoError(t, err)
 
-	router := NewRouter(cfg, bulker, ct, et, nil, nil, nil, nil, nil, fbuild.Info{})
+	router := NewRouter(cfg, bulker, ct, et, nil, nil, nil, nil, nil, nil, fbuild.Info{})
 	errCh := make(chan error)
 
 	var wg sync.WaitGroup
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index fdca0648a..bdc3bf771 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -175,3 +175,10 @@ type StatusResponse struct {
 	Status  string                 `json:"status"`
 	Version *StatusResponseVersion `json:"version,omitempty"`
 }
+
+type FileInfo struct {
+	Size      int64  `json:"size"`
+	Name      string `json:"name"`
+	Extension string `json:"ext"`
+	Mime      string `json:"mime_type"`
+}
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
new file mode 100644
index 000000000..473ab13f1
--- /dev/null
+++ b/internal/pkg/dl/upload.go
@@ -0,0 +1,27 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package dl
+
+import (
+	"context"
+	"encoding/json"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/model"
+)
+
+func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo) (string, error) {
+	return createUploadInfo(ctx, bulker, "files", fi) // @todo: index destination is an input (and different per integration)
+}
+
+func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo) (string, error) {
+	body, err := json.Marshal(fi)
+	if err != nil {
+		return "", err
+	}
+
+	// @todo: proper doc ID
+	return bulker.Create(ctx, index, "", body, bulk.WithRefresh())
+}
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index 0dc0ac947..4bb5159da 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -1,7 +1,3 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
 // Code generated by schema-generate. DO NOT EDIT.
 
 package model
@@ -277,6 +273,100 @@ type EnrollmentAPIKey struct {
 	UpdatedAt string `json:"updated_at,omitempty"`
 }
 
+// FileInfo An uploaded File
+type FileInfo struct {
+	ESDocument
+
+	// Information about the file properties
+	File *FileMetadata `json:"file,omitempty"`
+}
+
+// FileMetadata Information about the file properties
+type FileMetadata struct {
+
+	// Last time the file was accessed
+	Accessed string `json:"accessed,omitempty"`
+
+	// Platform-dependent sequence of file attributes such as readonly, execute, hidden
+	Attributes string `json:"attributes,omitempty"`
+
+	// Size, in bytes, of each data chunk
+	ChunkSize int64 `json:"ChunkSize,omitempty"`
+
+	// The algorithm used to compress the file
+	Compression string `json:"Compression,omitempty"`
+
+	// File creation time
+	Created string `json:"created,omitempty"`
+
+	// Last time the file attributes or metadata changed
+	Ctime string `json:"ctime,omitempty"`
+
+	// Device that is the source of the file
+	Device string `json:"device,omitempty"`
+
+	// Directory where the file is located
+	Directory string `json:"directory,omitempty"`
+
+	// Drive letter where the file is located
+	DriveLetter string `json:"drive_letter,omitempty"`
+
+	// File extension, excluding the leading dot
+	Extension string `json:"extension,omitempty"`
+
+	// Primary group ID (GID) of the file
+	Gid string `json:"gid,omitempty"`
+
+	// Primary group name of the file
+	Group string `json:"group,omitempty"`
+
+	// Checksums on the file contents
+	Hash *Hash `json:"hash,omitempty"`
+
+	// inode representing the file in the filesystem
+	Inode string `json:"inode,omitempty"`
+
+	// MIME type of the file
+	MimeType string `json:"mime_type,omitempty"`
+
+	// Mode of the file in octal representation
+	Mode string `json:"mode,omitempty"`
+
+	// Last time the file content was modified
+	Mtime string `json:"mtime,omitempty"`
+
+	// Name of the file including the extension, without the directory
+	Name string `json:"name,omitempty"`
+
+	// File owner's username
+	Owner string `json:"owner,omitempty"`
+
+	// Full path to the file, including the file name. It should include the drive letter, when appropriate
+	Path string `json:"path,omitempty"`
+
+	// Size of the file contents, in bytes
+	Size int64 `json:"size,omitempty"`
+
+	// The current state of the file upload process
+	Status string `json:"Status,omitempty"`
+
+	// Target path for symlinks
+	TargetPath string `json:"target_path,omitempty"`
+
+	// File type (file, dir, or symlink)
+	Type string `json:"type,omitempty"`
+
+	// The user ID (UID) or security identifier (SID) of the file owner
+	Uid string `json:"uid,omitempty"`
+}
+
+// Hash Checksums on the file contents
+type Hash struct {
+
+	// SHA256 sum of the file contents
+	Sha256 string `json:"sha256,omitempty"`
+}
+
 // HostMetadata The host metadata for the Elastic Agent
 type HostMetadata struct {
 
diff --git a/internal/pkg/server/fleet.go b/internal/pkg/server/fleet.go
index fecc81b6d..d636a8776 100644
--- a/internal/pkg/server/fleet.go
+++ b/internal/pkg/server/fleet.go
@@ -493,8 +493,9 @@ func (f *Fleet) runSubsystems(ctx context.Context, cfg *config.Config, g *errgro
 	at := api.NewArtifactT(&cfg.Inputs[0].Server, bulker, f.cache)
 	ack := api.NewAckT(&cfg.Inputs[0].Server, bulker, f.cache)
 	st := api.NewStatusT(&cfg.Inputs[0].Server, bulker, f.cache)
+	ut := api.NewUploadT(&cfg.Inputs[0].Server, bulker, f.cache)
 
-	router := api.NewRouter(&cfg.Inputs[0].Server, bulker, ct, et, at, ack, st, sm, tracer, f.bi)
+	router := api.NewRouter(&cfg.Inputs[0].Server, bulker, ct, et, at, ack, st, ut, sm, tracer, f.bi)
 
 	g.Go(loggedRunFunc(ctx, "Http server", func(ctx context.Context) error {
 		return router.Run(ctx)
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
new file mode 100644
index 000000000..f4bd00351
--- /dev/null
+++ b/internal/pkg/upload/upload.go
@@ -0,0 +1,131 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"time"
+
+	"github.com/gofrs/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+const (
+	//these should be configs probably
+	uploadRequestTimeout = time.Hour
+	chunkProgressTimeout = time.Hour / 4
+)
+
+var (
+	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
+	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
+)
+
+type upload struct {
+	complete  chan<- struct{}
+	chunkRecv chan<- struct{}
+}
+
+type Uploader struct {
+	current       map[string]upload
+	parallelLimit int
+}
+
+func New(limit int) *Uploader {
+	return &Uploader{
+		parallelLimit: limit,
+		current:       make(map[string]upload, limit),
+	}
+}
+
+// Start an upload operation, as long as the max concurrent has not been reached
+// returns the upload ID
+func (u *Uploader) Begin() (string, error) {
+	if len(u.current) >= u.parallelLimit {
+		return "", ErrMaxConcurrentUploads
+	}
+
+	uid, err := uuid.NewV4()
+	if err != nil {
+		return "", fmt.Errorf("unable to generate upload operation ID: %w", err)
+	}
+	id := uid.String()
+
+	total := time.NewTimer(uploadRequestTimeout)
+	chunkT := time.NewTimer(chunkProgressTimeout)
+	chunkRecv := make(chan struct{})
+	complete := make(chan struct{})
+	// total timer could also be achieved with context deadline and cancelling
+
+	go func() {
+		for {
+			select {
+			case <-total.C: // entire upload operation timed out
+				log.Trace().Str("uploadID", id).Msg("upload operation timed out")
+				// stop and drain chunk timer
+				if !chunkT.Stop() {
+					<-chunkT.C
+				}
+				delete(u.current, id)
+				return
+			case <-chunkT.C: // no chunk progress within chunk timer, expire operation
+				log.Trace().Str("uploadID", id).Msg("upload operation chunk activity timed out")
+				// stop and drain total timer
+				if !total.Stop() {
+					<-total.C
+				}
+				delete(u.current, id)
+				return
+			case <-chunkRecv: // chunk activity, update chunk timer
+				if !chunkT.Stop() {
+					<-chunkT.C
+				}
+				chunkT.Reset(chunkProgressTimeout)
+			case <-complete: // upload operation complete, clean up
+				if !chunkT.Stop() {
+					<-chunkT.C
+				}
+				if !total.Stop() {
+					<-total.C
+				}
+				delete(u.current, id)
+				return
+			}
+		}
+	}()
+	u.current[id] = upload{
+		complete:  complete,
+		chunkRecv: chunkRecv,
+	}
+	return id, nil
+}
+
+func (u *Uploader) Chunk(uplID string, chunknum int, body io.ReadCloser) (string, error) {
+	if body == nil {
+		return "", errors.New("body is required")
+	}
+	defer body.Close()
+	if _, valid := u.current[uplID]; !valid {
+		return "", ErrInvalidUploadID
+	}
+	u.current[uplID].chunkRecv <- struct{}{}
+
+	_, err := io.ReadAll(body)
+	if err != nil {
+		return "", err
+	}
+
+	return "", nil
+}
+
+func (u *Uploader) Complete(id string) (string, error) {
+	if _, valid := u.current[id]; !valid {
+		return "", ErrInvalidUploadID
+	}
+	u.current[id].complete <- struct{}{}
+	return "", nil
+}
diff --git a/licenses/license_header.go b/licenses/license_header.go
index 21a8501aa..fc5125bb2 100644
--- a/licenses/license_header.go
+++ b/licenses/license_header.go
@@ -1,6 +1,19 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
 // Code generated by beats/dev-tools/cmd/license/license_generate.go - DO NOT EDIT.
 
diff --git a/model/schema.json b/model/schema.json
index 78debe5c9..074962eb8 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -607,6 +607,162 @@
         "api_key_id",
         "api_key"
       ]
+    },
+    "file-chunk": {
+      "title": "Chunk",
+      "description": "One section of the blob contents of a file",
+      "type": "object",
+      "properties": {
+        "_id": {
+          "description": "The identifier for the file chunk, describing the ID of the file, and the chunk position",
+          "type": "string"
+        },
+        "data": {
+          "description": "Blob contents of the file",
+          "type": "binary"
+        },
+        "bid": {
+          "description": "Base ID of the chunk, matching all chunks together as the base ID of the file",
+          "type": "string",
+          "format": "uuid"
+        },
+        "last": {
+          "description": "Whether this chunk is the final chunk of a file",
+          "type": "boolean"
+        }
+      }
+    },
+    "file-info": {
+      "title": "File Info",
+      "description": "An uploaded File",
+      "type": "object",
+      "properties": {
+        "_id": {
+          "description": "The unique identifier for the file metadata",
+          "type": "string",
+          "format": "uuid"
+        },
+        "file": {
+          "title": "File Metadata",
+          "description": "Information about the file properties",
+          "type": "object",
+          "properties": {
+            "Status": {
+              "description": "The current state of the file upload process",
+              "type": "string"
+            },
+            "ChunkSize": {
+              "description": "Size, in bytes, of each data chunk",
+              "type": "integer"
+            },
+            "Compression": {
+              "description": "The algorithm used to compress the file",
+              "type": "string",
+              "enum": ["br","gzip","deflate","none"]
+            },
+            "hash": {
+              "title": "Hash",
+              "description": "Checksums on the file contents",
+              "type": "object",
+              "properties": {
+                "sha256": {
+                  "description": "SHA256 sum of the file contents",
+                  "type": "string"
+                }
+              }
+            },
+            "name": {
+              "description": "Name of the file including the extension, without the directory",
+              "type": "string"
+            },
+            "mime_type": {
+              "description": "MIME type of the file",
+              "type": "string"
+            },
+            "accessed": {
+              "description": "Last time the file was accessed",
+              "type": "string",
+              "format": "date-time"
+            },
+            "attributes": {
+              "description": "Platform-dependent sequence of file attributes such as readonly, execute, hidden",
+              "type": "string"
+            },
+            "created": {
+              "description": "File creation time",
+              "type": "string",
+              "format": "date-time"
+            },
+            "ctime": {
+              "description": "Last time the file attributes or metadata changed",
+              "type": "string",
+              "format": "date-time"
+            },
+            "device": {
+              "description": "Device that is the source of the file",
+              "type": "string"
+            },
+            "directory": {
+              "description": "Directory where the file is located",
+              "type": "string"
+            },
+            "drive_letter": {
+              "description": "Drive letter where the file is located",
+              "type": "string"
+            },
+            "extension": {
+              "description": "File extension, excluding the leading dot",
+              "type": "string"
+            },
+            "gid": {
+              "description": "Primary group ID (GID) of the file",
+              "type": "string"
+            },
+            "group": {
+              "description": "Primary group name of the file",
+              "type": "string"
+            },
+            "inode": {
+              "description": "inode representing the file in the filesystem",
+              "type": "string"
+            },
+            "mode": {
+              "description": "Mode of the file in octal representation",
+              "type": "string"
+            },
+            "mtime": {
+              "description": "Last time the file content was modified",
+              "type": "string",
+              "format": "date-time"
+            },
+            "owner": {
+              "description": "File owner's username",
+              "type": "string"
+            },
+            "path": {
+              "description": "Full path to the file, including the file name. It should include the drive letter, when appropriate",
+              "type": "string"
+            },
+            "size": {
+              "description": "Size of the file contents, in bytes",
+              "type": "integer"
+            },
+            "target_path": {
+              "description": "Target path for symlinks",
+              "type": "string"
+            },
+            "type": {
+              "description": "File type (file, dir, or symlink)",
+              "type": "string",
+              "enum": ["file","dir","symlink"]
+            },
+            "uid": {
+              "description":"The user ID (UID) or security identifier (SID) of the file owner",
+              "type":"string"
+            }
+          }
+        }
+      }
     }
   },
 

From 33e982113d70647d035cf150154a69e482f104de Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 26 Sep 2022 10:14:24 -0400
Subject: [PATCH 02/51] functional chunk uploads, basic functionality

---
 internal/pkg/api/handleUpload.go | 113 +++++++++++++++++++++---
 internal/pkg/api/schema.go       |  38 +++++++-
 internal/pkg/dl/upload.go        | 130 +++++++++++++++++++++++++--
 internal/pkg/model/schema.go     |  13 ++-
 internal/pkg/upload/cbor.go      | 146 +++++++++++++++++++++++++++++++
 internal/pkg/upload/upload.go    |  98 ++++++++++++++++-----
 model/schema.json                |  18 +++-
 7 files changed, 509 insertions(+), 47 deletions(-)
 create mode 100644 internal/pkg/upload/cbor.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 9f5b5d881..68005e1da 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -5,6 +5,7 @@
 package api
 
 import (
+	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -17,8 +18,10 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
+	"github.com/elastic/fleet-server/v7/internal/pkg/dl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/limit"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
+	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
 	"github.com/julienschmidt/httprouter"
@@ -26,12 +29,20 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+// the only valid values of upload status according to storage spec
+type UploadStatus string
+
+const (
+	UploadAwaiting UploadStatus = "AWAITING_UPLOAD"
+	UploadProgress UploadStatus = "UPLOADING"
+	UploadDone     UploadStatus = "READY"
+	UploadFail     UploadStatus = "UPLOAD_ERROR"
+	UploadDel      UploadStatus = "DELETED"
+)
+
 const (
 	// TODO: move to a config
 	maxParallelUploads = 5
-
-	// specification-designated maximum
-	maxChunkSize = 4194304 // 4 MiB
 )
 
 func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
@@ -181,21 +192,38 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 	r.Body.Close()
 
-	if strings.TrimSpace(fi.Name) == "" {
+	if strings.TrimSpace(fi.File.Name) == "" {
 		return errors.New("file name is required")
 	}
-	if fi.Size <= 0 {
+	if fi.File.Size <= 0 {
 		return errors.New("invalid file size, size is required")
 	}
+	if strings.TrimSpace(fi.File.Mime) == "" {
+		return errors.New("mime_type is required")
+	}
+
+	op, err := ut.upl.Begin(fi.File.Size)
+	if err != nil {
+		return err
+	}
 
-	uploadID, err := ut.upl.Begin()
+	doc := uploadRequestToFileInfo(fi, op.ChunkSize)
+	ret, err := dl.CreateUploadInfo(r.Context(), ut.bulker, doc, op.ID) // @todo: replace uploadID with correct file base ID
 	if err != nil {
 		return err
 	}
 
-	// TODO: write header doc
+	zlog.Info().Str("return", ret).Msg("wrote doc")
 
-	_, err = w.Write([]byte(uploadID))
+	out, err := json.Marshal(map[string]interface{}{
+		"upload_id":  op.ID,
+		"chunk_size": op.ChunkSize,
+	})
+	if err != nil {
+		return err
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_, err = w.Write(out)
 	if err != nil {
 		return err
 	}
@@ -203,15 +231,19 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 }
 
 func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string, chunkID int) error {
-	// prevent over-sized chunks
-	chunk := http.MaxBytesReader(w, r.Body, maxChunkSize)
-	data, err := ut.upl.Chunk(uplID, chunkID, chunk)
+	info, err := ut.upl.Chunk(uplID, chunkID)
 	if err != nil {
 		return err
 	}
+	if info.FirstReceived {
+		if err := updateUploadStatus(r.Context(), ut.bulker, uplID, UploadProgress); err != nil {
+			zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to update upload status")
+		}
+	}
 
-	_, err = w.Write([]byte(data))
-	if err != nil {
+	// prevent over-sized chunks
+	chunk := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
+	if err := dl.UploadChunk(r.Context(), ut.bulker, chunk, info); err != nil {
 		return err
 	}
 	return nil
@@ -223,9 +255,64 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return err
 	}
 
+	if err := updateUploadStatus(r.Context(), ut.bulker, uplID, UploadDone); err != nil {
+		// should be 500 error probably?
+		zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to set upload status to complete")
+		return err
+
+	}
+
 	_, err = w.Write([]byte(data))
 	if err != nil {
 		return err
 	}
 	return nil
 }
+
+func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
+	return model.FileInfo{
+		File: &model.FileMetadata{
+			Accessed:    req.File.Accessed,
+			Attributes:  req.File.Attributes,
+			ChunkSize:   chunkSize,
+			Compression: req.File.Compression,
+			Created:     req.File.Created,
+			Ctime:       req.File.CTime,
+			Device:      req.File.Device,
+			Directory:   req.File.Directory,
+			DriveLetter: req.File.DriveLetter,
+			Extension:   req.File.Extension,
+			Gid:         req.File.GID,
+			Group:       req.File.Group,
+			Hash: &model.Hash{
+				Sha256: req.File.Hash.SHA256,
+			},
+			Inode:      req.File.INode,
+			MimeType:   req.File.Mime,
+			Mode:       req.File.Mode,
+			Mtime:      req.File.MTime,
+			Name:       req.File.Name,
+			Owner:      req.File.Owner,
+			Path:       req.File.Path,
+			Size:       req.File.Size,
+			Status:     string(UploadAwaiting),
+			TargetPath: req.File.TargetPath,
+			Type:       req.File.Type,
+			Uid:        req.File.UID,
+		},
+	}
+}
+
+func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, fileID string, status UploadStatus) error {
+	data, err := json.Marshal(map[string]interface{}{
+		"doc": map[string]interface{}{
+			"file": map[string]string{
+				"Status": string(status),
+			},
+		},
+	})
+	if err != nil {
+		return err
+	}
+	return dl.UpdateUpload(ctx, bulker, fileID, data)
+}
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index bdc3bf771..5d77c8568 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -177,8 +177,38 @@ type StatusResponse struct {
 }
 
 type FileInfo struct {
-	Size      int64  `json:"size"`
-	Name      string `json:"name"`
-	Extension string `json:"ext"`
-	Mime      string `json:"mime_type"`
+	File struct {
+		Size        int64  `json:"size"`
+		Name        string `json:"name"`
+		Extension   string `json:"ext"`
+		Mime        string `json:"mime_type"`
+		Compression string `json:"Compression"`
+		Hash        struct {
+			SHA256 string `json:"sha256"`
+		}
+		Accessed    string   `json:"accessed"`
+		Attributes  []string `json:"attributes"`
+		Created     string   `json:"created"`
+		CTime       string   `json:"ctime"`
+		Device      string   `json:"device"`
+		Directory   string   `json:"directory"`
+		DriveLetter string   `json:"drive_letter"`
+		Ext         string   `json:"extension"`
+		GID         string   `json:"gid"`
+		Group       string   `json:"group"`
+		INode       string   `json:"inode"`
+		Mode        string   `json:"mode"`
+		MTime       string   `json:"mtime"`
+		Owner       string   `json:"owner"`
+		Path        string   `json:"path"`
+		TargetPath  string   `json:"target_path"`
+		Type        string   `json:"type"`
+		UID         string   `json:"uid"`
+	} `json:"file"`
+	Event struct {
+		ID string `json:"id"`
+	} `json:"event"`
+	Host struct {
+		Hostname string `json:"hostname"`
+	} `json:"host"`
 }
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index 473ab13f1..146084eeb 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -7,21 +7,141 @@ package dl
 import (
 	"context"
 	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
+	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
+	"github.com/elastic/go-elasticsearch/v7"
+	"github.com/elastic/go-elasticsearch/v7/esapi"
+	"github.com/rs/zerolog/log"
 )
 
-func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo) (string, error) {
-	return createUploadInfo(ctx, bulker, "files", fi) // @todo: index destination is an input (and different per integration)
+func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, fileID string) (string, error) {
+	return createUploadInfo(ctx, bulker, ".fleet-files", fi, fileID) // @todo: index destination is an input (and different per integration)
 }
 
-func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo) (string, error) {
+func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo, fileID string) (string, error) {
 	body, err := json.Marshal(fi)
 	if err != nil {
 		return "", err
 	}
+	return bulker.Create(ctx, index, fileID, body, bulk.WithRefresh())
+}
+
+func UpdateUpload(ctx context.Context, bulker bulk.Bulk, fileID string, data []byte) error {
+	return updateUpload(ctx, bulker, ".fleet-files", fileID, data)
+}
+
+func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID string, data []byte) error {
+	return bulker.Update(ctx, index, fileID, data)
+}
+
+func UploadChunk(ctx context.Context, bulker bulk.Bulk, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
+	client := bulker.Client()
+	var chunkBody io.Reader
+	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.ID, chunkInfo.Upload.ChunkSize)
+	chunkBody = cbor
+	const DEBUG = false
+
+	if DEBUG || chunkInfo.Final {
+		f, err := os.OpenFile("/home/dan/dev/elastic/file-store-poc/out2.data", os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0644)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+		chunkBody = io.TeeReader(cbor, f)
+	}
+
+	/*
+		buf := bytes.NewBuffer(nil)
+		out, err := io.ReadAll(data)
+		if err != nil {
+			return err
+		}
+		data.Close()
+		err = cbor.NewEncoder(buf).Encode(map[string]interface{}{
+			"bid":  fileID,
+			"last": false,
+			"data": out,
+		})
+		if err != nil {
+			return err
+		}
+		buf2 := buf.Bytes()
+	*/
+
+	req := esapi.IndexRequest{
+		Index:      ".fleet-file_data",
+		Body:       chunkBody,
+		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.ID, chunkInfo.ID),
+	}
+	overrider := contentTypeOverrider{client}
+	resp, err := req.Do(ctx, overrider)
+	/*
+		standard approach when content-type override no longer needed
+
+		resp, err := client.Index(".fleet-file_data", data, func(req *esapi.IndexRequest) {
+			req.DocumentID = fmt.Sprintf("%s.%d", fileID, chunkID)
+			if req.Header == nil {
+				req.Header = make(http.Header)
+			}
+			// the below setting actually gets overridden in the ES client
+			// when it checks for the existence of r.Body, and then sets content-type to JSON
+			// this setting is then *added* so multiple content-types are sent.
+			// https://github.com/elastic/go-elasticsearch/blob/7.17/esapi/api.index.go#L183-L193
+			// we have to temporarily override this with a custom esapi.Transport
+			req.Header.Set("Content-Type", "application/cbor")
+			req.Header.Set("Accept","application/json") // this one has no issues being set this way. We need to specify we want JSON response
+		})*/
+	if err != nil {
+		return err
+	}
+
+	//var buf bytes.Buffer
+	//spy := io.TeeReader(resp.Body, &buf)
+
+	var response ChunkUploadResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		return err
+	}
+	log.Info().Int("statuscode", resp.StatusCode).Interface("chunk-response", response).Msg("uploaded chunk")
+
+	if response.Error.Type != "" {
+		return fmt.Errorf("%s: %s. Caused by %s: %s", response.Error.Type, response.Error.Reason, response.Error.Cause.Type, response.Error.Cause.Reason)
+	}
+	return nil
+}
+
+type contentTypeOverrider struct {
+	client *elasticsearch.Client
+}
+
+func (c contentTypeOverrider) Perform(req *http.Request) (*http.Response, error) {
+	req.Header.Set("Content-Type", "application/cbor") // we will SEND cbor
+	req.Header.Set("Accept", "application/json")       // but we want JSON back
+	return c.client.Perform(req)
+}
 
-	// @todo: proper doc ID
-	return bulker.Create(ctx, index, "", body, bulk.WithRefresh())
+type ChunkUploadResponse struct {
+	Index   string `json:"_index"`
+	ID      string `json:"_id"`
+	Result  string `json:"result"`
+	Version int    `json:"_version"`
+	Shards  struct {
+		Total   int `json:"total"`
+		Success int `json:"successful"`
+		Failed  int `json:"failed"`
+	} `json:"_shards"`
+	Error struct {
+		Type   string `json:"type"`
+		Reason string `json:"reason"`
+		Cause  struct {
+			Type   string `json:"type"`
+			Reason string `json:"reason"`
+		} `json:"caused_by"`
+	} `json:"error"`
 }
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index 4bb5159da..8e2663488 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -273,12 +273,23 @@ type EnrollmentAPIKey struct {
 	UpdatedAt string `json:"updated_at,omitempty"`
 }
 
+// EventInformation Event identifiers to coordinate the source reason
+type EventInformation struct {
+
+	// unique event identifier
+	ID string `json:"id,omitempty"`
+}
+
 // FileInfo An uploaded File
 type FileInfo struct {
 	ESDocument
 
+	// Event identifiers to coordinate the source reason
+	Event *EventInformation `json:"event,omitempty"`
+
 	// Information about the file properties
 	File *FileMetadata `json:"file,omitempty"`
+	Host *HostMetadata `json:"host,omitempty"`
 }
 
 // FileMetadata Information about the file properties
@@ -288,7 +299,7 @@ type FileMetadata struct {
 	Accessed string `json:"accessed,omitempty"`
 
 	// Platform-dependent sequence of file attributes such as readonly, execute, hidden
-	Attributes string `json:"attributes,omitempty"`
+	Attributes []string `json:"attributes,omitempty"`
 
 	// Size, in bytes, of each data chunk
 	ChunkSize int64 `json:"ChunkSize,omitempty"`
diff --git a/internal/pkg/upload/cbor.go b/internal/pkg/upload/cbor.go
new file mode 100644
index 000000000..0a9d636b5
--- /dev/null
+++ b/internal/pkg/upload/cbor.go
@@ -0,0 +1,146 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// This is a trimmed-down special purpose writer
+// and cbor encoder used to streamline upload chunk writing
+// without buffering large amounts of data
+// in memory.
+// It is not a general-purpose CBOR encoder.
+// A suitable general purpose library, if the future needs one, is github.com/fxamacker/cbor/v2
+type cborEncoder struct {
+	chunk       io.ReadCloser
+	final       bool
+	preamble    []byte
+	prbWritten  bool
+	prbWritePos int
+	wroteTerm   bool
+}
+
+func NewCBORChunkWriter(chunkData io.ReadCloser, finalChunk bool, baseID string, chunkSize int64) *cborEncoder {
+	return &cborEncoder{
+		chunk:       chunkData,
+		final:       finalChunk,
+		preamble:    encodePreambleToCBOR(finalChunk, baseID, chunkSize),
+		prbWritten:  false,
+		prbWritePos: 0,
+		wroteTerm:   false,
+	}
+}
+
+// Writes the start of a CBOR object (equiv. JSON object)
+// {
+//	"bid": "baseID",
+//	"last": true/false,
+//	"data":
+// }
+// the slice ends where the chunk data bytes ("byte string") should begin.
+// it is therefore an incomplete CBOR object on its own
+// expecting the next section to be filled in by the caller.
+// the CBOR spec may be found here: https://www.rfc-editor.org/rfc/rfc8949
+func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
+	bidLen := len(baseID)
+
+	// if we know the size of the chunk stream, we will write the 4-byte uint32
+	// descriptor of that length
+	// otherwise it will be a *single* byte saying it is an unknown length
+	// and we will write out lengths as the chunk is read
+	chunkLen := 5 // space for describing sequence length. 1 byte to SAY 32-bit int (4byte), then 4 bytes
+	if final {
+		chunkLen = 1
+	}
+
+	preamble := make([]byte, 13+bidLen+chunkLen+5)
+	preamble[0] = 0xA3 // Object with 3 keys
+	preamble[1] = 0x64 // string with 4 chars
+	preamble[2] = 'l'
+	preamble[3] = 'a'
+	preamble[4] = 's'
+	preamble[5] = 't'
+	if final {
+		preamble[6] = 0xF4 // bool true
+	} else {
+		preamble[6] = 0xF5 // bool false
+	}
+	preamble[7] = 0x63 // string with 3 chars
+	preamble[8] = 'b'
+	preamble[9] = 'i'
+	preamble[10] = 'd'
+	preamble[11] = 0x78 // UTF-8 string coming, next byte describes length
+	preamble[12] = uint8(bidLen)
+	i := 13
+	for _, c := range baseID { // now write the document baseID
+		preamble[i] = byte(c)
+		i++
+	}
+	preamble[i] = 0x64 // string with 4 chars
+	preamble[i+1] = 'd'
+	preamble[i+2] = 'a'
+	preamble[i+3] = 't'
+	preamble[i+4] = 'a'
+	i += 5
+	if !final {
+		// byte data should be precisely chunkSize long, otherwise malformed
+		preamble[i] = 0x5A // say length descriptor will be 32-bit int
+		binary.BigEndian.PutUint32(preamble[i+1:], uint32(chunkSize))
+	} else {
+		// final chunk may be less than full size, will need to determine length
+		preamble[i] = 0x5F // indeterminate-length byte sequence
+	}
+	return preamble
+}
+
+// io.Reader interface for streaming out
+func (c *cborEncoder) Read(buf []byte) (int, error) {
+	if c.wroteTerm { // already wrote a terminating instruction for undefined byte sequence length
+		return 0, io.EOF
+	}
+
+	if !c.prbWritten {
+		n := copy(buf, c.preamble[c.prbWritePos:])
+		if n == len(c.preamble[c.prbWritePos:]) {
+			c.prbWritten = true
+		}
+		c.prbWritePos += n
+		return n, nil
+	}
+
+	if c.final {
+		// need to write length headers before the byte sequence
+		if len(buf) < 10 {
+			return 0, errors.New("buffer too small")
+		}
+		n, err := c.chunk.Read(buf[5:])
+		buf[0] = 0x5A // 4-byte length descriptor to follow
+		binary.BigEndian.PutUint32(buf[1:], uint32(n))
+
+		if errors.Is(err, io.EOF) {
+			if n == 0 { // chunk data has been exhausted, write the terminating byte and get out
+				buf[0] = 0xFF
+				c.wroteTerm = true
+				return 1, io.EOF
+			}
+			// if we can tack-on the terminating byte from this read call, do it
+			if len(buf) > n+5+1 {
+				buf[n+5] = 0xFF
+				c.wroteTerm = true
+				n = n + 1
+			} else {
+				//otherwise, wait for the next call to Read(), hide the EOF err
+				err = nil
+			}
+		}
+		return n + 5, err
+	}
+
+	return c.chunk.Read(buf)
+
+}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index f4bd00351..f32ac691a 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -7,7 +7,7 @@ package upload
 import (
 	"errors"
 	"fmt"
-	"io"
+	"sync"
 	"time"
 
 	"github.com/gofrs/uuid"
@@ -18,23 +18,45 @@ const (
 	//these should be configs probably
 	uploadRequestTimeout = time.Hour
 	chunkProgressTimeout = time.Hour / 4
+
+	// specification-designated maximum
+	MaxChunkSize = 4194304 // 4 MiB
 )
 
 var (
 	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
 	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
+
+	//@todo: explicit error for expired uploads
 )
 
 type upload struct {
 	complete  chan<- struct{}
 	chunkRecv chan<- struct{}
+	begun     bool
+	Info      Info
 }
 
 type Uploader struct {
 	current       map[string]upload
+	mu            sync.Mutex
 	parallelLimit int
 }
 
+type Info struct {
+	ID        string
+	ChunkSize int64
+	Total     int64
+	Count     int
+}
+
+type ChunkInfo struct {
+	ID            int
+	FirstReceived bool
+	Final         bool
+	Upload        Info
+}
+
 func New(limit int) *Uploader {
 	return &Uploader{
 		parallelLimit: limit,
@@ -44,14 +66,14 @@ func New(limit int) *Uploader {
 
 // Start an upload operation, as long as the max concurrent has not been reached
 // returns the upload ID
-func (u *Uploader) Begin() (string, error) {
+func (u *Uploader) Begin(size int64) (Info, error) {
 	if len(u.current) >= u.parallelLimit {
-		return "", ErrMaxConcurrentUploads
+		return Info{}, ErrMaxConcurrentUploads
 	}
 
 	uid, err := uuid.NewV4()
 	if err != nil {
-		return "", fmt.Errorf("unable to generate upload operation ID: %w", err)
+		return Info{}, fmt.Errorf("unable to generate upload operation ID: %w", err)
 	}
 	id := uid.String()
 
@@ -70,7 +92,7 @@ func (u *Uploader) Begin() (string, error) {
 				if !chunkT.Stop() {
 					<-chunkT.C
 				}
-				delete(u.current, id)
+				u.cancel(id)
 				return
 			case <-chunkT.C: // no chunk progress within chunk timer, expire operation
 				log.Trace().Str("uploadID", id).Msg("upload operation chunk activity timed out")
@@ -78,7 +100,7 @@ func (u *Uploader) Begin() (string, error) {
 				if !total.Stop() {
 					<-total.C
 				}
-				delete(u.current, id)
+				u.cancel(id)
 				return
 			case <-chunkRecv: // chunk activity, update chunk timer
 				if !chunkT.Stop() {
@@ -92,34 +114,49 @@ func (u *Uploader) Begin() (string, error) {
 				if !total.Stop() {
 					<-total.C
 				}
-				delete(u.current, id)
+				u.finalize(id)
 				return
 			}
 		}
 	}()
+	info := Info{
+		ID:        id,
+		ChunkSize: MaxChunkSize,
+		Total:     size,
+	}
+	cnt := info.Total / info.ChunkSize
+	if info.Total%info.ChunkSize > 0 {
+		cnt += 1
+	}
+	info.Count = int(cnt)
 	u.current[id] = upload{
 		complete:  complete,
 		chunkRecv: chunkRecv,
+		Info:      info,
 	}
-	return id, nil
+	return info, nil
 }
 
-func (u *Uploader) Chunk(uplID string, chunknum int, body io.ReadCloser) (string, error) {
-	if body == nil {
-		return "", errors.New("body is required")
-	}
-	defer body.Close()
-	if _, valid := u.current[uplID]; !valid {
-		return "", ErrInvalidUploadID
+func (u *Uploader) Chunk(uplID string, chunkID int) (ChunkInfo, error) {
+	u.mu.Lock()
+	upl, valid := u.current[uplID]
+	if !valid {
+		u.mu.Unlock()
+		return ChunkInfo{}, ErrInvalidUploadID
 	}
-	u.current[uplID].chunkRecv <- struct{}{}
-
-	_, err := io.ReadAll(body)
-	if err != nil {
-		return "", err
+	upl.chunkRecv <- struct{}{}
+	if !upl.begun {
+		upl.begun = true
 	}
-
-	return "", nil
+	u.current[uplID] = upl
+	u.mu.Unlock()
+
+	return ChunkInfo{
+		ID:            chunkID,
+		FirstReceived: upl.begun,
+		Final:         chunkID == upl.Info.Count-1,
+		Upload:        upl.Info,
+	}, nil
 }
 
 func (u *Uploader) Complete(id string) (string, error) {
@@ -129,3 +166,20 @@ func (u *Uploader) Complete(id string) (string, error) {
 	u.current[id].complete <- struct{}{}
 	return "", nil
 }
+
+func (u *Uploader) cancel(uplID string) error {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	delete(u.current, uplID)
+	// @todo: delete any uploaded chunks from ES
+	// leave header doc and mark failed?
+	return nil
+}
+
+func (u *Uploader) finalize(uplID string) error {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	delete(u.current, uplID)
+	// @todo: write Status:READY here?
+	return nil
+}
diff --git a/model/schema.json b/model/schema.json
index 074962eb8..9ff57033f 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -608,7 +608,7 @@
         "api_key"
       ]
     },
-    "file-chunk": {
+    "filechunk": {
       "title": "Chunk",
       "description": "One section of the blob contents of a file",
       "type": "object",
@@ -686,7 +686,10 @@
             },
             "attributes": {
               "description": "Platform-dependent sequence of file attributes such as readonly, execute, hidden",
-              "type": "string"
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
             },
             "created": {
               "description": "File creation time",
@@ -761,6 +764,17 @@
               "type":"string"
             }
           }
+        },
+        "host": { "$ref": "#/definitions/host-metadata" },
+        "event": {
+          "title": "Event information",
+          "description": "Event identifiers to coordinate the source reason",
+          "properties": {
+            "id": {
+              "description": "unique event identifier",
+              "type":"string"
+            }
+          }
         }
       }
     }

From 8da226e41438c7b2017929e271c25fa52fc8c400 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 27 Sep 2022 16:40:29 -0400
Subject: [PATCH 03/51] cleanups

---
 internal/pkg/api/handleUpload.go |  6 +++---
 internal/pkg/dl/upload.go        | 22 ++++------------------
 internal/pkg/upload/upload.go    |  2 ++
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 68005e1da..75a7cf77e 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -70,7 +70,7 @@ func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps ht
 			Err(err).
 			Int(ECSHTTPResponseCode, resp.StatusCode).
 			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail checkin")
+			Msg("fail upload initiation")
 
 		if err := resp.Write(w); err != nil {
 			zlog.Error().Err(err).Msg("fail writing error response")
@@ -116,7 +116,7 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 			Err(err).
 			Int(ECSHTTPResponseCode, resp.StatusCode).
 			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail checkin")
+			Msg("fail upload chunk")
 
 		if err := resp.Write(w); err != nil {
 			zlog.Error().Err(err).Msg("fail writing error response")
@@ -152,7 +152,7 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 			Err(err).
 			Int(ECSHTTPResponseCode, resp.StatusCode).
 			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail checkin")
+			Msg("fail upload completion")
 
 		if err := resp.Write(w); err != nil {
 			zlog.Error().Err(err).Msg("fail writing error response")
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index 146084eeb..2b5cfbfbf 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -10,7 +10,6 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"os"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
@@ -42,21 +41,10 @@ func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID st
 
 func UploadChunk(ctx context.Context, bulker bulk.Bulk, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
 	client := bulker.Client()
-	var chunkBody io.Reader
 	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.ID, chunkInfo.Upload.ChunkSize)
-	chunkBody = cbor
-	const DEBUG = false
-
-	if DEBUG || chunkInfo.Final {
-		f, err := os.OpenFile("/home/dan/dev/elastic/file-store-poc/out2.data", os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0644)
-		if err != nil {
-			return err
-		}
-		defer f.Close()
-		chunkBody = io.TeeReader(cbor, f)
-	}
 
 	/*
+		// the non-streaming version
 		buf := bytes.NewBuffer(nil)
 		out, err := io.ReadAll(data)
 		if err != nil {
@@ -76,9 +64,10 @@ func UploadChunk(ctx context.Context, bulker bulk.Bulk, data io.ReadCloser, chun
 
 	req := esapi.IndexRequest{
 		Index:      ".fleet-file_data",
-		Body:       chunkBody,
+		Body:       cbor,
 		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.ID, chunkInfo.ID),
 	}
+	// need to set the Content-Type of the request to CBOR, notes below
 	overrider := contentTypeOverrider{client}
 	resp, err := req.Do(ctx, overrider)
 	/*
@@ -101,14 +90,11 @@ func UploadChunk(ctx context.Context, bulker bulk.Bulk, data io.ReadCloser, chun
 		return err
 	}
 
-	//var buf bytes.Buffer
-	//spy := io.TeeReader(resp.Body, &buf)
-
 	var response ChunkUploadResponse
 	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
 		return err
 	}
-	log.Info().Int("statuscode", resp.StatusCode).Interface("chunk-response", response).Msg("uploaded chunk")
+	log.Trace().Int("statuscode", resp.StatusCode).Interface("chunk-response", response).Msg("uploaded chunk")
 
 	if response.Error.Type != "" {
 		return fmt.Errorf("%s: %s. Caused by %s: %s", response.Error.Type, response.Error.Reason, response.Error.Cause.Type, response.Error.Cause.Reason)
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index f32ac691a..df4a2a3bc 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -163,6 +163,8 @@ func (u *Uploader) Complete(id string) (string, error) {
 	if _, valid := u.current[id]; !valid {
 		return "", ErrInvalidUploadID
 	}
+	// @todo: verify chunks
+	// verify hashes, etc
 	u.current[id].complete <- struct{}{}
 	return "", nil
 }

From f0cd448f1eec326a63364f4e53c93fb3209b3166 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 27 Sep 2022 19:08:55 -0400
Subject: [PATCH 04/51] use a non-retry ES client for bulk uploads

when retry is disabled (bubbles up to be equivalent
to enabling longpolling for us), the elasticsearch.Client
does not internally buffer the request (which is used
in retrying and replaying the request).
Chunks should not be memory-buffered, and should be retried
at the integration level, since a retry-and-resume-friendly
API is presented to them
---
 internal/pkg/api/handleUpload.go | 23 +++++++++++++----------
 internal/pkg/dl/upload.go        |  3 +--
 internal/pkg/server/fleet.go     |  2 +-
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 75a7cf77e..922534477 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -24,6 +24,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
+	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/julienschmidt/httprouter"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
@@ -161,23 +162,25 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 }
 
 type UploadT struct {
-	bulker     bulk.Bulk
-	cache      cache.Cache
-	esThrottle *throttle.Throttle
-	upl        *upload.Uploader
+	bulker      bulk.Bulk
+	chunkClient *elasticsearch.Client
+	cache       cache.Cache
+	esThrottle  *throttle.Throttle
+	upl         *upload.Uploader
 }
 
-func NewUploadT(cfg *config.Server, bulker bulk.Bulk, cache cache.Cache) *UploadT {
+func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
 	log.Info().
 		Interface("limits", cfg.Limits.ArtifactLimit).
 		Int("maxParallel", defaultMaxParallel).
 		Msg("Artifact install limits")
 
 	return &UploadT{
-		bulker:     bulker,
-		cache:      cache,
-		esThrottle: throttle.NewThrottle(defaultMaxParallel),
-		upl:        upload.New(maxParallelUploads),
+		chunkClient: chunkClient,
+		bulker:      bulker,
+		cache:       cache,
+		esThrottle:  throttle.NewThrottle(defaultMaxParallel),
+		upl:         upload.New(maxParallelUploads),
 	}
 }
 
@@ -243,7 +246,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 
 	// prevent over-sized chunks
 	chunk := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
-	if err := dl.UploadChunk(r.Context(), ut.bulker, chunk, info); err != nil {
+	if err := dl.UploadChunk(r.Context(), ut.chunkClient, chunk, info); err != nil {
 		return err
 	}
 	return nil
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index 2b5cfbfbf..ce43f5578 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -39,8 +39,7 @@ func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID st
 	return bulker.Update(ctx, index, fileID, data)
 }
 
-func UploadChunk(ctx context.Context, bulker bulk.Bulk, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
-	client := bulker.Client()
+func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
 	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.ID, chunkInfo.Upload.ChunkSize)
 
 	/*
diff --git a/internal/pkg/server/fleet.go b/internal/pkg/server/fleet.go
index d636a8776..016659a22 100644
--- a/internal/pkg/server/fleet.go
+++ b/internal/pkg/server/fleet.go
@@ -493,7 +493,7 @@ func (f *Fleet) runSubsystems(ctx context.Context, cfg *config.Config, g *errgro
 	at := api.NewArtifactT(&cfg.Inputs[0].Server, bulker, f.cache)
 	ack := api.NewAckT(&cfg.Inputs[0].Server, bulker, f.cache)
 	st := api.NewStatusT(&cfg.Inputs[0].Server, bulker, f.cache)
-	ut := api.NewUploadT(&cfg.Inputs[0].Server, bulker, f.cache)
+	ut := api.NewUploadT(&cfg.Inputs[0].Server, bulker, monCli, f.cache) // uses no-retry client for bufferless chunk upload
 
 	router := api.NewRouter(&cfg.Inputs[0].Server, bulker, ct, et, at, ack, st, ut, sm, tracer, f.bi)
 

From ebcee3045aa92a54df9dd9209503f3ed82961d86 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 29 Sep 2022 14:05:23 -0400
Subject: [PATCH 05/51] index name temporary cleanup

---
 internal/pkg/dl/upload.go | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index ce43f5578..bf166420d 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -19,8 +19,15 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+const (
+	// @todo: neither of these should be static. But should be specific to an integration
+	// somewhat configurable, but need to follow a pattern so that Fleet Server has write access
+	FileHeaderIndex = ".fleet-files"
+	FileDataIndex   = ".fleet-file_data"
+)
+
 func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, fileID string) (string, error) {
-	return createUploadInfo(ctx, bulker, ".fleet-files", fi, fileID) // @todo: index destination is an input (and different per integration)
+	return createUploadInfo(ctx, bulker, FileHeaderIndex, fi, fileID) // @todo: index destination is an input (and different per integration)
 }
 
 func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo, fileID string) (string, error) {
@@ -32,7 +39,7 @@ func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi mo
 }
 
 func UpdateUpload(ctx context.Context, bulker bulk.Bulk, fileID string, data []byte) error {
-	return updateUpload(ctx, bulker, ".fleet-files", fileID, data)
+	return updateUpload(ctx, bulker, FileHeaderIndex, fileID, data)
 }
 
 func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID string, data []byte) error {
@@ -62,7 +69,7 @@ func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.Read
 	*/
 
 	req := esapi.IndexRequest{
-		Index:      ".fleet-file_data",
+		Index:      FileDataIndex,
 		Body:       cbor,
 		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.ID, chunkInfo.ID),
 	}

From f9b55575d27271a6d40118bf6109f947a95adc1b Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 4 Oct 2022 08:38:12 -0400
Subject: [PATCH 06/51] cbor: boolean byte was flipped

---
 internal/pkg/upload/cbor.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/pkg/upload/cbor.go b/internal/pkg/upload/cbor.go
index 0a9d636b5..1abcba968 100644
--- a/internal/pkg/upload/cbor.go
+++ b/internal/pkg/upload/cbor.go
@@ -66,9 +66,9 @@ func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
 	preamble[4] = 's'
 	preamble[5] = 't'
 	if final {
-		preamble[6] = 0xF4 // bool true
+		preamble[6] = 0xF5 // bool true
 	} else {
-		preamble[6] = 0xF5 // bool false
+		preamble[6] = 0xF4 // bool false
 	}
 	preamble[7] = 0x63 // string with 3 chars
 	preamble[8] = 'b'

From 79c959cd04f88b763547481c2abc7db6ef8291ae Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 4 Oct 2022 10:56:53 -0400
Subject: [PATCH 07/51] use throttle to cap upload concurrency, add tests

---
 internal/pkg/api/handleUpload.go   |  20 ++--
 internal/pkg/upload/upload.go      |  72 ++++++++++----
 internal/pkg/upload/upload_test.go | 154 +++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 31 deletions(-)
 create mode 100644 internal/pkg/upload/upload_test.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 922534477..39befe15f 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -22,7 +22,6 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/limit"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
-	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
 	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/julienschmidt/httprouter"
@@ -43,7 +42,8 @@ const (
 
 const (
 	// TODO: move to a config
-	maxParallelUploads = 5
+	maxParallelUploadOperations = 3
+	maxParallelChunks           = 4
 )
 
 func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
@@ -165,22 +165,21 @@ type UploadT struct {
 	bulker      bulk.Bulk
 	chunkClient *elasticsearch.Client
 	cache       cache.Cache
-	esThrottle  *throttle.Throttle
 	upl         *upload.Uploader
 }
 
 func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
 	log.Info().
 		Interface("limits", cfg.Limits.ArtifactLimit).
-		Int("maxParallel", defaultMaxParallel).
+		Int("maxParallelOps", maxParallelUploadOperations).
+		Int("maxParallelChunks", maxParallelChunks).
 		Msg("Artifact install limits")
 
 	return &UploadT{
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		esThrottle:  throttle.NewThrottle(defaultMaxParallel),
-		upl:         upload.New(maxParallelUploads),
+		upl:         upload.New(maxParallelChunks, maxParallelChunks),
 	}
 }
 
@@ -234,19 +233,20 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 }
 
 func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string, chunkID int) error {
-	info, err := ut.upl.Chunk(uplID, chunkID)
+	chunkInfo, err := ut.upl.Chunk(uplID, chunkID)
 	if err != nil {
 		return err
 	}
-	if info.FirstReceived {
+	defer chunkInfo.Token.Release()
+	if chunkInfo.FirstReceived {
 		if err := updateUploadStatus(r.Context(), ut.bulker, uplID, UploadProgress); err != nil {
 			zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to update upload status")
 		}
 	}
 
 	// prevent over-sized chunks
-	chunk := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
-	if err := dl.UploadChunk(r.Context(), ut.chunkClient, chunk, info); err != nil {
+	data := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
+	if err := dl.UploadChunk(r.Context(), ut.chunkClient, data, chunkInfo); err != nil {
 		return err
 	}
 	return nil
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index df4a2a3bc..a5f197269 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -7,9 +7,11 @@ package upload
 import (
 	"errors"
 	"fmt"
+	"strconv"
 	"sync"
 	"time"
 
+	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
 )
@@ -31,16 +33,19 @@ var (
 )
 
 type upload struct {
-	complete  chan<- struct{}
-	chunkRecv chan<- struct{}
-	begun     bool
-	Info      Info
+	opToken       *throttle.Token
+	chunkThrottle *throttle.Throttle
+	complete      chan<- struct{}
+	chunkRecv     chan<- struct{}
+	begun         bool
+	Info          Info
 }
 
 type Uploader struct {
-	current       map[string]upload
-	mu            sync.Mutex
-	parallelLimit int
+	current            map[string]upload
+	mu                 sync.Mutex
+	opThrottle         *throttle.Throttle
+	parallelChunkLimit int
 }
 
 type Info struct {
@@ -55,20 +60,22 @@ type ChunkInfo struct {
 	FirstReceived bool
 	Final         bool
 	Upload        Info
+	Token         *throttle.Token
 }
 
-func New(limit int) *Uploader {
+func New(opLimit int, chunkLimit int) *Uploader {
 	return &Uploader{
-		parallelLimit: limit,
-		current:       make(map[string]upload, limit),
+		parallelChunkLimit: chunkLimit,
+		opThrottle:         throttle.NewThrottle(opLimit),
+		current:            make(map[string]upload, opLimit),
 	}
 }
 
 // Start an upload operation, as long as the max concurrent has not been reached
 // returns the upload ID
 func (u *Uploader) Begin(size int64) (Info, error) {
-	if len(u.current) >= u.parallelLimit {
-		return Info{}, ErrMaxConcurrentUploads
+	if size <= 0 {
+		return Info{}, errors.New("invalid file size")
 	}
 
 	uid, err := uuid.NewV4()
@@ -77,6 +84,11 @@ func (u *Uploader) Begin(size int64) (Info, error) {
 	}
 	id := uid.String()
 
+	token := u.opThrottle.Acquire(id, 300*time.Hour)
+	if token == nil {
+		return Info{}, ErrMaxConcurrentUploads
+	}
+
 	total := time.NewTimer(uploadRequestTimeout)
 	chunkT := time.NewTimer(chunkProgressTimeout)
 	chunkRecv := make(chan struct{})
@@ -130,32 +142,42 @@ func (u *Uploader) Begin(size int64) (Info, error) {
 	}
 	info.Count = int(cnt)
 	u.current[id] = upload{
-		complete:  complete,
-		chunkRecv: chunkRecv,
-		Info:      info,
+		opToken:       token,
+		chunkThrottle: throttle.NewThrottle(u.parallelChunkLimit),
+		complete:      complete,
+		chunkRecv:     chunkRecv,
+		Info:          info,
 	}
 	return info, nil
 }
 
 func (u *Uploader) Chunk(uplID string, chunkID int) (ChunkInfo, error) {
 	u.mu.Lock()
+	defer u.mu.Unlock()
 	upl, valid := u.current[uplID]
 	if !valid {
-		u.mu.Unlock()
 		return ChunkInfo{}, ErrInvalidUploadID
 	}
+	if chunkID < 0 || chunkID >= upl.Info.Count {
+		return ChunkInfo{}, errors.New("invalid chunk number")
+	}
+
+	token := upl.chunkThrottle.Acquire(strconv.Itoa(chunkID), time.Hour)
+	if token == nil {
+		return ChunkInfo{}, ErrMaxConcurrentUploads
+	}
 	upl.chunkRecv <- struct{}{}
 	if !upl.begun {
 		upl.begun = true
 	}
 	u.current[uplID] = upl
-	u.mu.Unlock()
 
 	return ChunkInfo{
 		ID:            chunkID,
 		FirstReceived: upl.begun,
 		Final:         chunkID == upl.Info.Count-1,
 		Upload:        upl.Info,
+		Token:         token,
 	}, nil
 }
 
@@ -169,19 +191,27 @@ func (u *Uploader) Complete(id string) (string, error) {
 	return "", nil
 }
 
-func (u *Uploader) cancel(uplID string) error {
+func (u *Uploader) cleanupOperation(uplID string) {
 	u.mu.Lock()
 	defer u.mu.Unlock()
+	if upload, ok := u.current[uplID]; ok {
+		if upload.opToken != nil {
+			upload.opToken.Release()
+		}
+	}
 	delete(u.current, uplID)
+}
+
+func (u *Uploader) cancel(uplID string) error {
+	u.cleanupOperation(uplID)
+
 	// @todo: delete any uploaded chunks from ES
 	// leave header doc and mark failed?
 	return nil
 }
 
 func (u *Uploader) finalize(uplID string) error {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-	delete(u.current, uplID)
+	u.cleanupOperation(uplID)
 	// @todo: write Status:READY here?
 	return nil
 }
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
new file mode 100644
index 000000000..c08e63eb0
--- /dev/null
+++ b/internal/pkg/upload/upload_test.go
@@ -0,0 +1,154 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestMaxParallelUploadOpsReached(t *testing.T) {
+	opLimit := 4
+
+	u := New(opLimit, 0)
+
+	var err error
+	for i := 0; i < opLimit; i++ {
+		_, err = u.Begin(100)
+		assert.NoError(t, err)
+	}
+
+	_, err = u.Begin(100)
+	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
+}
+
+func TestMaxParallelUploadOpsReleased(t *testing.T) {
+	opLimit := 4
+	u := New(opLimit, 0)
+
+	// generate max operations
+	ops := make([]Info, 0, opLimit)
+	for i := 0; i < opLimit; i++ {
+		op, err := u.Begin(100)
+		require.NoError(t, err)
+		ops = append(ops, op)
+	}
+	// and verify max was reached
+	_, err := u.Begin(100)
+	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
+
+	// finishing an op should release the hold and allow another to begin
+	_, err = u.Complete(ops[0].ID)
+	require.NoError(t, err)
+
+	op, err := u.Begin(100)
+	assert.NoError(t, err)
+	assert.NotEmpty(t, op.ID)
+}
+
+func TestMaxParallelChunks(t *testing.T) {
+	chunkLim := 3
+
+	u := New(1, chunkLim)
+
+	// start an operation, that can have more than the test limit chunks
+	op, err := u.Begin(MaxChunkSize * int64(chunkLim+2))
+	require.NoError(t, err)
+
+	// upload up to the limit chunks, without releasing the request
+	for i := 0; i < chunkLim; i++ {
+		_, err := u.Chunk(op.ID, i)
+		require.NoError(t, err)
+	}
+
+	_, err = u.Chunk(op.ID, chunkLim)
+	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
+}
+
+func TestMaxParallelChunksReleased(t *testing.T) {
+	chunkLim := 3
+
+	u := New(1, chunkLim)
+
+	// start an operation, that can have more than the test limit chunks
+	op, err := u.Begin(MaxChunkSize * int64(chunkLim+2))
+	require.NoError(t, err)
+
+	// upload up to the limit chunks, without releasing the request
+	chunks := make([]ChunkInfo, 0, chunkLim)
+	for i := 0; i < chunkLim; i++ {
+		info, err := u.Chunk(op.ID, i)
+		require.NoError(t, err)
+		chunks = append(chunks, info)
+	}
+	_, err = u.Chunk(op.ID, chunkLim)
+	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
+
+	chunks[0].Token.Release()
+
+	_, err = u.Chunk(op.ID, chunkLim)
+	assert.NoError(t, err)
+}
+
+func TestUploadChunkCount(t *testing.T) {
+	tests := []struct {
+		FileSize      int64
+		ExpectedCount int
+		Name          string
+	}{
+		{10, 1, "Tiny files take 1 chunk"},
+		{MaxChunkSize, 1, "Precisely 1 chunk size bytes will fit in 1 chunk"},
+		{MaxChunkSize + 1, 2, "ChunkSize+1 bytes takes 2 chunks"},
+		{MaxChunkSize * 2.5, 3, "2.5x chunk size fits in 3 chunks"},
+		{7534559605, 1797, "7.5Gb file"},
+	}
+
+	u := New(len(tests), 1)
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+			info, err := u.Begin(tc.FileSize)
+			assert.NoError(t, err)
+			assert.Equal(t, tc.ExpectedCount, info.Count)
+		})
+	}
+}
+
+func TestChunkMarksFinal(t *testing.T) {
+	tests := []struct {
+		FileSize   int64
+		FinalChunk int
+		Name       string
+	}{
+		{10, 0, "Small file only chunk is final"},
+		{MaxChunkSize, 0, "1 chunksize only chunk is final"},
+		{MaxChunkSize + 1, 1, "ChunkSize+1 bytes takes 2 chunks"},
+		{MaxChunkSize * 2.5, 2, "2.5x chunk size"},
+		{7534559605, 1796, "7.5Gb file"},
+	}
+
+	u := New(len(tests), 4)
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+			info, err := u.Begin(tc.FileSize)
+			assert.NoError(t, err)
+
+			if tc.FinalChunk > 0 {
+				prev, err := u.Chunk(info.ID, tc.FinalChunk-1)
+				assert.NoError(t, err)
+				assert.Falsef(t, prev.Final, "previous chunk ID before last should not be marked final")
+				prev.Token.Release()
+			}
+
+			chunk, err := u.Chunk(info.ID, tc.FinalChunk)
+			assert.NoError(t, err)
+			assert.True(t, chunk.Final)
+			chunk.Token.Release()
+		})
+	}
+}

From 76dacbf3940abee987352c61ea7d18935e26bddc Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 4 Oct 2022 17:15:27 -0400
Subject: [PATCH 08/51] start using real(er) document IDs and index names

---
 internal/pkg/api/handleUpload.go | 57 +++++++++++++++++++++++---------
 internal/pkg/api/schema.go       |  5 ++-
 internal/pkg/dl/upload.go        | 18 +++++-----
 internal/pkg/upload/upload.go    | 17 ++++++----
 4 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 39befe15f..e26b96dc6 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -194,23 +194,19 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 	r.Body.Close()
 
-	if strings.TrimSpace(fi.File.Name) == "" {
-		return errors.New("file name is required")
-	}
-	if fi.File.Size <= 0 {
-		return errors.New("invalid file size, size is required")
-	}
-	if strings.TrimSpace(fi.File.Mime) == "" {
-		return errors.New("mime_type is required")
+	if err := validateUploadPayload(fi); err != nil {
+		return err
 	}
 
-	op, err := ut.upl.Begin(fi.File.Size)
+	docID := fmt.Sprintf("%s.%s", fi.ActionID, fi.AgentID)
+
+	op, err := ut.upl.Begin(fi.File.Size, docID, fi.Source)
 	if err != nil {
 		return err
 	}
 
 	doc := uploadRequestToFileInfo(fi, op.ChunkSize)
-	ret, err := dl.CreateUploadInfo(r.Context(), ut.bulker, doc, op.ID) // @todo: replace uploadID with correct file base ID
+	ret, err := dl.CreateUploadInfo(r.Context(), ut.bulker, doc, fi.Source, docID)
 	if err != nil {
 		return err
 	}
@@ -239,7 +235,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 	defer chunkInfo.Token.Release()
 	if chunkInfo.FirstReceived {
-		if err := updateUploadStatus(r.Context(), ut.bulker, uplID, UploadProgress); err != nil {
+		if err := updateUploadStatus(r.Context(), ut.bulker, chunkInfo.Upload, UploadProgress); err != nil {
 			zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to update upload status")
 		}
 	}
@@ -253,19 +249,19 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 }
 
 func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
-	data, err := ut.upl.Complete(uplID)
+	info, err := ut.upl.Complete(uplID)
 	if err != nil {
 		return err
 	}
 
-	if err := updateUploadStatus(r.Context(), ut.bulker, uplID, UploadDone); err != nil {
+	if err := updateUploadStatus(r.Context(), ut.bulker, info, UploadDone); err != nil {
 		// should be 500 error probably?
 		zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to set upload status to complete")
 		return err
 
 	}
 
-	_, err = w.Write([]byte(data))
+	_, err = w.Write([]byte(`{"status":"ok"}`))
 	if err != nil {
 		return err
 	}
@@ -306,7 +302,7 @@ func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
 	}
 }
 
-func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, fileID string, status UploadStatus) error {
+func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status UploadStatus) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
 			"file": map[string]string{
@@ -317,5 +313,34 @@ func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, fileID string, st
 	if err != nil {
 		return err
 	}
-	return dl.UpdateUpload(ctx, bulker, fileID, data)
+	return dl.UpdateUpload(ctx, bulker, info.Source, info.DocID, data)
+}
+
+func validateUploadPayload(fi FileInfo) error {
+
+	required := []struct {
+		Field string
+		Msg   string
+	}{
+		{fi.File.Name, "file name"},
+		{fi.File.Mime, "mime_type"},
+		{fi.ActionID, "action_id"},
+		{fi.AgentID, "agent_id"},
+		{fi.Source, "src"},
+	}
+
+	for _, req := range required {
+		if strings.TrimSpace(req.Field) == "" {
+			return fmt.Errorf("%s is required", req.Msg)
+		}
+	}
+
+	//@todo: valid action?
+	//@todo: valid agent?
+	//@todo: valid src? will that make future expansion harder and require FS updates? maybe just validate the index exists
+
+	if fi.File.Size <= 0 {
+		return errors.New("invalid file size, size is required")
+	}
+	return nil
 }
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index 5d77c8568..7280a20e2 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -177,7 +177,10 @@ type StatusResponse struct {
 }
 
 type FileInfo struct {
-	File struct {
+	ActionID string `json:"action_id"`
+	AgentID  string `json:"agent_id"`
+	Source   string `json:"src"`
+	File     struct {
 		Size        int64  `json:"size"`
 		Name        string `json:"name"`
 		Extension   string `json:"ext"`
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index bf166420d..b087008d8 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -22,12 +22,12 @@ import (
 const (
 	// @todo: neither of these should be static. But should be specific to an integration
 	// somewhat configurable, but need to follow a pattern so that Fleet Server has write access
-	FileHeaderIndex = ".fleet-files"
-	FileDataIndex   = ".fleet-file_data"
+	FileHeaderIndexPattern = ".fleet-%s-files"
+	FileDataIndexPattern   = ".fleet-%s-file-data"
 )
 
-func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, fileID string) (string, error) {
-	return createUploadInfo(ctx, bulker, FileHeaderIndex, fi, fileID) // @todo: index destination is an input (and different per integration)
+func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, source string, fileID string) (string, error) {
+	return createUploadInfo(ctx, bulker, fmt.Sprintf(FileHeaderIndexPattern, source), fi, fileID)
 }
 
 func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo, fileID string) (string, error) {
@@ -38,8 +38,8 @@ func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi mo
 	return bulker.Create(ctx, index, fileID, body, bulk.WithRefresh())
 }
 
-func UpdateUpload(ctx context.Context, bulker bulk.Bulk, fileID string, data []byte) error {
-	return updateUpload(ctx, bulker, FileHeaderIndex, fileID, data)
+func UpdateUpload(ctx context.Context, bulker bulk.Bulk, source string, fileID string, data []byte) error {
+	return updateUpload(ctx, bulker, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
 }
 
 func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID string, data []byte) error {
@@ -47,7 +47,7 @@ func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID st
 }
 
 func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
-	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.ID, chunkInfo.Upload.ChunkSize)
+	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Upload.ChunkSize)
 
 	/*
 		// the non-streaming version
@@ -69,9 +69,9 @@ func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.Read
 	*/
 
 	req := esapi.IndexRequest{
-		Index:      FileDataIndex,
+		Index:      fmt.Sprintf(FileDataIndexPattern, chunkInfo.Upload.Source),
 		Body:       cbor,
-		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.ID, chunkInfo.ID),
+		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.DocID, chunkInfo.ID),
 	}
 	// need to set the Content-Type of the request to CBOR, notes below
 	overrider := contentTypeOverrider{client}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index a5f197269..74047a6e4 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -49,7 +49,9 @@ type Uploader struct {
 }
 
 type Info struct {
-	ID        string
+	ID        string // upload operation identifier. Ephemeral, just used for the upload process
+	DocID     string // document ID of the uploaded file and chunks
+	Source    string // which integration is performing the upload
 	ChunkSize int64
 	Total     int64
 	Count     int
@@ -73,7 +75,7 @@ func New(opLimit int, chunkLimit int) *Uploader {
 
 // Start an upload operation, as long as the max concurrent has not been reached
 // returns the upload ID
-func (u *Uploader) Begin(size int64) (Info, error) {
+func (u *Uploader) Begin(size int64, docID string, source string) (Info, error) {
 	if size <= 0 {
 		return Info{}, errors.New("invalid file size")
 	}
@@ -133,7 +135,9 @@ func (u *Uploader) Begin(size int64) (Info, error) {
 	}()
 	info := Info{
 		ID:        id,
+		DocID:     docID,
 		ChunkSize: MaxChunkSize,
+		Source:    source,
 		Total:     size,
 	}
 	cnt := info.Total / info.ChunkSize
@@ -181,14 +185,15 @@ func (u *Uploader) Chunk(uplID string, chunkID int) (ChunkInfo, error) {
 	}, nil
 }
 
-func (u *Uploader) Complete(id string) (string, error) {
-	if _, valid := u.current[id]; !valid {
-		return "", ErrInvalidUploadID
+func (u *Uploader) Complete(id string) (Info, error) {
+	info, valid := u.current[id]
+	if !valid {
+		return Info{}, ErrInvalidUploadID
 	}
 	// @todo: verify chunks
 	// verify hashes, etc
 	u.current[id].complete <- struct{}{}
-	return "", nil
+	return info.Info, nil
 }
 
 func (u *Uploader) cleanupOperation(uplID string) {

From 2cafe7fb5de3adb7af69481d25969e2fc7aa803e Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 4 Oct 2022 17:16:51 -0400
Subject: [PATCH 09/51] update tests

---
 internal/pkg/upload/upload_test.go | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
index c08e63eb0..0e3102630 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/upload/upload_test.go
@@ -18,11 +18,11 @@ func TestMaxParallelUploadOpsReached(t *testing.T) {
 
 	var err error
 	for i := 0; i < opLimit; i++ {
-		_, err = u.Begin(100)
+		_, err = u.Begin(100, "", "")
 		assert.NoError(t, err)
 	}
 
-	_, err = u.Begin(100)
+	_, err = u.Begin(100, "", "")
 	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
 }
 
@@ -33,19 +33,19 @@ func TestMaxParallelUploadOpsReleased(t *testing.T) {
 	// generate max operations
 	ops := make([]Info, 0, opLimit)
 	for i := 0; i < opLimit; i++ {
-		op, err := u.Begin(100)
+		op, err := u.Begin(100, "", "")
 		require.NoError(t, err)
 		ops = append(ops, op)
 	}
 	// and verify max was reached
-	_, err := u.Begin(100)
+	_, err := u.Begin(100, "", "")
 	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
 
 	// finishing an op should release the hold and allow another to begin
 	_, err = u.Complete(ops[0].ID)
 	require.NoError(t, err)
 
-	op, err := u.Begin(100)
+	op, err := u.Begin(100, "", "")
 	assert.NoError(t, err)
 	assert.NotEmpty(t, op.ID)
 }
@@ -56,7 +56,7 @@ func TestMaxParallelChunks(t *testing.T) {
 	u := New(1, chunkLim)
 
 	// start an operation, that can have more than the test limit chunks
-	op, err := u.Begin(MaxChunkSize * int64(chunkLim+2))
+	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
 	require.NoError(t, err)
 
 	// upload up to the limit chunks, without releasing the request
@@ -75,7 +75,7 @@ func TestMaxParallelChunksReleased(t *testing.T) {
 	u := New(1, chunkLim)
 
 	// start an operation, that can have more than the test limit chunks
-	op, err := u.Begin(MaxChunkSize * int64(chunkLim+2))
+	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
 	require.NoError(t, err)
 
 	// upload up to the limit chunks, without releasing the request
@@ -111,7 +111,7 @@ func TestUploadChunkCount(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
-			info, err := u.Begin(tc.FileSize)
+			info, err := u.Begin(tc.FileSize, "", "")
 			assert.NoError(t, err)
 			assert.Equal(t, tc.ExpectedCount, info.Count)
 		})
@@ -135,7 +135,7 @@ func TestChunkMarksFinal(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
-			info, err := u.Begin(tc.FileSize)
+			info, err := u.Begin(tc.FileSize, "", "")
 			assert.NoError(t, err)
 
 			if tc.FinalChunk > 0 {

From fc9e79c84aadc4693d11a6c3b183828ee777800c Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 4 Oct 2022 18:12:13 -0400
Subject: [PATCH 10/51] max file size

---
 internal/pkg/api/handleUpload.go   |  5 ++-
 internal/pkg/upload/upload.go      |  8 ++++-
 internal/pkg/upload/upload_test.go | 53 ++++++++++++++++++++++++------
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index e26b96dc6..aab4eb5e7 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -44,6 +44,8 @@ const (
 	// TODO: move to a config
 	maxParallelUploadOperations = 3
 	maxParallelChunks           = 4
+	maxFileSize                 = 104857600 // 100 MiB
+
 )
 
 func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
@@ -171,6 +173,7 @@ type UploadT struct {
 func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
 	log.Info().
 		Interface("limits", cfg.Limits.ArtifactLimit).
+		Int64("maxFileSize", maxFileSize).
 		Int("maxParallelOps", maxParallelUploadOperations).
 		Int("maxParallelChunks", maxParallelChunks).
 		Msg("Artifact install limits")
@@ -179,7 +182,7 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		upl:         upload.New(maxParallelChunks, maxParallelChunks),
+		upl:         upload.New(maxFileSize, maxParallelChunks, maxParallelChunks),
 	}
 }
 
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index 74047a6e4..2430f3d12 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -28,6 +28,7 @@ const (
 var (
 	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
 	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
+	ErrFileSizeTooLarge     = errors.New("This file exceeds the maximum allowed file size")
 
 	//@todo: explicit error for expired uploads
 )
@@ -46,6 +47,7 @@ type Uploader struct {
 	mu                 sync.Mutex
 	opThrottle         *throttle.Throttle
 	parallelChunkLimit int
+	sizeLimit          int64
 }
 
 type Info struct {
@@ -65,9 +67,10 @@ type ChunkInfo struct {
 	Token         *throttle.Token
 }
 
-func New(opLimit int, chunkLimit int) *Uploader {
+func New(sizeLimit int64, opLimit int, chunkLimit int) *Uploader {
 	return &Uploader{
 		parallelChunkLimit: chunkLimit,
+		sizeLimit:          sizeLimit,
 		opThrottle:         throttle.NewThrottle(opLimit),
 		current:            make(map[string]upload, opLimit),
 	}
@@ -79,6 +82,9 @@ func (u *Uploader) Begin(size int64, docID string, source string) (Info, error)
 	if size <= 0 {
 		return Info{}, errors.New("invalid file size")
 	}
+	if size > u.sizeLimit {
+		return Info{}, ErrFileSizeTooLarge
+	}
 
 	uid, err := uuid.NewV4()
 	if err != nil {
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
index 0e3102630..ea9d24bee 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/upload/upload_test.go
@@ -5,16 +5,21 @@
 package upload
 
 import (
+	"strconv"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+
+	testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
 )
 
 func TestMaxParallelUploadOpsReached(t *testing.T) {
+	_ = testlog.SetLogger(t)
 	opLimit := 4
 
-	u := New(opLimit, 0)
+	u := New(500, opLimit, 0)
 
 	var err error
 	for i := 0; i < opLimit; i++ {
@@ -28,32 +33,33 @@ func TestMaxParallelUploadOpsReached(t *testing.T) {
 
 func TestMaxParallelUploadOpsReleased(t *testing.T) {
 	opLimit := 4
-	u := New(opLimit, 0)
+	u := New(500, opLimit, 0)
 
 	// generate max operations
 	ops := make([]Info, 0, opLimit)
 	for i := 0; i < opLimit; i++ {
-		op, err := u.Begin(100, "", "")
+		op, err := u.Begin(100, strconv.Itoa(i), "")
 		require.NoError(t, err)
 		ops = append(ops, op)
 	}
 	// and verify max was reached
-	_, err := u.Begin(100, "", "")
+	_, err := u.Begin(100, "X", "")
 	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
 
 	// finishing an op should release the hold and allow another to begin
 	_, err = u.Complete(ops[0].ID)
 	require.NoError(t, err)
 
-	op, err := u.Begin(100, "", "")
+	time.Sleep(5 * time.Millisecond) // occasionally, a little time was required for the change to propagate
+
+	_, err = u.Begin(100, "Y", "")
 	assert.NoError(t, err)
-	assert.NotEmpty(t, op.ID)
 }
 
 func TestMaxParallelChunks(t *testing.T) {
 	chunkLim := 3
 
-	u := New(1, chunkLim)
+	u := New(104857600, 1, chunkLim)
 
 	// start an operation, that can have more than the test limit chunks
 	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
@@ -72,7 +78,7 @@ func TestMaxParallelChunks(t *testing.T) {
 func TestMaxParallelChunksReleased(t *testing.T) {
 	chunkLim := 3
 
-	u := New(1, chunkLim)
+	u := New(104857600, 1, chunkLim)
 
 	// start an operation, that can have more than the test limit chunks
 	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
@@ -107,7 +113,7 @@ func TestUploadChunkCount(t *testing.T) {
 		{7534559605, 1797, "7.5Gb file"},
 	}
 
-	u := New(len(tests), 1)
+	u := New(8388608000, len(tests), 1)
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -131,7 +137,7 @@ func TestChunkMarksFinal(t *testing.T) {
 		{7534559605, 1796, "7.5Gb file"},
 	}
 
-	u := New(len(tests), 4)
+	u := New(8388608000, len(tests), 4)
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -152,3 +158,30 @@ func TestChunkMarksFinal(t *testing.T) {
 		})
 	}
 }
+
+func TestMaxFileSize(t *testing.T) {
+	tests := []struct {
+		MaxSize     int64
+		TryFile     int64
+		ShouldError bool
+		Name        string
+	}{
+		{500, 800, true, "800 is too large"},
+		{800, 500, false, "file within limits"},
+		{1024, 1023, false, "1-less than limit"},
+		{1024, 1024, false, "file is exactly limit"},
+		{1024, 1025, true, "file is 1 over limit"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+			u := New(tc.MaxSize, 1, 1)
+			_, err := u.Begin(tc.TryFile, "", "")
+			if tc.ShouldError {
+				assert.ErrorIs(t, err, ErrFileSizeTooLarge)
+			} else {
+				assert.NoError(t, err)
+			}
+		})
+	}
+}

From 59ee0c292d60067f5ea6a62d7ab157237986e390 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 5 Oct 2022 15:15:09 -0400
Subject: [PATCH 11/51] write agent,action IDs to file meta doc

---
 internal/pkg/api/handleUpload.go |  3 +++
 internal/pkg/model/schema.go     |  9 +++++++++
 model/schema.json                | 14 ++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index aab4eb5e7..d9734f36d 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -302,6 +302,9 @@ func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
 			Type:       req.File.Type,
 			Uid:        req.File.UID,
 		},
+		ActionID: req.ActionID,
+		AgentID:  req.AgentID,
+		Source:   req.Source,
 	}
 }
 
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index 8e2663488..d3f2f8131 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -284,12 +284,21 @@ type EventInformation struct {
 type FileInfo struct {
 	ESDocument
 
+	// The identifier for the action requesting this upload
+	ActionID string `json:"action_id,omitempty"`
+
+	// The unique identifier of the uploading agent
+	AgentID string `json:"agent_id,omitempty"`
+
 	// Event identifiers to coordinate the source reason
 	Event *EventInformation `json:"event,omitempty"`
 
 	// Information about the file properties
 	File *FileMetadata `json:"file,omitempty"`
 	Host *HostMetadata `json:"host,omitempty"`
+
+	// The integration initiating this file upload
+	Source string `json:"source,omitempty"`
 }
 
 // FileMetadata Information about the file properties
diff --git a/model/schema.json b/model/schema.json
index 9ff57033f..b9b50138a 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -642,6 +642,20 @@
           "type": "string",
           "format": "uuid"
         },
+        "agent_id": {
+          "description": "The unique identifier of the uploading agent",
+          "type": "string",
+          "format": "uuid"
+        },
+        "action_id": {
+          "description": "The identifier for the action requesting this upload",
+          "type": "string",
+          "format": "uuid"
+        },
+        "source": {
+          "description": "The integration initiating this file upload",
+          "type": "string"
+        },
         "file": {
           "title": "File Metadata",
           "description": "Information about the file properties",

From 3b0442dccfaab7b576ee1949a30e7fa132e447f3 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 12 Oct 2022 15:36:25 -0400
Subject: [PATCH 12/51] file upload verification, incl hashes

---
 internal/pkg/api/handleUpload.go              |  23 +++-
 internal/pkg/api/schema.go                    |   1 +
 internal/pkg/dl/upload.go                     |  59 +++++++--
 internal/pkg/model/schema.go                  |  10 ++
 .../pkg/upload/{cbor.go => cbor/chunk.go}     |  10 +-
 internal/pkg/upload/doc.go                    |  14 +++
 internal/pkg/upload/upload.go                 | 118 +++++++++++++++++-
 model/schema.json                             |   4 +
 8 files changed, 221 insertions(+), 18 deletions(-)
 rename internal/pkg/upload/{cbor.go => cbor/chunk.go} (94%)
 create mode 100644 internal/pkg/upload/doc.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index d9734f36d..f188b3e1f 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -6,9 +6,12 @@ package api
 
 import (
 	"context"
+	"crypto/md5"
+	"crypto/sha256"
 	"encoding/json"
 	"errors"
 	"fmt"
+	"hash"
 	"io"
 	"net/http"
 	"strconv"
@@ -23,6 +26,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
+	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
 	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/julienschmidt/httprouter"
 	"github.com/rs/zerolog"
@@ -203,7 +207,18 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 
 	docID := fmt.Sprintf("%s.%s", fi.ActionID, fi.AgentID)
 
-	op, err := ut.upl.Begin(fi.File.Size, docID, fi.Source)
+	var hasher hash.Hash
+	var sum string
+	switch {
+	case fi.File.Hash.SHA256 != "":
+		hasher = sha256.New()
+		sum = fi.File.Hash.SHA256
+	case fi.File.Hash.MD5 != "":
+		hasher = md5.New()
+		sum = fi.File.Hash.MD5
+	}
+
+	op, err := ut.upl.Begin(fi.File.Size, docID, fi.Source, sum, hasher)
 	if err != nil {
 		return err
 	}
@@ -245,14 +260,15 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 
 	// prevent over-sized chunks
 	data := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
-	if err := dl.UploadChunk(r.Context(), ut.chunkClient, data, chunkInfo); err != nil {
+	ce := cbor.NewChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Upload.ChunkSize)
+	if err := dl.UploadChunk(r.Context(), ut.chunkClient, ce, chunkInfo.Upload.Source, chunkInfo.Upload.DocID, chunkInfo.ID); err != nil {
 		return err
 	}
 	return nil
 }
 
 func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
-	info, err := ut.upl.Complete(uplID)
+	info, err := ut.upl.Complete(uplID, ut.bulker)
 	if err != nil {
 		return err
 	}
@@ -288,6 +304,7 @@ func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
 			Group:       req.File.Group,
 			Hash: &model.Hash{
 				Sha256: req.File.Hash.SHA256,
+				Md5:    req.File.Hash.MD5,
 			},
 			Inode:      req.File.INode,
 			MimeType:   req.File.Mime,
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index 7280a20e2..b90f24e21 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -188,6 +188,7 @@ type FileInfo struct {
 		Compression string `json:"Compression"`
 		Hash        struct {
 			SHA256 string `json:"sha256"`
+			MD5    string `json:"md5"`
 		}
 		Accessed    string   `json:"accessed"`
 		Attributes  []string `json:"attributes"`
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/dl/upload.go
index b087008d8..9d9b3ecf4 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/dl/upload.go
@@ -8,12 +8,13 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"io"
 	"net/http"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/dsl"
+	"github.com/elastic/fleet-server/v7/internal/pkg/es"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
-	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
+	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
 	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/elastic/go-elasticsearch/v7/esapi"
 	"github.com/rs/zerolog/log"
@@ -24,8 +25,23 @@ const (
 	// somewhat configurable, but need to follow a pattern so that Fleet Server has write access
 	FileHeaderIndexPattern = ".fleet-%s-files"
 	FileDataIndexPattern   = ".fleet-%s-file-data"
+
+	FieldBaseID = "bid"
+)
+
+var (
+	QueryChunkIDs = prepareFindChunkIDs()
 )
 
+func prepareFindChunkIDs() *dsl.Tmpl {
+	tmpl := dsl.NewTmpl()
+	root := dsl.NewRoot()
+	root.Param(FieldSource, false)
+	root.Query().Term(FieldBaseID, tmpl.Bind(FieldBaseID), nil)
+	tmpl.MustResolve(root)
+	return tmpl
+}
+
 func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, source string, fileID string) (string, error) {
 	return createUploadInfo(ctx, bulker, fmt.Sprintf(FileHeaderIndexPattern, source), fi, fileID)
 }
@@ -46,8 +62,7 @@ func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID st
 	return bulker.Update(ctx, index, fileID, data)
 }
 
-func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.ReadCloser, chunkInfo upload.ChunkInfo) error {
-	cbor := upload.NewCBORChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Upload.ChunkSize)
+func UploadChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkID int) error {
 
 	/*
 		// the non-streaming version
@@ -69,9 +84,10 @@ func UploadChunk(ctx context.Context, client *elasticsearch.Client, data io.Read
 	*/
 
 	req := esapi.IndexRequest{
-		Index:      fmt.Sprintf(FileDataIndexPattern, chunkInfo.Upload.Source),
-		Body:       cbor,
-		DocumentID: fmt.Sprintf("%s.%d", chunkInfo.Upload.DocID, chunkInfo.ID),
+		Index:      fmt.Sprintf(FileDataIndexPattern, source),
+		Body:       body,
+		DocumentID: fmt.Sprintf("%s.%d", docID, chunkID),
+		Refresh:    "true",
 	}
 	// need to set the Content-Type of the request to CBOR, notes below
 	overrider := contentTypeOverrider{client}
@@ -137,3 +153,32 @@ type ChunkUploadResponse struct {
 		} `json:"caused_by"`
 	} `json:"error"`
 }
+
+func ListChunkIDs(ctx context.Context, bulker bulk.Bulk, source string, fileID string) ([]es.HitT, error) {
+	return listChunkIDs(ctx, bulker, fmt.Sprintf(FileDataIndexPattern, source), fileID)
+}
+
+func listChunkIDs(ctx context.Context, bulker bulk.Bulk, index string, fileID string) ([]es.HitT, error) {
+	query, err := QueryChunkIDs.Render(map[string]interface{}{
+		FieldBaseID: fileID,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := bulker.Search(ctx, index, query)
+	if err != nil {
+		return nil, err
+	}
+	return res.HitsT.Hits, nil
+}
+
+func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkID int) (model.FileChunk, error) {
+	var chunk model.FileChunk
+	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkID))
+	if err != nil {
+		return chunk, err
+	}
+	err = json.Unmarshal(out, &chunk)
+	return chunk, err
+}
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index d3f2f8131..a4e63d352 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -380,11 +380,21 @@ type FileMetadata struct {
 	Uid string `json:"uid,omitempty"`
 }
 
+type FileChunk struct {
+	ESDocument
+
+	BID  string `json:"bid"`
+	Data []byte `json:"data"`
+	Last bool   `json:"last"`
+}
+
 // Hash Checksums on the file contents
 type Hash struct {
 
 	// SHA256 sum of the file contents
 	Sha256 string `json:"sha256,omitempty"`
+	// MD5 sum of the file contents
+	Md5 string `json:"md5,omitempty"`
 }
 
 // HostMetadata The host metadata for the Elastic Agent
diff --git a/internal/pkg/upload/cbor.go b/internal/pkg/upload/cbor/chunk.go
similarity index 94%
rename from internal/pkg/upload/cbor.go
rename to internal/pkg/upload/cbor/chunk.go
index 1abcba968..bb3468f56 100644
--- a/internal/pkg/upload/cbor.go
+++ b/internal/pkg/upload/cbor/chunk.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package cbor
 
 import (
 	"encoding/binary"
@@ -16,7 +16,7 @@ import (
 // in memory.
 // It is not a general-purpose CBOR encoder.
 // A suitable general purpose library, if the future needs one, is github.com/fxamacker/cbor/v2
-type cborEncoder struct {
+type ChunkEncoder struct {
 	chunk       io.ReadCloser
 	final       bool
 	preamble    []byte
@@ -25,8 +25,8 @@ type cborEncoder struct {
 	wroteTerm   bool
 }
 
-func NewCBORChunkWriter(chunkData io.ReadCloser, finalChunk bool, baseID string, chunkSize int64) *cborEncoder {
-	return &cborEncoder{
+func NewChunkWriter(chunkData io.ReadCloser, finalChunk bool, baseID string, chunkSize int64) *ChunkEncoder {
+	return &ChunkEncoder{
 		chunk:       chunkData,
 		final:       finalChunk,
 		preamble:    encodePreambleToCBOR(finalChunk, baseID, chunkSize),
@@ -99,7 +99,7 @@ func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
 }
 
 // io.Reader interface for streaming out
-func (c *cborEncoder) Read(buf []byte) (int, error) {
+func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 	if c.wroteTerm { // already wrote a terminating instruction for undefined byte sequence length
 		return 0, io.EOF
 	}
diff --git a/internal/pkg/upload/doc.go b/internal/pkg/upload/doc.go
new file mode 100644
index 000000000..f7e3efafb
--- /dev/null
+++ b/internal/pkg/upload/doc.go
@@ -0,0 +1,14 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+/*
+
+	This package concentrates the responsibility of following the
+	File Storage specification:
+	https://docs.google.com/document/d/1BlCQYxHwgGZMxysc0BiWxT9ZCLKwgryhkD9ryDNDIuI/edit?usp=sharing
+	so the resulting documents, using this module correctly and checking errors,
+	results in a valid File.
+
+*/
+package upload
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index 2430f3d12..df7260cee 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -5,12 +5,20 @@
 package upload
 
 import (
+	"bytes"
+	"context"
+	"encoding/hex"
 	"errors"
 	"fmt"
+	"hash"
+	"io"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
+	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/dl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
@@ -29,7 +37,8 @@ var (
 	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
 	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
 	ErrFileSizeTooLarge     = errors.New("This file exceeds the maximum allowed file size")
-
+	ErrMissingChunks        = errors.New("File data incomplete, not all chunks were uploaded")
+	ErrHashMismatch         = errors.New("file integrity hash does not match")
 	//@todo: explicit error for expired uploads
 )
 
@@ -57,6 +66,8 @@ type Info struct {
 	ChunkSize int64
 	Total     int64
 	Count     int
+	HashSum   string
+	Hasher    hash.Hash
 }
 
 type ChunkInfo struct {
@@ -78,7 +89,7 @@ func New(sizeLimit int64, opLimit int, chunkLimit int) *Uploader {
 
 // Start an upload operation, as long as the max concurrent has not been reached
 // returns the upload ID
-func (u *Uploader) Begin(size int64, docID string, source string) (Info, error) {
+func (u *Uploader) Begin(size int64, docID string, source string, hashsum string, hasher hash.Hash) (Info, error) {
 	if size <= 0 {
 		return Info{}, errors.New("invalid file size")
 	}
@@ -145,6 +156,8 @@ func (u *Uploader) Begin(size int64, docID string, source string) (Info, error)
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
+		Hasher:    hasher,
+		HashSum:   hashsum,
 	}
 	cnt := info.Total / info.ChunkSize
 	if info.Total%info.ChunkSize > 0 {
@@ -191,11 +204,28 @@ func (u *Uploader) Chunk(uplID string, chunkID int) (ChunkInfo, error) {
 	}, nil
 }
 
-func (u *Uploader) Complete(id string) (Info, error) {
+func (u *Uploader) Complete(id string, bulker bulk.Bulk) (Info, error) {
 	info, valid := u.current[id]
 	if !valid {
 		return Info{}, ErrInvalidUploadID
 	}
+
+	ok, err := u.allChunksPresent(info.Info, bulker)
+	if err != nil {
+		return Info{}, err
+	}
+	if !ok {
+		return Info{}, ErrMissingChunks
+	}
+
+	ok, err = u.verifyChunkData(info.Info, bulker)
+	if err != nil {
+		return Info{}, err
+	}
+	if !ok {
+		return Info{}, errors.New("file contents did not pass validation")
+	}
+
 	// @todo: verify chunks
 	// verify hashes, etc
 	u.current[id].complete <- struct{}{}
@@ -226,3 +256,85 @@ func (u *Uploader) finalize(uplID string) error {
 	// @todo: write Status:READY here?
 	return nil
 }
+
+func (u *Uploader) allChunksPresent(info Info, bulker bulk.Bulk) (bool, error) {
+	hits, err := dl.ListChunkIDs(context.TODO(), bulker, info.Source, info.DocID)
+	if err != nil {
+		log.Warn().Err(err).Msg("error listing chunks")
+		return false, err
+	}
+	if len(hits) != info.Count {
+		log.Warn().Int("expectedCount", info.Count).Int("received", len(hits)).Interface("hits", hits).Msg("mismatch number of chunks")
+		return false, nil
+	}
+
+	ids := make(map[int]bool, len(hits))
+	for _, h := range hits {
+		chunkID := strings.TrimPrefix(h.ID, info.DocID+".")
+		ival, err := strconv.Atoi(chunkID)
+		if err != nil {
+			log.Warn().Err(err).Str("chunkID", h.ID).Str("docID", info.DocID).Str("parsedChunkInt", chunkID).Interface("hits", hits).Msg("unable to convert to int value")
+			return false, err
+		}
+		ids[ival] = true
+	}
+
+	for i := 0; i < info.Count; i++ {
+		if got, exists := ids[i]; !got || !exists {
+			log.Warn().Int("expected", i).Interface("hits", hits).Msg("mismatch chunk")
+			return false, nil
+		}
+	}
+	return true, nil
+}
+
+func (u *Uploader) verifyChunkData(info Info, bulker bulk.Bulk) (bool, error) {
+	// verify all chunks except last are info.ChunkSize size
+	// verify last: false (or field excluded) for all except final chunk
+	// verify final chunk is last: true
+	// verify hash
+
+	for i := 0; i < info.Count; i++ {
+		chunk, err := dl.GetChunk(context.TODO(), bulker, info.Source, info.DocID, i)
+		if err != nil {
+			return false, err
+		}
+		if err != nil {
+			return false, err
+		}
+		if i < info.Count-1 {
+			if chunk.Last {
+				log.Debug().Int("chunkID", i).Msg("non-final chunk was incorrectly marked last")
+				return false, nil
+			}
+			if len(chunk.Data) != int(info.ChunkSize) {
+				log.Debug().Int64("requiredSize", info.ChunkSize).Int("chunkID", i).Int("gotSize", len(chunk.Data)).Msg("chunk was undersized")
+				return false, nil
+			}
+		} else {
+			if !chunk.Last {
+				log.Debug().Int("chunkID", i).Msg("final chunk was not marked as final")
+				return false, nil
+			}
+			if len(chunk.Data) == 0 {
+				log.Debug().Int("chunkID", i).Msg("final chunk was 0 size")
+				return false, nil
+			}
+		}
+
+		if info.Hasher != nil { // @todo: allow no-hash?
+			_, err = io.Copy(info.Hasher, bytes.NewReader(chunk.Data))
+			if err != nil {
+				return false, err
+			}
+		}
+	}
+
+	if info.Hasher != nil {
+		fullHash := hex.EncodeToString(info.Hasher.Sum(nil))
+		if fullHash != info.HashSum {
+			return false, ErrHashMismatch
+		}
+	}
+	return true, nil
+}
diff --git a/model/schema.json b/model/schema.json
index b9b50138a..daaa7ab86 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -682,6 +682,10 @@
                 "sha256": {
                   "description": "SHA256 sum of the file contents",
                   "type": "string"
+                },
+                "md5": {
+                  "description": "MD5 sum of the file contents",
+                  "type": "string"
                 }
               }
             },

From 2fea5df5bc608ef21d75fcb87a343aca65edef28 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 18 Oct 2022 15:21:42 -0400
Subject: [PATCH 13/51] WIP add contents schema for inner-zip info

---
 internal/pkg/api/handleUpload.go |  72 +++++----
 internal/pkg/api/schema.go       |  69 +++++----
 internal/pkg/model/schema.go     |  60 +++----
 model/schema.json                | 258 ++++++++++++++++---------------
 4 files changed, 242 insertions(+), 217 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index f188b3e1f..5e9d4da94 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -288,43 +288,55 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 }
 
 func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
+	primaryFile := fileRequestToFileData(req.File)
+	primaryFile.ChunkSize = chunkSize
+	primaryFile.Status = string(UploadAwaiting)
+
+	contents := make([]model.FileData, len(req.Contents))
+	for i, f := range req.Contents {
+		contents[i] = fileRequestToFileData(f)
+	}
+
 	return model.FileInfo{
-		File: &model.FileMetadata{
-			Accessed:    req.File.Accessed,
-			Attributes:  req.File.Attributes,
-			ChunkSize:   chunkSize,
-			Compression: req.File.Compression,
-			Created:     req.File.Created,
-			Ctime:       req.File.CTime,
-			Device:      req.File.Device,
-			Directory:   req.File.Directory,
-			DriveLetter: req.File.DriveLetter,
-			Extension:   req.File.Extension,
-			Gid:         req.File.GID,
-			Group:       req.File.Group,
-			Hash: &model.Hash{
-				Sha256: req.File.Hash.SHA256,
-				Md5:    req.File.Hash.MD5,
-			},
-			Inode:      req.File.INode,
-			MimeType:   req.File.Mime,
-			Mode:       req.File.Mode,
-			Mtime:      req.File.MTime,
-			Name:       req.File.Name,
-			Owner:      req.File.Owner,
-			Path:       req.File.Path,
-			Size:       req.File.Size,
-			Status:     string(UploadAwaiting),
-			TargetPath: req.File.TargetPath,
-			Type:       req.File.Type,
-			Uid:        req.File.UID,
-		},
+		File:     &primaryFile,
+		Contents: contents,
 		ActionID: req.ActionID,
 		AgentID:  req.AgentID,
 		Source:   req.Source,
 	}
 }
 
+func fileRequestToFileData(req FileData) model.FileData {
+	return model.FileData{
+		Accessed:    req.Accessed,
+		Attributes:  req.Attributes,
+		Compression: req.Compression,
+		Created:     req.Created,
+		Ctime:       req.CTime,
+		Device:      req.Device,
+		Directory:   req.Directory,
+		DriveLetter: req.DriveLetter,
+		Extension:   req.Extension,
+		Gid:         req.GID,
+		Group:       req.Group,
+		Hash: &model.Hash{
+			Sha256: req.Hash.SHA256,
+			Md5:    req.Hash.MD5,
+		},
+		Inode:      req.INode,
+		MimeType:   req.Mime,
+		Mode:       req.Mode,
+		Mtime:      req.MTime,
+		Name:       req.Name,
+		Owner:      req.Owner,
+		Path:       req.Path,
+		Size:       req.Size,
+		TargetPath: req.TargetPath,
+		Type:       req.Type,
+		Uid:        req.UID,
+	}
+}
+
 func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status UploadStatus) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index b90f24e21..e53d9abe3 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -177,42 +177,45 @@ type StatusResponse struct {
 }
 
 type FileInfo struct {
-	ActionID string `json:"action_id"`
-	AgentID  string `json:"agent_id"`
-	Source   string `json:"src"`
-	File     struct {
-		Size        int64  `json:"size"`
-		Name        string `json:"name"`
-		Extension   string `json:"ext"`
-		Mime        string `json:"mime_type"`
-		Compression string `json:"Compression"`
-		Hash        struct {
-			SHA256 string `json:"sha256"`
-			MD5    string `json:"md5"`
-		}
-		Accessed    string   `json:"accessed"`
-		Attributes  []string `json:"attributes"`
-		Created     string   `json:"created"`
-		CTime       string   `json:"ctime"`
-		Device      string   `json:"device"`
-		Directory   string   `json:"directory"`
-		DriveLetter string   `json:"drive_letter"`
-		Ext         string   `json:"extension"`
-		GID         string   `json:"gid"`
-		Group       string   `json:"group"`
-		INode       string   `json:"inode"`
-		Mode        string   `json:"mode"`
-		MTime       string   `json:"mtime"`
-		Owner       string   `json:"owner"`
-		Path        string   `json:"path"`
-		TargetPath  string   `json:"target_path"`
-		Type        string   `json:"type"`
-		UID         string   `json:"uid"`
-	} `json:"file"`
-	Event struct {
+	ActionID string     `json:"action_id"`
+	AgentID  string     `json:"agent_id"`
+	Source   string     `json:"src"`
+	File     FileData   `json:"file"`
+	Contents []FileData `json:"contents"`
+	Event    struct {
 		ID string `json:"id"`
 	} `json:"event"`
 	Host struct {
 		Hostname string `json:"hostname"`
 	} `json:"host"`
 }
+
+type FileData struct {
+	Size        int64  `json:"size"`
+	Name        string `json:"name"`
+	Extension   string `json:"ext"`
+	Mime        string `json:"mime_type"`
+	Compression string `json:"Compression"`
+	Hash        struct {
+		SHA256 string `json:"sha256"`
+		MD5    string `json:"md5"`
+	}
+	Accessed    string   `json:"accessed"`
+	Attributes  []string `json:"attributes"`
+	Created     string   `json:"created"`
+	CTime       string   `json:"ctime"`
+	Device      string   `json:"device"`
+	Directory   string   `json:"directory"`
+	DriveLetter string   `json:"drive_letter"`
+	Ext         string   `json:"extension"`
+	GID         string   `json:"gid"`
+	Group       string   `json:"group"`
+	INode       string   `json:"inode"`
+	Mode        string   `json:"mode"`
+	MTime       string   `json:"mtime"`
+	Owner       string   `json:"owner"`
+	Path        string   `json:"path"`
+	TargetPath  string   `json:"target_path"`
+	Type        string   `json:"type"`
+	UID         string   `json:"uid"`
+}
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index a4e63d352..4c3026952 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -280,29 +280,8 @@ type EventInformation struct {
 	ID string `json:"id,omitempty"`
 }
 
-// FileInfo An uploaded File
-type FileInfo struct {
-	ESDocument
-
-	// The identifier for the action requesting this upload
-	ActionID string `json:"action_id,omitempty"`
-
-	// The unique identifier of the uploading agent
-	AgentID string `json:"agent_id,omitempty"`
-
-	// Event identifiers to coordinate the source reason
-	Event *EventInformation `json:"event,omitempty"`
-
-	// Information about the file properties
-	File *FileMetadata `json:"file,omitempty"`
-	Host *HostMetadata `json:"host,omitempty"`
-
-	// The integration initiating this file upload
-	Source string `json:"source,omitempty"`
-}
-
-// FileMetadata Information about the file properties
-type FileMetadata struct {
+// FileData Information about the file properties
+type FileData struct {
 
 	// Last time the file was accessed
 	Accessed string `json:"accessed,omitempty"`
@@ -380,21 +359,44 @@ type FileMetadata struct {
 	Uid string `json:"uid,omitempty"`
 }
 
-type FileChunk struct {
+// FileInfo An uploaded File
+type FileInfo struct {
 	ESDocument
 
-	BID  string `json:"bid"`
-	Data []byte `json:"data"`
-	Last bool   `json:"last"`
+	// The identifier for the action requesting this upload
+	ActionID string `json:"action_id,omitempty"`
+
+	// The unique identifier of the uploading agent
+	AgentID string `json:"agent_id,omitempty"`
+
+	// Description of the contents of an archive, when the file property describes an archive (e.g. zip)
+	Contents []FileData `json:"contents,omitempty"`
+
+	// Event identifiers to coordinate the source reason
+	Event *EventInformation `json:"event,omitempty"`
+	File  *FileData         `json:"file,omitempty"`
+	Host  *HostMetadata     `json:"host,omitempty"`
+
+	// The integration initiating this file upload
+	Source string `json:"source,omitempty"`
 }
 
 // Hash Checksums on the file contents
 type Hash struct {
 
-	// SHA256 sum of the file contents
-	Sha256 string `json:"sha256,omitempty"`
 	// MD5 sum of the file contents
 	Md5 string `json:"md5,omitempty"`
+
+	// SHA256 sum of the file contents
+	Sha256 string `json:"sha256,omitempty"`
+}
+
+type FileChunk struct {
+	ESDocument
+
+	BID  string `json:"bid"`
+	Data []byte `json:"data"`
+	Last bool   `json:"last"`
 }
 
 // HostMetadata The host metadata for the Elastic Agent
diff --git a/model/schema.json b/model/schema.json
index daaa7ab86..4892df547 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -656,131 +656,12 @@
           "description": "The integration initiating this file upload",
           "type": "string"
         },
-        "file": {
-          "title": "File Metadata",
-          "description": "Information about the file properties",
-          "type": "object",
-          "properties": {
-            "Status": {
-              "description": "The current state of the file upload process",
-              "type": "string"
-            },
-            "ChunkSize": {
-              "description": "Size, in bytes, of each data chunk",
-              "type": "integer"
-            },
-            "Compression": {
-              "description": "The algorithm used to compress the file",
-              "type": "string",
-              "enum": ["br","gzip","deflate","none"]
-            },
-            "hash": {
-              "title": "Hash",
-              "description": "Checksums on the file contents",
-              "type": "object",
-              "properties": {
-                "sha256": {
-                  "description": "SHA256 sum of the file contents",
-                  "type": "string"
-                },
-                "md5": {
-                  "description": "MD5 sum of the file contents",
-                  "type": "string"
-                }
-              }
-            },
-            "name": {
-              "description": "Name of the file including the extension, without the directory",
-              "type": "string"
-            },
-            "mime_type": {
-              "description": "MIME type of the file",
-              "type": "string"
-            },
-            "accessed": {
-              "description": "Last time the file was accessed",
-              "type": "string",
-              "format": "date-time"
-            },
-            "attributes": {
-              "description": "Platform-dependent sequence of file attributes such as readonly, execute, hidden",
-              "type": "array",
-              "items": {
-                "type": "string"
-              }
-            },
-            "created": {
-              "description": "File creation time",
-              "type": "string",
-              "format": "date-time"
-            },
-            "ctime": {
-              "description": "Last time the file attributes or metadata changed",
-              "type": "string",
-              "format": "date-time"
-            },
-            "device": {
-              "description": "Device that is the source of the file",
-              "type": "string"
-            },
-            "directory": {
-              "description": "Directory where the file is located",
-              "type": "string"
-            },
-            "drive_letter": {
-              "description": "Drive letter where the file is located",
-              "type": "string"
-            },
-            "extension": {
-              "description": "File extension, excluding the leading dot",
-              "type": "string"
-            },
-            "gid": {
-              "description": "Primary group ID (GID) of the file",
-              "type": "string"
-            },
-            "group": {
-              "description": "Primary group name of the file",
-              "type": "string"
-            },
-            "inode": {
-              "description": "inode representing the file in the filesystem",
-              "type": "string"
-            },
-            "mode": {
-              "description": "Mode of the file in octal representation",
-              "type": "string"
-            },
-            "mtime": {
-              "description": "Last time the file content was modified",
-              "type": "string",
-              "format": "date-time"
-            },
-            "owner": {
-              "description": "File owner's username",
-              "type": "string"
-            },
-            "path": {
-              "description": "Full path to the file, including the file name. It should include the drive letter, when appropriate",
-              "type": "string"
-            },
-            "size": {
-              "description": "Size of the file contents, in bytes",
-              "type": "integer"
-            },
-            "target_path": {
-              "description": "Target path for symlinks",
-              "type": "string"
-            },
-            "type": {
-              "description": "File type (file, dir, or symlink)",
-              "type": "string",
-              "enum": ["file","dir","symlink"]
-            },
-            "uid": {
-              "description":"The user ID (UID) or security identifier (SID) of the file owner",
-              "type":"string"
-            }
+        "file": { "$ref": "#/definitions/file-data" },
+        "contents": {
+          "description": "Description of the contents of an archive, when the file property describes an archive (e.g. zip)",
+          "type": "array",
+          "items": {
+            "ref": "#/definitions/file-data"
           }
         },
         "host": { "$ref": "#/definitions/host-metadata" },
@@ -795,6 +676,133 @@
           }
         }
       }
+    },
+    "file-data": {
+      "title": "Information about a file",
+      "description": "Information about the file properties",
+      "type": "object",
+      "properties": {
+        "Status": {
+          "description": "The current state of the file upload process",
+          "type": "string"
+        },
+        "ChunkSize": {
+          "description": "Size, in bytes, of each data chunk",
+          "type": "integer"
+        },
+        "Compression": {
+          "description": "The algorithm used to compress the file",
+          "type": "string",
+          "enum": ["br","gzip","deflate","none"]
+        },
+        "hash": {
+          "title": "Hash",
+          "description": "Checksums on the file contents",
+          "type": "object",
+          "properties": {
+            "sha256": {
+              "description": "SHA256 sum of the file contents",
+              "type": "string"
+            },
+            "md5": {
+              "description": "MD5 sum of the file contents",
+              "type": "string"
+            }
+          }
+        },
+        "name": {
+          "description": "Name of the file including the extension, without the directory",
+          "type": "string"
+        },
+        "mime_type": {
+          "description": "MIME type of the file",
+          "type": "string"
+        },
+        "accessed": {
+          "description": "Last time the file was accessed",
+          "type": "string",
+          "format": "date-time"
+        },
+        "attributes": {
+          "description": "Platform-dependent sequence of file attributes such as readonly, execute, hidden",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "created": {
+          "description": "File creation time",
+          "type": "string",
+          "format": "date-time"
+        },
+        "ctime": {
+          "description": "Last time the file attributes or metadata changed",
+          "type": "string",
+          "format": "date-time"
+        },
+        "device": {
+          "description": "Device that is the source of the file",
+          "type": "string"
+        },
+        "directory": {
+          "description": "Directory where the file is located",
+          "type": "string"
+        },
+        "drive_letter": {
+          "description": "Drive letter where the file is located",
+          "type": "string"
+        },
+        "extension": {
+          "description": "File extension, excluding the leading dot",
+          "type": "string"
+        },
+        "gid": {
+          "description": "Primary group ID (GID) of the file",
+          "type": "string"
+        },
+        "group": {
+          "description": "Primary group name of the file",
+          "type": "string"
+        },
+        "inode": {
+          "description": "inode representing the file in the filesystem",
+          "type": "string"
+        },
+        "mode": {
+          "description": "Mode of the file in octal representation",
+          "type": "string"
+        },
+        "mtime": {
+          "description": "Last time the file content was modified",
+          "type": "string",
+          "format": "date-time"
+        },
+        "owner": {
+          "description": "File owner's username",
+          "type": "string"
+        },
+        "path": {
+          "description": "Full path to the file, including the file name. It should include the drive letter, when appropriate",
+          "type": "string"
+        },
+        "size": {
+          "description": "Size of the file contents, in bytes",
+          "type": "integer"
+        },
+        "target_path": {
+          "description": "Target path for symlinks",
+          "type": "string"
+        },
+        "type": {
+          "description": "File type (file, dir, or symlink)",
+          "type": "string",
+          "enum": ["file","dir","symlink"]
+        },
+        "uid": {
+          "description":"The user ID (UID) or security identifier (SID) of the file owner",
+          "type":"string"
+        }
+      }
     }
   },
 

From 3e64ef7b993a50d49b6c893e72d19ccd3c4d8ff1 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 7 Nov 2022 16:17:39 -0500
Subject: [PATCH 14/51] refactor out of dl, support arbitrary req payloads

---
 internal/pkg/api/handleUpload.go            |  88 ++++------
 internal/pkg/model/schema.go                | 118 -------------
 internal/pkg/{dl/upload.go => upload/es.go} |  26 +--
 internal/pkg/upload/upload.go               |   5 +-
 model/schema.json                           | 173 --------------------
 5 files changed, 43 insertions(+), 367 deletions(-)
 rename internal/pkg/{dl/upload.go => upload/es.go} (82%)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 5e9d4da94..4c22a31d6 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -13,6 +13,7 @@ import (
 	"fmt"
 	"hash"
 	"io"
+	"io/ioutil"
 	"net/http"
 	"strconv"
 	"strings"
@@ -21,10 +22,8 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
-	"github.com/elastic/fleet-server/v7/internal/pkg/dl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/limit"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
-	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
 	"github.com/elastic/go-elasticsearch/v7"
@@ -191,15 +190,23 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 }
 
 func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error {
+
+	// store raw body since we will json-decode twice
+	// 2MB is a reasonable json payload size. Any more might be an indication of garbage
+	body, err := ioutil.ReadAll(io.LimitReader(r.Body, 2*1024*1024))
+	r.Body.Close()
+	if err != nil {
+		return fmt.Errorf("error reading request: %w", err)
+	}
+
+	// decode once here to access known fields we need to parse and work with
 	var fi FileInfo
-	if err := json.NewDecoder(r.Body).Decode(&fi); err != nil {
-		r.Body.Close()
+	if err := json.Unmarshal(body, &fi); err != nil {
 		if errors.Is(err, io.EOF) {
 			return fmt.Errorf("file info body is required: %w", err)
 		}
 		return err
 	}
-	r.Body.Close()
 
 	if err := validateUploadPayload(fi); err != nil {
 		return err
@@ -223,8 +230,17 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 		return err
 	}
 
-	doc := uploadRequestToFileInfo(fi, op.ChunkSize)
-	ret, err := dl.CreateUploadInfo(r.Context(), ut.bulker, doc, fi.Source, docID)
+	// second decode here to maintain the arbitrary shape and fields we will just pass through
+	var reqDoc map[string]interface{}
+	if err := json.Unmarshal(body, &reqDoc); err != nil {
+		return fmt.Errorf("error parsing request json: %w", err)
+	}
+
+	doc, err := uploadRequestToFileDoc(reqDoc, op.ChunkSize)
+	if err != nil {
+		return fmt.Errorf("unable to convert request to file metadata doc: %w", err)
+	}
+	ret, err := upload.CreateFileDoc(r.Context(), ut.bulker, doc, fi.Source, docID)
 	if err != nil {
 		return err
 	}
@@ -261,7 +277,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	// prevent over-sized chunks
 	data := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
 	ce := cbor.NewChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Upload.ChunkSize)
-	if err := dl.UploadChunk(r.Context(), ut.chunkClient, ce, chunkInfo.Upload.Source, chunkInfo.Upload.DocID, chunkInfo.ID); err != nil {
+	if err := upload.IndexChunk(r.Context(), ut.chunkClient, ce, chunkInfo.Upload.Source, chunkInfo.Upload.DocID, chunkInfo.ID); err != nil {
 		return err
 	}
 	return nil
@@ -287,54 +303,18 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 	return nil
 }
 
-func uploadRequestToFileInfo(req FileInfo, chunkSize int64) model.FileInfo {
-	primaryFile := fileRequestToFileData(req.File)
-	primaryFile.ChunkSize = chunkSize
-	primaryFile.Status = string(UploadAwaiting)
-
-	contents := make([]model.FileData, len(req.Contents))
-	for i, f := range req.Contents {
-		contents[i] = fileRequestToFileData(f)
+// takes the arbitrary input document from an upload request and injects
+// a few known fields as it passes through
+func uploadRequestToFileDoc(req map[string]interface{}, chunkSize int64) ([]byte, error) {
+	fileObj, ok := req["file"].(map[string]interface{})
+	if !ok {
+		return nil, errors.New("invalid upload request. File is not an object")
 	}
 
-	return model.FileInfo{
-		File:     &primaryFile,
-		Contents: contents,
-		ActionID: req.ActionID,
-		AgentID:  req.AgentID,
-		Source:   req.Source,
-	}
-}
+	fileObj["ChunkSize"] = chunkSize
+	fileObj["Status"] = string(UploadAwaiting)
 
-func fileRequestToFileData(req FileData) model.FileData {
-	return model.FileData{
-		Accessed:    req.Accessed,
-		Attributes:  req.Attributes,
-		Compression: req.Compression,
-		Created:     req.Created,
-		Ctime:       req.CTime,
-		Device:      req.Device,
-		Directory:   req.Directory,
-		DriveLetter: req.DriveLetter,
-		Extension:   req.Extension,
-		Gid:         req.GID,
-		Group:       req.Group,
-		Hash: &model.Hash{
-			Sha256: req.Hash.SHA256,
-			Md5:    req.Hash.MD5,
-		},
-		Inode:      req.INode,
-		MimeType:   req.Mime,
-		Mode:       req.Mode,
-		Mtime:      req.MTime,
-		Name:       req.Name,
-		Owner:      req.Owner,
-		Path:       req.Path,
-		Size:       req.Size,
-		TargetPath: req.TargetPath,
-		Type:       req.Type,
-		Uid:        req.UID,
-	}
+	return json.Marshal(req)
 }
 
 func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status UploadStatus) error {
@@ -348,7 +328,7 @@ func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info,
 	if err != nil {
 		return err
 	}
-	return dl.UpdateUpload(ctx, bulker, info.Source, info.DocID, data)
+	return upload.UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
 }
 
 func validateUploadPayload(fi FileInfo) error {
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index 4c3026952..d17a8fbd8 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -273,124 +273,6 @@ type EnrollmentAPIKey struct {
 	UpdatedAt string `json:"updated_at,omitempty"`
 }
 
-// EventInformation Event identifiers to coordinate the source reason
-type EventInformation struct {
-
-	// unique event identifier
-	ID string `json:"id,omitempty"`
-}
-
-// FileData Information about the file properties
-type FileData struct {
-
-	// Last time the file was accessed
-	Accessed string `json:"accessed,omitempty"`
-
-	// Platform-dependent sequence of file attributes such as readonly, execute, hidden
-	Attributes []string `json:"attributes,omitempty"`
-
-	// Size, in bytes, of each data chunk
-	ChunkSize int64 `json:"ChunkSize,omitempty"`
-
-	// The algorithm used to compress the file
-	Compression string `json:"Compression,omitempty"`
-
-	// File creation time
-	Created string `json:"created,omitempty"`
-
-	// Last time the file attributes or metadata changed
-	Ctime string `json:"ctime,omitempty"`
-
-	// Device that is the source of the file
-	Device string `json:"device,omitempty"`
-
-	// Directory where the file is located
-	Directory string `json:"directory,omitempty"`
-
-	// Drive letter where the file is located
-	DriveLetter string `json:"drive_letter,omitempty"`
-
-	// File extension, excluding the leading dot
-	Extension string `json:"extension,omitempty"`
-
-	// Primary group ID (GID) of the file
-	Gid string `json:"gid,omitempty"`
-
-	// Primary group name of the file
-	Group string `json:"group,omitempty"`
-
-	// Checksums on the file contents
-	Hash *Hash `json:"hash,omitempty"`
-
-	// inode representing the file in the filesystem
-	Inode string `json:"inode,omitempty"`
-
-	// MIME type of the file
-	MimeType string `json:"mime_type,omitempty"`
-
-	// Mode of the file in octal representation
-	Mode string `json:"mode,omitempty"`
-
-	// Last time the file content was modified
-	Mtime string `json:"mtime,omitempty"`
-
-	// Name of the file including the extension, without the directory
-	Name string `json:"name,omitempty"`
-
-	// File owner's username
-	Owner string `json:"owner,omitempty"`
-
-	// Full path to the file, including the file name. It should include the drive letter, when appropriate
-	Path string `json:"path,omitempty"`
-
-	// Size of the file contents, in bytes
-	Size int64 `json:"size,omitempty"`
-
-	// The current state of the file upload process
-	Status string `json:"Status,omitempty"`
-
-	// Target path for symlinks
-	TargetPath string `json:"target_path,omitempty"`
-
-	// File type (file, dir, or symlink)
-	Type string `json:"type,omitempty"`
-
-	// The user ID (UID) or security identifier (SID) of the file owner
-	Uid string `json:"uid,omitempty"`
-}
-
-// FileInfo An uploaded File
-type FileInfo struct {
-	ESDocument
-
-	// The identifier for the action requesting this upload
-	ActionID string `json:"action_id,omitempty"`
-
-	// The unique identifier of the uploading agent
-	AgentID string `json:"agent_id,omitempty"`
-
-	// Description of the contents of an archive, when the file property describes an archive (e.g. zip)
-	Contents []FileData `json:"contents,omitempty"`
-
-	// Event identifiers to coordinate the source reason
-	Event *EventInformation `json:"event,omitempty"`
-	File  *FileData         `json:"file,omitempty"`
-	Host  *HostMetadata     `json:"host,omitempty"`
-
-	// The integration initiating this file upload
-	Source string `json:"source,omitempty"`
-}
-
-// Hash Checksums on the file contents
-type Hash struct {
-
-	// MD5 sum of the file contents
-	Md5 string `json:"md5,omitempty"`
-
-	// SHA256 sum of the file contents
-	Sha256 string `json:"sha256,omitempty"`
-}
-
 type FileChunk struct {
 	ESDocument
 
diff --git a/internal/pkg/dl/upload.go b/internal/pkg/upload/es.go
similarity index 82%
rename from internal/pkg/dl/upload.go
rename to internal/pkg/upload/es.go
index 9d9b3ecf4..210fe04c0 100644
--- a/internal/pkg/dl/upload.go
+++ b/internal/pkg/upload/es.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package dl
+package upload
 
 import (
 	"context"
@@ -36,33 +36,21 @@ var (
 func prepareFindChunkIDs() *dsl.Tmpl {
 	tmpl := dsl.NewTmpl()
 	root := dsl.NewRoot()
-	root.Param(FieldSource, false)
+	root.Param("_source", false) // do not return large data payload
 	root.Query().Term(FieldBaseID, tmpl.Bind(FieldBaseID), nil)
 	tmpl.MustResolve(root)
 	return tmpl
 }
 
-func CreateUploadInfo(ctx context.Context, bulker bulk.Bulk, fi model.FileInfo, source string, fileID string) (string, error) {
-	return createUploadInfo(ctx, bulker, fmt.Sprintf(FileHeaderIndexPattern, source), fi, fileID)
+func CreateFileDoc(ctx context.Context, bulker bulk.Bulk, doc []byte, source string, fileID string) (string, error) {
+	return bulker.Create(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, doc, bulk.WithRefresh())
 }
 
-func createUploadInfo(ctx context.Context, bulker bulk.Bulk, index string, fi model.FileInfo, fileID string) (string, error) {
-	body, err := json.Marshal(fi)
-	if err != nil {
-		return "", err
-	}
-	return bulker.Create(ctx, index, fileID, body, bulk.WithRefresh())
-}
-
-func UpdateUpload(ctx context.Context, bulker bulk.Bulk, source string, fileID string, data []byte) error {
-	return updateUpload(ctx, bulker, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
-}
-
-func updateUpload(ctx context.Context, bulker bulk.Bulk, index string, fileID string, data []byte) error {
-	return bulker.Update(ctx, index, fileID, data)
+func UpdateFileDoc(ctx context.Context, bulker bulk.Bulk, source string, fileID string, data []byte) error {
+	return bulker.Update(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
 }
 
-func UploadChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkID int) error {
+func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkID int) error {
 
 	/*
 		// the non-streaming version
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index df7260cee..b1c8da3dd 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -18,7 +18,6 @@ import (
 	"time"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
-	"github.com/elastic/fleet-server/v7/internal/pkg/dl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
@@ -258,7 +257,7 @@ func (u *Uploader) finalize(uplID string) error {
 }
 
 func (u *Uploader) allChunksPresent(info Info, bulker bulk.Bulk) (bool, error) {
-	hits, err := dl.ListChunkIDs(context.TODO(), bulker, info.Source, info.DocID)
+	hits, err := ListChunkIDs(context.TODO(), bulker, info.Source, info.DocID)
 	if err != nil {
 		log.Warn().Err(err).Msg("error listing chunks")
 		return false, err
@@ -295,7 +294,7 @@ func (u *Uploader) verifyChunkData(info Info, bulker bulk.Bulk) (bool, error) {
 	// verify hash
 
 	for i := 0; i < info.Count; i++ {
-		chunk, err := dl.GetChunk(context.TODO(), bulker, info.Source, info.DocID, i)
+		chunk, err := GetChunk(context.TODO(), bulker, info.Source, info.DocID, i)
 		if err != nil {
 			return false, err
 		}
diff --git a/model/schema.json b/model/schema.json
index 4892df547..4f518964f 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -631,181 +631,8 @@
           "type": "boolean"
         }
       }
-    },
-    "file-info": {
-      "title": "File Info",
-      "description": "An uploaded File",
-      "type": "object",
-      "properties": {
-        "_id": {
-          "description": "The unique identifier for the file metadata",
-          "type": "string",
-          "format": "uuid"
-        },
-        "agent_id": {
-          "description": "The unique identifier of the uploading agent",
-          "type": "string",
-          "format": "uuid"
-        },
-        "action_id": {
-          "description": "The identifier for the action requesting this upload",
-          "type": "string",
-          "format": "uuid"
-        },
-        "source": {
-          "description": "The integration initiating this file upload",
-          "type": "string"
-        },
-        "file": { "$ref": "#/definitions/file-data" },
-        "contents": {
-          "description": "Description of the contents of an archive, when the file property describes an archive (e.g. zip)",
-          "type": "array",
-          "items": {
-            "ref": "#/definitions/file-data"
-          }
-        },
-        "host": { "$ref": "#/definitions/host-metadata" },
-        "event": {
-          "title": "Event information",
-          "description": "Event identifiers to coordinate the source reason",
-          "properties": {
-            "id": {
-              "description": "unique event identifier",
-              "type":"string"
-            }
-          }
-        }
-      }
-    },
-    "file-data": {
-      "title": "Information about a file",
-      "description": "Information about the file properties",
-      "type": "object",
-      "properties": {
-        "Status": {
-          "description": "The current state of the file upload process",
-          "type": "string"
-        },
-        "ChunkSize": {
-          "description": "Size, in bytes, of each data chunk",
-          "type": "integer"
-        },
-        "Compression": {
-          "description": "The algorithm used to compress the file",
-          "type": "string",
-          "enum": ["br","gzip","deflate","none"]
-        },
-        "hash": {
-          "title": "Hash",
-          "description": "Checksums on the file contents",
-          "type": "object",
-          "properties": {
-            "sha256": {
-              "description": "SHA256 sum of the file contents",
-              "type": "string"
-            },
-            "md5": {
-              "description": "MD5 sum of the file contents",
-              "type": "string"
-            }
-          }
-        },
-        "name": {
-          "description": "Name of the file including the extension, without the directory",
-          "type": "string"
-        },
-        "mime_type": {
-          "description": "MIME type of the file",
-          "type": "string"
-        },
-        "accessed": {
-          "description": "Last time the file was accessed",
-          "type": "string",
-          "format": "date-time"
-        },
-        "attributes": {
-          "description": "Platform-dependent sequence of file attributes such as readonly, execute, hidden",
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "created": {
-          "description": "File creation time",
-          "type": "string",
-          "format": "date-time"
-        },
-        "ctime": {
-          "description": "Last time the file attributes or metadata changed",
-          "type": "string",
-          "format": "date-time"
-        },
-        "device": {
-          "description": "Device that is the source of the file",
-          "type": "string"
-        },
-        "directory": {
-          "description": "Directory where the file is located",
-          "type": "string"
-        },
-        "drive_letter": {
-          "description": "Drive letter where the file is located",
-          "type": "string"
-        },
-        "extension": {
-          "description": "File extension, excluding the leading dot",
-          "type": "string"
-        },
-        "gid": {
-          "description": "Primary group ID (GID) of the file",
-          "type": "string"
-        },
-        "group": {
-          "description": "Primary group name of the file",
-          "type": "string"
-        },
-        "inode": {
-          "description": "inode representing the file in the filesystem",
-          "type": "string"
-        },
-        "mode": {
-          "description": "Mode of the file in octal representation",
-          "type": "string"
-        },
-        "mtime": {
-          "description": "Last time the file content was modified",
-          "type": "string",
-          "format": "date-time"
-        },
-        "owner": {
-          "description": "File owner's username",
-          "type": "string"
-        },
-        "path": {
-          "description": "Full path to the file, including the file name. It should include the drive letter, when appropriate",
-          "type": "string"
-        },
-        "size": {
-          "description": "Size of the file contents, in bytes",
-          "type": "integer"
-        },
-        "target_path": {
-          "description": "Target path for symlinks",
-          "type": "string"
-        },
-        "type": {
-          "description": "File type (file, dir, or symlink)",
-          "type": "string",
-          "enum": ["file","dir","symlink"]
-        },
-        "uid": {
-          "description":"The user ID (UID) or security identifier (SID) of the file owner",
-          "type":"string"
-        }
-      }
     }
   },
-
   "checkin": {
     "title": "Checkin",
     "description": "An Elastic Agent checkin to Fleet",

From 8bc0a5ca14b34c3a35858a4d4646b9594886b7af Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 7 Nov 2022 19:11:34 -0500
Subject: [PATCH 15/51] add upload authorization

---
 internal/pkg/api/handleUpload.go | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 4c22a31d6..6a1495ffc 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -60,6 +60,8 @@ func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps ht
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
+	// authentication occurs inside here
+	// to check that key agent ID matches the ID in the body payload yet-to-be unmarshalled
 	err := rt.ut.handleUploadStart(&zlog, w, r)
 
 	if err != nil {
@@ -97,6 +99,17 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
+	// simpler authentication check, since chunk checksum must
+	// ultimately match the initial hash provided with the stricter key check
+	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("failed writing error response")
+		}
+		return
+	}
+
 	chunkNum, err := strconv.Atoi(chunkID)
 	if err != nil {
 		cntUpload.IncError(err)
@@ -142,6 +155,17 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
+	// simpler authentication check, file integrity checksum
+	// will catch directed tampering, this route just says "im done"
+	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
+		cntUpload.IncError(err)
+		resp := NewHTTPErrResp(err)
+		if err := resp.Write(w); err != nil {
+			zlog.Error().Err(err).Msg("failed writing error response")
+		}
+		return
+	}
+
 	err := rt.ut.handleUploadComplete(&zlog, w, r, id)
 
 	if err != nil {
@@ -208,6 +232,11 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 		return err
 	}
 
+	// check API key matches payload agent ID
+	if _, err := authAgent(r, &fi.AgentID, ut.bulker, ut.cache); err != nil {
+		return err
+	}
+
 	if err := validateUploadPayload(fi); err != nil {
 		return err
 	}

From 7a592e3839dba61eb31348f746a9b8b9ef182b82 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 7 Nov 2022 19:22:36 -0500
Subject: [PATCH 16/51] strip unused fields

---
 internal/pkg/api/schema.go | 33 +++------------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index e53d9abe3..f5209056f 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -182,40 +182,13 @@ type FileInfo struct {
 	Source   string     `json:"src"`
 	File     FileData   `json:"file"`
 	Contents []FileData `json:"contents"`
-	Event    struct {
-		ID string `json:"id"`
-	} `json:"event"`
-	Host struct {
-		Hostname string `json:"hostname"`
-	} `json:"host"`
 }
 
 type FileData struct {
-	Size        int64  `json:"size"`
-	Name        string `json:"name"`
-	Extension   string `json:"ext"`
-	Mime        string `json:"mime_type"`
-	Compression string `json:"Compression"`
-	Hash        struct {
+	Size int64  `json:"size"`
+	Name string `json:"name"`
+	Hash struct {
 		SHA256 string `json:"sha256"`
 		MD5    string `json:"md5"`
 	}
-	Accessed    string   `json:"accessed"`
-	Attributes  []string `json:"attributes"`
-	Created     string   `json:"created"`
-	CTime       string   `json:"ctime"`
-	Device      string   `json:"device"`
-	Directory   string   `json:"directory"`
-	DriveLetter string   `json:"drive_letter"`
-	Ext         string   `json:"extension"`
-	GID         string   `json:"gid"`
-	Group       string   `json:"group"`
-	INode       string   `json:"inode"`
-	Mode        string   `json:"mode"`
-	MTime       string   `json:"mtime"`
-	Owner       string   `json:"owner"`
-	Path        string   `json:"path"`
-	TargetPath  string   `json:"target_path"`
-	Type        string   `json:"type"`
-	UID         string   `json:"uid"`
 }

From 1249622f066c682fa0bbeedc38f29da8ed7d6d13 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 7 Nov 2022 19:29:55 -0500
Subject: [PATCH 17/51] reset license header

---
 licenses/license_header.go | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/licenses/license_header.go b/licenses/license_header.go
index fc5125bb2..21a8501aa 100644
--- a/licenses/license_header.go
+++ b/licenses/license_header.go
@@ -1,19 +1,6 @@
-// Licensed to Elasticsearch B.V. under one or more contributor
-// license agreements. See the NOTICE file distributed with
-// this work for additional information regarding copyright
-// ownership. Elasticsearch B.V. licenses this file to you under
-// the Apache License, Version 2.0 (the "License"); you may
-// not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
 
 // Code generated by beats/dev-tools/cmd/license/license_generate.go - DO NOT EDIT.
 

From d7ef820498fd9451a873d67103dd13140396804f Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 8 Nov 2022 07:18:10 -0500
Subject: [PATCH 18/51] small fixes

---
 internal/pkg/api/schema.go   | 2 +-
 internal/pkg/model/schema.go | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index f5209056f..c2a8b0a60 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -190,5 +190,5 @@ type FileData struct {
 	Hash struct {
 		SHA256 string `json:"sha256"`
 		MD5    string `json:"md5"`
-	}
+	} `json:"hash"`
 }
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index d17a8fbd8..a9022ff86 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -1,3 +1,7 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
 // Code generated by schema-generate. DO NOT EDIT.
 
 package model

From f61b67b2e3859604dd950f7b9fa8c2c1c0af9cce Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 8 Nov 2022 09:50:14 -0500
Subject: [PATCH 19/51] restore mime to checked fields

---
 internal/pkg/api/schema.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index c2a8b0a60..cc46dfe66 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -187,6 +187,7 @@ type FileInfo struct {
 type FileData struct {
 	Size int64  `json:"size"`
 	Name string `json:"name"`
+	Mime string `json:"mime_type"`
 	Hash struct {
 		SHA256 string `json:"sha256"`
 		MD5    string `json:"md5"`

From 30d80c3190ab4a4ab2f9b136d0f0c510dca9b04e Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 9 Nov 2022 13:58:59 -0500
Subject: [PATCH 20/51] update file index pattern names

---
 internal/pkg/upload/es.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 210fe04c0..407763e66 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -21,10 +21,9 @@ import (
 )
 
 const (
-	// @todo: neither of these should be static. But should be specific to an integration
-	// somewhat configurable, but need to follow a pattern so that Fleet Server has write access
-	FileHeaderIndexPattern = ".fleet-%s-files"
-	FileDataIndexPattern   = ".fleet-%s-file-data"
+	// integration name is substituted in
+	FileHeaderIndexPattern = ".fleet-files-%s"
+	FileDataIndexPattern   = ".fleet-file-data-%s"
 
 	FieldBaseID = "bid"
 )

From cd6bdb64b2401e336f803a7a75a50442942f411d Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 9 Nov 2022 16:35:55 -0500
Subject: [PATCH 21/51] wrap initial upload op write in mutex lock

---
 internal/pkg/upload/upload.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index b1c8da3dd..f53c77aa2 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -163,6 +163,8 @@ func (u *Uploader) Begin(size int64, docID string, source string, hashsum string
 		cnt += 1
 	}
 	info.Count = int(cnt)
+	u.mu.Lock()
+	defer u.mu.Unlock()
 	u.current[id] = upload{
 		opToken:       token,
 		chunkThrottle: throttle.NewThrottle(u.parallelChunkLimit),
@@ -321,7 +323,7 @@ func (u *Uploader) verifyChunkData(info Info, bulker bulk.Bulk) (bool, error) {
 			}
 		}
 
-		if info.Hasher != nil { // @todo: allow no-hash?
+		if info.Hasher != nil {
 			_, err = io.Copy(info.Hasher, bytes.NewReader(chunk.Data))
 			if err != nil {
 				return false, err

From af09cc5bf0f6bd1d72d3cd3af5f3f1d1ccaca383 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 10 Nov 2022 09:47:52 -0500
Subject: [PATCH 22/51] return all chunk IDs for counting

---
 internal/pkg/api/handleUpload.go | 1 -
 internal/pkg/upload/es.go        | 1 +
 internal/pkg/upload/upload.go    | 2 --
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 6a1495ffc..51e8a9bf2 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -380,7 +380,6 @@ func validateUploadPayload(fi FileInfo) error {
 	}
 
 	//@todo: valid action?
-	//@todo: valid agent?
 	//@todo: valid src? will that make future expansion harder and require FS updates? maybe just validate the index exists
 
 	if fi.File.Size <= 0 {
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 407763e66..0cb3c15fb 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -37,6 +37,7 @@ func prepareFindChunkIDs() *dsl.Tmpl {
 	root := dsl.NewRoot()
 	root.Param("_source", false) // do not return large data payload
 	root.Query().Term(FieldBaseID, tmpl.Bind(FieldBaseID), nil)
+	root.Size(10000) // 10k elasticsearch maximum. Result count breaks above 42gb files
 	tmpl.MustResolve(root)
 	return tmpl
 }
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index f53c77aa2..bc9e9b7de 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -227,8 +227,6 @@ func (u *Uploader) Complete(id string, bulker bulk.Bulk) (Info, error) {
 		return Info{}, errors.New("file contents did not pass validation")
 	}
 
-	// @todo: verify chunks
-	// verify hashes, etc
 	u.current[id].complete <- struct{}{}
 	return info.Info, nil
 }

From cd5870c3a9c749e9cbfaf3548bf7c5e32ac0a986 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 10 Nov 2022 09:54:29 -0500
Subject: [PATCH 23/51] fix capitalization

---
 internal/pkg/api/handleUpload.go | 4 ++--
 internal/pkg/upload/es.go        | 2 +-
 internal/pkg/upload/upload.go    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 51e8a9bf2..4b172d34c 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -203,7 +203,7 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		Int64("maxFileSize", maxFileSize).
 		Int("maxParallelOps", maxParallelUploadOperations).
 		Int("maxParallelChunks", maxParallelChunks).
-		Msg("Artifact install limits")
+		Msg("upload limits")
 
 	return &UploadT{
 		chunkClient: chunkClient,
@@ -337,7 +337,7 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 func uploadRequestToFileDoc(req map[string]interface{}, chunkSize int64) ([]byte, error) {
 	fileObj, ok := req["file"].(map[string]interface{})
 	if !ok {
-		return nil, errors.New("invalid upload request. File is not an object")
+		return nil, errors.New("invalid upload request, file is not an object")
 	}
 
 	fileObj["ChunkSize"] = chunkSize
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 0cb3c15fb..f2528146a 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -107,7 +107,7 @@ func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.Ch
 	log.Trace().Int("statuscode", resp.StatusCode).Interface("chunk-response", response).Msg("uploaded chunk")
 
 	if response.Error.Type != "" {
-		return fmt.Errorf("%s: %s. Caused by %s: %s", response.Error.Type, response.Error.Reason, response.Error.Cause.Type, response.Error.Cause.Reason)
+		return fmt.Errorf("%s: %s caused by %s: %s", response.Error.Type, response.Error.Reason, response.Error.Cause.Type, response.Error.Cause.Reason)
 	}
 	return nil
 }
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index bc9e9b7de..a33f57aec 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -35,8 +35,8 @@ const (
 var (
 	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
 	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
-	ErrFileSizeTooLarge     = errors.New("This file exceeds the maximum allowed file size")
-	ErrMissingChunks        = errors.New("File data incomplete, not all chunks were uploaded")
+	ErrFileSizeTooLarge     = errors.New("this file exceeds the maximum allowed file size")
+	ErrMissingChunks        = errors.New("file data incomplete, not all chunks were uploaded")
 	ErrHashMismatch         = errors.New("file integrity hash does not match")
 	//@todo: explicit error for expired uploads
 )

From b3823b45f2910716eb0545a5a5dabaf26c9a79e2 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 10 Nov 2022 09:54:52 -0500
Subject: [PATCH 24/51] no need to close request body

---
 internal/pkg/api/handleUpload.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 4b172d34c..b0ce26547 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -218,7 +218,6 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	// store raw body since we will json-decode twice
 	// 2MB is a reasonable json payload size. Any more might be an indication of garbage
 	body, err := ioutil.ReadAll(io.LimitReader(r.Body, 2*1024*1024))
-	r.Body.Close()
 	if err != nil {
 		return fmt.Errorf("error reading request: %w", err)
 	}

From ec0ee5dcf158f78d23e82b70eb8858ba87182bef Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 19 Dec 2022 08:15:21 -0500
Subject: [PATCH 25/51] WIP

---
 internal/pkg/api/handleUpload.go       | 136 +++++++-----
 internal/pkg/model/schema.go           |   1 +
 internal/pkg/upload/cbor/chunk.go      |  92 ++++++--
 internal/pkg/upload/cbor/chunk_test.go | 120 +++++++++++
 internal/pkg/upload/es.go              |  53 +++--
 internal/pkg/upload/upload.go          | 287 +++++++++++++------------
 model/schema.json                      |   4 +
 7 files changed, 457 insertions(+), 236 deletions(-)
 create mode 100644 internal/pkg/upload/cbor/chunk_test.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index b0ce26547..22d727b5f 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -8,6 +8,7 @@ import (
 	"context"
 	"crypto/md5"
 	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -32,30 +33,18 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-// the only valid values of upload status according to storage spec
-type UploadStatus string
-
-const (
-	UploadAwaiting UploadStatus = "AWAITING_UPLOAD"
-	UploadProgress UploadStatus = "UPLOADING"
-	UploadDone     UploadStatus = "READY"
-	UploadFail     UploadStatus = "UPLOAD_ERROR"
-	UploadDel      UploadStatus = "DELETED"
-)
-
 const (
 	// TODO: move to a config
-	maxParallelUploadOperations = 3
-	maxParallelChunks           = 4
-	maxFileSize                 = 104857600 // 100 MiB
+	maxFileSize    = 100 * 104857600 // 100 MiB
+	maxUploadTimer = 24 * time.Hour
 
+	// temp for easy development
+	AUTH_ENABLED = false // @todo: remove
 )
 
 func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	start := time.Now()
-
 	reqID := r.Header.Get(logger.HeaderRequestID)
-
 	zlog := log.With().
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
@@ -67,13 +56,6 @@ func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps ht
 	if err != nil {
 		cntUpload.IncError(err)
 		resp := NewHTTPErrResp(err)
-
-		// Log this as warn for visibility that limit has been reached.
-		// This allows customers to tune the configuration on detection of threshold.
-		if errors.Is(err, limit.ErrMaxLimit) || errors.Is(err, upload.ErrMaxConcurrentUploads) {
-			resp.Level = zerolog.WarnLevel
-		}
-
 		zlog.WithLevel(resp.Level).
 			Err(err).
 			Int(ECSHTTPResponseCode, resp.StatusCode).
@@ -99,15 +81,18 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
-	// simpler authentication check, since chunk checksum must
-	// ultimately match the initial hash provided with the stricter key check
-	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("failed writing error response")
+	// simpler authentication check,  for high chunk throughput
+	// since chunk checksums must match transit hash
+	// AND optionally the initial hash, both having stricter auth checks
+	if AUTH_ENABLED {
+		if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
+			cntUpload.IncError(err)
+			resp := NewHTTPErrResp(err)
+			if err := resp.Write(w); err != nil {
+				zlog.Error().Err(err).Msg("failed writing error response")
+			}
+			return
 		}
-		return
 	}
 
 	chunkNum, err := strconv.Atoi(chunkID)
@@ -155,15 +140,20 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
-	// simpler authentication check, file integrity checksum
-	// will catch directed tampering, this route just says "im done"
-	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("failed writing error response")
+	//@todo: doc lookup, agent ID is in there
+	agentID := "ABC"
+
+	// need to auth that it matches the ID in the initial
+	// doc, but that means we had to doc-lookup early
+	if AUTH_ENABLED {
+		if _, err := authAgent(r, &agentID, rt.bulker, rt.ut.cache); err != nil {
+			cntUpload.IncError(err)
+			resp := NewHTTPErrResp(err)
+			if err := resp.Write(w); err != nil {
+				zlog.Error().Err(err).Msg("failed writing error response")
+			}
+			return
 		}
-		return
 	}
 
 	err := rt.ut.handleUploadComplete(&zlog, w, r, id)
@@ -201,15 +191,13 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 	log.Info().
 		Interface("limits", cfg.Limits.ArtifactLimit).
 		Int64("maxFileSize", maxFileSize).
-		Int("maxParallelOps", maxParallelUploadOperations).
-		Int("maxParallelChunks", maxParallelChunks).
 		Msg("upload limits")
 
 	return &UploadT{
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		upl:         upload.New(maxFileSize, maxParallelChunks, maxParallelChunks),
+		upl:         upload.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
 	}
 }
 
@@ -232,8 +220,10 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 
 	// check API key matches payload agent ID
-	if _, err := authAgent(r, &fi.AgentID, ut.bulker, ut.cache); err != nil {
-		return err
+	if AUTH_ENABLED {
+		if _, err := authAgent(r, &fi.AgentID, ut.bulker, ut.cache); err != nil {
+			return err
+		}
 	}
 
 	if err := validateUploadPayload(fi); err != nil {
@@ -264,7 +254,7 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 		return fmt.Errorf("error parsing request json: %w", err)
 	}
 
-	doc, err := uploadRequestToFileDoc(reqDoc, op.ChunkSize)
+	doc, err := uploadRequestToFileDoc(reqDoc, op.ChunkSize, op.ID)
 	if err != nil {
 		return fmt.Errorf("unable to convert request to file metadata doc: %w", err)
 	}
@@ -291,33 +281,55 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 }
 
 func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string, chunkID int) error {
-	chunkInfo, err := ut.upl.Chunk(uplID, chunkID)
+	chunkHash := strings.TrimSpace(r.Header.Get("X-Chunk-Sha2"))
+
+	if chunkHash == "" {
+		return errors.New("chunk hash header required")
+	}
+
+	chunkInfo, err := ut.upl.Chunk(uplID, chunkID, chunkHash)
 	if err != nil {
 		return err
 	}
-	defer chunkInfo.Token.Release()
-	if chunkInfo.FirstReceived {
-		if err := updateUploadStatus(r.Context(), ut.bulker, chunkInfo.Upload, UploadProgress); err != nil {
-			zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to update upload status")
-		}
-	}
 
 	// prevent over-sized chunks
 	data := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
-	ce := cbor.NewChunkWriter(data, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Upload.ChunkSize)
+
+	// compute hash as we stream it
+	hash := sha256.New()
+	copier := io.TeeReader(data, hash)
+
+	ce := cbor.NewChunkWriter(copier, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Hash, chunkInfo.Upload.ChunkSize)
 	if err := upload.IndexChunk(r.Context(), ut.chunkClient, ce, chunkInfo.Upload.Source, chunkInfo.Upload.DocID, chunkInfo.ID); err != nil {
 		return err
 	}
+
+	hashsum := hex.EncodeToString(hash.Sum(nil))
+
+	if strings.ToLower(chunkHash) != strings.ToLower(hashsum) {
+		// @todo: delete document, since we wrote it, but the hash was invalid
+		return upload.ErrHashMismatch
+	}
+
 	return nil
 }
 
 func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
-	info, err := ut.upl.Complete(uplID, ut.bulker)
+	var req UploadCompleteRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		return errors.New("unable to parse request body")
+	}
+
+	if strings.TrimSpace(req.TransitHash.SHA256) == "" {
+		return errors.New("transit hash required")
+	}
+
+	info, err := ut.upl.Complete(uplID, req.TransitHash.SHA256, ut.bulker)
 	if err != nil {
 		return err
 	}
 
-	if err := updateUploadStatus(r.Context(), ut.bulker, info, UploadDone); err != nil {
+	if err := updateUploadStatus(r.Context(), ut.bulker, info, upload.StatusDone); err != nil {
 		// should be 500 error probably?
 		zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to set upload status to complete")
 		return err
@@ -333,19 +345,21 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 
 // takes the arbitrary input document from an upload request and injects
 // a few known fields as it passes through
-func uploadRequestToFileDoc(req map[string]interface{}, chunkSize int64) ([]byte, error) {
+func uploadRequestToFileDoc(req map[string]interface{}, chunkSize int64, uploadID string) ([]byte, error) {
 	fileObj, ok := req["file"].(map[string]interface{})
 	if !ok {
 		return nil, errors.New("invalid upload request, file is not an object")
 	}
 
 	fileObj["ChunkSize"] = chunkSize
-	fileObj["Status"] = string(UploadAwaiting)
+	fileObj["Status"] = string(upload.StatusAwaiting)
+	req["upload_id"] = uploadID
+	req["upload_start"] = time.Now().UnixMilli()
 
 	return json.Marshal(req)
 }
 
-func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status UploadStatus) error {
+func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status upload.Status) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
 			"file": map[string]string{
@@ -386,3 +400,9 @@ func validateUploadPayload(fi FileInfo) error {
 	}
 	return nil
 }
+
+type UploadCompleteRequest struct {
+	TransitHash struct {
+		SHA256 string `json:"sha256"`
+	} `json:"transithash"`
+}
diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index a9022ff86..3b0380c13 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -283,6 +283,7 @@ type FileChunk struct {
 	BID  string `json:"bid"`
 	Data []byte `json:"data"`
 	Last bool   `json:"last"`
+	SHA2 string `json:"sha2"`
 }
 
 // HostMetadata The host metadata for the Elastic Agent
diff --git a/internal/pkg/upload/cbor/chunk.go b/internal/pkg/upload/cbor/chunk.go
index bb3468f56..b9b624784 100644
--- a/internal/pkg/upload/cbor/chunk.go
+++ b/internal/pkg/upload/cbor/chunk.go
@@ -17,7 +17,7 @@ import (
 // It is not a general-purpose CBOR encoder.
 // A suitable general purpose library, if the future needs one, is github.com/fxamacker/cbor/v2
 type ChunkEncoder struct {
-	chunk       io.ReadCloser
+	chunk       io.Reader
 	final       bool
 	preamble    []byte
 	prbWritten  bool
@@ -25,11 +25,11 @@ type ChunkEncoder struct {
 	wroteTerm   bool
 }
 
-func NewChunkWriter(chunkData io.ReadCloser, finalChunk bool, baseID string, chunkSize int64) *ChunkEncoder {
+func NewChunkWriter(chunkData io.Reader, finalChunk bool, baseID string, chunkHash string, chunkSize int64) *ChunkEncoder {
 	return &ChunkEncoder{
 		chunk:       chunkData,
 		final:       finalChunk,
-		preamble:    encodePreambleToCBOR(finalChunk, baseID, chunkSize),
+		preamble:    encodePreambleToCBOR(finalChunk, baseID, chunkHash, chunkSize),
 		prbWritten:  false,
 		prbWritePos: 0,
 		wroteTerm:   false,
@@ -38,16 +38,18 @@ func NewChunkWriter(chunkData io.ReadCloser, finalChunk bool, baseID string, chu
 
 // Writes the start of a CBOR object (equiv. JSON object)
 // {
-//	"bid": "baseID",
 //	"last": true/false,
+//	"bid": "baseID",
+//	"sha2": "...",
 //	"data":
 // }
 // the slice ends where the chunk data bytes ("byte string") should begin.
 // it is therefore an incomplete CBOR object on its own
 // expecting the next section to be filled in by the caller.
 // the CBOR spec may be found here: https://www.rfc-editor.org/rfc/rfc8949
-func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
+func encodePreambleToCBOR(final bool, baseID string, chunkHash string, chunkSize int64) []byte {
 	bidLen := len(baseID)
+	hashLen := len(chunkHash)
 
 	// if we know the size of the chunk stream, we will write the 4-byte uint32
 	// descriptor of that length
@@ -58,9 +60,9 @@ func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
 		chunkLen = 1
 	}
 
-	preamble := make([]byte, 13+bidLen+chunkLen+5)
-	preamble[0] = 0xA3 // Object with 3 keys
-	preamble[1] = 0x64 // string with 4 chars
+	preamble := make([]byte, 11+bidLen+2+5+hashLen+2+chunkLen+5)
+	preamble[0] = 0xA4 // Object with 4 keys
+	preamble[1] = 0x64 // string with 4 chars (key: last)
 	preamble[2] = 'l'
 	preamble[3] = 'a'
 	preamble[4] = 's'
@@ -70,23 +72,31 @@ func encodePreambleToCBOR(final bool, baseID string, chunkSize int64) []byte {
 	} else {
 		preamble[6] = 0xF4 // bool false
 	}
-	preamble[7] = 0x63 // string with 3 chars
+	preamble[7] = 0x63 // string with 3 chars (key: bid)
 	preamble[8] = 'b'
 	preamble[9] = 'i'
 	preamble[10] = 'd'
-	preamble[11] = 0x78 // UTF-8 string coming, next byte describes length
-	preamble[12] = uint8(bidLen)
-	i := 13
-	for _, c := range baseID { // now write the document baseID
-		preamble[i] = byte(c)
-		i++
-	}
-	preamble[i] = 0x64 // string with 4 chars
-	preamble[i+1] = 'd'
-	preamble[i+2] = 'a'
-	preamble[i+3] = 't'
-	preamble[i+4] = 'a'
-	i += 5
+	i := 11
+	if n, err := writeString(preamble[i:], baseID); err != nil {
+		return nil
+	} else {
+		i = 11 + n
+	}
+	if n, err := writeKey(preamble[i:], "sha2"); err != nil {
+		return nil
+	} else {
+		i += n
+	}
+	if n, err := writeString(preamble[i:], chunkHash); err != nil {
+		return nil
+	} else {
+		i += n
+	}
+	if n, err := writeKey(preamble[i:], "data"); err != nil {
+		return nil
+	} else {
+		i += n
+	}
 	if !final {
 		// byte data should be precisely chunkSize long, otherwise malformed
 		preamble[i] = 0x5A // say length descriptor will be 32-bit int
@@ -144,3 +154,41 @@ func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 	return c.chunk.Read(buf)
 
 }
+
+// writes len(key)+1 bytes
+func writeKey(buf []byte, key string) (int, error) {
+	keylen := len(key)
+	if keylen > 0x17 { // CBOR spec max size for single-byte string length descriptor
+		// another method would have to be used for writing the string length
+		return 0, errors.New("large key size, write manually")
+	}
+	if len(buf) < keylen+1 {
+		return 0, errors.New("cbor buffer size too small")
+	}
+
+	buf[0] = byte(0x60 + keylen)
+	for i, c := range key {
+		buf[i+1] = byte(c)
+	}
+
+	return keylen + 1, nil
+}
+
+// writes len(string)+2 bytes
+func writeString(buf []byte, val string) (int, error) {
+	strlen := len(val)
+	if strlen > 0xff { // max single-byte strlen
+		return 0, errors.New("oversize string")
+	}
+	if len(buf) < strlen+2 {
+		return 0, errors.New("cbor buffer size too small")
+	}
+
+	buf[0] = 0x78 // Descriptor for: "UTF8 string. Next byte is a uint8 for n, and then n bytes follow"
+	buf[1] = uint8(strlen)
+	for i, c := range val {
+		buf[i+2] = byte(c)
+	}
+
+	return strlen + 2, nil
+}
diff --git a/internal/pkg/upload/cbor/chunk_test.go b/internal/pkg/upload/cbor/chunk_test.go
new file mode 100644
index 000000000..274d708e2
--- /dev/null
+++ b/internal/pkg/upload/cbor/chunk_test.go
@@ -0,0 +1,120 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package cbor
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/rand"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestChunkWriter(t *testing.T) {
+	contents := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}
+	chunkLength := 9
+	body := io.NopCloser(bytes.NewReader(contents))
+
+	w := NewChunkWriter(body, false, "foobar", "56ab", int64(chunkLength))
+
+	outbuf, err := io.ReadAll(w)
+	require.NoError(t, err)
+
+	expected := []byte{
+		0xA4,                           // object with 4 keys
+		0x64, 'l', 'a', 's', 't', 0xF4, // last: false
+		0x63, 'b', 'i', 'd', 0x78, 0x06, 'f', 'o', 'o', 'b', 'a', 'r', // "bid": "foobar"
+		0x64, 's', 'h', 'a', '2', 0x78, 0x04, '5', '6', 'a', 'b', // "sha2": "56ab"
+		0x64, 'd', 'a', 't', 'a', // data:
+		0x5A, 0x00, 0x00, 0x00, uint8(chunkLength), // 4-byte length instruction, then the actual 4-byte big-endian length
+		0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, // contents
+	}
+
+	assert.Equal(t, expected, outbuf)
+}
+
+func TestChunkWriterLastChunk(t *testing.T) {
+	contents := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}
+	chunkLength := 20 // describes what a "full" chunk length is, not THIS chunk
+	body := io.NopCloser(bytes.NewReader(contents))
+
+	w := NewChunkWriter(body, true, "foobar", "face", int64(chunkLength))
+
+	outbuf, err := io.ReadAll(w)
+	require.NoError(t, err)
+
+	expected := []byte{
+		0xA4,                           // object with 4 keys
+		0x64, 'l', 'a', 's', 't', 0xF5, // last: true
+		0x63, 'b', 'i', 'd', 0x78, 0x06, 'f', 'o', 'o', 'b', 'a', 'r', // "bid": "foobar"
+		0x64, 's', 'h', 'a', '2', 0x78, 0x04, 'f', 'a', 'c', 'e', // "sha2": "face"
+		0x64, 'd', 'a', 't', 'a', // data:
+		0x5F, // indeterminate length sequence
+	}
+
+	// assert equality up to the constant set point
+	assert.Equal(t, expected, outbuf[:len(expected)])
+	assert.Equal(t, uint8(0xFF), outbuf[len(outbuf)-1]) // final byte MUST be a 0xFF terminating byte when using indeterminate-length style
+
+	// some light parsing, since this is variable depending on how Read() sizes its buffers internally
+	parsedContents := make([]byte, len(contents))
+	pi := 0 // write pointer for above buffer
+	for i := len(expected); i < len(outbuf)-2; {
+		assert.Equal(t, uint8(0x5A), outbuf[i])                     // expect a descriptor for 4-byte length sequence
+		buflen := binary.BigEndian.Uint32(outbuf[i+1:])             // read 4 byte length descriptor
+		n := copy(parsedContents[pi:], outbuf[i+5:i+5+int(buflen)]) // and copy those over
+		pi += n
+		i += n + 5 // 5 = 1 from (0x5A) and 4 from length descriptor
+	}
+
+	assert.Equal(t, contents, parsedContents)
+}
+
+func TestChunkWriterLargeLastChunk(t *testing.T) {
+	// generates a large enough chunk to test multiple read calls internally
+
+	contents := make([]byte, 4096)
+
+	n, err := rand.Read(contents)
+	require.NoError(t, err)
+	require.Equal(t, n, 4096)
+
+	chunkLength := 8192 // describes what a "full" chunk length is, not THIS chunk
+	body := io.NopCloser(bytes.NewReader(contents))
+
+	w := NewChunkWriter(body, true, "foobar", "face", int64(chunkLength))
+
+	outbuf, err := io.ReadAll(w)
+	require.NoError(t, err)
+
+	expected := []byte{
+		0xA4,                           // object with 4 keys
+		0x64, 'l', 'a', 's', 't', 0xF5, // last: true
+		0x63, 'b', 'i', 'd', 0x78, 0x06, 'f', 'o', 'o', 'b', 'a', 'r', // "bid": "foobar"
+		0x64, 's', 'h', 'a', '2', 0x78, 0x04, 'f', 'a', 'c', 'e', // "sha2": "face"
+		0x64, 'd', 'a', 't', 'a', // data:
+		0x5F, // indeterminate length sequence
+	}
+
+	// assert equality up to the constant set point
+	assert.Equal(t, expected, outbuf[:len(expected)])
+	assert.Equal(t, uint8(0xFF), outbuf[len(outbuf)-1]) // final byte MUST be a 0xFF terminating byte when using indeterminate-length style
+
+	// some light parsing, since this is variable depending on how Read() sizes its buffers internally
+	parsedContents := make([]byte, len(contents))
+	pi := 0 // write pointer for above buffer
+	for i := len(expected); i < len(outbuf)-2; {
+		assert.Equal(t, uint8(0x5A), outbuf[i])                     // expect a descriptor for 4-byte length sequence
+		buflen := binary.BigEndian.Uint32(outbuf[i+1:])             // read 4 byte length descriptor
+		n := copy(parsedContents[pi:], outbuf[i+5:i+5+int(buflen)]) // and copy those over
+		pi += n
+		i += n + 5 // 5 = 1 from (0x5A) and 4 from length descriptor
+	}
+
+	assert.Equal(t, contents, parsedContents)
+}
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index f2528146a..53dc2695a 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -25,11 +25,13 @@ const (
 	FileHeaderIndexPattern = ".fleet-files-%s"
 	FileDataIndexPattern   = ".fleet-file-data-%s"
 
-	FieldBaseID = "bid"
+	FieldBaseID   = "bid"
+	FieldUploadID = "upload_id"
 )
 
 var (
 	QueryChunkIDs = prepareFindChunkIDs()
+	QueryUploadID = prepareFindByUploadID()
 )
 
 func prepareFindChunkIDs() *dsl.Tmpl {
@@ -42,35 +44,42 @@ func prepareFindChunkIDs() *dsl.Tmpl {
 	return tmpl
 }
 
+func prepareFindByUploadID() *dsl.Tmpl {
+	tmpl := dsl.NewTmpl()
+	root := dsl.NewRoot()
+	//root.Param("_source", false) // do not return large data payload
+	root.Query().Term(FieldUploadID, tmpl.Bind(FieldUploadID), nil)
+	tmpl.MustResolve(root)
+	return tmpl
+}
+
 func CreateFileDoc(ctx context.Context, bulker bulk.Bulk, doc []byte, source string, fileID string) (string, error) {
+	//@todo: put_if_absent
 	return bulker.Create(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, doc, bulk.WithRefresh())
 }
 
+func GetFileDoc(ctx context.Context, bulker bulk.Bulk, uploadID string) ([]es.HitT, error) {
+
+	query, err := QueryUploadID.Render(map[string]interface{}{
+		FieldUploadID: uploadID,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := bulker.Search(ctx, fmt.Sprintf(FileHeaderIndexPattern, "*"), query)
+	if err != nil {
+		return nil, err
+	}
+
+	return res.HitsT.Hits, nil
+}
+
 func UpdateFileDoc(ctx context.Context, bulker bulk.Bulk, source string, fileID string, data []byte) error {
 	return bulker.Update(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
 }
 
 func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkID int) error {
-
-	/*
-		// the non-streaming version
-		buf := bytes.NewBuffer(nil)
-		out, err := io.ReadAll(data)
-		if err != nil {
-			return err
-		}
-		data.Close()
-		err = cbor.NewEncoder(buf).Encode(map[string]interface{}{
-			"bid":  fileID,
-			"last": false,
-			"data": out,
-		})
-		if err != nil {
-			return err
-		}
-		buf2 := buf.Bytes()
-	*/
-
 	req := esapi.IndexRequest{
 		Index:      fmt.Sprintf(FileDataIndexPattern, source),
 		Body:       body,
@@ -83,7 +92,7 @@ func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.Ch
 	/*
 		standard approach when content-type override no longer needed
 
-		resp, err := client.Index(".fleet-file_data", data, func(req *esapi.IndexRequest) {
+		resp, err := client.Index(fmt.Sprintf(FileDataIndexPattern, source), data, func(req *esapi.IndexRequest) {
 			req.DocumentID = fmt.Sprintf("%s.%d", fileID, chunkID)
 			if req.Header == nil {
 				req.Header = make(http.Header)
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index a33f57aec..d8f3d08fa 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -5,13 +5,11 @@
 package upload
 
 import (
-	"bytes"
 	"context"
-	"encoding/hex"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"hash"
-	"io"
 	"strconv"
 	"strings"
 	"sync"
@@ -19,54 +17,63 @@ import (
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
+	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
 )
 
 const (
-	//these should be configs probably
-	uploadRequestTimeout = time.Hour
-	chunkProgressTimeout = time.Hour / 4
-
 	// specification-designated maximum
 	MaxChunkSize = 4194304 // 4 MiB
 )
 
 var (
-	ErrMaxConcurrentUploads = errors.New("the max number of concurrent uploads has been reached")
-	ErrInvalidUploadID      = errors.New("active upload not found with this ID, it may be expired")
-	ErrFileSizeTooLarge     = errors.New("this file exceeds the maximum allowed file size")
-	ErrMissingChunks        = errors.New("file data incomplete, not all chunks were uploaded")
-	ErrHashMismatch         = errors.New("file integrity hash does not match")
-	//@todo: explicit error for expired uploads
+	ErrInvalidUploadID  = errors.New("active upload not found with this ID, it may be expired")
+	ErrFileSizeTooLarge = errors.New("this file exceeds the maximum allowed file size")
+	ErrMissingChunks    = errors.New("file data incomplete, not all chunks were uploaded")
+	ErrHashMismatch     = errors.New("hash does not match")
+	ErrUploadExpired    = errors.New("upload has expired")
+	ErrUploadStopped    = errors.New("upload has stopped")
+	ErrInvalidChunkNum  = errors.New("invalid chunk number")
 )
 
-type upload struct {
-	opToken       *throttle.Token
-	chunkThrottle *throttle.Throttle
-	complete      chan<- struct{}
-	chunkRecv     chan<- struct{}
-	begun         bool
-	Info          Info
-}
+// the only valid values of upload status according to storage spec
+type Status string
+
+const (
+	StatusAwaiting Status = "AWAITING_UPLOAD"
+	StatusProgress Status = "UPLOADING"
+	StatusDone     Status = "READY"
+	StatusFail     Status = "UPLOAD_ERROR"
+	StatusDel      Status = "DELETED"
+)
 
 type Uploader struct {
-	current            map[string]upload
-	mu                 sync.Mutex
-	opThrottle         *throttle.Throttle
-	parallelChunkLimit int
-	sizeLimit          int64
+	metaCache map[string]Info // simple read-cache of file metadata doc info
+	mu        sync.RWMutex    // lock for the above
+	sizeLimit int64           // @todo: what if configuration changes? is this recreated with another New()?
+	timeLimit time.Duration   // @todo: same as above
+
+	// @todo: some es credentials
+	chunkClient *elasticsearch.Client
+	bulker      bulk.Bulk
 }
 
 type Info struct {
-	ID        string // upload operation identifier. Ephemeral, just used for the upload process
+	ID        string // upload operation identifier. Used to identify the upload process
 	DocID     string // document ID of the uploaded file and chunks
 	Source    string // which integration is performing the upload
 	ChunkSize int64
 	Total     int64
 	Count     int
-	HashSum   string
-	Hasher    hash.Hash
+	Start     time.Time
+	Status    Status
+}
+
+// convenience functions for computing current "Status" based on the fields
+func (i Info) Expired(timeout time.Duration) bool { return i.Start.Add(timeout).After(time.Now()) }
+func (i Info) StatusCanUpload() bool { // returns true if more chunks can be uploaded. False if the upload process has completed (with or without error)
+	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
 }
 
 type ChunkInfo struct {
@@ -74,15 +81,16 @@ type ChunkInfo struct {
 	FirstReceived bool
 	Final         bool
 	Upload        Info
+	Hash          string
 	Token         *throttle.Token
 }
 
-func New(sizeLimit int64, opLimit int, chunkLimit int) *Uploader {
+func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, timeLimit time.Duration) *Uploader {
 	return &Uploader{
-		parallelChunkLimit: chunkLimit,
-		sizeLimit:          sizeLimit,
-		opThrottle:         throttle.NewThrottle(opLimit),
-		current:            make(map[string]upload, opLimit),
+		chunkClient: chunkClient,
+		bulker:      bulker,
+		sizeLimit:   sizeLimit,
+		timeLimit:   timeLimit,
 	}
 }
 
@@ -102,116 +110,71 @@ func (u *Uploader) Begin(size int64, docID string, source string, hashsum string
 	}
 	id := uid.String()
 
-	token := u.opThrottle.Acquire(id, 300*time.Hour)
-	if token == nil {
-		return Info{}, ErrMaxConcurrentUploads
-	}
-
-	total := time.NewTimer(uploadRequestTimeout)
-	chunkT := time.NewTimer(chunkProgressTimeout)
-	chunkRecv := make(chan struct{})
-	complete := make(chan struct{})
-	// total timer could also be achieved with context deadline and cancelling
-
-	go func() {
-		for {
-			select {
-			case <-total.C: // entire upload operation timed out
-				log.Trace().Str("uploadID", id).Msg("upload operation timed out")
-				// stop and drain chunk timer
-				if !chunkT.Stop() {
-					<-chunkT.C
-				}
-				u.cancel(id)
-				return
-			case <-chunkT.C: // no chunk progress within chunk timer, expire operation
-				log.Trace().Str("uploadID", id).Msg("upload operation chunk activity timed out")
-				// stop and drain total timer
-				if !total.Stop() {
-					<-total.C
-				}
-				u.cancel(id)
-				return
-			case <-chunkRecv: // chunk activity, update chunk timer
-				if !chunkT.Stop() {
-					<-chunkT.C
-				}
-				chunkT.Reset(chunkProgressTimeout)
-			case <-complete: // upload operation complete, clean up
-				if !chunkT.Stop() {
-					<-chunkT.C
-				}
-				if !total.Stop() {
-					<-total.C
-				}
-				u.finalize(id)
-				return
-			}
-		}
-	}()
 	info := Info{
 		ID:        id,
 		DocID:     docID,
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
-		Hasher:    hasher,
-		HashSum:   hashsum,
+		//Hasher:    hasher,
+		//HashSum:   hashsum,
 	}
 	cnt := info.Total / info.ChunkSize
 	if info.Total%info.ChunkSize > 0 {
 		cnt += 1
 	}
 	info.Count = int(cnt)
-	u.mu.Lock()
-	defer u.mu.Unlock()
-	u.current[id] = upload{
-		opToken:       token,
-		chunkThrottle: throttle.NewThrottle(u.parallelChunkLimit),
-		complete:      complete,
-		chunkRecv:     chunkRecv,
-		Info:          info,
-	}
+
 	return info, nil
 }
 
-func (u *Uploader) Chunk(uplID string, chunkID int) (ChunkInfo, error) {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-	upl, valid := u.current[uplID]
-	if !valid {
-		return ChunkInfo{}, ErrInvalidUploadID
-	}
-	if chunkID < 0 || chunkID >= upl.Info.Count {
-		return ChunkInfo{}, errors.New("invalid chunk number")
+func (u *Uploader) Chunk(uplID string, chunkID int, chunkHash string) (ChunkInfo, error) {
+
+	// Fetch metadata doc, if not cached
+	u.mu.RLock()
+	defer u.mu.RUnlock()
+	info, exist := u.metaCache[uplID]
+	if !exist {
+		u.mu.Lock()
+		defer u.mu.Unlock()
+		// fetch and write
+
+		//resp, err := u.es.Get()
+
+		found := false
+		if !found {
+			return ChunkInfo{}, ErrInvalidUploadID
+		}
+
 	}
 
-	token := upl.chunkThrottle.Acquire(strconv.Itoa(chunkID), time.Hour)
-	if token == nil {
-		return ChunkInfo{}, ErrMaxConcurrentUploads
+	if info.Expired(u.timeLimit) {
+		return ChunkInfo{}, ErrUploadExpired
+	}
+	if !info.StatusCanUpload() {
+		return ChunkInfo{}, ErrUploadStopped
 	}
-	upl.chunkRecv <- struct{}{}
-	if !upl.begun {
-		upl.begun = true
+	if chunkID < 0 || chunkID >= info.Count {
+		return ChunkInfo{}, ErrInvalidChunkNum
 	}
-	u.current[uplID] = upl
 
 	return ChunkInfo{
 		ID:            chunkID,
-		FirstReceived: upl.begun,
-		Final:         chunkID == upl.Info.Count-1,
-		Upload:        upl.Info,
-		Token:         token,
+		FirstReceived: false, // @todo
+		Final:         chunkID == info.Count-1,
+		Upload:        info,
+		Hash:          chunkHash,
+		//Token:         token,
 	}, nil
 }
 
-func (u *Uploader) Complete(id string, bulker bulk.Bulk) (Info, error) {
-	info, valid := u.current[id]
+func (u *Uploader) Complete(id string, transitHash string, bulker bulk.Bulk) (Info, error) {
+	info, valid := u.metaCache[id]
 	if !valid {
 		return Info{}, ErrInvalidUploadID
 	}
 
-	ok, err := u.allChunksPresent(info.Info, bulker)
+	ok, err := u.allChunksPresent(info, bulker)
 	if err != nil {
 		return Info{}, err
 	}
@@ -219,7 +182,7 @@ func (u *Uploader) Complete(id string, bulker bulk.Bulk) (Info, error) {
 		return Info{}, ErrMissingChunks
 	}
 
-	ok, err = u.verifyChunkData(info.Info, bulker)
+	ok, err = u.verifyChunkData(info, transitHash, bulker)
 	if err != nil {
 		return Info{}, err
 	}
@@ -227,19 +190,13 @@ func (u *Uploader) Complete(id string, bulker bulk.Bulk) (Info, error) {
 		return Info{}, errors.New("file contents did not pass validation")
 	}
 
-	u.current[id].complete <- struct{}{}
-	return info.Info, nil
+	return info, nil
 }
 
 func (u *Uploader) cleanupOperation(uplID string) {
 	u.mu.Lock()
 	defer u.mu.Unlock()
-	if upload, ok := u.current[uplID]; ok {
-		if upload.opToken != nil {
-			upload.opToken.Release()
-		}
-	}
-	delete(u.current, uplID)
+	delete(u.metaCache, uplID)
 }
 
 func (u *Uploader) cancel(uplID string) error {
@@ -287,7 +244,7 @@ func (u *Uploader) allChunksPresent(info Info, bulker bulk.Bulk) (bool, error) {
 	return true, nil
 }
 
-func (u *Uploader) verifyChunkData(info Info, bulker bulk.Bulk) (bool, error) {
+func (u *Uploader) verifyChunkData(info Info, transitHash string, bulker bulk.Bulk) (bool, error) {
 	// verify all chunks except last are info.ChunkSize size
 	// verify last: false (or field excluded) for all except final chunk
 	// verify final chunk is last: true
@@ -321,19 +278,81 @@ func (u *Uploader) verifyChunkData(info Info, bulker bulk.Bulk) (bool, error) {
 			}
 		}
 
+		/*
+			if info.Hasher != nil {
+				_, err = io.Copy(info.Hasher, bytes.NewReader(chunk.Data))
+				if err != nil {
+					return false, err
+				}
+			}
+		*/
+	}
+
+	/*
 		if info.Hasher != nil {
-			_, err = io.Copy(info.Hasher, bytes.NewReader(chunk.Data))
-			if err != nil {
-				return false, err
+			fullHash := hex.EncodeToString(info.Hasher.Sum(nil))
+			if fullHash != info.HashSum {
+				return false, ErrHashMismatch
 			}
 		}
+	*/
+	return true, nil
+}
+
+// retrieves upload metadata info from elasticsearch
+// which may be locally cached
+func (u *Uploader) GetUploadInfo(uploadID string) (Info, error) {
+	results, err := GetFileDoc(context.TODO(), u.bulker, uploadID)
+	if err != nil {
+		return Info{}, err
+	}
+	if len(results) == 0 {
+		return Info{}, ErrInvalidUploadID
+	}
+	if len(results) > 1 {
+		return Info{}, fmt.Errorf("unable to locate upload record, got %d records, expected 1", len(results))
 	}
 
-	if info.Hasher != nil {
-		fullHash := hex.EncodeToString(info.Hasher.Sum(nil))
-		if fullHash != info.HashSum {
-			return false, ErrHashMismatch
-		}
+	var fi FileMetaDoc
+	if err := json.Unmarshal(results[0].Source, &fi); err != nil {
+		return Info{}, fmt.Errorf("file meta doc parsing error: %v", err)
 	}
-	return true, nil
+
+	// calculate number of chunks required
+	cnt := fi.File.Size / fi.File.ChunkSize
+	if fi.File.Size%fi.File.ChunkSize > 0 {
+		cnt += 1
+	}
+
+	return Info{
+		ID:        fi.UploadID,
+		Source:    fi.Source,
+		DocID:     results[0].ID,
+		ChunkSize: fi.File.ChunkSize,
+		Total:     fi.File.Size,
+		Count:     int(cnt),
+		Start:     fi.Start,
+		Status:    Status(fi.File.Status),
+	}, nil
+}
+
+type FileMetaDoc struct {
+	ActionID string     `json:"action_id"`
+	AgentID  string     `json:"agent_id"`
+	Source   string     `json:"src"`
+	File     FileData   `json:"file"`
+	Contents []FileData `json:"contents"`
+	UploadID string     `json:"upload_id"`
+	Start    time.Time  `json:"upload_start"`
+}
+type FileData struct {
+	Size      int64  `json:"size"`
+	ChunkSize int64  `json:"ChunkSize"`
+	Status    string `json:"Status"`
+	Name      string `json:"name"`
+	Mime      string `json:"mime_type"`
+	Hash      struct {
+		SHA256 string `json:"sha256"`
+		MD5    string `json:"md5"`
+	} `json:"hash"`
 }
diff --git a/model/schema.json b/model/schema.json
index 4f518964f..259e53496 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -629,6 +629,10 @@
         "last": {
           "description": "Whether this chunk is the final chunk of a file",
           "type": "boolean"
+        },
+        "sha2": {
+          "description": "the sha256 hash of the chunk contents",
+          "type": "string"
         }
       }
     }

From 5dc0b7b438874422b845f221c629275a49f8c862 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 19 Dec 2022 09:08:53 -0500
Subject: [PATCH 26/51] cleanup upload writing

---
 internal/pkg/api/handleUpload.go | 96 +++++++++-----------------------
 1 file changed, 27 insertions(+), 69 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 22d727b5f..4304b67a8 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -23,7 +23,6 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
-	"github.com/elastic/fleet-server/v7/internal/pkg/limit"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
@@ -54,17 +53,8 @@ func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps ht
 	err := rt.ut.handleUploadStart(&zlog, w, r)
 
 	if err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-		zlog.WithLevel(resp.Level).
-			Err(err).
-			Int(ECSHTTPResponseCode, resp.StatusCode).
-			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail upload initiation")
-
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("fail writing error response")
-		}
+		writeUploadError(err, w, zlog, start, "error initiating upload process")
+		return
 	}
 }
 
@@ -86,45 +76,19 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 	// AND optionally the initial hash, both having stricter auth checks
 	if AUTH_ENABLED {
 		if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
-			cntUpload.IncError(err)
-			resp := NewHTTPErrResp(err)
-			if err := resp.Write(w); err != nil {
-				zlog.Error().Err(err).Msg("failed writing error response")
-			}
+			writeUploadError(err, w, zlog, start, "authentication failure for chunk write")
 			return
 		}
 	}
 
 	chunkNum, err := strconv.Atoi(chunkID)
 	if err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("fail writing error response")
-		}
+		writeUploadError(err, w, zlog, start, "error parsing chunk index")
 		return
 	}
-	err = rt.ut.handleUploadChunk(&zlog, w, r, id, chunkNum)
-
-	if err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-
-		// Log this as warn for visibility that limit has been reached.
-		// This allows customers to tune the configuration on detection of threshold.
-		if errors.Is(err, limit.ErrMaxLimit) {
-			resp.Level = zerolog.WarnLevel
-		}
-
-		zlog.WithLevel(resp.Level).
-			Err(err).
-			Int(ECSHTTPResponseCode, resp.StatusCode).
-			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail upload chunk")
-
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("fail writing error response")
-		}
+	if err := rt.ut.handleUploadChunk(&zlog, w, r, id, chunkNum); err != nil {
+		writeUploadError(err, w, zlog, start, "error uploading chunk")
+		return
 	}
 }
 
@@ -147,36 +111,14 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 	// doc, but that means we had to doc-lookup early
 	if AUTH_ENABLED {
 		if _, err := authAgent(r, &agentID, rt.bulker, rt.ut.cache); err != nil {
-			cntUpload.IncError(err)
-			resp := NewHTTPErrResp(err)
-			if err := resp.Write(w); err != nil {
-				zlog.Error().Err(err).Msg("failed writing error response")
-			}
+			writeUploadError(err, w, zlog, start, "error authenticating for upload finalization")
 			return
 		}
 	}
 
-	err := rt.ut.handleUploadComplete(&zlog, w, r, id)
-
-	if err != nil {
-		cntUpload.IncError(err)
-		resp := NewHTTPErrResp(err)
-
-		// Log this as warn for visibility that limit has been reached.
-		// This allows customers to tune the configuration on detection of threshold.
-		if errors.Is(err, limit.ErrMaxLimit) {
-			resp.Level = zerolog.WarnLevel
-		}
-
-		zlog.WithLevel(resp.Level).
-			Err(err).
-			Int(ECSHTTPResponseCode, resp.StatusCode).
-			Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
-			Msg("fail upload completion")
-
-		if err := resp.Write(w); err != nil {
-			zlog.Error().Err(err).Msg("fail writing error response")
-		}
+	if err := rt.ut.handleUploadComplete(&zlog, w, r, id); err != nil {
+		writeUploadError(err, w, zlog, start, "error finalizing upload")
+		return
 	}
 }
 
@@ -401,6 +343,22 @@ func validateUploadPayload(fi FileInfo) error {
 	return nil
 }
 
+// helper function for doing all the error responsibilities
+// at the HTTP edge
+func writeUploadError(err error, w http.ResponseWriter, zlog zerolog.Logger, start time.Time, msg string) {
+	cntUpload.IncError(err)
+	resp := NewHTTPErrResp(err)
+
+	zlog.WithLevel(resp.Level).
+		Err(err).
+		Int(ECSHTTPResponseCode, resp.StatusCode).
+		Int64(ECSEventDuration, time.Since(start).Nanoseconds()).
+		Msg(msg)
+	if e := resp.Write(w); e != nil {
+		zlog.Error().Err(e).Msg("failure writing error response")
+	}
+}
+
 type UploadCompleteRequest struct {
 	TransitHash struct {
 		SHA256 string `json:"sha256"`

From 26319ec5d91eaddbfa9165778379c3957ec1f04e Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 20 Dec 2022 09:22:53 -0500
Subject: [PATCH 27/51] refactor, parse JSON once

---
 internal/pkg/api/handleUpload.go   | 119 +++++------------------------
 internal/pkg/upload/jsdict.go      | 101 ++++++++++++++++++++++++
 internal/pkg/upload/jsdict_test.go |  68 +++++++++++++++++
 internal/pkg/upload/upload.go      | 113 +++++++++++++++++++++------
 internal/pkg/upload/upload_test.go |  13 +---
 5 files changed, 278 insertions(+), 136 deletions(-)
 create mode 100644 internal/pkg/upload/jsdict.go
 create mode 100644 internal/pkg/upload/jsdict_test.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 4304b67a8..cc414a32e 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -6,15 +6,12 @@ package api
 
 import (
 	"context"
-	"crypto/md5"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
-	"hash"
 	"io"
-	"io/ioutil"
 	"net/http"
 	"strconv"
 	"strings"
@@ -50,9 +47,7 @@ func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps ht
 
 	// authentication occurs inside here
 	// to check that key agent ID matches the ID in the body payload yet-to-be unmarshalled
-	err := rt.ut.handleUploadStart(&zlog, w, r)
-
-	if err != nil {
+	if err := rt.ut.handleUploadStart(&zlog, w, r); err != nil {
 		writeUploadError(err, w, zlog, start, "error initiating upload process")
 		return
 	}
@@ -126,7 +121,7 @@ type UploadT struct {
 	bulker      bulk.Bulk
 	chunkClient *elasticsearch.Client
 	cache       cache.Cache
-	upl         *upload.Uploader
+	uploader    *upload.Uploader
 }
 
 func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
@@ -139,22 +134,14 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		upl:         upload.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
+		uploader:    upload.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
 	}
 }
 
 func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error {
-
-	// store raw body since we will json-decode twice
-	// 2MB is a reasonable json payload size. Any more might be an indication of garbage
-	body, err := ioutil.ReadAll(io.LimitReader(r.Body, 2*1024*1024))
+	// decode early to match agentID in the payload
+	payload, err := upload.ReadDict(r.Body)
 	if err != nil {
-		return fmt.Errorf("error reading request: %w", err)
-	}
-
-	// decode once here to access known fields we need to parse and work with
-	var fi FileInfo
-	if err := json.Unmarshal(body, &fi); err != nil {
 		if errors.Is(err, io.EOF) {
 			return fmt.Errorf("file info body is required: %w", err)
 		}
@@ -162,54 +149,26 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 
 	// check API key matches payload agent ID
+	agentID, ok := payload.Str("agent_id")
+	if !ok || agentID == "" {
+		return errors.New("required field agent_id is missing")
+	}
 	if AUTH_ENABLED {
-		if _, err := authAgent(r, &fi.AgentID, ut.bulker, ut.cache); err != nil {
+		if _, err := authAgent(r, &agentID, ut.bulker, ut.cache); err != nil {
 			return err
 		}
 	}
 
-	if err := validateUploadPayload(fi); err != nil {
-		return err
-	}
-
-	docID := fmt.Sprintf("%s.%s", fi.ActionID, fi.AgentID)
-
-	var hasher hash.Hash
-	var sum string
-	switch {
-	case fi.File.Hash.SHA256 != "":
-		hasher = sha256.New()
-		sum = fi.File.Hash.SHA256
-	case fi.File.Hash.MD5 != "":
-		hasher = md5.New()
-		sum = fi.File.Hash.MD5
-	}
-
-	op, err := ut.upl.Begin(fi.File.Size, docID, fi.Source, sum, hasher)
+	// validate payload, enrich with additional fields, and write metadata doc to ES
+	info, err := ut.uploader.Begin(r.Context(), payload)
 	if err != nil {
 		return err
 	}
 
-	// second decode here to maintain the arbitrary shape and fields we will just pass through
-	var reqDoc map[string]interface{}
-	if err := json.Unmarshal(body, &reqDoc); err != nil {
-		return fmt.Errorf("error parsing request json: %w", err)
-	}
-
-	doc, err := uploadRequestToFileDoc(reqDoc, op.ChunkSize, op.ID)
-	if err != nil {
-		return fmt.Errorf("unable to convert request to file metadata doc: %w", err)
-	}
-	ret, err := upload.CreateFileDoc(r.Context(), ut.bulker, doc, fi.Source, docID)
-	if err != nil {
-		return err
-	}
-
-	zlog.Info().Str("return", ret).Msg("wrote doc")
-
+	// prepare and write response
 	out, err := json.Marshal(map[string]interface{}{
-		"upload_id":  op.ID,
-		"chunk_size": op.ChunkSize,
+		"upload_id":  info.ID,
+		"chunk_size": info.ChunkSize,
 	})
 	if err != nil {
 		return err
@@ -229,7 +188,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 		return errors.New("chunk hash header required")
 	}
 
-	chunkInfo, err := ut.upl.Chunk(uplID, chunkID, chunkHash)
+	chunkInfo, err := ut.uploader.Chunk(r.Context(), uplID, chunkID, chunkHash)
 	if err != nil {
 		return err
 	}
@@ -266,7 +225,7 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return errors.New("transit hash required")
 	}
 
-	info, err := ut.upl.Complete(uplID, req.TransitHash.SHA256, ut.bulker)
+	info, err := ut.uploader.Complete(uplID, req.TransitHash.SHA256, ut.bulker)
 	if err != nil {
 		return err
 	}
@@ -285,22 +244,6 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 	return nil
 }
 
-// takes the arbitrary input document from an upload request and injects
-// a few known fields as it passes through
-func uploadRequestToFileDoc(req map[string]interface{}, chunkSize int64, uploadID string) ([]byte, error) {
-	fileObj, ok := req["file"].(map[string]interface{})
-	if !ok {
-		return nil, errors.New("invalid upload request, file is not an object")
-	}
-
-	fileObj["ChunkSize"] = chunkSize
-	fileObj["Status"] = string(upload.StatusAwaiting)
-	req["upload_id"] = uploadID
-	req["upload_start"] = time.Now().UnixMilli()
-
-	return json.Marshal(req)
-}
-
 func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status upload.Status) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
@@ -315,34 +258,6 @@ func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info,
 	return upload.UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
 }
 
-func validateUploadPayload(fi FileInfo) error {
-
-	required := []struct {
-		Field string
-		Msg   string
-	}{
-		{fi.File.Name, "file name"},
-		{fi.File.Mime, "mime_type"},
-		{fi.ActionID, "action_id"},
-		{fi.AgentID, "agent_id"},
-		{fi.Source, "src"},
-	}
-
-	for _, req := range required {
-		if strings.TrimSpace(req.Field) == "" {
-			return fmt.Errorf("%s is required", req.Msg)
-		}
-	}
-
-	//@todo: valid action?
-	//@todo: valid src? will that make future expansion harder and require FS updates? maybe just validate the index exists
-
-	if fi.File.Size <= 0 {
-		return errors.New("invalid file size, size is required")
-	}
-	return nil
-}
-
 // helper function for doing all the error responsibilities
 // at the HTTP edge
 func writeUploadError(err error, w http.ResponseWriter, zlog zerolog.Logger, start time.Time, msg string) {
diff --git a/internal/pkg/upload/jsdict.go b/internal/pkg/upload/jsdict.go
new file mode 100644
index 000000000..7de11494c
--- /dev/null
+++ b/internal/pkg/upload/jsdict.go
@@ -0,0 +1,101 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+)
+
+func ReadDict(r io.Reader) (JSDict, error) {
+	var dict JSDict
+	decoder := json.NewDecoder(r)
+	decoder.UseNumber() // can directly parse numbers from JSON -> int64 instead of float64 in-between
+	return dict, decoder.Decode(&dict)
+}
+
+// helper for accessing nested properties without panics
+// it allows for a safe way to do things like
+// js["foo"].(map[string]interface{})["bar"].(map[string]interface{})["baz"].(string)
+type JSDict map[string]interface{}
+
+// for a given path, retrieves the raw value in the json structure
+// safely, such that if any key is missing, or if any non-leaf key
+// is not an object, returns false instead of panicking
+func (j JSDict) Val(keys ...string) (interface{}, bool) {
+	if len(keys) == 0 {
+		return nil, false
+	}
+	var m map[string]interface{} = j
+	for i, k := range keys {
+		value, ok := m[k]
+		if !ok {
+			return nil, false
+		}
+		if i == len(keys)-1 {
+			return value, true
+		}
+		m, ok = value.(map[string]interface{})
+		if !ok {
+			return nil, false
+		}
+	}
+	return nil, false
+}
+
+// convenience for safely requesting a nested string value
+func (j JSDict) Str(keys ...string) (string, bool) {
+	if val, ok := j.Val(keys...); ok {
+		s, ok := val.(string)
+		return s, ok
+	}
+	return "", false
+}
+
+// convenience for safely requesting a nested int64 value
+func (j JSDict) Int64(keys ...string) (int64, bool) {
+	if val, ok := j.Val(keys...); ok {
+		switch v := val.(type) {
+		case float64: // standard json decode/unmarshal
+			return int64(v), true
+		case json.Number: // json UseNumber() to get int64 directly
+			n, err := v.Int64()
+			return n, err == nil
+		default:
+			return 0, false
+		}
+	}
+	return 0, false
+}
+
+// write values to possibly nested locations
+func (j JSDict) Put(value interface{}, keys ...string) error {
+	if len(keys) == 0 {
+		return errors.New("path not provided")
+	}
+	// simple case
+	if len(keys) == 1 {
+		j[keys[0]] = value
+		return nil
+	}
+	var m map[string]interface{} = j
+	for i, k := range keys {
+		if i == len(keys)-1 {
+			m[k] = value
+			return nil
+		}
+		// otherwise, we have more to nest. Make sure this level is an object
+		x, ok := m[k].(map[string]interface{})
+		if !ok {
+			return fmt.Errorf("unable to write to %s, missing property at %s", strings.Join(keys, "."), k)
+		}
+		m = x
+	}
+
+	return nil
+}
diff --git a/internal/pkg/upload/jsdict_test.go b/internal/pkg/upload/jsdict_test.go
new file mode 100644
index 000000000..47a246d36
--- /dev/null
+++ b/internal/pkg/upload/jsdict_test.go
@@ -0,0 +1,68 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestJSDictDecodesValidJSON(t *testing.T) {
+	inputs := []string{
+		`{}`,
+		`{"foo":"bar"}`,
+		`{"action_id":"32a4c404-1418-4bba-8f2b-e704faf897f8","agent_id":"f2eec0f8-468d-4950-9fd3-9c260584885c","file":{"ext":"gif","mime_type":"image/gif","mode":"0644","name":"wallet.gif","path":"/tmp/meme.gif","size":3417671,"type":"file"},"src":"endpoint"}`,
+	}
+
+	for i, tc := range inputs {
+		t.Run(fmt.Sprintf("json-%d", i), func(t *testing.T) {
+			_, err := ReadDict(strings.NewReader(tc))
+			assert.NoError(t, err)
+		})
+	}
+
+}
+
+func TestJSDictFetchIntDefaultMethod(t *testing.T) {
+	// number chosen to be larger than any int32 or uint32 to hold
+	input := bytes.NewReader([]byte(`{"num":5000000000}`))
+
+	// manually decoding into a JSDict to ignore whatever default behavior
+	// in ReadDict(), to ensure Int retrieval works with either json
+	// decoding behavior
+	var d JSDict
+	err := json.NewDecoder(input).Decode(&d)
+	require.NoError(t, err)
+
+	val, ok := d.Int64("num")
+	assert.True(t, ok, "num conversion should return ok=true status")
+
+	assert.Equal(t, int64(5000000000), val)
+}
+
+func TestJSDictFetchIntNumberMethod(t *testing.T) {
+	// number chosen to be larger than any int32 or uint32 to hold
+	input := bytes.NewReader([]byte(`{"num":5000000000}`))
+
+	// manually decoding into a JSDict to ignore whatever default behavior
+	// in ReadDict(), to ensure Int retrieval works with either json
+	// decoding behavior
+	var d JSDict
+	decoder := json.NewDecoder(input)
+	decoder.UseNumber() // This defines a specific number decoding strategy
+	err := decoder.Decode(&d)
+	require.NoError(t, err)
+
+	val, ok := d.Int64("num")
+	assert.True(t, ok, "num conversion should return ok=true status")
+
+	assert.Equal(t, int64(5000000000), val)
+}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index d8f3d08fa..65da9c4e4 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -9,7 +9,6 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"hash"
 	"strconv"
 	"strings"
 	"sync"
@@ -96,10 +95,21 @@ func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, t
 
 // Start an upload operation, as long as the max concurrent has not been reached
 // returns the upload ID
-func (u *Uploader) Begin(size int64, docID string, source string, hashsum string, hasher hash.Hash) (Info, error) {
-	if size <= 0 {
-		return Info{}, errors.New("invalid file size")
+func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
+	if data == nil {
+		return Info{}, errors.New("upload start payload required")
 	}
+
+	/*
+		Validation and Input parsing
+	*/
+
+	// make sure all required fields are present and non-empty
+	if err := validateUploadPayload(data); err != nil {
+		return Info{}, err
+	}
+
+	size, _ := data.Int64("file", "size")
 	if size > u.sizeLimit {
 		return Info{}, ErrFileSizeTooLarge
 	}
@@ -110,42 +120,72 @@ func (u *Uploader) Begin(size int64, docID string, source string, hashsum string
 	}
 	id := uid.String()
 
+	// grab required fields that were checked already in validation step
+	agentID, _ := data.Str("agent_id")
+	actionID, _ := data.Str("action_id")
+	source, _ := data.Str("src")
+	docID := fmt.Sprintf("%s.%s", actionID, agentID)
+
 	info := Info{
 		ID:        id,
 		DocID:     docID,
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
-		//Hasher:    hasher,
-		//HashSum:   hashsum,
 	}
-	cnt := info.Total / info.ChunkSize
+	chunkCount := info.Total / info.ChunkSize
 	if info.Total%info.ChunkSize > 0 {
-		cnt += 1
+		chunkCount += 1
+	}
+	info.Count = int(chunkCount)
+
+	/*
+		Enrich document with additional server-side fields
+	*/
+
+	if err := data.Put(info.ChunkSize, "file", "ChunkSize"); err != nil {
+		return Info{}, err
+	}
+	if err := data.Put(string(StatusAwaiting), "file", "Status"); err != nil {
+		return Info{}, err
+	}
+	if err := data.Put(id, "upload_id"); err != nil {
+		return Info{}, err
+	}
+	if err := data.Put(time.Now().UnixMilli(), "upload_start"); err != nil {
+		return Info{}, err
+	}
+
+	doc, err := json.Marshal(data)
+	if err != nil {
+		return Info{}, err
+	}
+
+	_, err = CreateFileDoc(ctx, u.bulker, doc, source, docID)
+	if err != nil {
+		return Info{}, err
 	}
-	info.Count = int(cnt)
 
 	return info, nil
 }
 
-func (u *Uploader) Chunk(uplID string, chunkID int, chunkHash string) (ChunkInfo, error) {
+func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkID int, chunkHash string) (ChunkInfo, error) {
 
 	// Fetch metadata doc, if not cached
-	u.mu.RLock()
-	defer u.mu.RUnlock()
+	//u.mu.RLock()
+	//defer u.mu.RUnlock()
 	info, exist := u.metaCache[uplID]
 	if !exist {
-		u.mu.Lock()
-		defer u.mu.Unlock()
+		//u.mu.Lock()
+		//defer u.mu.Unlock()
 		// fetch and write
 
-		//resp, err := u.es.Get()
-
-		found := false
-		if !found {
-			return ChunkInfo{}, ErrInvalidUploadID
+		var err error
+		info, err = u.GetUploadInfo(ctx, uplID)
+		if err != nil {
+			return ChunkInfo{}, fmt.Errorf("unable to retrieve upload info: %w", err)
 		}
-
+		u.metaCache[uplID] = info
 	}
 
 	if info.Expired(u.timeLimit) {
@@ -299,10 +339,37 @@ func (u *Uploader) verifyChunkData(info Info, transitHash string, bulker bulk.Bu
 	return true, nil
 }
 
+func validateUploadPayload(info JSDict) error {
+
+	required := [][]string{
+		{"file", "name"},
+		{"file", "mime_type"},
+		{"action_id"},
+		{"agent_id"},
+		{"src"},
+	}
+
+	for _, fields := range required {
+		if value, ok := info.Str(fields...); !ok || strings.TrimSpace(value) == "" {
+			return fmt.Errorf("%s is required", strings.Join(fields, "."))
+		}
+	}
+
+	//@todo: valid action?
+	//@todo: valid src? will that make future expansion harder and require FS updates? maybe just validate the index exists
+
+	if size, ok := info.Int64("file", "size"); !ok {
+		return errors.New("file.size is required")
+	} else if size <= 0 {
+		return fmt.Errorf("invalid file.size: %d", size)
+	}
+	return nil
+}
+
 // retrieves upload metadata info from elasticsearch
 // which may be locally cached
-func (u *Uploader) GetUploadInfo(uploadID string) (Info, error) {
-	results, err := GetFileDoc(context.TODO(), u.bulker, uploadID)
+func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, error) {
+	results, err := GetFileDoc(ctx, u.bulker, uploadID)
 	if err != nil {
 		return Info{}, err
 	}
@@ -315,7 +382,7 @@ func (u *Uploader) GetUploadInfo(uploadID string) (Info, error) {
 
 	var fi FileMetaDoc
 	if err := json.Unmarshal(results[0].Source, &fi); err != nil {
-		return Info{}, fmt.Errorf("file meta doc parsing error: %v", err)
+		return Info{}, fmt.Errorf("file meta doc parsing error: %w", err)
 	}
 
 	// calculate number of chunks required
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
index ea9d24bee..d4da69cfa 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/upload/upload_test.go
@@ -4,17 +4,7 @@
 
 package upload
 
-import (
-	"strconv"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
-)
-
+/*
 func TestMaxParallelUploadOpsReached(t *testing.T) {
 	_ = testlog.SetLogger(t)
 	opLimit := 4
@@ -185,3 +175,4 @@ func TestMaxFileSize(t *testing.T) {
 		})
 	}
 }
+*/

From 08c211af202ab1ca462bba53dc0fa52cf4cea0ac Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 20 Dec 2022 15:05:13 -0500
Subject: [PATCH 28/51] check transit hash, request chunk fields manually

use scripted field to get chunk size
---
 internal/pkg/api/handleUpload.go     |  12 +-
 internal/pkg/api/schema.go           |  18 -
 internal/pkg/bulk/schema_easyjson.go | 902 ++++++++++++++++-----------
 internal/pkg/es/result.go            |  13 +-
 internal/pkg/upload/cbor/chunk.go    |   1 +
 internal/pkg/upload/es.go            | 141 ++++-
 internal/pkg/upload/upload.go        | 202 +++---
 7 files changed, 777 insertions(+), 512 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index cc414a32e..a25cf3079 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -31,7 +31,7 @@ import (
 
 const (
 	// TODO: move to a config
-	maxFileSize    = 100 * 104857600 // 100 MiB
+	maxFileSize    = 104857600 // 100 MiB
 	maxUploadTimer = 24 * time.Hour
 
 	// temp for easy development
@@ -188,7 +188,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 		return errors.New("chunk hash header required")
 	}
 
-	chunkInfo, err := ut.uploader.Chunk(r.Context(), uplID, chunkID, chunkHash)
+	upinfo, chunkInfo, err := ut.uploader.Chunk(r.Context(), uplID, chunkID, chunkHash)
 	if err != nil {
 		return err
 	}
@@ -200,14 +200,14 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	hash := sha256.New()
 	copier := io.TeeReader(data, hash)
 
-	ce := cbor.NewChunkWriter(copier, chunkInfo.Final, chunkInfo.Upload.DocID, chunkInfo.Hash, chunkInfo.Upload.ChunkSize)
-	if err := upload.IndexChunk(r.Context(), ut.chunkClient, ce, chunkInfo.Upload.Source, chunkInfo.Upload.DocID, chunkInfo.ID); err != nil {
+	ce := cbor.NewChunkWriter(copier, chunkInfo.Last, chunkInfo.BID, chunkInfo.SHA2, upinfo.ChunkSize)
+	if err := upload.IndexChunk(r.Context(), ut.chunkClient, ce, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
 		return err
 	}
 
 	hashsum := hex.EncodeToString(hash.Sum(nil))
 
-	if strings.ToLower(chunkHash) != strings.ToLower(hashsum) {
+	if !strings.EqualFold(chunkHash, hashsum) {
 		// @todo: delete document, since we wrote it, but the hash was invalid
 		return upload.ErrHashMismatch
 	}
@@ -225,7 +225,7 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return errors.New("transit hash required")
 	}
 
-	info, err := ut.uploader.Complete(uplID, req.TransitHash.SHA256, ut.bulker)
+	info, err := ut.uploader.Complete(r.Context(), uplID, req.TransitHash.SHA256)
 	if err != nil {
 		return err
 	}
diff --git a/internal/pkg/api/schema.go b/internal/pkg/api/schema.go
index cc46dfe66..fdca0648a 100644
--- a/internal/pkg/api/schema.go
+++ b/internal/pkg/api/schema.go
@@ -175,21 +175,3 @@ type StatusResponse struct {
 	Status  string                 `json:"status"`
 	Version *StatusResponseVersion `json:"version,omitempty"`
 }
-
-type FileInfo struct {
-	ActionID string     `json:"action_id"`
-	AgentID  string     `json:"agent_id"`
-	Source   string     `json:"src"`
-	File     FileData   `json:"file"`
-	Contents []FileData `json:"contents"`
-}
-
-type FileData struct {
-	Size int64  `json:"size"`
-	Name string `json:"name"`
-	Mime string `json:"mime_type"`
-	Hash struct {
-		SHA256 string `json:"sha256"`
-		MD5    string `json:"md5"`
-	} `json:"hash"`
-}
diff --git a/internal/pkg/bulk/schema_easyjson.go b/internal/pkg/bulk/schema_easyjson.go
index ff9720747..6fb44064b 100644
--- a/internal/pkg/bulk/schema_easyjson.go
+++ b/internal/pkg/bulk/schema_easyjson.go
@@ -8,6 +8,7 @@ package bulk
 
 import (
 	json "encoding/json"
+
 	es "github.com/elastic/fleet-server/v7/internal/pkg/es"
 	easyjson "github.com/mailru/easyjson"
 	jlexer "github.com/mailru/easyjson/jlexer"
@@ -22,117 +23,7 @@ var (
 	_ easyjson.Marshaler
 )
 
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(in *jlexer.Lexer, out *bulkIndexerResponse) {
-	isTopLevel := in.IsStart()
-	if in.IsNull() {
-		if isTopLevel {
-			in.Consumed()
-		}
-		in.Skip()
-		return
-	}
-	in.Delim('{')
-	for !in.IsDelim('}') {
-		key := in.UnsafeFieldName(false)
-		in.WantColon()
-		if in.IsNull() {
-			in.Skip()
-			in.WantComma()
-			continue
-		}
-		switch key {
-		case "took":
-			out.Took = int(in.Int())
-		case "errors":
-			out.HasErrors = bool(in.Bool())
-		case "items":
-			if in.IsNull() {
-				in.Skip()
-				out.Items = nil
-			} else {
-				in.Delim('[')
-				if out.Items == nil {
-					if !in.IsDelim(']') {
-						out.Items = make([]bulkStubItem, 0, 2)
-					} else {
-						out.Items = []bulkStubItem{}
-					}
-				} else {
-					out.Items = (out.Items)[:0]
-				}
-				for !in.IsDelim(']') {
-					var v1 bulkStubItem
-					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in, &v1)
-					out.Items = append(out.Items, v1)
-					in.WantComma()
-				}
-				in.Delim(']')
-			}
-		default:
-			in.SkipRecursive()
-		}
-		in.WantComma()
-	}
-	in.Delim('}')
-	if isTopLevel {
-		in.Consumed()
-	}
-}
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(out *jwriter.Writer, in bulkIndexerResponse) {
-	out.RawByte('{')
-	first := true
-	_ = first
-	{
-		const prefix string = ",\"took\":"
-		out.RawString(prefix[1:])
-		out.Int(int(in.Took))
-	}
-	{
-		const prefix string = ",\"errors\":"
-		out.RawString(prefix)
-		out.Bool(bool(in.HasErrors))
-	}
-	if len(in.Items) != 0 {
-		const prefix string = ",\"items\":"
-		out.RawString(prefix)
-		{
-			out.RawByte('[')
-			for v2, v3 := range in.Items {
-				if v2 > 0 {
-					out.RawByte(',')
-				}
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out, v3)
-			}
-			out.RawByte(']')
-		}
-	}
-	out.RawByte('}')
-}
-
-// MarshalJSON supports json.Marshaler interface
-func (v bulkIndexerResponse) MarshalJSON() ([]byte, error) {
-	w := jwriter.Writer{}
-	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(&w, v)
-	return w.Buffer.BuildBytes(), w.Error
-}
-
-// MarshalEasyJSON supports easyjson.Marshaler interface
-func (v bulkIndexerResponse) MarshalEasyJSON(w *jwriter.Writer) {
-	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(w, v)
-}
-
-// UnmarshalJSON supports json.Unmarshaler interface
-func (v *bulkIndexerResponse) UnmarshalJSON(data []byte) error {
-	r := jlexer.Lexer{Data: data}
-	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(&r, v)
-	return r.Error()
-}
-
-// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
-func (v *bulkIndexerResponse) UnmarshalEasyJSON(l *jlexer.Lexer) {
-	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(l, v)
-}
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jlexer.Lexer, out *bulkStubItem) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(in *jlexer.Lexer, out *bulkStubItem) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -159,7 +50,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jle
 				if out.Index == nil {
 					out.Index = new(BulkIndexerResponseItem)
 				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in, out.Index)
+				(*out.Index).UnmarshalEasyJSON(in)
 			}
 		case "delete":
 			if in.IsNull() {
@@ -169,7 +60,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jle
 				if out.Delete == nil {
 					out.Delete = new(BulkIndexerResponseItem)
 				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in, out.Delete)
+				(*out.Delete).UnmarshalEasyJSON(in)
 			}
 		case "create":
 			if in.IsNull() {
@@ -179,7 +70,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jle
 				if out.Create == nil {
 					out.Create = new(BulkIndexerResponseItem)
 				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in, out.Create)
+				(*out.Create).UnmarshalEasyJSON(in)
 			}
 		case "update":
 			if in.IsNull() {
@@ -189,7 +80,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jle
 				if out.Update == nil {
 					out.Update = new(BulkIndexerResponseItem)
 				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in, out.Update)
+				(*out.Update).UnmarshalEasyJSON(in)
 			}
 		default:
 			in.SkipRecursive()
@@ -201,7 +92,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jle
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jwriter.Writer, in bulkStubItem) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(out *jwriter.Writer, in bulkStubItem) {
 	out.RawByte('{')
 	first := true
 	_ = first
@@ -211,7 +102,7 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jw
 		if in.Index == nil {
 			out.RawString("null")
 		} else {
-			easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out, *in.Index)
+			(*in.Index).MarshalEasyJSON(out)
 		}
 	}
 	{
@@ -220,7 +111,7 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jw
 		if in.Delete == nil {
 			out.RawString("null")
 		} else {
-			easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out, *in.Delete)
+			(*in.Delete).MarshalEasyJSON(out)
 		}
 	}
 	{
@@ -229,7 +120,7 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jw
 		if in.Create == nil {
 			out.RawString("null")
 		} else {
-			easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out, *in.Create)
+			(*in.Create).MarshalEasyJSON(out)
 		}
 	}
 	{
@@ -238,187 +129,36 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jw
 		if in.Update == nil {
 			out.RawString("null")
 		} else {
-			easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out, *in.Update)
-		}
-	}
-	out.RawByte('}')
-}
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in *jlexer.Lexer, out *BulkIndexerResponseItem) {
-	isTopLevel := in.IsStart()
-	if in.IsNull() {
-		if isTopLevel {
-			in.Consumed()
+			(*in.Update).MarshalEasyJSON(out)
 		}
-		in.Skip()
-		return
-	}
-	in.Delim('{')
-	for !in.IsDelim('}') {
-		key := in.UnsafeFieldName(false)
-		in.WantColon()
-		if in.IsNull() {
-			in.Skip()
-			in.WantComma()
-			continue
-		}
-		switch key {
-		case "_id":
-			out.DocumentID = string(in.String())
-		case "status":
-			out.Status = int(in.Int())
-		case "error":
-			if in.IsNull() {
-				in.Skip()
-				out.Error = nil
-			} else {
-				if out.Error == nil {
-					out.Error = new(es.ErrorT)
-				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs(in, out.Error)
-			}
-		default:
-			in.SkipRecursive()
-		}
-		in.WantComma()
-	}
-	in.Delim('}')
-	if isTopLevel {
-		in.Consumed()
-	}
-}
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out *jwriter.Writer, in BulkIndexerResponseItem) {
-	out.RawByte('{')
-	first := true
-	_ = first
-	{
-		const prefix string = ",\"_id\":"
-		out.RawString(prefix[1:])
-		out.String(string(in.DocumentID))
-	}
-	{
-		const prefix string = ",\"status\":"
-		out.RawString(prefix)
-		out.Int(int(in.Status))
-	}
-	if in.Error != nil {
-		const prefix string = ",\"error\":"
-		out.RawString(prefix)
-		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs(out, *in.Error)
 	}
 	out.RawByte('}')
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs(in *jlexer.Lexer, out *es.ErrorT) {
-	isTopLevel := in.IsStart()
-	if in.IsNull() {
-		if isTopLevel {
-			in.Consumed()
-		}
-		in.Skip()
-		return
-	}
-	in.Delim('{')
-	for !in.IsDelim('}') {
-		key := in.UnsafeFieldName(false)
-		in.WantColon()
-		if in.IsNull() {
-			in.Skip()
-			in.WantComma()
-			continue
-		}
-		switch key {
-		case "type":
-			out.Type = string(in.String())
-		case "reason":
-			out.Reason = string(in.String())
-		case "caused_by":
-			easyjsonCef4e921Decode(in, &out.Cause)
-		default:
-			in.SkipRecursive()
-		}
-		in.WantComma()
-	}
-	in.Delim('}')
-	if isTopLevel {
-		in.Consumed()
-	}
+
+// MarshalJSON supports json.Marshaler interface
+func (v bulkStubItem) MarshalJSON() ([]byte, error) {
+	w := jwriter.Writer{}
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(&w, v)
+	return w.Buffer.BuildBytes(), w.Error
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs(out *jwriter.Writer, in es.ErrorT) {
-	out.RawByte('{')
-	first := true
-	_ = first
-	{
-		const prefix string = ",\"type\":"
-		out.RawString(prefix[1:])
-		out.String(string(in.Type))
-	}
-	{
-		const prefix string = ",\"reason\":"
-		out.RawString(prefix)
-		out.String(string(in.Reason))
-	}
-	{
-		const prefix string = ",\"caused_by\":"
-		out.RawString(prefix)
-		easyjsonCef4e921Encode(out, in.Cause)
-	}
-	out.RawByte('}')
+
+// MarshalEasyJSON supports easyjson.Marshaler interface
+func (v bulkStubItem) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk(w, v)
 }
-func easyjsonCef4e921Decode(in *jlexer.Lexer, out *struct {
-	Type   string `json:"type"`
-	Reason string `json:"reason"`
-}) {
-	isTopLevel := in.IsStart()
-	if in.IsNull() {
-		if isTopLevel {
-			in.Consumed()
-		}
-		in.Skip()
-		return
-	}
-	in.Delim('{')
-	for !in.IsDelim('}') {
-		key := in.UnsafeFieldName(false)
-		in.WantColon()
-		if in.IsNull() {
-			in.Skip()
-			in.WantComma()
-			continue
-		}
-		switch key {
-		case "type":
-			out.Type = string(in.String())
-		case "reason":
-			out.Reason = string(in.String())
-		default:
-			in.SkipRecursive()
-		}
-		in.WantComma()
-	}
-	in.Delim('}')
-	if isTopLevel {
-		in.Consumed()
-	}
+
+// UnmarshalJSON supports json.Unmarshaler interface
+func (v *bulkStubItem) UnmarshalJSON(data []byte) error {
+	r := jlexer.Lexer{Data: data}
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(&r, v)
+	return r.Error()
 }
-func easyjsonCef4e921Encode(out *jwriter.Writer, in struct {
-	Type   string `json:"type"`
-	Reason string `json:"reason"`
-}) {
-	out.RawByte('{')
-	first := true
-	_ = first
-	{
-		const prefix string = ",\"type\":"
-		out.RawString(prefix[1:])
-		out.String(string(in.Type))
-	}
-	{
-		const prefix string = ",\"reason\":"
-		out.RawString(prefix)
-		out.String(string(in.Reason))
-	}
-	out.RawByte('}')
+
+// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
+func (v *bulkStubItem) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk(l, v)
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(in *jlexer.Lexer, out *MsearchResponse) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(in *jlexer.Lexer, out *bulkIndexerResponse) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -437,31 +177,33 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(in *jle
 			continue
 		}
 		switch key {
-		case "responses":
+		case "took":
+			out.Took = int(in.Int())
+		case "errors":
+			out.HasErrors = bool(in.Bool())
+		case "items":
 			if in.IsNull() {
 				in.Skip()
-				out.Responses = nil
+				out.Items = nil
 			} else {
 				in.Delim('[')
-				if out.Responses == nil {
+				if out.Items == nil {
 					if !in.IsDelim(']') {
-						out.Responses = make([]MsearchResponseItem, 0, 0)
+						out.Items = make([]bulkStubItem, 0, 2)
 					} else {
-						out.Responses = []MsearchResponseItem{}
+						out.Items = []bulkStubItem{}
 					}
 				} else {
-					out.Responses = (out.Responses)[:0]
+					out.Items = (out.Items)[:0]
 				}
 				for !in.IsDelim(']') {
-					var v4 MsearchResponseItem
-					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in, &v4)
-					out.Responses = append(out.Responses, v4)
+					var v1 bulkStubItem
+					(v1).UnmarshalEasyJSON(in)
+					out.Items = append(out.Items, v1)
 					in.WantComma()
 				}
 				in.Delim(']')
 			}
-		case "took":
-			out.Took = int(in.Int())
 		default:
 			in.SkipRecursive()
 		}
@@ -472,58 +214,61 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(in *jle
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(out *jwriter.Writer, in MsearchResponse) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(out *jwriter.Writer, in bulkIndexerResponse) {
 	out.RawByte('{')
 	first := true
 	_ = first
 	{
-		const prefix string = ",\"responses\":"
+		const prefix string = ",\"took\":"
 		out.RawString(prefix[1:])
-		if in.Responses == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
-			out.RawString("null")
-		} else {
+		out.Int(int(in.Took))
+	}
+	{
+		const prefix string = ",\"errors\":"
+		out.RawString(prefix)
+		out.Bool(bool(in.HasErrors))
+	}
+	if len(in.Items) != 0 {
+		const prefix string = ",\"items\":"
+		out.RawString(prefix)
+		{
 			out.RawByte('[')
-			for v5, v6 := range in.Responses {
-				if v5 > 0 {
+			for v2, v3 := range in.Items {
+				if v2 > 0 {
 					out.RawByte(',')
 				}
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(out, v6)
+				(v3).MarshalEasyJSON(out)
 			}
 			out.RawByte(']')
 		}
 	}
-	{
-		const prefix string = ",\"took\":"
-		out.RawString(prefix)
-		out.Int(int(in.Took))
-	}
 	out.RawByte('}')
 }
 
 // MarshalJSON supports json.Marshaler interface
-func (v MsearchResponse) MarshalJSON() ([]byte, error) {
+func (v bulkIndexerResponse) MarshalJSON() ([]byte, error) {
 	w := jwriter.Writer{}
-	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(&w, v)
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(&w, v)
 	return w.Buffer.BuildBytes(), w.Error
 }
 
 // MarshalEasyJSON supports easyjson.Marshaler interface
-func (v MsearchResponse) MarshalEasyJSON(w *jwriter.Writer) {
-	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(w, v)
+func (v bulkIndexerResponse) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk1(w, v)
 }
 
 // UnmarshalJSON supports json.Unmarshaler interface
-func (v *MsearchResponse) UnmarshalJSON(data []byte) error {
+func (v *bulkIndexerResponse) UnmarshalJSON(data []byte) error {
 	r := jlexer.Lexer{Data: data}
-	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(&r, v)
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(&r, v)
 	return r.Error()
 }
 
 // UnmarshalEasyJSON supports easyjson.Unmarshaler interface
-func (v *MsearchResponse) UnmarshalEasyJSON(l *jlexer.Lexer) {
-	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(l, v)
+func (v *bulkIndexerResponse) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk1(l, v)
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jlexer.Lexer, out *MsearchResponseItem) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(in *jlexer.Lexer, out *MsearchResponseItem) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -549,9 +294,9 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jle
 		case "timed_out":
 			out.TimedOut = bool(in.Bool())
 		case "_shards":
-			easyjsonCef4e921Decode1(in, &out.Shards)
+			easyjsonCef4e921Decode(in, &out.Shards)
 		case "hits":
-			easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in, &out.Hits)
+			easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs(in, &out.Hits)
 		case "aggregations":
 			if in.IsNull() {
 				in.Skip()
@@ -565,9 +310,9 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jle
 				for !in.IsDelim('}') {
 					key := string(in.String())
 					in.WantColon()
-					var v7 es.Aggregation
-					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in, &v7)
-					(out.Aggregations)[key] = v7
+					var v4 es.Aggregation
+					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in, &v4)
+					(out.Aggregations)[key] = v4
 					in.WantComma()
 				}
 				in.Delim('}')
@@ -580,7 +325,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jle
 				if out.Error == nil {
 					out.Error = new(es.ErrorT)
 				}
-				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs(in, out.Error)
+				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in, out.Error)
 			}
 		default:
 			in.SkipRecursive()
@@ -592,7 +337,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jle
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(out *jwriter.Writer, in MsearchResponseItem) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(out *jwriter.Writer, in MsearchResponseItem) {
 	out.RawByte('{')
 	first := true
 	_ = first
@@ -614,40 +359,175 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(out *jw
 	{
 		const prefix string = ",\"_shards\":"
 		out.RawString(prefix)
-		easyjsonCef4e921Encode1(out, in.Shards)
+		easyjsonCef4e921Encode(out, in.Shards)
 	}
 	{
 		const prefix string = ",\"hits\":"
 		out.RawString(prefix)
-		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs1(out, in.Hits)
+		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs(out, in.Hits)
 	}
 	if len(in.Aggregations) != 0 {
 		const prefix string = ",\"aggregations\":"
 		out.RawString(prefix)
 		{
 			out.RawByte('{')
-			v8First := true
-			for v8Name, v8Value := range in.Aggregations {
-				if v8First {
-					v8First = false
+			v5First := true
+			for v5Name, v5Value := range in.Aggregations {
+				if v5First {
+					v5First = false
 				} else {
 					out.RawByte(',')
 				}
-				out.String(string(v8Name))
+				out.String(string(v5Name))
 				out.RawByte(':')
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out, v8Value)
+				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs1(out, v5Value)
 			}
 			out.RawByte('}')
 		}
 	}
-	if in.Error != nil {
-		const prefix string = ",\"error\":"
+	if in.Error != nil {
+		const prefix string = ",\"error\":"
+		out.RawString(prefix)
+		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out, *in.Error)
+	}
+	out.RawByte('}')
+}
+
+// MarshalJSON supports json.Marshaler interface
+func (v MsearchResponseItem) MarshalJSON() ([]byte, error) {
+	w := jwriter.Writer{}
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(&w, v)
+	return w.Buffer.BuildBytes(), w.Error
+}
+
+// MarshalEasyJSON supports easyjson.Marshaler interface
+func (v MsearchResponseItem) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk2(w, v)
+}
+
+// UnmarshalJSON supports json.Unmarshaler interface
+func (v *MsearchResponseItem) UnmarshalJSON(data []byte) error {
+	r := jlexer.Lexer{Data: data}
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(&r, v)
+	return r.Error()
+}
+
+// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
+func (v *MsearchResponseItem) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk2(l, v)
+}
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in *jlexer.Lexer, out *es.ErrorT) {
+	isTopLevel := in.IsStart()
+	if in.IsNull() {
+		if isTopLevel {
+			in.Consumed()
+		}
+		in.Skip()
+		return
+	}
+	in.Delim('{')
+	for !in.IsDelim('}') {
+		key := in.UnsafeFieldName(false)
+		in.WantColon()
+		if in.IsNull() {
+			in.Skip()
+			in.WantComma()
+			continue
+		}
+		switch key {
+		case "type":
+			out.Type = string(in.String())
+		case "reason":
+			out.Reason = string(in.String())
+		case "caused_by":
+			easyjsonCef4e921Decode1(in, &out.Cause)
+		default:
+			in.SkipRecursive()
+		}
+		in.WantComma()
+	}
+	in.Delim('}')
+	if isTopLevel {
+		in.Consumed()
+	}
+}
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out *jwriter.Writer, in es.ErrorT) {
+	out.RawByte('{')
+	first := true
+	_ = first
+	{
+		const prefix string = ",\"type\":"
+		out.RawString(prefix[1:])
+		out.String(string(in.Type))
+	}
+	{
+		const prefix string = ",\"reason\":"
+		out.RawString(prefix)
+		out.String(string(in.Reason))
+	}
+	{
+		const prefix string = ",\"caused_by\":"
+		out.RawString(prefix)
+		easyjsonCef4e921Encode1(out, in.Cause)
+	}
+	out.RawByte('}')
+}
+func easyjsonCef4e921Decode1(in *jlexer.Lexer, out *struct {
+	Type   string `json:"type"`
+	Reason string `json:"reason"`
+}) {
+	isTopLevel := in.IsStart()
+	if in.IsNull() {
+		if isTopLevel {
+			in.Consumed()
+		}
+		in.Skip()
+		return
+	}
+	in.Delim('{')
+	for !in.IsDelim('}') {
+		key := in.UnsafeFieldName(false)
+		in.WantColon()
+		if in.IsNull() {
+			in.Skip()
+			in.WantComma()
+			continue
+		}
+		switch key {
+		case "type":
+			out.Type = string(in.String())
+		case "reason":
+			out.Reason = string(in.String())
+		default:
+			in.SkipRecursive()
+		}
+		in.WantComma()
+	}
+	in.Delim('}')
+	if isTopLevel {
+		in.Consumed()
+	}
+}
+func easyjsonCef4e921Encode1(out *jwriter.Writer, in struct {
+	Type   string `json:"type"`
+	Reason string `json:"reason"`
+}) {
+	out.RawByte('{')
+	first := true
+	_ = first
+	{
+		const prefix string = ",\"type\":"
+		out.RawString(prefix[1:])
+		out.String(string(in.Type))
+	}
+	{
+		const prefix string = ",\"reason\":"
 		out.RawString(prefix)
-		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs(out, *in.Error)
+		out.String(string(in.Reason))
 	}
 	out.RawByte('}')
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in *jlexer.Lexer, out *es.Aggregation) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in *jlexer.Lexer, out *es.Aggregation) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -688,11 +568,11 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in *jlexe
 					out.Buckets = (out.Buckets)[:0]
 				}
 				for !in.IsDelim(']') {
-					var v9 es.Bucket
+					var v6 es.Bucket
 					if data := in.Raw(); in.Ok() {
-						in.AddError((v9).UnmarshalJSON(data))
+						in.AddError((v6).UnmarshalJSON(data))
 					}
-					out.Buckets = append(out.Buckets, v9)
+					out.Buckets = append(out.Buckets, v6)
 					in.WantComma()
 				}
 				in.Delim(']')
@@ -707,7 +587,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in *jlexe
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out *jwriter.Writer, in es.Aggregation) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs1(out *jwriter.Writer, in es.Aggregation) {
 	out.RawByte('{')
 	first := true
 	_ = first
@@ -731,11 +611,11 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out *jwri
 		out.RawString(prefix)
 		{
 			out.RawByte('[')
-			for v10, v11 := range in.Buckets {
-				if v10 > 0 {
+			for v7, v8 := range in.Buckets {
+				if v7 > 0 {
 					out.RawByte(',')
 				}
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs3(out, v11)
+				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs3(out, v8)
 			}
 			out.RawByte(']')
 		}
@@ -791,7 +671,7 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs3(out *jwri
 	}
 	out.RawByte('}')
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in *jlexer.Lexer, out *es.HitsT) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs(in *jlexer.Lexer, out *es.HitsT) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -826,9 +706,9 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in *jlexe
 					out.Hits = (out.Hits)[:0]
 				}
 				for !in.IsDelim(']') {
-					var v12 es.HitT
-					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs4(in, &v12)
-					out.Hits = append(out.Hits, v12)
+					var v9 es.HitT
+					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs4(in, &v9)
+					out.Hits = append(out.Hits, v9)
 					in.WantComma()
 				}
 				in.Delim(']')
@@ -855,7 +735,7 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs1(in *jlexe
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs1(out *jwriter.Writer, in es.HitsT) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs(out *jwriter.Writer, in es.HitsT) {
 	out.RawByte('{')
 	first := true
 	_ = first
@@ -866,11 +746,11 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs1(out *jwri
 			out.RawString("null")
 		} else {
 			out.RawByte('[')
-			for v13, v14 := range in.Hits {
-				if v13 > 0 {
+			for v10, v11 := range in.Hits {
+				if v10 > 0 {
 					out.RawByte(',')
 				}
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs4(out, v14)
+				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs4(out, v11)
 			}
 			out.RawByte(']')
 		}
@@ -987,6 +867,28 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs4(in *jlexe
 				}
 				*out.Score = float64(in.Float64())
 			}
+		case "fields":
+			if in.IsNull() {
+				in.Skip()
+			} else {
+				in.Delim('{')
+				out.Fields = make(map[string]interface{})
+				for !in.IsDelim('}') {
+					key := string(in.String())
+					in.WantColon()
+					var v12 interface{}
+					if m, ok := v12.(easyjson.Unmarshaler); ok {
+						m.UnmarshalEasyJSON(in)
+					} else if m, ok := v12.(json.Unmarshaler); ok {
+						_ = m.UnmarshalJSON(in.Raw())
+					} else {
+						v12 = in.Interface()
+					}
+					(out.Fields)[key] = v12
+					in.WantComma()
+				}
+				in.Delim('}')
+			}
 		default:
 			in.SkipRecursive()
 		}
@@ -1035,9 +937,36 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs4(out *jwri
 			out.Float64(float64(*in.Score))
 		}
 	}
+	{
+		const prefix string = ",\"fields\":"
+		out.RawString(prefix)
+		if in.Fields == nil && (out.Flags&jwriter.NilMapAsEmpty) == 0 {
+			out.RawString(`null`)
+		} else {
+			out.RawByte('{')
+			v13First := true
+			for v13Name, v13Value := range in.Fields {
+				if v13First {
+					v13First = false
+				} else {
+					out.RawByte(',')
+				}
+				out.String(string(v13Name))
+				out.RawByte(':')
+				if m, ok := v13Value.(easyjson.Marshaler); ok {
+					m.MarshalEasyJSON(out)
+				} else if m, ok := v13Value.(json.Marshaler); ok {
+					out.Raw(m.MarshalJSON())
+				} else {
+					out.Raw(json.Marshal(v13Value))
+				}
+			}
+			out.RawByte('}')
+		}
+	}
 	out.RawByte('}')
 }
-func easyjsonCef4e921Decode1(in *jlexer.Lexer, out *struct {
+func easyjsonCef4e921Decode(in *jlexer.Lexer, out *struct {
 	Total      uint64 `json:"total"`
 	Successful uint64 `json:"successful"`
 	Skipped    uint64 `json:"skipped"`
@@ -1079,7 +1008,7 @@ func easyjsonCef4e921Decode1(in *jlexer.Lexer, out *struct {
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921Encode1(out *jwriter.Writer, in struct {
+func easyjsonCef4e921Encode(out *jwriter.Writer, in struct {
 	Total      uint64 `json:"total"`
 	Successful uint64 `json:"successful"`
 	Skipped    uint64 `json:"skipped"`
@@ -1110,6 +1039,186 @@ func easyjsonCef4e921Encode1(out *jwriter.Writer, in struct {
 	}
 	out.RawByte('}')
 }
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(in *jlexer.Lexer, out *MsearchResponse) {
+	isTopLevel := in.IsStart()
+	if in.IsNull() {
+		if isTopLevel {
+			in.Consumed()
+		}
+		in.Skip()
+		return
+	}
+	in.Delim('{')
+	for !in.IsDelim('}') {
+		key := in.UnsafeFieldName(false)
+		in.WantColon()
+		if in.IsNull() {
+			in.Skip()
+			in.WantComma()
+			continue
+		}
+		switch key {
+		case "responses":
+			if in.IsNull() {
+				in.Skip()
+				out.Responses = nil
+			} else {
+				in.Delim('[')
+				if out.Responses == nil {
+					if !in.IsDelim(']') {
+						out.Responses = make([]MsearchResponseItem, 0, 0)
+					} else {
+						out.Responses = []MsearchResponseItem{}
+					}
+				} else {
+					out.Responses = (out.Responses)[:0]
+				}
+				for !in.IsDelim(']') {
+					var v14 MsearchResponseItem
+					(v14).UnmarshalEasyJSON(in)
+					out.Responses = append(out.Responses, v14)
+					in.WantComma()
+				}
+				in.Delim(']')
+			}
+		case "took":
+			out.Took = int(in.Int())
+		default:
+			in.SkipRecursive()
+		}
+		in.WantComma()
+	}
+	in.Delim('}')
+	if isTopLevel {
+		in.Consumed()
+	}
+}
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(out *jwriter.Writer, in MsearchResponse) {
+	out.RawByte('{')
+	first := true
+	_ = first
+	{
+		const prefix string = ",\"responses\":"
+		out.RawString(prefix[1:])
+		if in.Responses == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
+			out.RawString("null")
+		} else {
+			out.RawByte('[')
+			for v15, v16 := range in.Responses {
+				if v15 > 0 {
+					out.RawByte(',')
+				}
+				(v16).MarshalEasyJSON(out)
+			}
+			out.RawByte(']')
+		}
+	}
+	{
+		const prefix string = ",\"took\":"
+		out.RawString(prefix)
+		out.Int(int(in.Took))
+	}
+	out.RawByte('}')
+}
+
+// MarshalJSON supports json.Marshaler interface
+func (v MsearchResponse) MarshalJSON() ([]byte, error) {
+	w := jwriter.Writer{}
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(&w, v)
+	return w.Buffer.BuildBytes(), w.Error
+}
+
+// MarshalEasyJSON supports easyjson.Marshaler interface
+func (v MsearchResponse) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk3(w, v)
+}
+
+// UnmarshalJSON supports json.Unmarshaler interface
+func (v *MsearchResponse) UnmarshalJSON(data []byte) error {
+	r := jlexer.Lexer{Data: data}
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(&r, v)
+	return r.Error()
+}
+
+// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
+func (v *MsearchResponse) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk3(l, v)
+}
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(in *jlexer.Lexer, out *MgetResponseItem) {
+	isTopLevel := in.IsStart()
+	if in.IsNull() {
+		if isTopLevel {
+			in.Consumed()
+		}
+		in.Skip()
+		return
+	}
+	in.Delim('{')
+	for !in.IsDelim('}') {
+		key := in.UnsafeFieldName(false)
+		in.WantColon()
+		if in.IsNull() {
+			in.Skip()
+			in.WantComma()
+			continue
+		}
+		switch key {
+		case "found":
+			out.Found = bool(in.Bool())
+		case "_source":
+			if data := in.Raw(); in.Ok() {
+				in.AddError((out.Source).UnmarshalJSON(data))
+			}
+		default:
+			in.SkipRecursive()
+		}
+		in.WantComma()
+	}
+	in.Delim('}')
+	if isTopLevel {
+		in.Consumed()
+	}
+}
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(out *jwriter.Writer, in MgetResponseItem) {
+	out.RawByte('{')
+	first := true
+	_ = first
+	{
+		const prefix string = ",\"found\":"
+		out.RawString(prefix[1:])
+		out.Bool(bool(in.Found))
+	}
+	{
+		const prefix string = ",\"_source\":"
+		out.RawString(prefix)
+		out.Raw((in.Source).MarshalJSON())
+	}
+	out.RawByte('}')
+}
+
+// MarshalJSON supports json.Marshaler interface
+func (v MgetResponseItem) MarshalJSON() ([]byte, error) {
+	w := jwriter.Writer{}
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(&w, v)
+	return w.Buffer.BuildBytes(), w.Error
+}
+
+// MarshalEasyJSON supports easyjson.Marshaler interface
+func (v MgetResponseItem) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk4(w, v)
+}
+
+// UnmarshalJSON supports json.Unmarshaler interface
+func (v *MgetResponseItem) UnmarshalJSON(data []byte) error {
+	r := jlexer.Lexer{Data: data}
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(&r, v)
+	return r.Error()
+}
+
+// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
+func (v *MgetResponseItem) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk4(l, v)
+}
 func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk5(in *jlexer.Lexer, out *MgetResponse) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
@@ -1145,9 +1254,9 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk5(in *jle
 					out.Items = (out.Items)[:0]
 				}
 				for !in.IsDelim(']') {
-					var v15 MgetResponseItem
-					easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(in, &v15)
-					out.Items = append(out.Items, v15)
+					var v17 MgetResponseItem
+					(v17).UnmarshalEasyJSON(in)
+					out.Items = append(out.Items, v17)
 					in.WantComma()
 				}
 				in.Delim(']')
@@ -1173,11 +1282,11 @@ func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk5(out *jw
 			out.RawString("null")
 		} else {
 			out.RawByte('[')
-			for v16, v17 := range in.Items {
-				if v16 > 0 {
+			for v18, v19 := range in.Items {
+				if v18 > 0 {
 					out.RawByte(',')
 				}
-				easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk6(out, v17)
+				(v19).MarshalEasyJSON(out)
 			}
 			out.RawByte(']')
 		}
@@ -1208,7 +1317,7 @@ func (v *MgetResponse) UnmarshalJSON(data []byte) error {
 func (v *MgetResponse) UnmarshalEasyJSON(l *jlexer.Lexer) {
 	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk5(l, v)
 }
-func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(in *jlexer.Lexer, out *MgetResponseItem) {
+func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(in *jlexer.Lexer, out *BulkIndexerResponseItem) {
 	isTopLevel := in.IsStart()
 	if in.IsNull() {
 		if isTopLevel {
@@ -1227,11 +1336,19 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(in *jle
 			continue
 		}
 		switch key {
-		case "found":
-			out.Found = bool(in.Bool())
-		case "_source":
-			if data := in.Raw(); in.Ok() {
-				in.AddError((out.Source).UnmarshalJSON(data))
+		case "_id":
+			out.DocumentID = string(in.String())
+		case "status":
+			out.Status = int(in.Int())
+		case "error":
+			if in.IsNull() {
+				in.Skip()
+				out.Error = nil
+			} else {
+				if out.Error == nil {
+					out.Error = new(es.ErrorT)
+				}
+				easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgEs2(in, out.Error)
 			}
 		default:
 			in.SkipRecursive()
@@ -1243,19 +1360,48 @@ func easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(in *jle
 		in.Consumed()
 	}
 }
-func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk6(out *jwriter.Writer, in MgetResponseItem) {
+func easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk6(out *jwriter.Writer, in BulkIndexerResponseItem) {
 	out.RawByte('{')
 	first := true
 	_ = first
 	{
-		const prefix string = ",\"found\":"
+		const prefix string = ",\"_id\":"
 		out.RawString(prefix[1:])
-		out.Bool(bool(in.Found))
+		out.String(string(in.DocumentID))
 	}
 	{
-		const prefix string = ",\"_source\":"
+		const prefix string = ",\"status\":"
 		out.RawString(prefix)
-		out.Raw((in.Source).MarshalJSON())
+		out.Int(int(in.Status))
+	}
+	if in.Error != nil {
+		const prefix string = ",\"error\":"
+		out.RawString(prefix)
+		easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgEs2(out, *in.Error)
 	}
 	out.RawByte('}')
 }
+
+// MarshalJSON supports json.Marshaler interface
+func (v BulkIndexerResponseItem) MarshalJSON() ([]byte, error) {
+	w := jwriter.Writer{}
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk6(&w, v)
+	return w.Buffer.BuildBytes(), w.Error
+}
+
+// MarshalEasyJSON supports easyjson.Marshaler interface
+func (v BulkIndexerResponseItem) MarshalEasyJSON(w *jwriter.Writer) {
+	easyjsonCef4e921EncodeGithubComElasticFleetServerV7InternalPkgBulk6(w, v)
+}
+
+// UnmarshalJSON supports json.Unmarshaler interface
+func (v *BulkIndexerResponseItem) UnmarshalJSON(data []byte) error {
+	r := jlexer.Lexer{Data: data}
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(&r, v)
+	return r.Error()
+}
+
+// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
+func (v *BulkIndexerResponseItem) UnmarshalEasyJSON(l *jlexer.Lexer) {
+	easyjsonCef4e921DecodeGithubComElasticFleetServerV7InternalPkgBulk6(l, v)
+}
diff --git a/internal/pkg/es/result.go b/internal/pkg/es/result.go
index b6ee0d2ad..14e62e617 100644
--- a/internal/pkg/es/result.go
+++ b/internal/pkg/es/result.go
@@ -27,12 +27,13 @@ type AckResponse struct {
 }
 
 type HitT struct {
-	ID      string          `json:"_id"`
-	SeqNo   int64           `json:"_seq_no"`
-	Version int64           `json:"version"`
-	Index   string          `json:"_index"`
-	Source  json.RawMessage `json:"_source"`
-	Score   *float64        `json:"_score"`
+	ID      string                 `json:"_id"`
+	SeqNo   int64                  `json:"_seq_no"`
+	Version int64                  `json:"version"`
+	Index   string                 `json:"_index"`
+	Source  json.RawMessage        `json:"_source"`
+	Score   *float64               `json:"_score"`
+	Fields  map[string]interface{} `json:"fields"`
 }
 
 func (hit *HitT) Unmarshal(v interface{}) error {
diff --git a/internal/pkg/upload/cbor/chunk.go b/internal/pkg/upload/cbor/chunk.go
index b9b624784..a733da2a4 100644
--- a/internal/pkg/upload/cbor/chunk.go
+++ b/internal/pkg/upload/cbor/chunk.go
@@ -47,6 +47,7 @@ func NewChunkWriter(chunkData io.Reader, finalChunk bool, baseID string, chunkHa
 // it is therefore an incomplete CBOR object on its own
 // expecting the next section to be filled in by the caller.
 // the CBOR spec may be found here: https://www.rfc-editor.org/rfc/rfc8949
+// chunksize is ignored when writing the "final"=true chunk
 func encodePreambleToCBOR(final bool, baseID string, chunkHash string, chunkSize int64) []byte {
 	bidLen := len(baseID)
 	hashLen := len(chunkHash)
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 53dc2695a..fc608c288 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -7,8 +7,11 @@ package upload
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
+	"strconv"
+	"strings"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/dsl"
@@ -26,12 +29,15 @@ const (
 	FileDataIndexPattern   = ".fleet-file-data-%s"
 
 	FieldBaseID   = "bid"
+	FieldLast     = "last"
+	FieldSHA2     = "sha2"
 	FieldUploadID = "upload_id"
 )
 
 var (
-	QueryChunkIDs = prepareFindChunkIDs()
-	QueryUploadID = prepareFindByUploadID()
+	QueryChunkIDs  = prepareFindChunkIDs()
+	QueryUploadID  = prepareFindByUploadID()
+	QueryChunkInfo = prepareChunkWithoutData()
 )
 
 func prepareFindChunkIDs() *dsl.Tmpl {
@@ -44,6 +50,26 @@ func prepareFindChunkIDs() *dsl.Tmpl {
 	return tmpl
 }
 
+// get fields other than the byte payload (data)
+func prepareChunkWithoutData() *dsl.Tmpl {
+	tmpl := dsl.NewTmpl()
+	root := dsl.NewRoot()
+	root.Param("_source", false)
+	root.Query().Term(FieldBaseID, tmpl.Bind(FieldBaseID), nil)
+	root.Param("fields", []string{FieldSHA2, FieldLast, FieldBaseID})
+	root.Param("script_fields", map[string]interface{}{
+		"size": map[string]interface{}{
+			"script": map[string]interface{}{
+				"lang":   "painless",
+				"source": "params._source.data.length",
+			},
+		},
+	})
+	root.Size(10000)
+	tmpl.MustResolve(root)
+	return tmpl
+}
+
 func prepareFindByUploadID() *dsl.Tmpl {
 	tmpl := dsl.NewTmpl()
 	root := dsl.NewRoot()
@@ -79,11 +105,11 @@ func UpdateFileDoc(ctx context.Context, bulker bulk.Bulk, source string, fileID
 	return bulker.Update(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
 }
 
-func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkID int) error {
+func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkNum int) error {
 	req := esapi.IndexRequest{
 		Index:      fmt.Sprintf(FileDataIndexPattern, source),
 		Body:       body,
-		DocumentID: fmt.Sprintf("%s.%d", docID, chunkID),
+		DocumentID: fmt.Sprintf("%s.%d", docID, chunkNum),
 		Refresh:    "true",
 	}
 	// need to set the Content-Type of the request to CBOR, notes below
@@ -93,7 +119,7 @@ func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.Ch
 		standard approach when content-type override no longer needed
 
 		resp, err := client.Index(fmt.Sprintf(FileDataIndexPattern, source), data, func(req *esapi.IndexRequest) {
-			req.DocumentID = fmt.Sprintf("%s.%d", fileID, chunkID)
+			req.DocumentID = fmt.Sprintf("%s.%d", fileID, chunkNum)
 			if req.Header == nil {
 				req.Header = make(http.Header)
 			}
@@ -170,9 +196,110 @@ func listChunkIDs(ctx context.Context, bulker bulk.Bulk, index string, fileID st
 	return res.HitsT.Hits, nil
 }
 
-func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkID int) (model.FileChunk, error) {
+func GetChunkInfos(ctx context.Context, bulker bulk.Bulk, baseID string) ([]ChunkInfo, error) {
+
+	query, err := QueryChunkInfo.Render(map[string]interface{}{
+		FieldBaseID: baseID,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := bulker.Search(ctx, fmt.Sprintf(FileDataIndexPattern, "*"), query)
+	if err != nil {
+		return nil, err
+	}
+
+	chunks := make([]ChunkInfo, len(res.HitsT.Hits))
+
+	var (
+		bid  string
+		last bool
+		sha2 string
+		size int
+		ok   bool
+	)
+
+	for i, h := range res.HitsT.Hits {
+		if bid, ok = getResultsFieldString(h.Fields, FieldBaseID); !ok {
+			return nil, fmt.Errorf("unable to retrieve %s field from chunk document", FieldBaseID)
+		}
+		if last, ok = getResultsFieldBool(h.Fields, FieldLast); !ok {
+			return nil, fmt.Errorf("unable to retrieve %s field from chunk document", FieldLast)
+		}
+		if sha2, ok = getResultsFieldString(h.Fields, FieldSHA2); !ok {
+			return nil, fmt.Errorf("unable to retrieve %s field from chunk document", FieldSHA2)
+		}
+		if size, ok = getResultsFieldInt(h.Fields, "size"); !ok {
+			return nil, errors.New("unable to retrieve size from chunk document")
+		}
+
+		chunkid := strings.TrimPrefix(h.ID, bid+".")
+		chunkNum, err := strconv.Atoi(chunkid)
+		if err != nil {
+			return nil, fmt.Errorf("unable to parse chunk number from id %s: %w", h.ID, err)
+		}
+		chunks[i] = ChunkInfo{
+			Pos:  chunkNum,
+			BID:  bid,
+			Last: last,
+			SHA2: sha2,
+			Size: size,
+		}
+	}
+
+	return chunks, nil
+}
+
+// convenience function for translating the elasticsearch "field" response format
+// of "field": { "a": [value], "b": [value] }
+func getResultField(fields map[string]interface{}, key string) (interface{}, bool) {
+	array, ok := fields[key].([]interface{})
+	if !ok {
+		return nil, false
+	}
+	if array == nil || len(array) < 1 {
+		return nil, false
+	}
+	return array[0], true
+}
+
+func getResultsFieldString(fields map[string]interface{}, key string) (string, bool) {
+	val, ok := getResultField(fields, key)
+	if !ok {
+		return "", false
+	}
+	str, ok := val.(string)
+	return str, ok
+}
+func getResultsFieldBool(fields map[string]interface{}, key string) (bool, bool) {
+	val, ok := getResultField(fields, key)
+	if !ok {
+		return false, false
+	}
+	b, ok := val.(bool)
+	return b, ok
+}
+func getResultsFieldInt(fields map[string]interface{}, key string) (int, bool) {
+	val, ok := getResultField(fields, key)
+	if !ok {
+		return 0, false
+	}
+	switch n := val.(type) {
+	case int:
+		return n, true
+	case int64:
+		return int(n), true
+	case float64:
+		return int(n), true
+	default:
+		return 0, false
+	}
+}
+
+func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) (model.FileChunk, error) {
 	var chunk model.FileChunk
-	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkID))
+	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkNum))
 	if err != nil {
 		return chunk, err
 	}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index 65da9c4e4..d675e3a0b 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -6,16 +6,17 @@ package upload
 
 import (
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
-	"strconv"
+	"sort"
 	"strings"
 	"sync"
 	"time"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
-	"github.com/elastic/fleet-server/v7/internal/pkg/throttle"
 	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
@@ -70,18 +71,18 @@ type Info struct {
 }
 
 // convenience functions for computing current "Status" based on the fields
-func (i Info) Expired(timeout time.Duration) bool { return i.Start.Add(timeout).After(time.Now()) }
+func (i Info) Expired(timeout time.Duration) bool { return time.Now().After(i.Start.Add(timeout)) }
 func (i Info) StatusCanUpload() bool { // returns true if more chunks can be uploaded. False if the upload process has completed (with or without error)
 	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
 }
 
 type ChunkInfo struct {
-	ID            int
-	FirstReceived bool
-	Final         bool
-	Upload        Info
-	Hash          string
-	Token         *throttle.Token
+	Pos  int  // Ordered chunk position in file
+	Last bool // Is this the final chunk in the file
+	SHA2 string
+	Size int
+	BID  string // base id, matches metadata doc's _id
+	//FirstReceived bool
 }
 
 func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, timeLimit time.Duration) *Uploader {
@@ -90,6 +91,7 @@ func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, t
 		bulker:      bulker,
 		sizeLimit:   sizeLimit,
 		timeLimit:   timeLimit,
+		metaCache:   make(map[string]Info),
 	}
 }
 
@@ -169,7 +171,7 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	return info, nil
 }
 
-func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkID int, chunkHash string) (ChunkInfo, error) {
+func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkHash string) (Info, ChunkInfo, error) {
 
 	// Fetch metadata doc, if not cached
 	//u.mu.RLock()
@@ -183,50 +185,45 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkID int, chunkHa
 		var err error
 		info, err = u.GetUploadInfo(ctx, uplID)
 		if err != nil {
-			return ChunkInfo{}, fmt.Errorf("unable to retrieve upload info: %w", err)
+			return Info{}, ChunkInfo{}, fmt.Errorf("unable to retrieve upload info: %w", err)
 		}
 		u.metaCache[uplID] = info
 	}
 
 	if info.Expired(u.timeLimit) {
-		return ChunkInfo{}, ErrUploadExpired
+		return Info{}, ChunkInfo{}, ErrUploadExpired
 	}
 	if !info.StatusCanUpload() {
-		return ChunkInfo{}, ErrUploadStopped
+		return Info{}, ChunkInfo{}, ErrUploadStopped
 	}
-	if chunkID < 0 || chunkID >= info.Count {
-		return ChunkInfo{}, ErrInvalidChunkNum
+	if chunkNum < 0 || chunkNum >= info.Count {
+		return Info{}, ChunkInfo{}, ErrInvalidChunkNum
 	}
 
-	return ChunkInfo{
-		ID:            chunkID,
-		FirstReceived: false, // @todo
-		Final:         chunkID == info.Count-1,
-		Upload:        info,
-		Hash:          chunkHash,
-		//Token:         token,
+	return info, ChunkInfo{
+		Pos: chunkNum,
+		BID: info.DocID,
+		//FirstReceived: false, // @todo
+		Last: chunkNum == info.Count-1,
+		Size: int(info.ChunkSize),
+		SHA2: chunkHash,
 	}, nil
 }
 
-func (u *Uploader) Complete(id string, transitHash string, bulker bulk.Bulk) (Info, error) {
+func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
 	info, valid := u.metaCache[id]
 	if !valid {
 		return Info{}, ErrInvalidUploadID
 	}
 
-	ok, err := u.allChunksPresent(info, bulker)
+	chunks, err := GetChunkInfos(ctx, u.bulker, info.DocID)
 	if err != nil {
 		return Info{}, err
 	}
-	if !ok {
+	if !u.allChunksPresent(info, chunks) {
 		return Info{}, ErrMissingChunks
 	}
-
-	ok, err = u.verifyChunkData(info, transitHash, bulker)
-	if err != nil {
-		return Info{}, err
-	}
-	if !ok {
+	if !u.verifyChunkInfo(info, chunks, transitHash) {
 		return Info{}, errors.New("file contents did not pass validation")
 	}
 
@@ -253,90 +250,83 @@ func (u *Uploader) finalize(uplID string) error {
 	return nil
 }
 
-func (u *Uploader) allChunksPresent(info Info, bulker bulk.Bulk) (bool, error) {
-	hits, err := ListChunkIDs(context.TODO(), bulker, info.Source, info.DocID)
-	if err != nil {
-		log.Warn().Err(err).Msg("error listing chunks")
-		return false, err
-	}
-	if len(hits) != info.Count {
-		log.Warn().Int("expectedCount", info.Count).Int("received", len(hits)).Interface("hits", hits).Msg("mismatch number of chunks")
-		return false, nil
+func (u *Uploader) allChunksPresent(info Info, chunks []ChunkInfo) bool {
+	// check overall count
+	if len(chunks) != info.Count {
+		log.Warn().Int("expectedCount", info.Count).Int("received", len(chunks)).Interface("chunks", chunks).Msg("mismatch number of chunks")
+		return false
 	}
 
-	ids := make(map[int]bool, len(hits))
-	for _, h := range hits {
-		chunkID := strings.TrimPrefix(h.ID, info.DocID+".")
-		ival, err := strconv.Atoi(chunkID)
-		if err != nil {
-			log.Warn().Err(err).Str("chunkID", h.ID).Str("docID", info.DocID).Str("parsedChunkInt", chunkID).Interface("hits", hits).Msg("unable to convert to int value")
-			return false, err
-		}
-		ids[ival] = true
-	}
+	// now ensure all positions are accounted for, no gaps, etc
+	sort.Slice(chunks, func(i, j int) bool {
+		return chunks[i].Pos < chunks[j].Pos
+	})
 
-	for i := 0; i < info.Count; i++ {
-		if got, exists := ids[i]; !got || !exists {
-			log.Warn().Int("expected", i).Interface("hits", hits).Msg("mismatch chunk")
-			return false, nil
+	for i, c := range chunks {
+		if c.Pos != i {
+			log.Warn().Int("expected", i).Interface("chunk", c).Msg("chunk position doesn't match. May be a gap in uploaded file")
+			return false
 		}
 	}
-	return true, nil
+
+	return true
 }
 
-func (u *Uploader) verifyChunkData(info Info, transitHash string, bulker bulk.Bulk) (bool, error) {
+func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash string) bool {
 	// verify all chunks except last are info.ChunkSize size
 	// verify last: false (or field excluded) for all except final chunk
 	// verify final chunk is last: true
 	// verify hash
 
-	for i := 0; i < info.Count; i++ {
-		chunk, err := GetChunk(context.TODO(), bulker, info.Source, info.DocID, i)
-		if err != nil {
-			return false, err
-		}
-		if err != nil {
-			return false, err
-		}
+	hasher := sha256.New()
+
+	for i, chunk := range chunks {
 		if i < info.Count-1 {
 			if chunk.Last {
 				log.Debug().Int("chunkID", i).Msg("non-final chunk was incorrectly marked last")
-				return false, nil
+				return false
 			}
-			if len(chunk.Data) != int(info.ChunkSize) {
-				log.Debug().Int64("requiredSize", info.ChunkSize).Int("chunkID", i).Int("gotSize", len(chunk.Data)).Msg("chunk was undersized")
-				return false, nil
+			if chunk.Size != int(info.ChunkSize) {
+				log.Debug().Int64("requiredSize", info.ChunkSize).Int("chunkID", i).Int("gotSize", chunk.Size).Msg("chunk was undersized")
+				return false
 			}
 		} else {
 			if !chunk.Last {
 				log.Debug().Int("chunkID", i).Msg("final chunk was not marked as final")
-				return false, nil
+				return false
 			}
-			if len(chunk.Data) == 0 {
+			if chunk.Size == 0 {
 				log.Debug().Int("chunkID", i).Msg("final chunk was 0 size")
-				return false, nil
+				return false
+			}
+			if chunk.Size > int(info.ChunkSize) {
+				log.Debug().Int("chunk-size", chunk.Size).Int("maxsize", int(info.ChunkSize)).Msg("final chunk was oversized")
+				return false
 			}
 		}
 
-		/*
-			if info.Hasher != nil {
-				_, err = io.Copy(info.Hasher, bytes.NewReader(chunk.Data))
-				if err != nil {
-					return false, err
-				}
-			}
-		*/
-	}
+		rawHash, err := hex.DecodeString(chunk.SHA2)
+		if err != nil {
+			log.Warn().Err(err).Msg("error decoding chunk hash")
+			return false
+		}
 
-	/*
-		if info.Hasher != nil {
-			fullHash := hex.EncodeToString(info.Hasher.Sum(nil))
-			if fullHash != info.HashSum {
-				return false, ErrHashMismatch
-			}
+		if n, err := hasher.Write(rawHash); err != nil {
+			log.Error().Err(err).Msg("error computing transitHash from component chunk hashes")
+			return false
+		} else if n != len(rawHash) {
+			log.Error().Int("wrote", n).Int("expected", len(rawHash)).Msg("transitHash calculation failure, could not write to hasher")
+			return false
 		}
-	*/
-	return true, nil
+	}
+
+	calcHash := hex.EncodeToString(hasher.Sum(nil))
+	if !strings.EqualFold(transitHash, calcHash) {
+		log.Warn().Str("provided-hash", transitHash).Str("calc-hash", calcHash).Msg("file upload streaming hash does not match")
+		return false
+	}
+
+	return true
 }
 
 func validateUploadPayload(info JSDict) error {
@@ -403,15 +393,6 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, er
 	}, nil
 }
 
-type FileMetaDoc struct {
-	ActionID string     `json:"action_id"`
-	AgentID  string     `json:"agent_id"`
-	Source   string     `json:"src"`
-	File     FileData   `json:"file"`
-	Contents []FileData `json:"contents"`
-	UploadID string     `json:"upload_id"`
-	Start    time.Time  `json:"upload_start"`
-}
 type FileData struct {
 	Size      int64  `json:"size"`
 	ChunkSize int64  `json:"ChunkSize"`
@@ -423,3 +404,30 @@ type FileData struct {
 		MD5    string `json:"md5"`
 	} `json:"hash"`
 }
+
+type FileMetaDoc struct {
+	ActionID string    `json:"action_id"`
+	AgentID  string    `json:"agent_id"`
+	Source   string    `json:"src"`
+	File     FileData  `json:"file"`
+	UploadID string    `json:"upload_id"`
+	Start    time.Time `json:"upload_start"`
+}
+
+// custom unmarshaller to make unix-epoch values
+// work
+func (f *FileMetaDoc) UnmarshalJSON(b []byte) error {
+	type InnerFile FileMetaDoc // type alias to prevent recursion into this func
+	// override the field to parse as an int, then manually convert to time.time
+	var tmp struct {
+		InnerFile
+		Start int64 `json:"upload_start"`
+	}
+	if err := json.Unmarshal(b, &tmp); err != nil {
+		return err
+	}
+
+	*f = FileMetaDoc(tmp.InnerFile) // copy over all fields
+	f.Start = time.UnixMilli(tmp.Start)
+	return nil
+}

From 50cd8fff7f107b6f2b2da9b7f07ff2b6ed139cfa Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 20 Dec 2022 15:26:56 -0500
Subject: [PATCH 29/51] es client 8.5.0 update and cleanups

---
 internal/pkg/api/handleUpload.go |  2 +-
 internal/pkg/upload/es.go        | 48 ++++++++------------------------
 internal/pkg/upload/upload.go    |  2 +-
 3 files changed, 13 insertions(+), 39 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index a25cf3079..838460114 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -23,7 +23,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
-	"github.com/elastic/go-elasticsearch/v7"
+	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/julienschmidt/httprouter"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index fc608c288..3d787f918 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -18,8 +18,8 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/es"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
-	"github.com/elastic/go-elasticsearch/v7"
-	"github.com/elastic/go-elasticsearch/v7/esapi"
+	"github.com/elastic/go-elasticsearch/v8"
+	"github.com/elastic/go-elasticsearch/v8/esapi"
 	"github.com/rs/zerolog/log"
 )
 
@@ -106,31 +106,15 @@ func UpdateFileDoc(ctx context.Context, bulker bulk.Bulk, source string, fileID
 }
 
 func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkNum int) error {
-	req := esapi.IndexRequest{
-		Index:      fmt.Sprintf(FileDataIndexPattern, source),
-		Body:       body,
-		DocumentID: fmt.Sprintf("%s.%d", docID, chunkNum),
-		Refresh:    "true",
-	}
-	// need to set the Content-Type of the request to CBOR, notes below
-	overrider := contentTypeOverrider{client}
-	resp, err := req.Do(ctx, overrider)
-	/*
-		standard approach when content-type override no longer needed
-
-		resp, err := client.Index(fmt.Sprintf(FileDataIndexPattern, source), data, func(req *esapi.IndexRequest) {
-			req.DocumentID = fmt.Sprintf("%s.%d", fileID, chunkNum)
-			if req.Header == nil {
-				req.Header = make(http.Header)
-			}
-			// the below setting actually gets overridden in the ES client
-			// when it checks for the existence of r.Body, and then sets content-type to JSON
-			// this setting is then *added* so multiple content-types are sent.
-			// https://github.com/elastic/go-elasticsearch/blob/7.17/esapi/api.index.go#L183-L193
-			// we have to temporarily override this with a custom esapi.Transport
-			req.Header.Set("Content-Type", "application/cbor")
-			req.Header.Set("Accept","application/json") // this one has no issues being set this way. We need to specify we want JSON response
-		})*/
+	resp, err := client.Index(fmt.Sprintf(FileDataIndexPattern, source), body, func(req *esapi.IndexRequest) {
+		req.DocumentID = fmt.Sprintf("%s.%d", docID, chunkNum)
+		if req.Header == nil {
+			req.Header = make(http.Header)
+		}
+		req.Header.Set("Content-Type", "application/cbor")
+		req.Header.Set("Accept", "application/json")
+		req.Refresh = "true"
+	})
 	if err != nil {
 		return err
 	}
@@ -147,16 +131,6 @@ func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.Ch
 	return nil
 }
 
-type contentTypeOverrider struct {
-	client *elasticsearch.Client
-}
-
-func (c contentTypeOverrider) Perform(req *http.Request) (*http.Response, error) {
-	req.Header.Set("Content-Type", "application/cbor") // we will SEND cbor
-	req.Header.Set("Accept", "application/json")       // but we want JSON back
-	return c.client.Perform(req)
-}
-
 type ChunkUploadResponse struct {
 	Index   string `json:"_index"`
 	ID      string `json:"_id"`
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index d675e3a0b..60f1ebded 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -17,7 +17,7 @@ import (
 	"time"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
-	"github.com/elastic/go-elasticsearch/v7"
+	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/gofrs/uuid"
 	"github.com/rs/zerolog/log"
 )

From aff2c6d63ace98a1eb696f8b1572daf3a4a4016c Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 21 Dec 2022 11:36:08 -0500
Subject: [PATCH 30/51] cleanups and some tests

---
 internal/pkg/api/handleUpload.go | 29 ++++++-------
 internal/pkg/upload/es.go        | 62 ++++++++++-----------------
 internal/pkg/upload/es_test.go   | 69 ++++++++++++++++++++++++++++++
 internal/pkg/upload/upload.go    | 72 ++++++++++++++++++++------------
 4 files changed, 153 insertions(+), 79 deletions(-)
 create mode 100644 internal/pkg/upload/es_test.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 838460114..5ce9bc23a 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -99,18 +99,8 @@ func (rt Router) handleUploadComplete(w http.ResponseWriter, r *http.Request, ps
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
-	//@todo: doc lookup, agent ID is in there
-	agentID := "ABC"
-
-	// need to auth that it matches the ID in the initial
-	// doc, but that means we had to doc-lookup early
-	if AUTH_ENABLED {
-		if _, err := authAgent(r, &agentID, rt.bulker, rt.ut.cache); err != nil {
-			writeUploadError(err, w, zlog, start, "error authenticating for upload finalization")
-			return
-		}
-	}
-
+	// authentication occurs inside here, to ensure key agent ID
+	// matches the same agent ID the operation started with
 	if err := rt.ut.handleUploadComplete(&zlog, w, r, id); err != nil {
 		writeUploadError(err, w, zlog, start, "error finalizing upload")
 		return
@@ -183,7 +173,6 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 
 func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string, chunkID int) error {
 	chunkHash := strings.TrimSpace(r.Header.Get("X-Chunk-Sha2"))
-
 	if chunkHash == "" {
 		return errors.New("chunk hash header required")
 	}
@@ -216,6 +205,18 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 }
 
 func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
+	info, err := ut.uploader.GetUploadInfo(r.Context(), uplID)
+	if err != nil {
+		return err
+	}
+	// need to auth that it matches the ID in the initial
+	// doc, but that means we had to doc-lookup early
+	if AUTH_ENABLED {
+		if _, err := authAgent(r, &info.AgentID, ut.bulker, ut.cache); err != nil {
+			return fmt.Errorf("Error authenticating for upload finalization: %w", err)
+		}
+	}
+
 	var req UploadCompleteRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		return errors.New("unable to parse request body")
@@ -225,7 +226,7 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return errors.New("transit hash required")
 	}
 
-	info, err := ut.uploader.Complete(r.Context(), uplID, req.TransitHash.SHA256)
+	info, err = ut.uploader.Complete(r.Context(), uplID, req.TransitHash.SHA256)
 	if err != nil {
 		return err
 	}
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 3d787f918..77a1357b5 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -79,8 +79,11 @@ func prepareFindByUploadID() *dsl.Tmpl {
 	return tmpl
 }
 
+/*
+	Metadata Doc Operations
+*/
+
 func CreateFileDoc(ctx context.Context, bulker bulk.Bulk, doc []byte, source string, fileID string) (string, error) {
-	//@todo: put_if_absent
 	return bulker.Create(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, doc, bulk.WithRefresh())
 }
 
@@ -105,6 +108,10 @@ func UpdateFileDoc(ctx context.Context, bulker bulk.Bulk, source string, fileID
 	return bulker.Update(ctx, fmt.Sprintf(FileHeaderIndexPattern, source), fileID, data)
 }
 
+/*
+	Chunk Operations
+*/
+
 func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.ChunkEncoder, source string, docID string, chunkNum int) error {
 	resp, err := client.Index(fmt.Sprintf(FileDataIndexPattern, source), body, func(req *esapi.IndexRequest) {
 		req.DocumentID = fmt.Sprintf("%s.%d", docID, chunkNum)
@@ -141,37 +148,13 @@ type ChunkUploadResponse struct {
 		Success int `json:"successful"`
 		Failed  int `json:"failed"`
 	} `json:"_shards"`
-	Error struct {
-		Type   string `json:"type"`
-		Reason string `json:"reason"`
-		Cause  struct {
-			Type   string `json:"type"`
-			Reason string `json:"reason"`
-		} `json:"caused_by"`
-	} `json:"error"`
-}
-
-func ListChunkIDs(ctx context.Context, bulker bulk.Bulk, source string, fileID string) ([]es.HitT, error) {
-	return listChunkIDs(ctx, bulker, fmt.Sprintf(FileDataIndexPattern, source), fileID)
-}
-
-func listChunkIDs(ctx context.Context, bulker bulk.Bulk, index string, fileID string) ([]es.HitT, error) {
-	query, err := QueryChunkIDs.Render(map[string]interface{}{
-		FieldBaseID: fileID,
-	})
-	if err != nil {
-		return nil, err
-	}
-
-	res, err := bulker.Search(ctx, index, query)
-	if err != nil {
-		return nil, err
-	}
-	return res.HitsT.Hits, nil
+	Error es.ErrorT `json:"error"`
 }
 
+// Retrieves a subset of chunk document fields, specifically omitting the Data payload (bytes)
+// but adding the calculated field "size", that is the length, in bytes, of the Data field
+// the chunk's ordered index position (Pos) is also parsed from the document ID
 func GetChunkInfos(ctx context.Context, bulker bulk.Bulk, baseID string) ([]ChunkInfo, error) {
-
 	query, err := QueryChunkInfo.Render(map[string]interface{}{
 		FieldBaseID: baseID,
 	})
@@ -225,6 +208,17 @@ func GetChunkInfos(ctx context.Context, bulker bulk.Bulk, baseID string) ([]Chun
 	return chunks, nil
 }
 
+// retrieves a full chunk document, Data included
+func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) (model.FileChunk, error) {
+	var chunk model.FileChunk
+	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkNum))
+	if err != nil {
+		return chunk, err
+	}
+	err = json.Unmarshal(out, &chunk)
+	return chunk, err
+}
+
 // convenience function for translating the elasticsearch "field" response format
 // of "field": { "a": [value], "b": [value] }
 func getResultField(fields map[string]interface{}, key string) (interface{}, bool) {
@@ -270,13 +264,3 @@ func getResultsFieldInt(fields map[string]interface{}, key string) (int, bool) {
 		return 0, false
 	}
 }
-
-func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) (model.FileChunk, error) {
-	var chunk model.FileChunk
-	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkNum))
-	if err != nil {
-		return chunk, err
-	}
-	err = json.Unmarshal(out, &chunk)
-	return chunk, err
-}
diff --git a/internal/pkg/upload/es_test.go b/internal/pkg/upload/es_test.go
new file mode 100644
index 000000000..71919f40f
--- /dev/null
+++ b/internal/pkg/upload/es_test.go
@@ -0,0 +1,69 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/es"
+	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+)
+
+func TestChunkInfoResultsParseCorrectly(t *testing.T) {
+	fakeBulk := itesting.NewMockBulk()
+
+	baseID := "abc.xyz"
+	sha2 := "ffff"
+	size := 3417671
+
+	fakeBulk.On("Search", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(&es.ResultT{
+		HitsT: es.HitsT{
+			Hits: []es.HitT{
+				{
+					ID:     baseID + ".0",
+					Index:  fmt.Sprintf(FileDataIndexPattern, "mysrc"),
+					Source: []byte(""),
+					Fields: map[string]interface{}{
+						"bid":  []interface{}{baseID},
+						"last": []interface{}{false},
+						"sha2": []interface{}{sha2},
+						"size": []interface{}{float64(size)},
+					},
+				},
+				{
+					ID:     baseID + ".1",
+					Index:  fmt.Sprintf(FileDataIndexPattern, "mysrc"),
+					Source: []byte(""),
+					Fields: map[string]interface{}{
+						"bid":  []interface{}{baseID},
+						"last": []interface{}{true},
+						"sha2": []interface{}{sha2},
+						"size": []interface{}{float64(size)},
+					},
+				},
+			},
+		},
+	}, nil)
+
+	chunks, err := GetChunkInfos(context.Background(), fakeBulk, baseID)
+	assert.NoError(t, err)
+	assert.Len(t, chunks, 2)
+
+	assert.Equal(t, baseID, chunks[0].BID)
+	assert.False(t, chunks[0].Last)
+	assert.Equal(t, sha2, chunks[0].SHA2)
+	assert.Equal(t, 0, chunks[0].Pos)
+	assert.Equal(t, size, chunks[0].Size)
+
+	assert.Equal(t, baseID, chunks[1].BID)
+	assert.True(t, chunks[1].Last)
+	assert.Equal(t, sha2, chunks[1].SHA2)
+	assert.Equal(t, 1, chunks[1].Pos)
+	assert.Equal(t, size, chunks[1].Size)
+}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index 60f1ebded..9f5b27161 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -49,12 +49,11 @@ const (
 )
 
 type Uploader struct {
-	metaCache map[string]Info // simple read-cache of file metadata doc info
+	metaCache map[string]Info // cache of file metadata doc info
 	mu        sync.RWMutex    // lock for the above
 	sizeLimit int64           // @todo: what if configuration changes? is this recreated with another New()?
 	timeLimit time.Duration   // @todo: same as above
 
-	// @todo: some es credentials
 	chunkClient *elasticsearch.Client
 	bulker      bulk.Bulk
 }
@@ -63,6 +62,8 @@ type Info struct {
 	ID        string // upload operation identifier. Used to identify the upload process
 	DocID     string // document ID of the uploaded file and chunks
 	Source    string // which integration is performing the upload
+	AgentID   string
+	ActionID  string
 	ChunkSize int64
 	Total     int64
 	Count     int
@@ -95,8 +96,7 @@ func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, t
 	}
 }
 
-// Start an upload operation, as long as the max concurrent has not been reached
-// returns the upload ID
+// Start an upload operation
 func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	if data == nil {
 		return Info{}, errors.New("upload start payload required")
@@ -131,6 +131,8 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	info := Info{
 		ID:        id,
 		DocID:     docID,
+		AgentID:   agentID,
+		ActionID:  actionID,
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
@@ -158,11 +160,13 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 		return Info{}, err
 	}
 
+	/*
+		Write to storage
+	*/
 	doc, err := json.Marshal(data)
 	if err != nil {
 		return Info{}, err
 	}
-
 	_, err = CreateFileDoc(ctx, u.bulker, doc, source, docID)
 	if err != nil {
 		return Info{}, err
@@ -173,21 +177,9 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 
 func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkHash string) (Info, ChunkInfo, error) {
 
-	// Fetch metadata doc, if not cached
-	//u.mu.RLock()
-	//defer u.mu.RUnlock()
-	info, exist := u.metaCache[uplID]
-	if !exist {
-		//u.mu.Lock()
-		//defer u.mu.Unlock()
-		// fetch and write
-
-		var err error
-		info, err = u.GetUploadInfo(ctx, uplID)
-		if err != nil {
-			return Info{}, ChunkInfo{}, fmt.Errorf("unable to retrieve upload info: %w", err)
-		}
-		u.metaCache[uplID] = info
+	info, err := u.GetUploadInfo(ctx, uplID)
+	if err != nil {
+		return Info{}, ChunkInfo{}, err
 	}
 
 	if info.Expired(u.timeLimit) {
@@ -211,9 +203,9 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkH
 }
 
 func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
-	info, valid := u.metaCache[id]
-	if !valid {
-		return Info{}, ErrInvalidUploadID
+	info, err := u.GetUploadInfo(ctx, id)
+	if err != nil {
+		return Info{}, err
 	}
 
 	chunks, err := GetChunkInfos(ctx, u.bulker, info.DocID)
@@ -282,6 +274,8 @@ func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash st
 
 	for i, chunk := range chunks {
 		if i < info.Count-1 {
+			// all chunks except last must have last:false
+			// and be PRECISELY info.ChunkSize bytes long
 			if chunk.Last {
 				log.Debug().Int("chunkID", i).Msg("non-final chunk was incorrectly marked last")
 				return false
@@ -291,6 +285,8 @@ func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash st
 				return false
 			}
 		} else {
+			// last chunk must be marked last:true
+			// and can be any valid size (0,ChunkSize]
 			if !chunk.Last {
 				log.Debug().Int("chunkID", i).Msg("final chunk was not marked as final")
 				return false
@@ -305,12 +301,13 @@ func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash st
 			}
 		}
 
+		// write the byte-decoded hash for this chunk to the
+		// running hash for the entire file (transithash)
 		rawHash, err := hex.DecodeString(chunk.SHA2)
 		if err != nil {
 			log.Warn().Err(err).Msg("error decoding chunk hash")
 			return false
 		}
-
 		if n, err := hasher.Write(rawHash); err != nil {
 			log.Error().Err(err).Msg("error computing transitHash from component chunk hashes")
 			return false
@@ -356,9 +353,30 @@ func validateUploadPayload(info JSDict) error {
 	return nil
 }
 
-// retrieves upload metadata info from elasticsearch
-// which may be locally cached
+// Searches for Upload Metadata document in local memory cache if available
+// otherwise, fetches from elasticsearch and caches for next use
 func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, error) {
+	// Fetch metadata doc, if not cached
+	u.mu.RLock()
+	info, exist := u.metaCache[uploadID]
+	u.mu.RUnlock() // not deferred since this must be clear before we gain a write lock below
+	if exist {
+		return info, nil
+	}
+
+	// not found in cache, try fetching
+	info, err := u.fetchUploadInfo(ctx, uploadID)
+	if err != nil {
+		return Info{}, fmt.Errorf("unable to retrieve upload info: %w", err)
+	}
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	u.metaCache[uploadID] = info
+	return info, nil
+}
+
+// retrieves upload metadata info from elasticsearch
+func (u *Uploader) fetchUploadInfo(ctx context.Context, uploadID string) (Info, error) {
 	results, err := GetFileDoc(ctx, u.bulker, uploadID)
 	if err != nil {
 		return Info{}, err
@@ -384,6 +402,8 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, er
 	return Info{
 		ID:        fi.UploadID,
 		Source:    fi.Source,
+		AgentID:   fi.AgentID,
+		ActionID:  fi.ActionID,
 		DocID:     results[0].ID,
 		ChunkSize: fi.File.ChunkSize,
 		Total:     fi.File.Size,

From 367fa00ba08242b4386c944bcb0de0abcdc08bcf Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 22 Dec 2022 13:00:11 -0500
Subject: [PATCH 31/51] cleanups, etc

---
 internal/pkg/api/handleUpload.go   |  31 ++--
 internal/pkg/upload/es.go          |  33 +++-
 internal/pkg/upload/finalize.go    | 145 +++++++++++++++++
 internal/pkg/upload/info.go        |  82 ++++++++++
 internal/pkg/upload/upload.go      | 247 ++++++-----------------------
 internal/pkg/upload/upload_test.go |  84 ----------
 6 files changed, 315 insertions(+), 307 deletions(-)
 create mode 100644 internal/pkg/upload/finalize.go
 create mode 100644 internal/pkg/upload/info.go

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 5ce9bc23a..d48c5c1b9 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -197,7 +197,15 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	hashsum := hex.EncodeToString(hash.Sum(nil))
 
 	if !strings.EqualFold(chunkHash, hashsum) {
-		// @todo: delete document, since we wrote it, but the hash was invalid
+		// delete document, since we wrote it, but the hash was invalid
+		// context scoped to allow this operation to finish even if client disconnects
+		if err := upload.DeleteChunk(context.Background(), ut.bulker, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
+			zlog.Warn().Err(err).
+				Str("source", upinfo.Source).
+				Str("fileID", chunkInfo.BID).
+				Int("chunkNum", chunkInfo.Pos).
+				Msg("a chunk hash mismatch occurred, and fleet server was unable to remove the invalid chunk")
+		}
 		return upload.ErrHashMismatch
 	}
 
@@ -231,13 +239,6 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return err
 	}
 
-	if err := updateUploadStatus(r.Context(), ut.bulker, info, upload.StatusDone); err != nil {
-		// should be 500 error probably?
-		zlog.Warn().Err(err).Str("upload", uplID).Msg("unable to set upload status to complete")
-		return err
-
-	}
-
 	_, err = w.Write([]byte(`{"status":"ok"}`))
 	if err != nil {
 		return err
@@ -245,20 +246,6 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 	return nil
 }
 
-func updateUploadStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status upload.Status) error {
-	data, err := json.Marshal(map[string]interface{}{
-		"doc": map[string]interface{}{
-			"file": map[string]string{
-				"Status": string(status),
-			},
-		},
-	})
-	if err != nil {
-		return err
-	}
-	return upload.UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
-}
-
 // helper function for doing all the error responsibilities
 // at the HTTP edge
 func writeUploadError(err error, w http.ResponseWriter, zlog zerolog.Logger, start time.Time, msg string) {
diff --git a/internal/pkg/upload/es.go b/internal/pkg/upload/es.go
index 77a1357b5..f7d915e89 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/upload/es.go
@@ -5,6 +5,7 @@
 package upload
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
@@ -35,9 +36,10 @@ const (
 )
 
 var (
-	QueryChunkIDs  = prepareFindChunkIDs()
-	QueryUploadID  = prepareFindByUploadID()
-	QueryChunkInfo = prepareChunkWithoutData()
+	QueryChunkIDs   = prepareFindChunkIDs()
+	QueryUploadID   = prepareFindMetaByUploadID()
+	QueryChunkInfo  = prepareChunkWithoutData()
+	MatchChunkByBID = prepareQueryChunkByBID()
 )
 
 func prepareFindChunkIDs() *dsl.Tmpl {
@@ -70,7 +72,7 @@ func prepareChunkWithoutData() *dsl.Tmpl {
 	return tmpl
 }
 
-func prepareFindByUploadID() *dsl.Tmpl {
+func prepareFindMetaByUploadID() *dsl.Tmpl {
 	tmpl := dsl.NewTmpl()
 	root := dsl.NewRoot()
 	//root.Param("_source", false) // do not return large data payload
@@ -79,6 +81,14 @@ func prepareFindByUploadID() *dsl.Tmpl {
 	return tmpl
 }
 
+func prepareQueryChunkByBID() *dsl.Tmpl {
+	tmpl := dsl.NewTmpl()
+	root := dsl.NewRoot()
+	root.Query().Term(FieldBaseID, tmpl.Bind(FieldBaseID), nil)
+	tmpl.MustResolve(root)
+	return tmpl
+}
+
 /*
 	Metadata Doc Operations
 */
@@ -219,6 +229,21 @@ func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID strin
 	return chunk, err
 }
 
+func DeleteChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) error {
+	return bulker.Delete(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkNum))
+}
+
+func DeleteChunksByQuery(ctx context.Context, bulker bulk.Bulk, source string, baseID string) error {
+	q, err := MatchChunkByBID.Render(map[string]interface{}{
+		FieldBaseID: baseID,
+	})
+	if err != nil {
+		return err
+	}
+	_, err = bulker.Client().DeleteByQuery([]string{fmt.Sprintf(FileDataIndexPattern, source)}, bytes.NewReader(q))
+	return err
+}
+
 // convenience function for translating the elasticsearch "field" response format
 // of "field": { "a": [value], "b": [value] }
 func getResultField(fields map[string]interface{}, key string) (interface{}, bool) {
diff --git a/internal/pkg/upload/finalize.go b/internal/pkg/upload/finalize.go
new file mode 100644
index 000000000..4093aa7b2
--- /dev/null
+++ b/internal/pkg/upload/finalize.go
@@ -0,0 +1,145 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"sort"
+	"strings"
+
+	"github.com/rs/zerolog/log"
+)
+
+func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
+	// make sure document is freshly fetched, not cached
+	// so accurate status checking happens
+	info, err := FetchUploadInfo(ctx, u.bulker, id)
+	if err != nil {
+		return info, err
+	}
+
+	/*
+		Verify Upload
+	*/
+
+	// if already done, failed or deleted, exit
+	if !info.StatusCanUpload() {
+		return info, err
+	}
+
+	chunks, err := GetChunkInfos(ctx, u.bulker, info.DocID)
+	if err != nil {
+		return info, err
+	}
+	if !u.allChunksPresent(info, chunks) {
+		return info, ErrMissingChunks
+	}
+	if !u.verifyChunkInfo(info, chunks, transitHash) {
+		if err := SetStatus(ctx, u.bulker, info, StatusFail); err != nil {
+			log.Warn().Err(err).Str("fileID", info.DocID).Str("uploadID", info.ID).Msg("file upload failed chunk validation, but encountered an error setting the upload status to failure")
+		}
+		if err := DeleteChunksByQuery(ctx, u.bulker, info.Source, info.DocID); err != nil {
+			log.Warn().Err(err).Str("fileID", info.DocID).Str("uploadID", info.ID).Msg("file upload failed chunk validation, but encountered an error deleting left-behind chunk data")
+		}
+		return info, errors.New("file contents did not pass validation")
+	}
+
+	/*
+		Upload OK. Update status and save valid transithash
+	*/
+	if err := MarkComplete(ctx, u.bulker, info, transitHash); err != nil {
+		return info, err
+
+	}
+
+	return info, nil
+}
+
+func (u *Uploader) allChunksPresent(info Info, chunks []ChunkInfo) bool {
+	// check overall count
+	if len(chunks) != info.Count {
+		log.Warn().Int("expectedCount", info.Count).Int("received", len(chunks)).Interface("chunks", chunks).Msg("mismatch number of chunks")
+		return false
+	}
+
+	// now ensure all positions are accounted for, no gaps, etc
+	sort.Slice(chunks, func(i, j int) bool {
+		return chunks[i].Pos < chunks[j].Pos
+	})
+
+	for i, c := range chunks {
+		if c.Pos != i {
+			log.Warn().Int("expected", i).Interface("chunk", c).Msg("chunk position doesn't match. May be a gap in uploaded file")
+			return false
+		}
+	}
+
+	return true
+}
+
+func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash string) bool {
+	// verify all chunks except last are info.ChunkSize size
+	// verify last: false (or field excluded) for all except final chunk
+	// verify final chunk is last: true
+	// verify hash
+
+	hasher := sha256.New()
+
+	for i, chunk := range chunks {
+		if i < info.Count-1 {
+			// all chunks except last must have last:false
+			// and be PRECISELY info.ChunkSize bytes long
+			if chunk.Last {
+				log.Debug().Int("chunkID", i).Msg("non-final chunk was incorrectly marked last")
+				return false
+			}
+			if chunk.Size != int(info.ChunkSize) {
+				log.Debug().Int64("requiredSize", info.ChunkSize).Int("chunkID", i).Int("gotSize", chunk.Size).Msg("chunk was undersized")
+				return false
+			}
+		} else {
+			// last chunk must be marked last:true
+			// and can be any valid size (0,ChunkSize]
+			if !chunk.Last {
+				log.Debug().Int("chunkID", i).Msg("final chunk was not marked as final")
+				return false
+			}
+			if chunk.Size == 0 {
+				log.Debug().Int("chunkID", i).Msg("final chunk was 0 size")
+				return false
+			}
+			if chunk.Size > int(info.ChunkSize) {
+				log.Debug().Int("chunk-size", chunk.Size).Int("maxsize", int(info.ChunkSize)).Msg("final chunk was oversized")
+				return false
+			}
+		}
+
+		// write the byte-decoded hash for this chunk to the
+		// running hash for the entire file (transithash)
+		rawHash, err := hex.DecodeString(chunk.SHA2)
+		if err != nil {
+			log.Warn().Err(err).Msg("error decoding chunk hash")
+			return false
+		}
+		if n, err := hasher.Write(rawHash); err != nil {
+			log.Error().Err(err).Msg("error computing transitHash from component chunk hashes")
+			return false
+		} else if n != len(rawHash) {
+			log.Error().Int("wrote", n).Int("expected", len(rawHash)).Msg("transitHash calculation failure, could not write to hasher")
+			return false
+		}
+	}
+
+	calcHash := hex.EncodeToString(hasher.Sum(nil))
+	if !strings.EqualFold(transitHash, calcHash) {
+		log.Warn().Str("provided-hash", transitHash).Str("calc-hash", calcHash).Msg("file upload streaming hash does not match")
+		return false
+	}
+
+	return true
+}
diff --git a/internal/pkg/upload/info.go b/internal/pkg/upload/info.go
new file mode 100644
index 000000000..4c78c439f
--- /dev/null
+++ b/internal/pkg/upload/info.go
@@ -0,0 +1,82 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+)
+
+// retrieves upload metadata info from elasticsearch
+func FetchUploadInfo(ctx context.Context, bulker bulk.Bulk, uploadID string) (Info, error) {
+	results, err := GetFileDoc(ctx, bulker, uploadID)
+	if err != nil {
+		return Info{}, err
+	}
+	if len(results) == 0 {
+		return Info{}, ErrInvalidUploadID
+	}
+	if len(results) > 1 {
+		return Info{}, fmt.Errorf("unable to locate upload record, got %d records, expected 1", len(results))
+	}
+
+	var fi FileMetaDoc
+	if err := json.Unmarshal(results[0].Source, &fi); err != nil {
+		return Info{}, fmt.Errorf("file meta doc parsing error: %w", err)
+	}
+
+	// calculate number of chunks required
+	cnt := fi.File.Size / fi.File.ChunkSize
+	if fi.File.Size%fi.File.ChunkSize > 0 {
+		cnt += 1
+	}
+
+	return Info{
+		ID:        fi.UploadID,
+		Source:    fi.Source,
+		AgentID:   fi.AgentID,
+		ActionID:  fi.ActionID,
+		DocID:     results[0].ID,
+		ChunkSize: fi.File.ChunkSize,
+		Total:     fi.File.Size,
+		Count:     int(cnt),
+		Start:     fi.Start,
+		Status:    Status(fi.File.Status),
+	}, nil
+}
+
+func SetStatus(ctx context.Context, bulker bulk.Bulk, info Info, status Status) error {
+	data, err := json.Marshal(map[string]interface{}{
+		"doc": map[string]interface{}{
+			"file": map[string]string{
+				"Status": string(status),
+			},
+		},
+	})
+	if err != nil {
+		return err
+	}
+	return UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
+}
+
+func MarkComplete(ctx context.Context, bulker bulk.Bulk, info Info, hash string) error {
+	data, err := json.Marshal(map[string]interface{}{
+		"doc": map[string]interface{}{
+			"file": map[string]string{
+				"Status": string(StatusDone),
+			},
+			"transithash": map[string]interface{}{
+				"sha256": hash,
+			},
+		},
+	})
+	if err != nil {
+		return err
+	}
+	return UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
+}
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index 9f5b27161..da50bbd2c 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -6,12 +6,9 @@ package upload
 
 import (
 	"context"
-	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
-	"sort"
 	"strings"
 	"sync"
 	"time"
@@ -19,7 +16,6 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/gofrs/uuid"
-	"github.com/rs/zerolog/log"
 )
 
 const (
@@ -48,16 +44,6 @@ const (
 	StatusDel      Status = "DELETED"
 )
 
-type Uploader struct {
-	metaCache map[string]Info // cache of file metadata doc info
-	mu        sync.RWMutex    // lock for the above
-	sizeLimit int64           // @todo: what if configuration changes? is this recreated with another New()?
-	timeLimit time.Duration   // @todo: same as above
-
-	chunkClient *elasticsearch.Client
-	bulker      bulk.Bulk
-}
-
 type Info struct {
 	ID        string // upload operation identifier. Used to identify the upload process
 	DocID     string // document ID of the uploaded file and chunks
@@ -77,6 +63,38 @@ func (i Info) StatusCanUpload() bool { // returns true if more chunks can be upl
 	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
 }
 
+type FileData struct {
+	Size      int64  `json:"size"`
+	ChunkSize int64  `json:"ChunkSize"`
+	Status    string `json:"Status"`
+}
+
+type FileMetaDoc struct {
+	ActionID string    `json:"action_id"`
+	AgentID  string    `json:"agent_id"`
+	Source   string    `json:"src"`
+	File     FileData  `json:"file"`
+	UploadID string    `json:"upload_id"`
+	Start    time.Time `json:"upload_start"`
+}
+
+// custom unmarshaller to make unix-epoch values work
+func (f *FileMetaDoc) UnmarshalJSON(b []byte) error {
+	type InnerFile FileMetaDoc // type alias to prevent recursion into this func
+	// override the field to parse as an int, then manually convert to time.time
+	var tmp struct {
+		InnerFile
+		Start int64 `json:"upload_start"`
+	}
+	if err := json.Unmarshal(b, &tmp); err != nil {
+		return err
+	}
+
+	*f = FileMetaDoc(tmp.InnerFile) // copy over all fields
+	f.Start = time.UnixMilli(tmp.Start)
+	return nil
+}
+
 type ChunkInfo struct {
 	Pos  int  // Ordered chunk position in file
 	Last bool // Is this the final chunk in the file
@@ -86,6 +104,18 @@ type ChunkInfo struct {
 	//FirstReceived bool
 }
 
+type Uploader struct {
+	metaCache map[string]Info // cache of file metadata doc info
+	mu        sync.RWMutex    // lock for the above
+	// @todo: cache eviction so it's not unbounded growth
+	// @todo: cache refresh so status is accurate
+	sizeLimit int64         // @todo: what if configuration changes? is this recreated with another New()?
+	timeLimit time.Duration // @todo: same as above
+
+	chunkClient *elasticsearch.Client
+	bulker      bulk.Bulk
+}
+
 func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, timeLimit time.Duration) *Uploader {
 	return &Uploader{
 		chunkClient: chunkClient,
@@ -176,12 +206,16 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 }
 
 func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkHash string) (Info, ChunkInfo, error) {
-
+	// find the upload, details, and status associated with the file upload
 	info, err := u.GetUploadInfo(ctx, uplID)
 	if err != nil {
 		return Info{}, ChunkInfo{}, err
 	}
 
+	/*
+		Verify Chunk upload can proceed
+	*/
+
 	if info.Expired(u.timeLimit) {
 		return Info{}, ChunkInfo{}, ErrUploadExpired
 	}
@@ -202,26 +236,6 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkH
 	}, nil
 }
 
-func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
-	info, err := u.GetUploadInfo(ctx, id)
-	if err != nil {
-		return Info{}, err
-	}
-
-	chunks, err := GetChunkInfos(ctx, u.bulker, info.DocID)
-	if err != nil {
-		return Info{}, err
-	}
-	if !u.allChunksPresent(info, chunks) {
-		return Info{}, ErrMissingChunks
-	}
-	if !u.verifyChunkInfo(info, chunks, transitHash) {
-		return Info{}, errors.New("file contents did not pass validation")
-	}
-
-	return info, nil
-}
-
 func (u *Uploader) cleanupOperation(uplID string) {
 	u.mu.Lock()
 	defer u.mu.Unlock()
@@ -242,90 +256,6 @@ func (u *Uploader) finalize(uplID string) error {
 	return nil
 }
 
-func (u *Uploader) allChunksPresent(info Info, chunks []ChunkInfo) bool {
-	// check overall count
-	if len(chunks) != info.Count {
-		log.Warn().Int("expectedCount", info.Count).Int("received", len(chunks)).Interface("chunks", chunks).Msg("mismatch number of chunks")
-		return false
-	}
-
-	// now ensure all positions are accounted for, no gaps, etc
-	sort.Slice(chunks, func(i, j int) bool {
-		return chunks[i].Pos < chunks[j].Pos
-	})
-
-	for i, c := range chunks {
-		if c.Pos != i {
-			log.Warn().Int("expected", i).Interface("chunk", c).Msg("chunk position doesn't match. May be a gap in uploaded file")
-			return false
-		}
-	}
-
-	return true
-}
-
-func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash string) bool {
-	// verify all chunks except last are info.ChunkSize size
-	// verify last: false (or field excluded) for all except final chunk
-	// verify final chunk is last: true
-	// verify hash
-
-	hasher := sha256.New()
-
-	for i, chunk := range chunks {
-		if i < info.Count-1 {
-			// all chunks except last must have last:false
-			// and be PRECISELY info.ChunkSize bytes long
-			if chunk.Last {
-				log.Debug().Int("chunkID", i).Msg("non-final chunk was incorrectly marked last")
-				return false
-			}
-			if chunk.Size != int(info.ChunkSize) {
-				log.Debug().Int64("requiredSize", info.ChunkSize).Int("chunkID", i).Int("gotSize", chunk.Size).Msg("chunk was undersized")
-				return false
-			}
-		} else {
-			// last chunk must be marked last:true
-			// and can be any valid size (0,ChunkSize]
-			if !chunk.Last {
-				log.Debug().Int("chunkID", i).Msg("final chunk was not marked as final")
-				return false
-			}
-			if chunk.Size == 0 {
-				log.Debug().Int("chunkID", i).Msg("final chunk was 0 size")
-				return false
-			}
-			if chunk.Size > int(info.ChunkSize) {
-				log.Debug().Int("chunk-size", chunk.Size).Int("maxsize", int(info.ChunkSize)).Msg("final chunk was oversized")
-				return false
-			}
-		}
-
-		// write the byte-decoded hash for this chunk to the
-		// running hash for the entire file (transithash)
-		rawHash, err := hex.DecodeString(chunk.SHA2)
-		if err != nil {
-			log.Warn().Err(err).Msg("error decoding chunk hash")
-			return false
-		}
-		if n, err := hasher.Write(rawHash); err != nil {
-			log.Error().Err(err).Msg("error computing transitHash from component chunk hashes")
-			return false
-		} else if n != len(rawHash) {
-			log.Error().Int("wrote", n).Int("expected", len(rawHash)).Msg("transitHash calculation failure, could not write to hasher")
-			return false
-		}
-	}
-
-	calcHash := hex.EncodeToString(hasher.Sum(nil))
-	if !strings.EqualFold(transitHash, calcHash) {
-		log.Warn().Str("provided-hash", transitHash).Str("calc-hash", calcHash).Msg("file upload streaming hash does not match")
-		return false
-	}
-
-	return true
-}
-
 func validateUploadPayload(info JSDict) error {
 
 	required := [][]string{
@@ -365,7 +295,7 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, er
 	}
 
 	// not found in cache, try fetching
-	info, err := u.fetchUploadInfo(ctx, uploadID)
+	info, err := FetchUploadInfo(ctx, u.bulker, uploadID)
 	if err != nil {
 		return Info{}, fmt.Errorf("unable to retrieve upload info: %w", err)
 	}
@@ -374,80 +304,3 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, er
 	u.metaCache[uploadID] = info
 	return info, nil
 }
-
-// retrieves upload metadata info from elasticsearch
-func (u *Uploader) fetchUploadInfo(ctx context.Context, uploadID string) (Info, error) {
-	results, err := GetFileDoc(ctx, u.bulker, uploadID)
-	if err != nil {
-		return Info{}, err
-	}
-	if len(results) == 0 {
-		return Info{}, ErrInvalidUploadID
-	}
-	if len(results) > 1 {
-		return Info{}, fmt.Errorf("unable to locate upload record, got %d records, expected 1", len(results))
-	}
-
-	var fi FileMetaDoc
-	if err := json.Unmarshal(results[0].Source, &fi); err != nil {
-		return Info{}, fmt.Errorf("file meta doc parsing error: %w", err)
-	}
-
-	// calculate number of chunks required
-	cnt := fi.File.Size / fi.File.ChunkSize
-	if fi.File.Size%fi.File.ChunkSize > 0 {
-		cnt += 1
-	}
-
-	return Info{
-		ID:        fi.UploadID,
-		Source:    fi.Source,
-		AgentID:   fi.AgentID,
-		ActionID:  fi.ActionID,
-		DocID:     results[0].ID,
-		ChunkSize: fi.File.ChunkSize,
-		Total:     fi.File.Size,
-		Count:     int(cnt),
-		Start:     fi.Start,
-		Status:    Status(fi.File.Status),
-	}, nil
-}
-
-type FileData struct {
-	Size      int64  `json:"size"`
-	ChunkSize int64  `json:"ChunkSize"`
-	Status    string `json:"Status"`
-	Name      string `json:"name"`
-	Mime      string `json:"mime_type"`
-	Hash      struct {
-		SHA256 string `json:"sha256"`
-		MD5    string `json:"md5"`
-	} `json:"hash"`
-}
-
-type FileMetaDoc struct {
-	ActionID string    `json:"action_id"`
-	AgentID  string    `json:"agent_id"`
-	Source   string    `json:"src"`
-	File     FileData  `json:"file"`
-	UploadID string    `json:"upload_id"`
-	Start    time.Time `json:"upload_start"`
-}
-
-// custom unmarshaller to make unix-epoch values
-// work
-func (f *FileMetaDoc) UnmarshalJSON(b []byte) error {
-	type InnerFile FileMetaDoc // type alias to prevent recursion into this func
-	// override the field to parse as an int, then manually convert to time.time
-	var tmp struct {
-		InnerFile
-		Start int64 `json:"upload_start"`
-	}
-	if err := json.Unmarshal(b, &tmp); err != nil {
-		return err
-	}
-
-	*f = FileMetaDoc(tmp.InnerFile) // copy over all fields
-	f.Start = time.UnixMilli(tmp.Start)
-	return nil
-}
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
index d4da69cfa..3f81bc162 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/upload/upload_test.go
@@ -5,90 +5,6 @@
 package upload
 
 /*
-func TestMaxParallelUploadOpsReached(t *testing.T) {
-	_ = testlog.SetLogger(t)
-	opLimit := 4
-
-	u := New(500, opLimit, 0)
-
-	var err error
-	for i := 0; i < opLimit; i++ {
-		_, err = u.Begin(100, "", "")
-		assert.NoError(t, err)
-	}
-
-	_, err = u.Begin(100, "", "")
-	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
-}
-
-func TestMaxParallelUploadOpsReleased(t *testing.T) {
-	opLimit := 4
-	u := New(500, opLimit, 0)
-
-	// generate max operations
-	ops := make([]Info, 0, opLimit)
-	for i := 0; i < opLimit; i++ {
-		op, err := u.Begin(100, strconv.Itoa(i), "")
-		require.NoError(t, err)
-		ops = append(ops, op)
-	}
-	// and verify max was reached
-	_, err := u.Begin(100, "X", "")
-	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
-
-	// finishing an op should release the hold and allow another to begin
-	_, err = u.Complete(ops[0].ID)
-	require.NoError(t, err)
-
-	time.Sleep(5 * time.Millisecond) // occasionally, a little time was required for the change to propagate
-
-	_, err = u.Begin(100, "Y", "")
-	assert.NoError(t, err)
-}
-
-func TestMaxParallelChunks(t *testing.T) {
-	chunkLim := 3
-
-	u := New(104857600, 1, chunkLim)
-
-	// start an operation, that can have more than the test limit chunks
-	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
-	require.NoError(t, err)
-
-	// upload up to the limit chunks, without releasing the request
-	for i := 0; i < chunkLim; i++ {
-		_, err := u.Chunk(op.ID, i)
-		require.NoError(t, err)
-	}
-
-	_, err = u.Chunk(op.ID, chunkLim)
-	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
-}
-
-func TestMaxParallelChunksReleased(t *testing.T) {
-	chunkLim := 3
-
-	u := New(104857600, 1, chunkLim)
-
-	// start an operation, that can have more than the test limit chunks
-	op, err := u.Begin(MaxChunkSize*int64(chunkLim+2), "", "")
-	require.NoError(t, err)
-
-	// upload up to the limit chunks, without releasing the request
-	chunks := make([]ChunkInfo, 0, chunkLim)
-	for i := 0; i < chunkLim; i++ {
-		info, err := u.Chunk(op.ID, i)
-		require.NoError(t, err)
-		chunks = append(chunks, info)
-	}
-	_, err = u.Chunk(op.ID, chunkLim)
-	assert.ErrorIs(t, err, ErrMaxConcurrentUploads)
-
-	chunks[0].Token.Release()
-
-	_, err = u.Chunk(op.ID, chunkLim)
-	assert.NoError(t, err)
-}
 
 func TestUploadChunkCount(t *testing.T) {
 	tests := []struct {

From 4b62be3320e45290671ef8d3f5bf07b7822b0636 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 22 Dec 2022 13:08:00 -0500
Subject: [PATCH 32/51] remove vestigial functions

---
 internal/pkg/upload/upload.go | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index da50bbd2c..d886a3ea6 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -236,26 +236,6 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkH
 	}, nil
 }
 
-func (u *Uploader) cleanupOperation(uplID string) {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-	delete(u.metaCache, uplID)
-}
-
-func (u *Uploader) cancel(uplID string) error {
-	u.cleanupOperation(uplID)
-
-	// @todo: delete any uploaded chunks from ES
-	// leave header doc and mark failed?
-	return nil
-}
-
-func (u *Uploader) finalize(uplID string) error {
-	u.cleanupOperation(uplID)
-	// @todo: write Status:READY here?
-	return nil
-}
-
 func validateUploadPayload(info JSDict) error {
 
 	required := [][]string{

From 65b33b1568217cc207158498823badd197ebce0a Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 9 Jan 2023 16:44:37 -0500
Subject: [PATCH 33/51] reuse trimmed hash

---
 internal/pkg/api/handleUpload.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index d48c5c1b9..506c67f29 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -230,11 +230,12 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 		return errors.New("unable to parse request body")
 	}
 
-	if strings.TrimSpace(req.TransitHash.SHA256) == "" {
+	hash := strings.TrimSpace(req.TransitHash.SHA256)
+	if hash == "" {
 		return errors.New("transit hash required")
 	}
 
-	info, err = ut.uploader.Complete(r.Context(), uplID, req.TransitHash.SHA256)
+	info, err = ut.uploader.Complete(r.Context(), uplID, hash)
 	if err != nil {
 		return err
 	}

From b0eee36645b41e49265f08916147100117259ba1 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 9 Jan 2023 16:51:45 -0500
Subject: [PATCH 34/51] clarify some offsets in chunk writing

---
 internal/pkg/upload/cbor/chunk.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/internal/pkg/upload/cbor/chunk.go b/internal/pkg/upload/cbor/chunk.go
index a733da2a4..8e5a2ebc5 100644
--- a/internal/pkg/upload/cbor/chunk.go
+++ b/internal/pkg/upload/cbor/chunk.go
@@ -109,6 +109,8 @@ func encodePreambleToCBOR(final bool, baseID string, chunkHash string, chunkSize
 	return preamble
 }
 
+const varLenHeaderSize = 5
+
 // io.Reader interface for streaming out
 func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 	if c.wroteTerm { // already wrote a terminating instruction for undefined byte sequence length
@@ -129,7 +131,7 @@ func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 		if len(buf) < 10 {
 			return 0, errors.New("buffer too small")
 		}
-		n, err := c.chunk.Read(buf[5:])
+		n, err := c.chunk.Read(buf[varLenHeaderSize:])
 		buf[0] = 0x5A // 4-byte length descriptor to follow
 		binary.BigEndian.PutUint32(buf[1:], uint32(n))
 
@@ -140,8 +142,8 @@ func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 				return 1, io.EOF
 			}
 			// if we can tack-on the terminating byte from this read call, do it
-			if len(buf) > n+5+1 {
-				buf[n+5] = 0xFF
+			if len(buf) > n+varLenHeaderSize+1 {
+				buf[n+varLenHeaderSize] = 0xFF
 				c.wroteTerm = true
 				n = n + 1
 			} else {
@@ -149,7 +151,7 @@ func (c *ChunkEncoder) Read(buf []byte) (int, error) {
 				err = nil
 			}
 		}
-		return n + 5, err
+		return n + varLenHeaderSize, err
 	}
 
 	return c.chunk.Read(buf)

From 270902477a794cb702ec0a1f32e10d5795c36d7e Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Mon, 9 Jan 2023 16:56:41 -0500
Subject: [PATCH 35/51] cleanup some error objects

---
 internal/pkg/upload/finalize.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/internal/pkg/upload/finalize.go b/internal/pkg/upload/finalize.go
index 4093aa7b2..d138cf91d 100644
--- a/internal/pkg/upload/finalize.go
+++ b/internal/pkg/upload/finalize.go
@@ -15,6 +15,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+var (
+	ErrFailValidation  = errors.New("file contents failed validation")
+	ErrStatusNoUploads = errors.New("file closed, not accepting uploads")
+)
+
 func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
 	// make sure document is freshly fetched, not cached
 	// so accurate status checking happens
@@ -29,7 +34,7 @@ func (u *Uploader) Complete(ctx context.Context, id string, transitHash string)
 
 	// if already done, failed or deleted, exit
 	if !info.StatusCanUpload() {
-		return info, err
+		return info, ErrStatusNoUploads
 	}
 
 	chunks, err := GetChunkInfos(ctx, u.bulker, info.DocID)
@@ -46,7 +51,7 @@ func (u *Uploader) Complete(ctx context.Context, id string, transitHash string)
 		if err := DeleteChunksByQuery(ctx, u.bulker, info.Source, info.DocID); err != nil {
 			log.Warn().Err(err).Str("fileID", info.DocID).Str("uploadID", info.ID).Msg("file upload failed chunk validation, but encountered an error deleting left-behind chunk data")
 		}
-		return info, errors.New("file contents did not pass validation")
+		return info, ErrFailValidation
 	}
 
 	/*

From 3004ac5b5642565f2a6a0e556c26b488d890f5d9 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 07:28:37 -0500
Subject: [PATCH 36/51] cleanup temp auth flag

---
 internal/pkg/api/handleUpload.go | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 506c67f29..ff917af37 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -33,9 +33,6 @@ const (
 	// TODO: move to a config
 	maxFileSize    = 104857600 // 100 MiB
 	maxUploadTimer = 24 * time.Hour
-
-	// temp for easy development
-	AUTH_ENABLED = false // @todo: remove
 )
 
 func (rt Router) handleUploadStart(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
@@ -66,14 +63,12 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
-	// simpler authentication check,  for high chunk throughput
-	// since chunk checksums must match transit hash
-	// AND optionally the initial hash, both having stricter auth checks
-	if AUTH_ENABLED {
-		if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
-			writeUploadError(err, w, zlog, start, "authentication failure for chunk write")
-			return
-		}
+		// simpler authentication check,  for high chunk throughput
+		// since chunk checksums must match transit hash
+		// AND optionally the initial hash, both having stricter auth checks
+	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
+		writeUploadError(err, w, zlog, start, "authentication failure for chunk write")
+		return
 	}
 
 	chunkNum, err := strconv.Atoi(chunkID)
@@ -143,10 +138,8 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	if !ok || agentID == "" {
 		return errors.New("required field agent_id is missing")
 	}
-	if AUTH_ENABLED {
-		if _, err := authAgent(r, &agentID, ut.bulker, ut.cache); err != nil {
-			return err
-		}
+	if _, err := authAgent(r, &agentID, ut.bulker, ut.cache); err != nil {
+		return err
 	}
 
 	// validate payload, enrich with additional fields, and write metadata doc to ES
@@ -219,10 +212,8 @@ func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWri
 	}
 	// need to auth that it matches the ID in the initial
 	// doc, but that means we had to doc-lookup early
-	if AUTH_ENABLED {
-		if _, err := authAgent(r, &info.AgentID, ut.bulker, ut.cache); err != nil {
-			return fmt.Errorf("Error authenticating for upload finalization: %w", err)
-		}
+	if _, err := authAgent(r, &info.AgentID, ut.bulker, ut.cache); err != nil {
+		return fmt.Errorf("Error authenticating for upload finalization: %w", err)
 	}
 
 	var req UploadCompleteRequest

From 1c18186059b56d5a5456234579d6011412dcfbda Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 08:02:13 -0500
Subject: [PATCH 37/51] turn down some linters for intentional decisions

---
 internal/pkg/api/handleUpload.go       | 4 ++--
 internal/pkg/upload/cbor/chunk_test.go | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index ff917af37..775014e5b 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -123,7 +123,7 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 	}
 }
 
-func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error {
+func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error { //nolint:unparam // log is standard first arg for the handlers
 	// decode early to match agentID in the payload
 	payload, err := upload.ReadDict(r.Body)
 	if err != nil {
@@ -205,7 +205,7 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	return nil
 }
 
-func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
+func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error { //nolint:unparam // log is standard first arg for the handlers
 	info, err := ut.uploader.GetUploadInfo(r.Context(), uplID)
 	if err != nil {
 		return err
diff --git a/internal/pkg/upload/cbor/chunk_test.go b/internal/pkg/upload/cbor/chunk_test.go
index 274d708e2..69b6fc056 100644
--- a/internal/pkg/upload/cbor/chunk_test.go
+++ b/internal/pkg/upload/cbor/chunk_test.go
@@ -80,7 +80,7 @@ func TestChunkWriterLargeLastChunk(t *testing.T) {
 
 	contents := make([]byte, 4096)
 
-	n, err := rand.Read(contents)
+	n, err := rand.Read(contents) //nolint:gosec // weak RNG here is just for testing
 	require.NoError(t, err)
 	require.Equal(t, n, 4096)
 

From b374287c44b9ba0163e8c85f536954cc465d9dea Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 11:33:20 -0500
Subject: [PATCH 38/51] restore some of the upload tests

---
 internal/pkg/upload/jsdict.go      |   4 +
 internal/pkg/upload/upload_test.go | 228 +++++++++++++++++++++++++----
 2 files changed, 201 insertions(+), 31 deletions(-)

diff --git a/internal/pkg/upload/jsdict.go b/internal/pkg/upload/jsdict.go
index 7de11494c..c0fc64927 100644
--- a/internal/pkg/upload/jsdict.go
+++ b/internal/pkg/upload/jsdict.go
@@ -66,6 +66,10 @@ func (j JSDict) Int64(keys ...string) (int64, bool) {
 		case json.Number: // json UseNumber() to get int64 directly
 			n, err := v.Int64()
 			return n, err == nil
+		case int:
+			return int64(v), true
+		case int64:
+			return v, true
 		default:
 			return 0, false
 		}
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/upload/upload_test.go
index 3f81bc162..c849fad02 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/upload/upload_test.go
@@ -4,32 +4,223 @@
 
 package upload
 
-/*
+import (
+	"context"
+	"encoding/json"
+	"strings"
+	"testing"
+	"time"
+
+	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+)
+
+// convenience function for making a typical file request structure
+// with defaults when specific values are not checked or required
+func makeUploadRequestDict(input map[string]interface{}) JSDict {
+	// defaults
+	d := JSDict{
+		"file": map[string]interface{}{
+			"name":      "foo.png",
+			"mime_type": "image/png",
+			"size":      1024,
+		},
+		"action_id": "123",
+		"agent_id":  "456",
+		"src":       "agent",
+	}
+
+	if input == nil {
+		return d
+	}
+
+	// fill in any provided values, e.g.  "file.name": "test.zip"
+	for k, v := range input {
+		dict := map[string]interface{}(d)
+		keys := strings.Split(k, ".")
+		for i, key := range keys {
+			if i < len(keys)-1 {
+				dict, _ = dict[key].(map[string]interface{})
+				continue
+			}
+			dict[key] = v
+		}
+	}
+	return d
+}
+
+// Happy-path case, where everything expected is provided
+// tests to make sure the returned struct is correctly populated
+func TestUploadBeginReturnsCorrectInfo(t *testing.T) {
+	size := 2048
+	src := "mysource"
+	action := "abc"
+	agent := "XYZ"
+	data := makeUploadRequestDict(map[string]interface{}{
+		"action_id": action,
+		"agent_id":  agent,
+		"src":       src,
+		"file.size": size,
+	})
+
+	fakeBulk := itesting.NewMockBulk()
+
+	fakeBulk.On("Create",
+		mock.MatchedBy(func(_ context.Context) bool { return true }), // match context.Context
+		".fleet-files-"+src, // index
+		action+"."+agent,    // document ID
+		mock.Anything,       // ES document
+		mock.Anything,       // bulker options
+	).Return("", nil)
+
+	u := New(nil, fakeBulk, int64(size), time.Hour)
+	info, err := u.Begin(context.Background(), data)
+	assert.NoError(t, err)
+
+	assert.Equal(t, int64(size), info.Total)
+	assert.Equal(t, action, info.ActionID)
+	assert.Equal(t, agent, info.AgentID)
+	assert.Equal(t, src, info.Source)
+	assert.Equal(t, StatusAwaiting, info.Status)
+	assert.Greaterf(t, info.ChunkSize, int64(0), "server chosen chunk size should be >0")
+	assert.Equal(t, action+"."+agent, info.DocID)
+	assert.WithinDuration(t, time.Now(), info.Start, time.Minute)
+}
+
+// Happy-path case, where everything expected is provided
+// tests the document sent to elasticsearch passes through
+// the correct fields from input
+func TestUploadBeginWritesDocumentFromInputs(t *testing.T) {
+	size := 3096
+	src := "foo"
+	action := "abcd-ef"
+	agent := "xyz-123"
+	name := "test.zip"
+
+	data := makeUploadRequestDict(map[string]interface{}{
+		"action_id": action,
+		"agent_id":  agent,
+		"src":       src,
+		"file.name": name,
+		"file.size": size,
+	})
+
+	fakeBulk := itesting.NewMockBulk()
+
+	fakeBulk.On("Create",
+		mock.MatchedBy(func(_ context.Context) bool { return true }), // match context.Context
+		".fleet-files-"+src, // index
+		action+"."+agent,    // document ID
+		mock.Anything,       // ES document
+		mock.Anything,       // bulker options
+	).Return("", nil)
+
+	u := New(nil, fakeBulk, int64(size), time.Hour)
+	_, err := u.Begin(context.Background(), data)
+	assert.NoError(t, err)
+
+	payload, ok := fakeBulk.Calls[0].Arguments[3].([]byte)
+	assert.Truef(t, ok, "argument to es create should be byte slice")
+
+	j := make(JSDict)
+	err = json.Unmarshal(payload, &j)
+	assert.NoError(t, err)
+
+}
+
+func TestUploadBeginCalculatesCorrectChunkCount(t *testing.T) {
+	fakeBulk := itesting.NewMockBulk()
+
+	fakeBulk.On("Create",
+		mock.Anything, // match context.Context
+		mock.Anything, // index
+		mock.Anything, // document ID
+		mock.Anything, // ES document
+		mock.Anything, // bulker options
+	).Return("", nil)
 
-func TestUploadChunkCount(t *testing.T) {
 	tests := []struct {
 		FileSize      int64
 		ExpectedCount int
 		Name          string
 	}{
 		{10, 1, "Tiny files take 1 chunk"},
-		{MaxChunkSize, 1, "Precisely 1 chunk size bytes will fit in 1 chunk"},
+		{MaxChunkSize, 1, "Precisely 1 chunk size bytes should fit in 1 chunk"},
 		{MaxChunkSize + 1, 2, "ChunkSize+1 bytes takes 2 chunks"},
-		{MaxChunkSize * 2.5, 3, "2.5x chunk size fits in 3 chunks"},
+		{MaxChunkSize * 3.5, 4, "3.5x chunk size fits in 4 chunks due to remainder"},
 		{7534559605, 1797, "7.5Gb file"},
 	}
 
-	u := New(8388608000, len(tests), 1)
+	u := New(nil, fakeBulk, MaxChunkSize*3000, time.Hour)
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
-			info, err := u.Begin(tc.FileSize, "", "")
+			data := makeUploadRequestDict(map[string]interface{}{
+				"file.size": tc.FileSize,
+			})
+			info, err := u.Begin(context.Background(), data)
 			assert.NoError(t, err)
 			assert.Equal(t, tc.ExpectedCount, info.Count)
 		})
 	}
 }
 
+func TestUploadBeginMaxFileSize(t *testing.T) {
+	tests := []struct {
+		UploadSizeLimit int64
+		FileSize        int64
+		ShouldError     bool
+		Name            string
+	}{
+		{500, 800, true, "800 is too large"},
+		{800, 500, false, "file within limits"},
+		{1024, 1023, false, "1-less than limit"},
+		{1024, 1024, false, "file is exactly limit"},
+		{1024, 1025, true, "file is 1 over limit"},
+	}
+
+	fakeBulk := itesting.NewMockBulk()
+	fakeBulk.On("Create",
+		mock.Anything, // context.Context
+		mock.Anything, // index
+		mock.Anything, // document ID
+		mock.Anything, // ES document
+		mock.Anything, // bulker options
+	).Return("", nil)
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+			u := New(nil, fakeBulk, tc.UploadSizeLimit, time.Hour)
+			data := makeUploadRequestDict(map[string]interface{}{
+				"file.size": tc.FileSize,
+			})
+			_, err := u.Begin(context.Background(), data)
+			if tc.ShouldError {
+				assert.ErrorIs(t, err, ErrFileSizeTooLarge)
+			} else {
+				assert.NoError(t, err)
+			}
+		})
+	}
+}
+
+/*
+func TestUploadRejectsMissingRequiredFields(t *testing.T) {
+	data := makeUploadRequestDict()
+
+	u := New(nil, nil, 1024, time.Hour)
+	info, err := u.Begin(context.Background(), data)
+	assert.Error(t, err)
+
+}
+
+
+*/
+
+/*
+
 func TestChunkMarksFinal(t *testing.T) {
 	tests := []struct {
 		FileSize   int64
@@ -65,30 +256,5 @@ func TestChunkMarksFinal(t *testing.T) {
 	}
 }
 
-func TestMaxFileSize(t *testing.T) {
-	tests := []struct {
-		MaxSize     int64
-		TryFile     int64
-		ShouldError bool
-		Name        string
-	}{
-		{500, 800, true, "800 is too large"},
-		{800, 500, false, "file within limits"},
-		{1024, 1023, false, "1-less than limit"},
-		{1024, 1024, false, "file is exactly limit"},
-		{1024, 1025, true, "file is 1 over limit"},
-	}
 
-	for _, tc := range tests {
-		t.Run(tc.Name, func(t *testing.T) {
-			u := New(tc.MaxSize, 1, 1)
-			_, err := u.Begin(tc.TryFile, "", "")
-			if tc.ShouldError {
-				assert.ErrorIs(t, err, ErrFileSizeTooLarge)
-			} else {
-				assert.NoError(t, err)
-			}
-		})
-	}
-}
 */

From f7dc8a28bac49dfde34d23af98f35277c811af01 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 11:33:39 -0500
Subject: [PATCH 39/51] ensure Info fields are populated

---
 internal/pkg/upload/upload.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/internal/pkg/upload/upload.go b/internal/pkg/upload/upload.go
index d886a3ea6..fccb33377 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/upload/upload.go
@@ -166,6 +166,8 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
+		Status:    StatusAwaiting,
+		Start:     time.Now(),
 	}
 	chunkCount := info.Total / info.ChunkSize
 	if info.Total%info.ChunkSize > 0 {
@@ -180,13 +182,13 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	if err := data.Put(info.ChunkSize, "file", "ChunkSize"); err != nil {
 		return Info{}, err
 	}
-	if err := data.Put(string(StatusAwaiting), "file", "Status"); err != nil {
+	if err := data.Put(info.Status, "file", "Status"); err != nil {
 		return Info{}, err
 	}
 	if err := data.Put(id, "upload_id"); err != nil {
 		return Info{}, err
 	}
-	if err := data.Put(time.Now().UnixMilli(), "upload_start"); err != nil {
+	if err := data.Put(info.Start.UnixMilli(), "upload_start"); err != nil {
 		return Info{}, err
 	}
 

From 6cc2b7188d82538b4e94a2b64f0ce6a962a65a7d Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 14:18:09 -0500
Subject: [PATCH 40/51] comment spacing

---
 internal/pkg/api/handleUpload.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 775014e5b..e731e1f5b 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -63,9 +63,9 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 		Str(ECSHTTPRequestID, reqID).
 		Logger()
 
-		// simpler authentication check,  for high chunk throughput
-		// since chunk checksums must match transit hash
-		// AND optionally the initial hash, both having stricter auth checks
+	// simpler authentication check,  for high chunk throughput
+	// since chunk checksums must match transit hash
+	// AND optionally the initial hash, both having stricter auth checks
 	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
 		writeUploadError(err, w, zlog, start, "authentication failure for chunk write")
 		return

From 046b3107c4f54d52d8ed991f654da63b7e5f466c Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 16:08:47 -0500
Subject: [PATCH 41/51] namespace the exportable structs to avoid circular
 imports coming

---
 internal/pkg/api/handleUpload.go              | 18 ++---
 .../pkg/{upload => uploader}/cbor/chunk.go    |  0
 .../{upload => uploader}/cbor/chunk_test.go   |  0
 internal/pkg/{upload => uploader}/doc.go      |  2 +-
 internal/pkg/{upload => uploader}/es.go       |  4 +-
 internal/pkg/{upload => uploader}/es_test.go  |  2 +-
 internal/pkg/{upload => uploader}/finalize.go | 11 +--
 internal/pkg/{upload => uploader}/info.go     | 23 +++---
 internal/pkg/{upload => uploader}/jsdict.go   |  2 +-
 .../pkg/{upload => uploader}/jsdict_test.go   |  2 +-
 internal/pkg/{upload => uploader}/upload.go   | 79 ++++++-------------
 internal/pkg/uploader/upload/info.go          | 37 +++++++++
 .../pkg/{upload => uploader}/upload_test.go   |  5 +-
 13 files changed, 98 insertions(+), 87 deletions(-)
 rename internal/pkg/{upload => uploader}/cbor/chunk.go (100%)
 rename internal/pkg/{upload => uploader}/cbor/chunk_test.go (100%)
 rename internal/pkg/{upload => uploader}/doc.go (96%)
 rename internal/pkg/{upload => uploader}/es.go (99%)
 rename internal/pkg/{upload => uploader}/es_test.go (99%)
 rename internal/pkg/{upload => uploader}/finalize.go (91%)
 rename internal/pkg/{upload => uploader}/info.go (70%)
 rename internal/pkg/{upload => uploader}/jsdict.go (99%)
 rename internal/pkg/{upload => uploader}/jsdict_test.go (99%)
 rename internal/pkg/{upload => uploader}/upload.go (74%)
 create mode 100644 internal/pkg/uploader/upload/info.go
 rename internal/pkg/{upload => uploader}/upload_test.go (98%)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index e731e1f5b..ae862a7f1 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -21,8 +21,8 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
-	"github.com/elastic/fleet-server/v7/internal/pkg/upload"
-	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/cbor"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/julienschmidt/httprouter"
 	"github.com/rs/zerolog"
@@ -106,7 +106,7 @@ type UploadT struct {
 	bulker      bulk.Bulk
 	chunkClient *elasticsearch.Client
 	cache       cache.Cache
-	uploader    *upload.Uploader
+	uploader    *uploader.Uploader
 }
 
 func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
@@ -119,13 +119,13 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		uploader:    upload.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
+		uploader:    uploader.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
 	}
 }
 
 func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error { //nolint:unparam // log is standard first arg for the handlers
 	// decode early to match agentID in the payload
-	payload, err := upload.ReadDict(r.Body)
+	payload, err := uploader.ReadDict(r.Body)
 	if err != nil {
 		if errors.Is(err, io.EOF) {
 			return fmt.Errorf("file info body is required: %w", err)
@@ -176,14 +176,14 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	}
 
 	// prevent over-sized chunks
-	data := http.MaxBytesReader(w, r.Body, upload.MaxChunkSize)
+	data := http.MaxBytesReader(w, r.Body, uploader.MaxChunkSize)
 
 	// compute hash as we stream it
 	hash := sha256.New()
 	copier := io.TeeReader(data, hash)
 
 	ce := cbor.NewChunkWriter(copier, chunkInfo.Last, chunkInfo.BID, chunkInfo.SHA2, upinfo.ChunkSize)
-	if err := upload.IndexChunk(r.Context(), ut.chunkClient, ce, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
+	if err := uploader.IndexChunk(r.Context(), ut.chunkClient, ce, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
 		return err
 	}
 
@@ -192,14 +192,14 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	if !strings.EqualFold(chunkHash, hashsum) {
 		// delete document, since we wrote it, but the hash was invalid
 		// context scoped to allow this operation to finish even if client disconnects
-		if err := upload.DeleteChunk(context.Background(), ut.bulker, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
+		if err := uploader.DeleteChunk(context.Background(), ut.bulker, upinfo.Source, chunkInfo.BID, chunkInfo.Pos); err != nil {
 			zlog.Warn().Err(err).
 				Str("source", upinfo.Source).
 				Str("fileID", chunkInfo.BID).
 				Int("chunkNum", chunkInfo.Pos).
 				Msg("a chunk hash mismatch occurred, and fleet server was unable to remove the invalid chunk")
 		}
-		return upload.ErrHashMismatch
+		return uploader.ErrHashMismatch
 	}
 
 	return nil
diff --git a/internal/pkg/upload/cbor/chunk.go b/internal/pkg/uploader/cbor/chunk.go
similarity index 100%
rename from internal/pkg/upload/cbor/chunk.go
rename to internal/pkg/uploader/cbor/chunk.go
diff --git a/internal/pkg/upload/cbor/chunk_test.go b/internal/pkg/uploader/cbor/chunk_test.go
similarity index 100%
rename from internal/pkg/upload/cbor/chunk_test.go
rename to internal/pkg/uploader/cbor/chunk_test.go
diff --git a/internal/pkg/upload/doc.go b/internal/pkg/uploader/doc.go
similarity index 96%
rename from internal/pkg/upload/doc.go
rename to internal/pkg/uploader/doc.go
index f7e3efafb..fd82b9338 100644
--- a/internal/pkg/upload/doc.go
+++ b/internal/pkg/uploader/doc.go
@@ -11,4 +11,4 @@
 	results in a valid File.
 
 */
-package upload
+package uploader
diff --git a/internal/pkg/upload/es.go b/internal/pkg/uploader/es.go
similarity index 99%
rename from internal/pkg/upload/es.go
rename to internal/pkg/uploader/es.go
index f7d915e89..c626dda5a 100644
--- a/internal/pkg/upload/es.go
+++ b/internal/pkg/uploader/es.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"bytes"
@@ -18,7 +18,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/dsl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/es"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
-	"github.com/elastic/fleet-server/v7/internal/pkg/upload/cbor"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/cbor"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/elastic/go-elasticsearch/v8/esapi"
 	"github.com/rs/zerolog/log"
diff --git a/internal/pkg/upload/es_test.go b/internal/pkg/uploader/es_test.go
similarity index 99%
rename from internal/pkg/upload/es_test.go
rename to internal/pkg/uploader/es_test.go
index 71919f40f..2a77d980d 100644
--- a/internal/pkg/upload/es_test.go
+++ b/internal/pkg/uploader/es_test.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"context"
diff --git a/internal/pkg/upload/finalize.go b/internal/pkg/uploader/finalize.go
similarity index 91%
rename from internal/pkg/upload/finalize.go
rename to internal/pkg/uploader/finalize.go
index d138cf91d..3438dca7e 100644
--- a/internal/pkg/upload/finalize.go
+++ b/internal/pkg/uploader/finalize.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"context"
@@ -12,6 +12,7 @@ import (
 	"sort"
 	"strings"
 
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 	"github.com/rs/zerolog/log"
 )
 
@@ -20,7 +21,7 @@ var (
 	ErrStatusNoUploads = errors.New("file closed, not accepting uploads")
 )
 
-func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (Info, error) {
+func (u *Uploader) Complete(ctx context.Context, id string, transitHash string) (upload.Info, error) {
 	// make sure document is freshly fetched, not cached
 	// so accurate status checking happens
 	info, err := FetchUploadInfo(ctx, u.bulker, id)
@@ -45,7 +46,7 @@ func (u *Uploader) Complete(ctx context.Context, id string, transitHash string)
 		return info, ErrMissingChunks
 	}
 	if !u.verifyChunkInfo(info, chunks, transitHash) {
-		if err := SetStatus(ctx, u.bulker, info, StatusFail); err != nil {
+		if err := SetStatus(ctx, u.bulker, info, upload.StatusFail); err != nil {
 			log.Warn().Err(err).Str("fileID", info.DocID).Str("uploadID", info.ID).Msg("file upload failed chunk validation, but encountered an error setting the upload status to failure")
 		}
 		if err := DeleteChunksByQuery(ctx, u.bulker, info.Source, info.DocID); err != nil {
@@ -65,7 +66,7 @@ func (u *Uploader) Complete(ctx context.Context, id string, transitHash string)
 	return info, nil
 }
 
-func (u *Uploader) allChunksPresent(info Info, chunks []ChunkInfo) bool {
+func (u *Uploader) allChunksPresent(info upload.Info, chunks []ChunkInfo) bool {
 	// check overall count
 	if len(chunks) != info.Count {
 		log.Warn().Int("expectedCount", info.Count).Int("received", len(chunks)).Interface("chunks", chunks).Msg("mismatch number of chunks")
@@ -87,7 +88,7 @@ func (u *Uploader) allChunksPresent(info Info, chunks []ChunkInfo) bool {
 	return true
 }
 
-func (u *Uploader) verifyChunkInfo(info Info, chunks []ChunkInfo, transitHash string) bool {
+func (u *Uploader) verifyChunkInfo(info upload.Info, chunks []ChunkInfo, transitHash string) bool {
 	// verify all chunks except last are info.ChunkSize size
 	// verify last: false (or field excluded) for all except final chunk
 	// verify final chunk is last: true
diff --git a/internal/pkg/upload/info.go b/internal/pkg/uploader/info.go
similarity index 70%
rename from internal/pkg/upload/info.go
rename to internal/pkg/uploader/info.go
index 4c78c439f..f6bc156e9 100644
--- a/internal/pkg/upload/info.go
+++ b/internal/pkg/uploader/info.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"context"
@@ -10,24 +10,25 @@ import (
 	"fmt"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 )
 
 // retrieves upload metadata info from elasticsearch
-func FetchUploadInfo(ctx context.Context, bulker bulk.Bulk, uploadID string) (Info, error) {
+func FetchUploadInfo(ctx context.Context, bulker bulk.Bulk, uploadID string) (upload.Info, error) {
 	results, err := GetFileDoc(ctx, bulker, uploadID)
 	if err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 	if len(results) == 0 {
-		return Info{}, ErrInvalidUploadID
+		return upload.Info{}, ErrInvalidUploadID
 	}
 	if len(results) > 1 {
-		return Info{}, fmt.Errorf("unable to locate upload record, got %d records, expected 1", len(results))
+		return upload.Info{}, fmt.Errorf("unable to locate upload record, got %d records, expected 1", len(results))
 	}
 
 	var fi FileMetaDoc
 	if err := json.Unmarshal(results[0].Source, &fi); err != nil {
-		return Info{}, fmt.Errorf("file meta doc parsing error: %w", err)
+		return upload.Info{}, fmt.Errorf("file meta doc parsing error: %w", err)
 	}
 
 	// calculate number of chunks required
@@ -36,7 +37,7 @@ func FetchUploadInfo(ctx context.Context, bulker bulk.Bulk, uploadID string) (In
 		cnt += 1
 	}
 
-	return Info{
+	return upload.Info{
 		ID:        fi.UploadID,
 		Source:    fi.Source,
 		AgentID:   fi.AgentID,
@@ -46,11 +47,11 @@ func FetchUploadInfo(ctx context.Context, bulker bulk.Bulk, uploadID string) (In
 		Total:     fi.File.Size,
 		Count:     int(cnt),
 		Start:     fi.Start,
-		Status:    Status(fi.File.Status),
+		Status:    upload.Status(fi.File.Status),
 	}, nil
 }
 
-func SetStatus(ctx context.Context, bulker bulk.Bulk, info Info, status Status) error {
+func SetStatus(ctx context.Context, bulker bulk.Bulk, info upload.Info, status upload.Status) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
 			"file": map[string]string{
@@ -64,11 +65,11 @@ func SetStatus(ctx context.Context, bulker bulk.Bulk, info Info, status Status)
 	return UpdateFileDoc(ctx, bulker, info.Source, info.DocID, data)
 }
 
-func MarkComplete(ctx context.Context, bulker bulk.Bulk, info Info, hash string) error {
+func MarkComplete(ctx context.Context, bulker bulk.Bulk, info upload.Info, hash string) error {
 	data, err := json.Marshal(map[string]interface{}{
 		"doc": map[string]interface{}{
 			"file": map[string]string{
-				"Status": string(StatusDone),
+				"Status": string(upload.StatusDone),
 			},
 			"transithash": map[string]interface{}{
 				"sha256": hash,
diff --git a/internal/pkg/upload/jsdict.go b/internal/pkg/uploader/jsdict.go
similarity index 99%
rename from internal/pkg/upload/jsdict.go
rename to internal/pkg/uploader/jsdict.go
index c0fc64927..b563974dd 100644
--- a/internal/pkg/upload/jsdict.go
+++ b/internal/pkg/uploader/jsdict.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"encoding/json"
diff --git a/internal/pkg/upload/jsdict_test.go b/internal/pkg/uploader/jsdict_test.go
similarity index 99%
rename from internal/pkg/upload/jsdict_test.go
rename to internal/pkg/uploader/jsdict_test.go
index 47a246d36..f7fb12235 100644
--- a/internal/pkg/upload/jsdict_test.go
+++ b/internal/pkg/uploader/jsdict_test.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"bytes"
diff --git a/internal/pkg/upload/upload.go b/internal/pkg/uploader/upload.go
similarity index 74%
rename from internal/pkg/upload/upload.go
rename to internal/pkg/uploader/upload.go
index fccb33377..c29c086d7 100644
--- a/internal/pkg/upload/upload.go
+++ b/internal/pkg/uploader/upload.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"context"
@@ -14,6 +14,7 @@ import (
 	"time"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/gofrs/uuid"
 )
@@ -33,36 +34,6 @@ var (
 	ErrInvalidChunkNum  = errors.New("invalid chunk number")
 )
 
-// the only valid values of upload status according to storage spec
-type Status string
-
-const (
-	StatusAwaiting Status = "AWAITING_UPLOAD"
-	StatusProgress Status = "UPLOADING"
-	StatusDone     Status = "READY"
-	StatusFail     Status = "UPLOAD_ERROR"
-	StatusDel      Status = "DELETED"
-)
-
-type Info struct {
-	ID        string // upload operation identifier. Used to identify the upload process
-	DocID     string // document ID of the uploaded file and chunks
-	Source    string // which integration is performing the upload
-	AgentID   string
-	ActionID  string
-	ChunkSize int64
-	Total     int64
-	Count     int
-	Start     time.Time
-	Status    Status
-}
-
-// convenience functions for computing current "Status" based on the fields
-func (i Info) Expired(timeout time.Duration) bool { return time.Now().After(i.Start.Add(timeout)) }
-func (i Info) StatusCanUpload() bool { // returns true if more chunks can be uploaded. False if the upload process has completed (with or without error)
-	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
-}
-
 type FileData struct {
 	Size      int64  `json:"size"`
 	ChunkSize int64  `json:"ChunkSize"`
@@ -105,8 +76,8 @@ type ChunkInfo struct {
 }
 
 type Uploader struct {
-	metaCache map[string]Info // cache of file metadata doc info
-	mu        sync.RWMutex    // lock for the above
+	metaCache map[string]upload.Info // cache of file metadata doc info
+	mu        sync.RWMutex           // lock for the above
 	// @todo: cache eviction so it's not unbounded growth
 	// @todo: cache refresh so status is accurate
 	sizeLimit int64         // @todo: what if configuration changes? is this recreated with another New()?
@@ -122,14 +93,14 @@ func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, t
 		bulker:      bulker,
 		sizeLimit:   sizeLimit,
 		timeLimit:   timeLimit,
-		metaCache:   make(map[string]Info),
+		metaCache:   make(map[string]upload.Info),
 	}
 }
 
 // Start an upload operation
-func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
+func (u *Uploader) Begin(ctx context.Context, data JSDict) (upload.Info, error) {
 	if data == nil {
-		return Info{}, errors.New("upload start payload required")
+		return upload.Info{}, errors.New("upload start payload required")
 	}
 
 	/*
@@ -138,17 +109,17 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 
 	// make sure all required fields are present and non-empty
 	if err := validateUploadPayload(data); err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 
 	size, _ := data.Int64("file", "size")
 	if size > u.sizeLimit {
-		return Info{}, ErrFileSizeTooLarge
+		return upload.Info{}, ErrFileSizeTooLarge
 	}
 
 	uid, err := uuid.NewV4()
 	if err != nil {
-		return Info{}, fmt.Errorf("unable to generate upload operation ID: %w", err)
+		return upload.Info{}, fmt.Errorf("unable to generate upload operation ID: %w", err)
 	}
 	id := uid.String()
 
@@ -158,7 +129,7 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	source, _ := data.Str("src")
 	docID := fmt.Sprintf("%s.%s", actionID, agentID)
 
-	info := Info{
+	info := upload.Info{
 		ID:        id,
 		DocID:     docID,
 		AgentID:   agentID,
@@ -166,7 +137,7 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 		ChunkSize: MaxChunkSize,
 		Source:    source,
 		Total:     size,
-		Status:    StatusAwaiting,
+		Status:    upload.StatusAwaiting,
 		Start:     time.Now(),
 	}
 	chunkCount := info.Total / info.ChunkSize
@@ -180,16 +151,16 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	*/
 
 	if err := data.Put(info.ChunkSize, "file", "ChunkSize"); err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 	if err := data.Put(info.Status, "file", "Status"); err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 	if err := data.Put(id, "upload_id"); err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 	if err := data.Put(info.Start.UnixMilli(), "upload_start"); err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 
 	/*
@@ -197,21 +168,21 @@ func (u *Uploader) Begin(ctx context.Context, data JSDict) (Info, error) {
 	*/
 	doc, err := json.Marshal(data)
 	if err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 	_, err = CreateFileDoc(ctx, u.bulker, doc, source, docID)
 	if err != nil {
-		return Info{}, err
+		return upload.Info{}, err
 	}
 
 	return info, nil
 }
 
-func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkHash string) (Info, ChunkInfo, error) {
+func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkHash string) (upload.Info, ChunkInfo, error) {
 	// find the upload, details, and status associated with the file upload
 	info, err := u.GetUploadInfo(ctx, uplID)
 	if err != nil {
-		return Info{}, ChunkInfo{}, err
+		return upload.Info{}, ChunkInfo{}, err
 	}
 
 	/*
@@ -219,13 +190,13 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkH
 	*/
 
 	if info.Expired(u.timeLimit) {
-		return Info{}, ChunkInfo{}, ErrUploadExpired
+		return upload.Info{}, ChunkInfo{}, ErrUploadExpired
 	}
 	if !info.StatusCanUpload() {
-		return Info{}, ChunkInfo{}, ErrUploadStopped
+		return upload.Info{}, ChunkInfo{}, ErrUploadStopped
 	}
 	if chunkNum < 0 || chunkNum >= info.Count {
-		return Info{}, ChunkInfo{}, ErrInvalidChunkNum
+		return upload.Info{}, ChunkInfo{}, ErrInvalidChunkNum
 	}
 
 	return info, ChunkInfo{
@@ -267,7 +238,7 @@ func validateUploadPayload(info JSDict) error {
 
 // Searches for Upload Metadata document in local memory cache if available
 // otherwise, fetches from elasticsearch and caches for next use
-func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, error) {
+func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (upload.Info, error) {
 	// Fetch metadata doc, if not cached
 	u.mu.RLock()
 	info, exist := u.metaCache[uploadID]
@@ -279,7 +250,7 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (Info, er
 	// not found in cache, try fetching
 	info, err := FetchUploadInfo(ctx, u.bulker, uploadID)
 	if err != nil {
-		return Info{}, fmt.Errorf("unable to retrieve upload info: %w", err)
+		return upload.Info{}, fmt.Errorf("unable to retrieve upload info: %w", err)
 	}
 	u.mu.Lock()
 	defer u.mu.Unlock()
diff --git a/internal/pkg/uploader/upload/info.go b/internal/pkg/uploader/upload/info.go
new file mode 100644
index 000000000..5e6b4f07f
--- /dev/null
+++ b/internal/pkg/uploader/upload/info.go
@@ -0,0 +1,37 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package upload
+
+import "time"
+
+// the only valid values of upload status according to storage spec
+type Status string
+
+const (
+	StatusAwaiting Status = "AWAITING_UPLOAD"
+	StatusProgress Status = "UPLOADING"
+	StatusDone     Status = "READY"
+	StatusFail     Status = "UPLOAD_ERROR"
+	StatusDel      Status = "DELETED"
+)
+
+type Info struct {
+	ID        string // upload operation identifier. Used to identify the upload process
+	DocID     string // document ID of the uploaded file and chunks
+	Source    string // which integration is performing the upload
+	AgentID   string
+	ActionID  string
+	ChunkSize int64
+	Total     int64
+	Count     int
+	Start     time.Time
+	Status    Status
+}
+
+// convenience functions for computing current "Status" based on the fields
+func (i Info) Expired(timeout time.Duration) bool { return time.Now().After(i.Start.Add(timeout)) }
+func (i Info) StatusCanUpload() bool { // returns true if more chunks can be uploaded. False if the upload process has completed (with or without error)
+	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
+}
diff --git a/internal/pkg/upload/upload_test.go b/internal/pkg/uploader/upload_test.go
similarity index 98%
rename from internal/pkg/upload/upload_test.go
rename to internal/pkg/uploader/upload_test.go
index c849fad02..8ab0382dc 100644
--- a/internal/pkg/upload/upload_test.go
+++ b/internal/pkg/uploader/upload_test.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package upload
+package uploader
 
 import (
 	"context"
@@ -12,6 +12,7 @@ import (
 	"time"
 
 	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
@@ -83,7 +84,7 @@ func TestUploadBeginReturnsCorrectInfo(t *testing.T) {
 	assert.Equal(t, action, info.ActionID)
 	assert.Equal(t, agent, info.AgentID)
 	assert.Equal(t, src, info.Source)
-	assert.Equal(t, StatusAwaiting, info.Status)
+	assert.Equal(t, upload.StatusAwaiting, info.Status)
 	assert.Greaterf(t, info.ChunkSize, int64(0), "server chosen chunk size should be >0")
 	assert.Equal(t, action+"."+agent, info.DocID)
 	assert.WithinDuration(t, time.Now(), info.Start, time.Minute)

From 610d19ad71f37666f65bc0f7edc6bd00ed9a5bfc Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 16:32:17 -0500
Subject: [PATCH 42/51] use internal cache for upload infos

---
 internal/pkg/api/handleUpload.go |  2 +-
 internal/pkg/cache/cache.go      | 38 ++++++++++++++++++++++++++++++++
 internal/pkg/uploader/upload.go  | 24 +++++++-------------
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index ae862a7f1..05cf7de67 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -119,7 +119,7 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		cache:       cache,
-		uploader:    uploader.New(chunkClient, bulker, maxFileSize, maxUploadTimer),
+		uploader:    uploader.New(chunkClient, bulker, cache, maxFileSize, maxUploadTimer),
 	}
 }
 
diff --git a/internal/pkg/cache/cache.go b/internal/pkg/cache/cache.go
index 909988702..3d1655bb9 100644
--- a/internal/pkg/cache/cache.go
+++ b/internal/pkg/cache/cache.go
@@ -16,6 +16,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/apikey"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 )
 
 type Cache interface {
@@ -32,6 +33,9 @@ type Cache interface {
 
 	SetArtifact(artifact model.Artifact)
 	GetArtifact(ident, sha2 string) (model.Artifact, bool)
+
+	SetUpload(id string, info upload.Info)
+	GetUpload(id string) (upload.Info, bool)
 }
 
 type APIKey = apikey.APIKey
@@ -270,3 +274,37 @@ func (c *CacheT) SetArtifact(artifact model.Artifact) {
 		Dur("ttl", ttl).
 		Msg("Artifact cache SET")
 }
+
+func (c *CacheT) SetUpload(id string, info upload.Info) {
+	c.mut.RLock()
+	defer c.mut.RUnlock()
+
+	scopedKey := "upload:" + id
+	ttl := time.Hour / 2 // @todo: add to configurable
+	cost := int64(len(info.ID) + len(info.DocID) + len(info.ActionID) + len(info.AgentID) + len(info.Source) + len(info.Status) + 8*4)
+	ok := c.cache.SetWithTTL(scopedKey, info, cost, ttl)
+	log.Trace().
+		Bool("ok", ok).
+		Str("id", id).
+		Int64("cost", cost).
+		Dur("ttl", ttl).
+		Msg("Upload info cache SET")
+}
+func (c *CacheT) GetUpload(id string) (upload.Info, bool) {
+	c.mut.RLock()
+	defer c.mut.RUnlock()
+
+	scopedKey := "upload:" + id
+	if v, ok := c.cache.Get(scopedKey); ok {
+		log.Trace().Str("id", id).Msg("upload info cache HIT")
+		key, ok := v.(upload.Info)
+		if !ok {
+			log.Error().Str("id", id).Msg("upload info cache cast fail")
+			return upload.Info{}, false
+		}
+		return key, ok
+	}
+
+	log.Trace().Str("id", id).Msg("upload info cache MISS")
+	return upload.Info{}, false
+}
diff --git a/internal/pkg/uploader/upload.go b/internal/pkg/uploader/upload.go
index c29c086d7..f5d2bac16 100644
--- a/internal/pkg/uploader/upload.go
+++ b/internal/pkg/uploader/upload.go
@@ -10,10 +10,10 @@ import (
 	"errors"
 	"fmt"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/gofrs/uuid"
@@ -76,10 +76,7 @@ type ChunkInfo struct {
 }
 
 type Uploader struct {
-	metaCache map[string]upload.Info // cache of file metadata doc info
-	mu        sync.RWMutex           // lock for the above
-	// @todo: cache eviction so it's not unbounded growth
-	// @todo: cache refresh so status is accurate
+	cache     cache.Cache   // cache of file metadata doc info
 	sizeLimit int64         // @todo: what if configuration changes? is this recreated with another New()?
 	timeLimit time.Duration // @todo: same as above
 
@@ -87,13 +84,13 @@ type Uploader struct {
 	bulker      bulk.Bulk
 }
 
-func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, sizeLimit int64, timeLimit time.Duration) *Uploader {
+func New(chunkClient *elasticsearch.Client, bulker bulk.Bulk, cache cache.Cache, sizeLimit int64, timeLimit time.Duration) *Uploader {
 	return &Uploader{
 		chunkClient: chunkClient,
 		bulker:      bulker,
 		sizeLimit:   sizeLimit,
 		timeLimit:   timeLimit,
-		metaCache:   make(map[string]upload.Info),
+		cache:       cache,
 	}
 }
 
@@ -200,9 +197,8 @@ func (u *Uploader) Chunk(ctx context.Context, uplID string, chunkNum int, chunkH
 	}
 
 	return info, ChunkInfo{
-		Pos: chunkNum,
-		BID: info.DocID,
-		//FirstReceived: false, // @todo
+		Pos:  chunkNum,
+		BID:  info.DocID,
 		Last: chunkNum == info.Count-1,
 		Size: int(info.ChunkSize),
 		SHA2: chunkHash,
@@ -240,9 +236,7 @@ func validateUploadPayload(info JSDict) error {
 // otherwise, fetches from elasticsearch and caches for next use
 func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (upload.Info, error) {
 	// Fetch metadata doc, if not cached
-	u.mu.RLock()
-	info, exist := u.metaCache[uploadID]
-	u.mu.RUnlock() // not deferred since this must be clear before we gain a write lock below
+	info, exist := u.cache.GetUpload(uploadID)
 	if exist {
 		return info, nil
 	}
@@ -252,8 +246,6 @@ func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (upload.I
 	if err != nil {
 		return upload.Info{}, fmt.Errorf("unable to retrieve upload info: %w", err)
 	}
-	u.mu.Lock()
-	defer u.mu.Unlock()
-	u.metaCache[uploadID] = info
+	u.cache.SetUpload(uploadID, info)
 	return info, nil
 }

From 882df208ad2c11487e94951521783a5d11ea079b Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Tue, 17 Jan 2023 16:37:12 -0500
Subject: [PATCH 43/51] fix lint

---
 internal/pkg/cache/cache.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/pkg/cache/cache.go b/internal/pkg/cache/cache.go
index 3d1655bb9..733de5682 100644
--- a/internal/pkg/cache/cache.go
+++ b/internal/pkg/cache/cache.go
@@ -290,7 +290,7 @@ func (c *CacheT) SetUpload(id string, info upload.Info) {
 		Dur("ttl", ttl).
 		Msg("Upload info cache SET")
 }
-func (c *CacheT) GetUpload(id string) (upload.Info, bool) {
+func (c *CacheT) GetUpload(id string) (upload.Info, bool) { //nolint:dupl // a little repetition to support strong typing
 	c.mut.RLock()
 	defer c.mut.RUnlock()
 

From 8e38d8079c2ffdc06f6282c78f0e9dd2b4c26fa7 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 07:21:01 -0500
Subject: [PATCH 44/51] fixup tests

---
 internal/pkg/cache/cache.go          |  2 +-
 internal/pkg/uploader/upload_test.go | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/internal/pkg/cache/cache.go b/internal/pkg/cache/cache.go
index 733de5682..8629a6630 100644
--- a/internal/pkg/cache/cache.go
+++ b/internal/pkg/cache/cache.go
@@ -196,7 +196,7 @@ func (c *CacheT) ValidAPIKey(key APIKey) bool {
 }
 
 // GetEnrollmentAPIKey returns the enrollment API key by ID.
-func (c *CacheT) GetEnrollmentAPIKey(id string) (model.EnrollmentAPIKey, bool) {
+func (c *CacheT) GetEnrollmentAPIKey(id string) (model.EnrollmentAPIKey, bool) { //nolint:dupl // similar getters to support strong typing
 	c.mut.RLock()
 	defer c.mut.RUnlock()
 
diff --git a/internal/pkg/uploader/upload_test.go b/internal/pkg/uploader/upload_test.go
index 8ab0382dc..168cc72f7 100644
--- a/internal/pkg/uploader/upload_test.go
+++ b/internal/pkg/uploader/upload_test.go
@@ -11,11 +11,14 @@ import (
 	"testing"
 	"time"
 
+	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
+	"github.com/elastic/fleet-server/v7/internal/pkg/config"
 	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
 )
 
 // convenience function for making a typical file request structure
@@ -76,7 +79,9 @@ func TestUploadBeginReturnsCorrectInfo(t *testing.T) {
 		mock.Anything,       // bulker options
 	).Return("", nil)
 
-	u := New(nil, fakeBulk, int64(size), time.Hour)
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+	u := New(nil, fakeBulk, c, int64(size), time.Hour)
 	info, err := u.Begin(context.Background(), data)
 	assert.NoError(t, err)
 
@@ -118,8 +123,10 @@ func TestUploadBeginWritesDocumentFromInputs(t *testing.T) {
 		mock.Anything,       // bulker options
 	).Return("", nil)
 
-	u := New(nil, fakeBulk, int64(size), time.Hour)
-	_, err := u.Begin(context.Background(), data)
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+	u := New(nil, fakeBulk, c, int64(size), time.Hour)
+	_, err = u.Begin(context.Background(), data)
 	assert.NoError(t, err)
 
 	payload, ok := fakeBulk.Calls[0].Arguments[3].([]byte)
@@ -154,7 +161,9 @@ func TestUploadBeginCalculatesCorrectChunkCount(t *testing.T) {
 		{7534559605, 1797, "7.5Gb file"},
 	}
 
-	u := New(nil, fakeBulk, MaxChunkSize*3000, time.Hour)
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+	u := New(nil, fakeBulk, c, MaxChunkSize*3000, time.Hour)
 
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -191,9 +200,12 @@ func TestUploadBeginMaxFileSize(t *testing.T) {
 		mock.Anything, // bulker options
 	).Return("", nil)
 
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
-			u := New(nil, fakeBulk, tc.UploadSizeLimit, time.Hour)
+			u := New(nil, fakeBulk, c, tc.UploadSizeLimit, time.Hour)
 			data := makeUploadRequestDict(map[string]interface{}{
 				"file.size": tc.FileSize,
 			})

From dcf436fce42bcb093bd244bb777770b57ba56028 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 07:35:17 -0500
Subject: [PATCH 45/51] move chunk def into new package for exported defs

---
 internal/pkg/model/schema.go         |  9 ---------
 internal/pkg/uploader/es.go          |  6 +++---
 internal/pkg/uploader/upload/info.go | 15 ++++++++++++++-
 model/schema.json                    | 28 ----------------------------
 4 files changed, 17 insertions(+), 41 deletions(-)

diff --git a/internal/pkg/model/schema.go b/internal/pkg/model/schema.go
index bbf4633b7..ae45c1c0a 100644
--- a/internal/pkg/model/schema.go
+++ b/internal/pkg/model/schema.go
@@ -284,15 +284,6 @@ type EnrollmentAPIKey struct {
 	UpdatedAt string `json:"updated_at,omitempty"`
 }
 
-type FileChunk struct {
-	ESDocument
-
-	BID  string `json:"bid"`
-	Data []byte `json:"data"`
-	Last bool   `json:"last"`
-	SHA2 string `json:"sha2"`
-}
-
 // HostMetadata The host metadata for the Elastic Agent
 type HostMetadata struct {
 
diff --git a/internal/pkg/uploader/es.go b/internal/pkg/uploader/es.go
index c626dda5a..363664ee8 100644
--- a/internal/pkg/uploader/es.go
+++ b/internal/pkg/uploader/es.go
@@ -17,8 +17,8 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/dsl"
 	"github.com/elastic/fleet-server/v7/internal/pkg/es"
-	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/cbor"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 	"github.com/elastic/go-elasticsearch/v8"
 	"github.com/elastic/go-elasticsearch/v8/esapi"
 	"github.com/rs/zerolog/log"
@@ -219,8 +219,8 @@ func GetChunkInfos(ctx context.Context, bulker bulk.Bulk, baseID string) ([]Chun
 }
 
 // retrieves a full chunk document, Data included
-func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) (model.FileChunk, error) {
-	var chunk model.FileChunk
+func GetChunk(ctx context.Context, bulker bulk.Bulk, source string, fileID string, chunkNum int) (upload.Chunk, error) {
+	var chunk upload.Chunk
 	out, err := bulker.Read(ctx, fmt.Sprintf(FileDataIndexPattern, source), fmt.Sprintf("%s.%d", fileID, chunkNum))
 	if err != nil {
 		return chunk, err
diff --git a/internal/pkg/uploader/upload/info.go b/internal/pkg/uploader/upload/info.go
index 5e6b4f07f..0379819e7 100644
--- a/internal/pkg/uploader/upload/info.go
+++ b/internal/pkg/uploader/upload/info.go
@@ -4,7 +4,11 @@
 
 package upload
 
-import "time"
+import (
+	"time"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/model"
+)
 
 // the only valid values of upload status according to storage spec
 type Status string
@@ -35,3 +39,12 @@ func (i Info) Expired(timeout time.Duration) bool { return time.Now().After(i.St
 func (i Info) StatusCanUpload() bool { // returns true if more chunks can be uploaded. False if the upload process has completed (with or without error)
 	return !(i.Status == StatusFail || i.Status == StatusDone || i.Status == StatusDel)
 }
+
+type Chunk struct {
+	model.ESDocument
+
+	BID  string `json:"bid"`
+	Data []byte `json:"data"`
+	Last bool   `json:"last"`
+	SHA2 string `json:"sha2"`
+}
diff --git a/model/schema.json b/model/schema.json
index 3812d9a37..fdf0a709f 100644
--- a/model/schema.json
+++ b/model/schema.json
@@ -616,34 +616,6 @@
         "api_key_id",
         "api_key"
       ]
-    },
-    "filechunk": {
-      "title": "Chunk",
-      "description": "One section of the blob contents of a file",
-      "type": "object",
-      "properties": {
-        "_id": {
-          "description": "The identifier for the file chunk, describing the ID of the file, and the chunk position",
-          "type": "string"
-        },
-        "data": {
-          "description": "Blob contents of the file",
-          "type": "binary"
-        },
-        "bid": {
-          "description": "Base ID of the chunk, matching all chunks together as the base ID of the file",
-          "type": "string",
-          "format": "uuid"
-        },
-        "last": {
-          "description": "Whether this chunk is the final chunk of a file",
-          "type": "boolean"
-        },
-        "sha2": {
-          "description": "the sha256 hash of the chunk contents",
-          "type": "string"
-        }
-      }
     }
   },
   "checkin": {

From e2ee184f443882b48a287089c399a5fbdc1282f6 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 08:54:32 -0500
Subject: [PATCH 46/51] small cleanups

---
 internal/pkg/cache/cache.go     | 3 ++-
 internal/pkg/uploader/upload.go | 7 +++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/pkg/cache/cache.go b/internal/pkg/cache/cache.go
index 8629a6630..f8f4bad6d 100644
--- a/internal/pkg/cache/cache.go
+++ b/internal/pkg/cache/cache.go
@@ -280,7 +280,8 @@ func (c *CacheT) SetUpload(id string, info upload.Info) {
 	defer c.mut.RUnlock()
 
 	scopedKey := "upload:" + id
-	ttl := time.Hour / 2 // @todo: add to configurable
+	ttl := 30 * time.Minute // @todo: add to configurable
+	// cache cost for other entries use bytes as the unit. Add up the string lengths and the size of the int64s in the upload.Info struct, as a manual 'sizeof'
 	cost := int64(len(info.ID) + len(info.DocID) + len(info.ActionID) + len(info.AgentID) + len(info.Source) + len(info.Status) + 8*4)
 	ok := c.cache.SetWithTTL(scopedKey, info, cost, ttl)
 	log.Trace().
diff --git a/internal/pkg/uploader/upload.go b/internal/pkg/uploader/upload.go
index f5d2bac16..1f7b137af 100644
--- a/internal/pkg/uploader/upload.go
+++ b/internal/pkg/uploader/upload.go
@@ -72,13 +72,12 @@ type ChunkInfo struct {
 	SHA2 string
 	Size int
 	BID  string // base id, matches metadata doc's _id
-	//FirstReceived bool
 }
 
 type Uploader struct {
-	cache     cache.Cache   // cache of file metadata doc info
-	sizeLimit int64         // @todo: what if configuration changes? is this recreated with another New()?
-	timeLimit time.Duration // @todo: same as above
+	cache     cache.Cache // cache of file metadata doc info
+	sizeLimit int64
+	timeLimit time.Duration
 
 	chunkClient *elasticsearch.Client
 	bulker      bulk.Bulk

From ea72b10e8decd94f75e52a42dcf24db5389e9f4e Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 10:54:39 -0500
Subject: [PATCH 47/51] comment cleanups

---
 internal/pkg/uploader/doc.go | 9 ++-------
 internal/pkg/uploader/es.go  | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/internal/pkg/uploader/doc.go b/internal/pkg/uploader/doc.go
index fd82b9338..9c2088b94 100644
--- a/internal/pkg/uploader/doc.go
+++ b/internal/pkg/uploader/doc.go
@@ -3,12 +3,7 @@
 // you may not use this file except in compliance with the Elastic License.
 
 /*
-
-	This package concentrates the responsibility of following the
-	File Storage specification:
-	https://docs.google.com/document/d/1BlCQYxHwgGZMxysc0BiWxT9ZCLKwgryhkD9ryDNDIuI/edit?usp=sharing
-	so the resulting documents, using this module correctly and checking errors,
-	results in a valid File.
-
+	The uploader package provides the ability to store files in Elasticsearch
+	in a chunked format that is compatible with Kibana's files plugin.
 */
 package uploader
diff --git a/internal/pkg/uploader/es.go b/internal/pkg/uploader/es.go
index 363664ee8..680d74482 100644
--- a/internal/pkg/uploader/es.go
+++ b/internal/pkg/uploader/es.go
@@ -75,7 +75,6 @@ func prepareChunkWithoutData() *dsl.Tmpl {
 func prepareFindMetaByUploadID() *dsl.Tmpl {
 	tmpl := dsl.NewTmpl()
 	root := dsl.NewRoot()
-	//root.Param("_source", false) // do not return large data payload
 	root.Query().Term(FieldUploadID, tmpl.Bind(FieldUploadID), nil)
 	tmpl.MustResolve(root)
 	return tmpl

From bf44629d7bd013ae08c4589f70a5c95451f3b6e5 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 20:14:05 -0500
Subject: [PATCH 48/51] fixup uploader tests

---
 internal/pkg/uploader/upload.go      |   3 -
 internal/pkg/uploader/upload_test.go | 129 +++++++++++++++++++++++----
 2 files changed, 110 insertions(+), 22 deletions(-)

diff --git a/internal/pkg/uploader/upload.go b/internal/pkg/uploader/upload.go
index 1f7b137af..8106e8dd2 100644
--- a/internal/pkg/uploader/upload.go
+++ b/internal/pkg/uploader/upload.go
@@ -220,9 +220,6 @@ func validateUploadPayload(info JSDict) error {
 		}
 	}
 
-	//@todo: valid action?
-	//@todo: valid src? will that make future expansion harder and require FS updates? maybe just validate the index exists
-
 	if size, ok := info.Int64("file", "size"); !ok {
 		return errors.New("file.size is required")
 	} else if size <= 0 {
diff --git a/internal/pkg/uploader/upload_test.go b/internal/pkg/uploader/upload_test.go
index 168cc72f7..a891237ab 100644
--- a/internal/pkg/uploader/upload_test.go
+++ b/internal/pkg/uploader/upload_test.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
+	"github.com/elastic/fleet-server/v7/internal/pkg/es"
 	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
 
@@ -219,20 +220,90 @@ func TestUploadBeginMaxFileSize(t *testing.T) {
 	}
 }
 
-/*
 func TestUploadRejectsMissingRequiredFields(t *testing.T) {
-	data := makeUploadRequestDict()
 
-	u := New(nil, nil, 1024, time.Hour)
-	info, err := u.Begin(context.Background(), data)
-	assert.Error(t, err)
+	tests := []string{
+		"file.name",
+		"file.mime_type",
+		"file.size",
+		"action_id",
+		"agent_id",
+		"src",
+	}
+
+	fakeBulk := itesting.NewMockBulk()
+	fakeBulk.On("Create",
+		mock.Anything, // match context.Context
+		mock.Anything, // index
+		mock.Anything, // document ID
+		mock.Anything, // ES document
+		mock.Anything, // bulker options
+	).Return("", nil)
+
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+
+	u := New(nil, fakeBulk, c, 2048, time.Hour)
+
+	var ok bool
+	for _, field := range tests {
+
+		t.Run("required field "+field, func(t *testing.T) {
+			// create input that already has all required fields
+			data := makeUploadRequestDict(nil)
+
+			// now delete this field and expect failure below
+			d := map[string]interface{}(data)
+			parts := strings.Split(field, ".")
+			for i, part := range parts {
+				if i == len(parts)-1 { // leaf of an object tree
+					delete(d, part)
+				} else {
+					d, ok = d[part].(map[string]interface{})
+					assert.Truef(t, ok, "incorrect key path '%s' when testing required fields", field)
+				}
+			}
+
+			_, err = u.Begin(context.Background(), data)
+			assert.Errorf(t, err, "%s is a required field and should error if not provided", field)
+		})
+
+	}
 
 }
 
+func mockUploadInfoResult(bulker *itesting.MockBulk, info upload.Info) {
 
-*/
+	// convert info into how it's stored/returned in ES
+	out, _ := json.Marshal(map[string]interface{}{
+		"action_id": info.ActionID,
+		"agent_id":  info.AgentID,
+		"src":       info.Source,
+		"file": map[string]interface{}{
+			"size":      info.Total,
+			"ChunkSize": info.ChunkSize,
+			"Status":    info.Status,
+		},
+		"upload_id":    info.ID,
+		"upload_start": info.Start.UnixMilli(),
+	})
 
-/*
+	bulker.On("Search",
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+	).Return(&es.ResultT{
+		HitsT: es.HitsT{
+			Hits: []es.HitT{
+				{
+					ID:     info.DocID,
+					Source: out,
+				},
+			},
+		},
+	}, nil).Once()
+}
 
 func TestChunkMarksFinal(t *testing.T) {
 	tests := []struct {
@@ -247,27 +318,47 @@ func TestChunkMarksFinal(t *testing.T) {
 		{7534559605, 1796, "7.5Gb file"},
 	}
 
-	u := New(8388608000, len(tests), 4)
-
 	for _, tc := range tests {
 		t.Run(tc.Name, func(t *testing.T) {
-			info, err := u.Begin(tc.FileSize, "", "")
+
+			fakeBulk := itesting.NewMockBulk()
+			fakeBulk.On("Create",
+				mock.Anything, // match context.Context
+				mock.Anything, // index
+				mock.Anything, // document ID
+				mock.Anything, // ES document
+				mock.Anything, // bulker options
+			).Return("", nil)
+
+			// shared caches, mock bulker, and uploader between test runs had race conditions
+			// preventing return of the correct mock data for each call, so we will
+			// recreate them within each test run
+			c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+			require.NoError(t, err)
+
+			u := New(nil, fakeBulk, c, 8388608000, time.Hour)
+
+			data := makeUploadRequestDict(map[string]interface{}{
+				"file.size": tc.FileSize,
+			})
+
+			info, err := u.Begin(context.Background(), data)
 			assert.NoError(t, err)
 
+			// for anything larger than 1-chunk, check for off-by-ones
 			if tc.FinalChunk > 0 {
-				prev, err := u.Chunk(info.ID, tc.FinalChunk-1)
+				mockUploadInfoResult(fakeBulk, info)
+				_, prev, err := u.Chunk(context.Background(), info.ID, tc.FinalChunk-1, "")
 				assert.NoError(t, err)
-				assert.Falsef(t, prev.Final, "previous chunk ID before last should not be marked final")
-				prev.Token.Release()
+				assert.Falsef(t, prev.Last, "penultimate chunk number (%d) should not be marked final", tc.FinalChunk-1)
 			}
 
-			chunk, err := u.Chunk(info.ID, tc.FinalChunk)
+			mockUploadInfoResult(fakeBulk, info)
+
+			// make sure the final chunk is marked as such
+			_, chunk, err := u.Chunk(context.Background(), info.ID, tc.FinalChunk, "")
 			assert.NoError(t, err)
-			assert.True(t, chunk.Final)
-			chunk.Token.Release()
+			assert.Truef(t, chunk.Last, "chunk number %d should be marked as Last", tc.FinalChunk)
 		})
 	}
 }
-
-
-*/

From db44cd1a92d21cc7254182a1d42e8e4d5baf5610 Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Wed, 18 Jan 2023 20:16:25 -0500
Subject: [PATCH 49/51] add line to changelog

---
 CHANGELOG.next.asciidoc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index 159b0def0..81e96126a 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -31,3 +31,4 @@
 - Log redacted config when config updates. {issue}1626[1626] {pull}1668[1668]
 - Storing checkin message in last_checkin_message {pull}1932[1932]
 - Allow upgrade actions to signal that they will be retried. {pull}1887[1887]
+- Fleet server now supports file uploads for a limited subset of integrations {pull}1902[1902]
\ No newline at end of file

From 144756daaf303445a8b56b46ca8a942e71be99ac Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Thu, 19 Jan 2023 13:16:32 -0500
Subject: [PATCH 50/51] comment conventions

---
 internal/pkg/uploader/upload.go      | 2 +-
 internal/pkg/uploader/upload/info.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/pkg/uploader/upload.go b/internal/pkg/uploader/upload.go
index 8106e8dd2..95e83f1c6 100644
--- a/internal/pkg/uploader/upload.go
+++ b/internal/pkg/uploader/upload.go
@@ -228,7 +228,7 @@ func validateUploadPayload(info JSDict) error {
 	return nil
 }
 
-// Searches for Upload Metadata document in local memory cache if available
+// GetUploadInfo searches for Upload Metadata document in local memory cache if available
 // otherwise, fetches from elasticsearch and caches for next use
 func (u *Uploader) GetUploadInfo(ctx context.Context, uploadID string) (upload.Info, error) {
 	// Fetch metadata doc, if not cached
diff --git a/internal/pkg/uploader/upload/info.go b/internal/pkg/uploader/upload/info.go
index 0379819e7..9c7cca19e 100644
--- a/internal/pkg/uploader/upload/info.go
+++ b/internal/pkg/uploader/upload/info.go
@@ -10,7 +10,7 @@ import (
 	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 )
 
-// the only valid values of upload status according to storage spec
+// Status represents the only valid values of upload status according to storage spec
 type Status string
 
 const (

From b87792799acca4c6516df6d2a85c1c116b98b9bb Mon Sep 17 00:00:00 2001
From: pzl <dan@panzarel.la>
Date: Fri, 20 Jan 2023 18:04:44 -0500
Subject: [PATCH 51/51] a WHOLE LOT of handler tests

---
 internal/pkg/api/handleUpload.go      |   19 +-
 internal/pkg/api/handleUpload_test.go | 1047 +++++++++++++++++++++++++
 2 files changed, 1060 insertions(+), 6 deletions(-)

diff --git a/internal/pkg/api/handleUpload.go b/internal/pkg/api/handleUpload.go
index 05cf7de67..dc0edb777 100644
--- a/internal/pkg/api/handleUpload.go
+++ b/internal/pkg/api/handleUpload.go
@@ -17,10 +17,12 @@ import (
 	"strings"
 	"time"
 
+	"github.com/elastic/fleet-server/v7/internal/pkg/apikey"
 	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
 	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
 	"github.com/elastic/fleet-server/v7/internal/pkg/config"
 	"github.com/elastic/fleet-server/v7/internal/pkg/logger"
+	"github.com/elastic/fleet-server/v7/internal/pkg/model"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader"
 	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/cbor"
 	"github.com/elastic/go-elasticsearch/v8"
@@ -66,14 +68,14 @@ func (rt Router) handleUploadChunk(w http.ResponseWriter, r *http.Request, ps ht
 	// simpler authentication check,  for high chunk throughput
 	// since chunk checksums must match transit hash
 	// AND optionally the initial hash, both having stricter auth checks
-	if _, err := authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
+	if _, err := rt.ut.authAPIKey(r, rt.bulker, rt.ut.cache); err != nil {
 		writeUploadError(err, w, zlog, start, "authentication failure for chunk write")
 		return
 	}
 
 	chunkNum, err := strconv.Atoi(chunkID)
 	if err != nil {
-		writeUploadError(err, w, zlog, start, "error parsing chunk index")
+		writeUploadError(uploader.ErrInvalidChunkNum, w, zlog, start, "error parsing chunk index")
 		return
 	}
 	if err := rt.ut.handleUploadChunk(&zlog, w, r, id, chunkNum); err != nil {
@@ -107,6 +109,8 @@ type UploadT struct {
 	chunkClient *elasticsearch.Client
 	cache       cache.Cache
 	uploader    *uploader.Uploader
+	authAgent   func(*http.Request, *string, bulk.Bulk, cache.Cache) (*model.Agent, error) // injectable for testing purposes
+	authAPIKey  func(*http.Request, bulk.Bulk, cache.Cache) (*apikey.APIKey, error)        // as above
 }
 
 func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch.Client, cache cache.Cache) *UploadT {
@@ -120,10 +124,12 @@ func NewUploadT(cfg *config.Server, bulker bulk.Bulk, chunkClient *elasticsearch
 		bulker:      bulker,
 		cache:       cache,
 		uploader:    uploader.New(chunkClient, bulker, cache, maxFileSize, maxUploadTimer),
+		authAgent:   authAgent,
+		authAPIKey:  authAPIKey,
 	}
 }
 
-func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request) error { //nolint:unparam // log is standard first arg for the handlers
+func (ut *UploadT) handleUploadStart(_ *zerolog.Logger, w http.ResponseWriter, r *http.Request) error {
 	// decode early to match agentID in the payload
 	payload, err := uploader.ReadDict(r.Body)
 	if err != nil {
@@ -138,7 +144,8 @@ func (ut *UploadT) handleUploadStart(zlog *zerolog.Logger, w http.ResponseWriter
 	if !ok || agentID == "" {
 		return errors.New("required field agent_id is missing")
 	}
-	if _, err := authAgent(r, &agentID, ut.bulker, ut.cache); err != nil {
+	_, err = ut.authAgent(r, &agentID, ut.bulker, ut.cache)
+	if err != nil {
 		return err
 	}
 
@@ -205,14 +212,14 @@ func (ut *UploadT) handleUploadChunk(zlog *zerolog.Logger, w http.ResponseWriter
 	return nil
 }
 
-func (ut *UploadT) handleUploadComplete(zlog *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error { //nolint:unparam // log is standard first arg for the handlers
+func (ut *UploadT) handleUploadComplete(_ *zerolog.Logger, w http.ResponseWriter, r *http.Request, uplID string) error {
 	info, err := ut.uploader.GetUploadInfo(r.Context(), uplID)
 	if err != nil {
 		return err
 	}
 	// need to auth that it matches the ID in the initial
 	// doc, but that means we had to doc-lookup early
-	if _, err := authAgent(r, &info.AgentID, ut.bulker, ut.cache); err != nil {
+	if _, err := ut.authAgent(r, &info.AgentID, ut.bulker, ut.cache); err != nil {
 		return fmt.Errorf("Error authenticating for upload finalization: %w", err)
 	}
 
diff --git a/internal/pkg/api/handleUpload_test.go b/internal/pkg/api/handleUpload_test.go
index bad0dab64..29b9b2da9 100644
--- a/internal/pkg/api/handleUpload_test.go
+++ b/internal/pkg/api/handleUpload_test.go
@@ -6,3 +6,1050 @@
 // +build !integration
 
 package api
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/elastic/fleet-server/v7/internal/pkg/apikey"
+	"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
+	"github.com/elastic/fleet-server/v7/internal/pkg/cache"
+	"github.com/elastic/fleet-server/v7/internal/pkg/config"
+	"github.com/elastic/fleet-server/v7/internal/pkg/es"
+	"github.com/elastic/fleet-server/v7/internal/pkg/model"
+	itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader"
+	"github.com/elastic/fleet-server/v7/internal/pkg/uploader/upload"
+	"github.com/elastic/go-elasticsearch/v8"
+	"github.com/julienschmidt/httprouter"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
+)
+
+/*
+  Upload Begin route testing
+*/
+
+func TestUploadStartValidation(t *testing.T) {
+	hr, _, _ := prepareUploaderMock(t)
+
+	// test empty body
+	rec := httptest.NewRecorder()
+	hr.ServeHTTP(rec, httptest.NewRequest(http.MethodPost, RouteUploadBegin, nil))
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "body is required")
+
+	// now test various body contents
+	tests := []struct {
+		Name           string
+		ExpectStatus   int
+		ExpectContains string
+		Input          string
+	}{
+		{"Zero length body is rejected", http.StatusBadRequest, "", ""},
+		{"Minimum required body", http.StatusOK, "upload_id",
+			`{
+				"file": {
+					"size": 200,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"Oversized file should be rejected", http.StatusBadRequest, "size",
+			`{
+				"file": {
+					"size": ` + strconv.Itoa(maxFileSize+1024) + `,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"zero size file should be rejected", http.StatusBadRequest, "size",
+			`{
+				"file": {
+					"size": 0,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"negative size file should be rejected", http.StatusBadRequest, "size",
+			`{
+				"file": {
+					"size": -100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"file size is required", http.StatusBadRequest, "file.size is required",
+			`{
+				"file": {
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"file name is required", http.StatusBadRequest, "file.name is required",
+			`{
+				"file": {
+					"size": 100,
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"file name must not be empty", http.StatusBadRequest, "file.name",
+			`{
+				"file": {
+					"size": 100,
+					"name": "",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"file mime_type is required", http.StatusBadRequest, "mime_type",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"agent_id is required", http.StatusBadRequest, "agent_id",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"action_id": "123",
+				"src": "agent"
+			}`,
+		},
+		{"action_id is required", http.StatusBadRequest, "action_id",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"src": "agent"
+			}`,
+		},
+		{"action_id must not be empty", http.StatusBadRequest, "action_id",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "",
+				"src": "agent"
+			}`,
+		},
+		{"src is required", http.StatusBadRequest, "src",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123"
+			}`,
+		},
+		{"src must not be empty", http.StatusBadRequest, "src",
+			`{
+				"file": {
+					"size": 100,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "foo",
+				"action_id": "123",
+				"src":""
+			}`,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+			rec = httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, RouteUploadBegin, strings.NewReader(tc.Input))
+			hr.ServeHTTP(rec, req)
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+			if tc.ExpectContains != "" {
+				assert.Contains(t, rec.Body.String(), tc.ExpectContains)
+			}
+
+		})
+	}
+
+}
+
+func TestUploadStartAuth(t *testing.T) {
+
+	tests := []struct {
+		Name               string
+		AuthSuccess        bool
+		AgentFromAPIKey    string
+		AgentInRequestBody string
+		ExpectStatus       int
+	}{
+		{"Agent ID matching API Key succeeds", true, "abc123", "abc123", http.StatusOK},
+		{"Agent ID not matching API Key should reject", true, "oneID", "differentID", http.StatusBadRequest},
+		{"Bad auth should reject request", false, "", "IDinDoc", http.StatusBadRequest},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, rt, _ := prepareUploaderMock(t)
+			if !tc.AuthSuccess {
+				rt.ut.authAPIKey = func(r *http.Request, b bulk.Bulk, c cache.Cache) (*apikey.APIKey, error) {
+					return nil, apikey.ErrInvalidToken
+				}
+				rt.ut.authAgent = func(r *http.Request, s *string, b bulk.Bulk, c cache.Cache) (*model.Agent, error) {
+					return nil, apikey.ErrInvalidToken
+				}
+			} else {
+				rt.ut.authAgent = func(r *http.Request, s *string, b bulk.Bulk, c cache.Cache) (*model.Agent, error) {
+					if *s != tc.AgentFromAPIKey { // real AuthAgent provides this facility
+						return nil, ErrAgentIdentity
+					}
+					return &model.Agent{
+						ESDocument: model.ESDocument{
+							Id: tc.AgentFromAPIKey,
+						},
+						Agent: &model.AgentMetadata{
+							ID: tc.AgentFromAPIKey,
+						},
+					}, nil
+				}
+			}
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, RouteUploadBegin, strings.NewReader(mockStartBodyWithAgent(tc.AgentInRequestBody)))
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+		})
+	}
+
+}
+
+func TestUploadStartResponse(t *testing.T) {
+	hr, _, _ := prepareUploaderMock(t)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, RouteUploadBegin, strings.NewReader(mockStartBodyWithAgent("foo")))
+	hr.ServeHTTP(rec, req)
+	assert.Equal(t, http.StatusOK, rec.Code)
+
+	type UploadStartResponse struct {
+		UploadID  string `json:"upload_id"`
+		ChunkSize int    `json:"chunk_size"`
+	}
+
+	var response UploadStartResponse
+	err := json.Unmarshal(rec.Body.Bytes(), &response)
+	assert.NoErrorf(t, err, "upload start should provide valid JSON response")
+
+	assert.NotEmptyf(t, response.UploadID, "upload start response should provide an ID")
+	assert.Greaterf(t, response.ChunkSize, 0, "upload start response should provide a chunk size > 0")
+
+}
+
+/*
+  Chunk data upload route
+*/
+
+func TestChunkUploadRouteParams(t *testing.T) {
+
+	data := []byte("filedata")
+	hasher := sha256.New()
+	_, err := hasher.Write(data)
+	require.NoError(t, err)
+	hash := hex.EncodeToString(hasher.Sum(nil))
+
+	mockUploadID := "abc123"
+
+	tests := []struct {
+		Name              string
+		Path              string
+		ExpectStatus      int
+		ExpectErrContains string
+	}{
+		{"Valid chunk number is OK", "/api/fleet/uploads/" + mockUploadID + "/0", http.StatusOK, ""},
+		{"Non-numeric chunk number is rejected", "/api/fleet/uploads/" + mockUploadID + "/CHUNKNUM", http.StatusBadRequest, "invalid chunk number"},
+		{"Negative chunk number is rejected", "/api/fleet/uploads/" + mockUploadID + "/-2", http.StatusBadRequest, "invalid chunk number"},
+		{"Too large chunk number is rejected", "/api/fleet/uploads/" + mockUploadID + "/50", http.StatusBadRequest, "invalid chunk number"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, _, fakebulk := prepareUploaderMock(t)
+			mockUploadInfoResult(fakebulk, upload.Info{
+				DocID:     "bar.foo",
+				ID:        mockUploadID,
+				ChunkSize: maxFileSize,
+				Total:     uploader.MaxChunkSize + 1,
+				Count:     2, // this is a 2-chunk "file" based on size above
+				Start:     time.Now(),
+				Status:    upload.StatusProgress,
+				Source:    "agent",
+				AgentID:   "foo",
+				ActionID:  "bar",
+			})
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPut, tc.Path, bytes.NewReader(data))
+			req.Header.Set("X-Chunk-SHA2", hash)
+
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+			if tc.ExpectErrContains != "" {
+				assert.Contains(t, rec.Body.String(), tc.ExpectErrContains)
+			}
+		})
+	}
+
+}
+
+func TestChunkUploadRequiresChunkHashHeader(t *testing.T) {
+	data := []byte("filedata")
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockUploadInfoResult(fakebulk, upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: maxFileSize,
+		Total:     10,
+		Count:     1,
+		Start:     time.Now(),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPut, "/api/fleet/uploads/"+mockUploadID+"/0", bytes.NewReader(data))
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "hash header")
+
+}
+
+func TestChunkUploadStatus(t *testing.T) {
+	data := []byte("filedata")
+	hasher := sha256.New()
+	_, err := hasher.Write(data)
+	require.NoError(t, err)
+	hash := hex.EncodeToString(hasher.Sum(nil))
+
+	mockUploadID := "abc123"
+
+	tests := []struct {
+		Name              string
+		Status            upload.Status
+		ExpectStatus      int
+		ExpectErrContains string
+	}{
+		{"Can upload for Status Awaiting", upload.StatusAwaiting, http.StatusOK, ""},
+		{"Can upload for in progress", upload.StatusProgress, http.StatusOK, ""},
+		{"Status Delete Files cannot upload", upload.StatusDel, http.StatusBadRequest, "stopped"},
+		{"Status Complete File cannot upload", upload.StatusDone, http.StatusBadRequest, "stopped"},
+		{"Status Failure File cannot upload", upload.StatusFail, http.StatusBadRequest, "stopped"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, _, fakebulk := prepareUploaderMock(t)
+			mockUploadInfoResult(fakebulk, upload.Info{
+				DocID:     "bar.foo",
+				ID:        mockUploadID,
+				ChunkSize: maxFileSize,
+				Total:     10,
+				Count:     1,
+				Start:     time.Now(),
+				Status:    tc.Status,
+				Source:    "agent",
+				AgentID:   "foo",
+				ActionID:  "bar",
+			})
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPut, "/api/fleet/uploads/"+mockUploadID+"/0", bytes.NewReader(data))
+			req.Header.Set("X-Chunk-SHA2", hash)
+
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+			if tc.ExpectErrContains != "" {
+				assert.Contains(t, rec.Body.String(), tc.ExpectErrContains)
+			}
+		})
+	}
+
+}
+
+func TestChunkUploadExpiry(t *testing.T) {
+	data := []byte("filedata")
+	hasher := sha256.New()
+	_, err := hasher.Write(data)
+	require.NoError(t, err)
+	hash := hex.EncodeToString(hasher.Sum(nil))
+
+	mockUploadID := "abc123"
+
+	tests := []struct {
+		Name              string
+		StartTime         time.Time
+		ExpectStatus      int
+		ExpectErrContains string
+	}{
+		{"Unexpired upload succeeds", time.Now().Add(-time.Minute), http.StatusOK, ""},
+		{"Expired Upload rejects", time.Now().Add(-maxUploadTimer * 2), http.StatusBadRequest, "expired"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, _, fakebulk := prepareUploaderMock(t)
+			mockUploadInfoResult(fakebulk, upload.Info{
+				DocID:     "bar.foo",
+				ID:        mockUploadID,
+				ChunkSize: maxFileSize,
+				Total:     10,
+				Count:     1,
+				Start:     tc.StartTime,
+				Status:    upload.StatusAwaiting,
+				Source:    "agent",
+				AgentID:   "foo",
+				ActionID:  "bar",
+			})
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPut, "/api/fleet/uploads/"+mockUploadID+"/0", bytes.NewReader(data))
+			req.Header.Set("X-Chunk-SHA2", hash)
+
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+			if tc.ExpectErrContains != "" {
+				assert.Contains(t, rec.Body.String(), tc.ExpectErrContains)
+			}
+		})
+	}
+
+}
+
+/*
+	Upload finalization route testing
+*/
+
+func TestUploadFinalizeRequiresMatchingAuth(t *testing.T) {
+	tests := []struct {
+		Name              string
+		AuthSuccess       bool
+		AgentFromAPIKey   string
+		AgentInFileRecord string
+		ExpectStatus      int
+	}{
+		{"Agent ID matching API Key succeeds", true, "abc123", "abc123", http.StatusOK},
+		{"Agent ID in File not matching API Key should reject", true, "oneID", "differentID", http.StatusBadRequest},
+		{"Bad auth should reject request", false, "", "IDinDoc", http.StatusBadRequest},
+	}
+	mockUploadID := "abc123"
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, rt, fakebulk := prepareUploaderMock(t)
+			mockInfo := upload.Info{
+				DocID:     "bar." + tc.AgentInFileRecord,
+				ID:        mockUploadID,
+				ChunkSize: maxFileSize,
+				Total:     10,
+				Count:     1,
+				Start:     time.Now().Add(-time.Minute),
+				Status:    upload.StatusAwaiting,
+				Source:    "agent",
+				AgentID:   tc.AgentInFileRecord,
+				ActionID:  "bar",
+			}
+
+			transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{{
+				Last: true,
+				Pos:  0,
+				SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+				BID:  mockInfo.DocID,
+				Size: int(mockInfo.Total),
+			}})
+
+			if !tc.AuthSuccess {
+				rt.ut.authAPIKey = func(r *http.Request, b bulk.Bulk, c cache.Cache) (*apikey.APIKey, error) {
+					return nil, apikey.ErrInvalidToken
+				}
+				rt.ut.authAgent = func(r *http.Request, s *string, b bulk.Bulk, c cache.Cache) (*model.Agent, error) {
+					return nil, apikey.ErrInvalidToken
+				}
+			} else {
+				rt.ut.authAgent = func(r *http.Request, s *string, b bulk.Bulk, c cache.Cache) (*model.Agent, error) {
+					if *s != tc.AgentFromAPIKey { // real AuthAgent provides this facility
+						return nil, ErrAgentIdentity
+					}
+					return &model.Agent{
+						Agent: &model.AgentMetadata{
+							ID: tc.AgentFromAPIKey,
+						},
+					}, nil
+				}
+			}
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash":{"sha256":"`+transit+`"}}`))
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+		})
+	}
+}
+
+func TestUploadFinalizeRequiresValidStatus(t *testing.T) {
+	mockUploadID := "abc123"
+
+	tests := []struct {
+		Name              string
+		Status            upload.Status
+		ExpectStatus      int
+		ExpectErrContains string
+	}{
+		{"Can finalize Status Awaiting", upload.StatusAwaiting, http.StatusOK, ""},
+		{"Can finalize Status in progress", upload.StatusProgress, http.StatusOK, ""},
+		{"Cannot finalize Status Deleted", upload.StatusDel, http.StatusBadRequest, "closed"},
+		{"Cannot finalize Status Complete", upload.StatusDone, http.StatusBadRequest, "closed"},
+		{"Cannot finalize Status Failure", upload.StatusFail, http.StatusBadRequest, "closed"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			hr, _, fakebulk := prepareUploaderMock(t)
+			mockInfo := upload.Info{
+				DocID:     "bar.foo",
+				ID:        mockUploadID,
+				ChunkSize: uploader.MaxChunkSize,
+				Total:     10,
+				Count:     1,
+				Start:     time.Now().Add(-time.Minute),
+				Status:    tc.Status,
+				Source:    "agent",
+				AgentID:   "foo",
+				ActionID:  "bar",
+			}
+
+			transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{{
+				Last: true,
+				BID:  mockInfo.DocID,
+				Size: int(mockInfo.Total),
+				Pos:  0,
+				SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+			}})
+
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "`+transit+`"}}`))
+
+			hr.ServeHTTP(rec, req)
+
+			assert.Equal(t, tc.ExpectStatus, rec.Code)
+			if tc.ExpectErrContains != "" {
+				assert.Contains(t, rec.Body.String(), tc.ExpectErrContains)
+			}
+		})
+	}
+}
+
+func TestUploadFinalizeRejectsMissingChunks(t *testing.T) {
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockInfo := upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: uploader.MaxChunkSize,
+		Total:     uploader.MaxChunkSize * 3,
+		Count:     3,
+		Start:     time.Now().Add(-time.Minute),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	}
+
+	transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  0,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		// chunk position 1 omitted
+		{
+			Last: true,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  2,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "`+transit+`"}}`))
+
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "incomplete")
+}
+
+func TestUploadFinalizeRejectsFinalChunkNotMarkedFinal(t *testing.T) {
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockInfo := upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: uploader.MaxChunkSize,
+		Total:     uploader.MaxChunkSize * 3,
+		Count:     3,
+		Start:     time.Now().Add(-time.Minute),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	}
+
+	transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  0,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  1,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  2,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "`+transit+`"}}`))
+
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "failed validation")
+}
+
+func TestUploadFinalizeNonFinalChunkMarkedFinal(t *testing.T) {
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockInfo := upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: uploader.MaxChunkSize,
+		Total:     uploader.MaxChunkSize * 3,
+		Count:     3,
+		Start:     time.Now().Add(-time.Minute),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	}
+
+	transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  0,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: true,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  1,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: true,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  2,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "`+transit+`"}}`))
+
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "failed validation")
+}
+
+func TestUploadFinalizeUndersizedChunk(t *testing.T) {
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockInfo := upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: uploader.MaxChunkSize,
+		Total:     uploader.MaxChunkSize * 3,
+		Count:     3,
+		Start:     time.Now().Add(-time.Minute),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	}
+
+	transit := mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  0,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize) - 5,
+			Pos:  1,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: true,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  2,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "`+transit+`"}}`))
+
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "failed validation")
+}
+
+func TestUploadFinalizeIncorrectTransitHash(t *testing.T) {
+	mockUploadID := "abc123"
+
+	hr, _, fakebulk := prepareUploaderMock(t)
+	mockInfo := upload.Info{
+		DocID:     "bar.foo",
+		ID:        mockUploadID,
+		ChunkSize: uploader.MaxChunkSize,
+		Total:     uploader.MaxChunkSize * 3,
+		Count:     3,
+		Start:     time.Now().Add(-time.Minute),
+		Status:    upload.StatusProgress,
+		Source:    "agent",
+		AgentID:   "foo",
+		ActionID:  "bar",
+	}
+
+	mockUploadedFile(fakebulk, mockInfo, []uploader.ChunkInfo{
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  0,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: false,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize) - 5,
+			Pos:  1,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+		{
+			Last: true,
+			BID:  mockInfo.DocID,
+			Size: int(uploader.MaxChunkSize),
+			Pos:  2,
+			SHA2: "0c4a81b85a6b7ff00bde6c32e1e8be33b4b793b3b7b5cb03db93f77f7c9374d1", // sample value
+		},
+	})
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/fleet/uploads/"+mockUploadID, strings.NewReader(`{"transithash": {"sha256": "wrongHash"}}`))
+
+	hr.ServeHTTP(rec, req)
+
+	assert.Equal(t, http.StatusBadRequest, rec.Code)
+	assert.Contains(t, rec.Body.String(), "failed validation")
+}
+
+/*
+	Helpers and mocks
+*/
+
+// prepareUploaderMock sets up common dependencies and registers upload routes to a returned router
+func prepareUploaderMock(t *testing.T) (*httprouter.Router, Router, *itesting.MockBulk) {
+	// chunk index operations skip the bulker in order to send binary docs directly
+	// so a mock *elasticsearch.Client needs to be be prepared
+	es := mockESClient(t)
+
+	fakebulk := itesting.NewMockBulk()
+	fakebulk.On("Create",
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+	).Return("", nil)
+	fakebulk.On("Update",
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+	).Return(nil)
+	fakebulk.On("Client",
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+		mock.Anything,
+	).Return(es, nil)
+
+	c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
+	require.NoError(t, err)
+
+	// create a router instance with an UploadT that will handle the incoming requests
+	rt := Router{
+		ctx: context.Background(),
+		ut: &UploadT{
+			bulker:      fakebulk,
+			chunkClient: es,
+			cache:       c,
+			uploader:    uploader.New(es, fakebulk, c, maxFileSize, maxUploadTimer),
+			authAgent: func(r *http.Request, id *string, bulker bulk.Bulk, c cache.Cache) (*model.Agent, error) {
+				return &model.Agent{
+					ESDocument: model.ESDocument{
+						Id: "foo",
+					},
+					Agent: &model.AgentMetadata{
+						ID: "foo",
+					},
+				}, nil
+			},
+			authAPIKey: func(r *http.Request, b bulk.Bulk, c cache.Cache) (*apikey.APIKey, error) {
+				return nil, nil
+			},
+		},
+	}
+
+	hr := httprouter.New()
+	hr.Handle(http.MethodPost, RouteUploadBegin, rt.handleUploadStart)
+	hr.Handle(http.MethodPut, RouteUploadChunk, rt.handleUploadChunk)
+	hr.Handle(http.MethodPost, RouteUploadComplete, rt.handleUploadComplete)
+	return hr, rt, fakebulk
+}
+
+// mockStartBodyWithAgent returns the minimum required JSON payload for beginning an upload, with agent set as input
+func mockStartBodyWithAgent(agent string) string {
+	return `{
+				"file": {
+					"size": 200,
+					"name": "foo.png",
+					"mime_type": "image/png"
+				},
+				"agent_id": "` + agent + `",
+				"action_id": "123",
+				"src": "agent"
+			}`
+}
+
+// mockUploadInfoResult sets up the MockBulk to return file metadata in the proper format
+func mockUploadInfoResult(bulker *itesting.MockBulk, info upload.Info) {
+
+	// convert info into how it's stored/returned in ES
+	out, _ := json.Marshal(map[string]interface{}{
+		"action_id": info.ActionID,
+		"agent_id":  info.AgentID,
+		"src":       info.Source,
+		"file": map[string]interface{}{
+			"size":      info.Total,
+			"ChunkSize": info.ChunkSize,
+			"Status":    info.Status,
+		},
+		"upload_id":    info.ID,
+		"upload_start": info.Start.UnixMilli(),
+	})
+
+	bulker.On("Search",
+		mock.Anything,
+		mock.MatchedBy(func(idx string) bool { return strings.HasPrefix(idx, ".fleet-files-") }),
+		mock.Anything,
+		mock.Anything,
+	).Return(&es.ResultT{
+		HitsT: es.HitsT{
+			Hits: []es.HitT{
+				{
+					ID:     info.DocID,
+					Source: out,
+				},
+			},
+		},
+	}, nil).Once()
+}
+
+// mockChunkResult sets up the MockBulk to return Chunk Data in the expected format from Elasticsearch
+// it returns the transithash for the provided chunks
+func mockChunkResult(bulker *itesting.MockBulk, chunks []uploader.ChunkInfo) string {
+
+	results := make([]es.HitT, len(chunks))
+	for i, chunk := range chunks {
+		results[i] = es.HitT{
+			ID: chunk.BID + "." + strconv.Itoa(chunk.Pos),
+			Fields: map[string]interface{}{
+				uploader.FieldBaseID: []interface{}{chunk.BID},
+				uploader.FieldSHA2:   []interface{}{chunk.SHA2},
+				uploader.FieldLast:   []interface{}{chunk.Last},
+				"size":               []interface{}{chunk.Size},
+			},
+		}
+	}
+
+	bulker.On("Search",
+		mock.Anything,
+		mock.MatchedBy(func(idx string) bool { return strings.HasPrefix(idx, ".fleet-file-data-") }),
+		mock.Anything,
+		mock.Anything,
+	).Return(&es.ResultT{
+		HitsT: es.HitsT{
+			Hits: results,
+		},
+	}, nil)
+	return calcTransitHash(chunks)
+}
+
+// mockUploadedFile places the expected data (file metadata and chunks) into the bulker
+// to emulate an uploaded file
+// it returns the transithash for the provided chunks
+func mockUploadedFile(bulker *itesting.MockBulk, info upload.Info, chunks []uploader.ChunkInfo) string {
+	mockUploadInfoResult(bulker, info) // one result from the cache for agent ID check
+	mockUploadInfoResult(bulker, info) // second for a cache-busting fetch for up-to-date status
+	return mockChunkResult(bulker, chunks)
+}
+
+func calcTransitHash(chunks []uploader.ChunkInfo) string {
+	hasher := sha256.New()
+	for _, c := range chunks {
+		out, err := hex.DecodeString(c.SHA2)
+		if err != nil {
+			panic(err)
+		}
+		_, _ = hasher.Write(out)
+	}
+	return hex.EncodeToString(hasher.Sum(nil))
+}
+
+/*
+	Setup to convert a *elasticsearch.Client as a harmless mock
+	by replacing the Transport to nowhere
+*/
+
+type MockTransport struct {
+	Response    *http.Response
+	RoundTripFn func(req *http.Request) (*http.Response, error)
+}
+
+func (t *MockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	return t.RoundTripFn(req)
+}
+
+func mockESClient(t *testing.T) *elasticsearch.Client {
+	mocktrans := MockTransport{
+		Response: &http.Response{
+			StatusCode: http.StatusOK,
+			Body:       ioutil.NopCloser(strings.NewReader(`{}`)),
+			Header:     http.Header{"X-Elastic-Product": []string{"Elasticsearch"}},
+		},
+	}
+
+	mocktrans.RoundTripFn = func(req *http.Request) (*http.Response, error) { return mocktrans.Response, nil }
+	client, err := elasticsearch.NewClient(elasticsearch.Config{
+		Transport: &mocktrans,
+	})
+	require.NoError(t, err)
+	return client
+}