From 7fa126f1ba80520351b1dd8bf823cd6766d855ee Mon Sep 17 00:00:00 2001 From: Josh Black Date: Thu, 10 Nov 2022 12:11:23 -0800 Subject: [PATCH 01/26] backport of commit 840abfbe10427b571c5afde38d79a5f24435c668 --- changelog/17856.txt | 3 +++ vault/activity_log.go | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 changelog/17856.txt diff --git a/changelog/17856.txt b/changelog/17856.txt new file mode 100644 index 000000000000..f039794857ab --- /dev/null +++ b/changelog/17856.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: fix the end_date returned from the activity log endpoint when partial counts are computed +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 865fd24bed24..d9ef3fcc5518 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1619,7 +1619,18 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T // Now populate the response based on breakdowns. responseData := make(map[string]interface{}) responseData["start_time"] = pq.StartTime.Format(time.RFC3339) - responseData["end_time"] = pq.EndTime.Format(time.RFC3339) + + // If we computed partial counts, we should return the actual end time we computed counts for, not the pre-computed + // query end time. If we don't do this, the end_time in the response doesn't match the actual data in the response, + // which is confusing. Note that regardless of what end time is given, if it falls within the current month, it will + // be set to the end of the current month. This is definitely suboptimal, and possibly confusing, but still an + // improvement over using the pre-computed query end time. + if computePartial { + responseData["end_time"] = endTime.Format(time.RFC3339) + } else { + responseData["end_time"] = pq.EndTime.Format(time.RFC3339) + } + responseData["by_namespace"] = byNamespaceResponse responseData["total"] = &ResponseCounts{ DistinctEntities: distinctEntitiesResponse, From ce41a91221bf9124773dd3ee2ad3d6b170524e0a Mon Sep 17 00:00:00 2001 From: Josh Black Date: Tue, 15 Nov 2022 12:15:51 -0800 Subject: [PATCH 02/26] backport of commit 87aa644ca82c75449cbaaaf7e24f14907841447e --- changelog/17935.txt | 3 +++ vault/activity_log.go | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 changelog/17935.txt diff --git a/changelog/17935.txt b/changelog/17935.txt new file mode 100644 index 000000000000..c16ffdfe332e --- /dev/null +++ b/changelog/17935.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: return partial month counts when querying a historical date range and no historical data exists. +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index d9ef3fcc5518..970cef996a9c 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1546,10 +1546,19 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T return nil, err } if storedQuery == nil { - return nil, nil + // If the storedQuery is nil, that means there's no historical data to process. But, it's possible there's + // still current month data to process, so rather than returning a 204, let's proceed along like we're + // just querying the current month. + storedQuery = &activity.PrecomputedQuery{ + StartTime: startTime, + EndTime: endTime, + Namespaces: make([]*activity.NamespaceRecord, 0), + Months: make([]*activity.MonthRecord, 0), + } } pq = storedQuery } + // Calculate the namespace response breakdowns and totals for entities and tokens from the initial // namespace data. totalEntities, totalTokens, byNamespaceResponse, err := a.calculateByNamespaceResponseForQuery(ctx, pq.Namespaces) @@ -1584,6 +1593,7 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T // Add the current month's namespace data the precomputed query namespaces byNamespaceResponse = append(byNamespaceResponse, byNamespaceResponseCurrent...) } + // Sort clients within each namespace a.sortALResponseNamespaces(byNamespaceResponse) @@ -1597,11 +1607,13 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T if err != nil { return nil, err } + // Add the namespace attribution for the current month to the newly computed current month value. Note // that transformMonthBreakdowns calculates a superstruct of the required namespace struct due to its // primary use-case being for precomputedQueryWorker, but we will reuse this code for brevity and extract // the namespaces from it. currentMonthNamespaceAttribution := a.transformMonthBreakdowns(partialByMonth) + // Ensure that there is only one element in this list -- if not, warn. if len(currentMonthNamespaceAttribution) > 1 { a.logger.Warn("more than one month worth of namespace and mount attribution calculated for "+ From 309cf80bec9d0570059329f14c6d95781754bd99 Mon Sep 17 00:00:00 2001 From: Josh Black Date: Fri, 16 Dec 2022 16:02:42 -0800 Subject: [PATCH 03/26] backport of commit b8de2c2b4e574c5fabde01c9be8c93c4e390c897 --- changelog/18452.txt | 3 +++ vault/activity_log.go | 23 +++++++++++++++++++++-- vault/logical_system_activity.go | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 changelog/18452.txt diff --git a/changelog/18452.txt b/changelog/18452.txt new file mode 100644 index 000000000000..6d4566667ee4 --- /dev/null +++ b/changelog/18452.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: de-duplicate namespaces when historical and current month data are mixed +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 970cef996a9c..24df0213e2fc 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1590,8 +1590,27 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T return nil, err } - // Add the current month's namespace data the precomputed query namespaces - byNamespaceResponse = append(byNamespaceResponse, byNamespaceResponseCurrent...) + // Create a mapping of namespace id to slice index, so that we can efficiently update our results without + // having to traverse the entire namespace response slice every time. + nsrMap := make(map[string]int) + for i, nr := range byNamespaceResponse { + nsrMap[nr.NamespaceID] = i + } + + // Rather than blindly appending, which will create duplicates, check our existing counts against the current + // month counts, and append or update as necessary. + for _, nrc := range byNamespaceResponseCurrent { + if ndx, ok := nsrMap[nrc.NamespaceID]; ok { + existingRecord := byNamespaceResponse[ndx] + existingRecord.Counts.EntityClients += nrc.Counts.EntityClients + existingRecord.Counts.Clients += nrc.Counts.Clients + existingRecord.Counts.DistinctEntities += nrc.Counts.DistinctEntities + existingRecord.Counts.NonEntityClients += nrc.Counts.NonEntityClients + existingRecord.Counts.NonEntityTokens += nrc.Counts.NonEntityTokens + } else { + byNamespaceResponse = append(byNamespaceResponse, nrc) + } + } } // Sort clients within each namespace diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 1d34a8e22bfc..8fda8ae891b3 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -165,6 +165,7 @@ func parseStartEndTimes(a *ActivityLog, d *framework.FieldData) (time.Time, time return startTime, endTime, nil } +// This endpoint is not used by the UI. The UI's "export" feature is entirely client-side. func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { a := b.Core.activityLog if a == nil { From bc0833367533ff69392c1e12a0accd9c9aa4dc5d Mon Sep 17 00:00:00 2001 From: Josh Black Date: Thu, 5 Jan 2023 09:34:05 -0800 Subject: [PATCH 04/26] backport of commit cb61488b9a96dd79e09371add1a4c06394dacd95 --- changelog/18598.txt | 3 +++ vault/activity_log.go | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 changelog/18598.txt diff --git a/changelog/18598.txt b/changelog/18598.txt new file mode 100644 index 000000000000..62d13d0e705f --- /dev/null +++ b/changelog/18598.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: include mount counts when de-duplicating current and historical month data +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 24df0213e2fc..3c4bc613da26 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1598,15 +1598,37 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T } // Rather than blindly appending, which will create duplicates, check our existing counts against the current - // month counts, and append or update as necessary. + // month counts, and append or update as necessary. We also want to account for mounts and their counts. for _, nrc := range byNamespaceResponseCurrent { if ndx, ok := nsrMap[nrc.NamespaceID]; ok { existingRecord := byNamespaceResponse[ndx] + + // Create a map of the existing mounts, so we don't duplicate them + mountMap := make(map[string]*ResponseCounts) + for _, erm := range existingRecord.Mounts { + mountMap[erm.MountPath] = erm.Counts + } + existingRecord.Counts.EntityClients += nrc.Counts.EntityClients existingRecord.Counts.Clients += nrc.Counts.Clients existingRecord.Counts.DistinctEntities += nrc.Counts.DistinctEntities existingRecord.Counts.NonEntityClients += nrc.Counts.NonEntityClients existingRecord.Counts.NonEntityTokens += nrc.Counts.NonEntityTokens + + // Check the current month mounts against the existing mounts and if there are matches, update counts + // accordingly. If there is no match, append the new mount to the existing mounts, so it will be counted + // later. + for _, nrcMount := range nrc.Mounts { + if existingRecordMountCounts, ook := mountMap[nrcMount.MountPath]; ook { + existingRecordMountCounts.EntityClients += nrcMount.Counts.EntityClients + existingRecordMountCounts.Clients += nrcMount.Counts.Clients + existingRecordMountCounts.DistinctEntities += nrcMount.Counts.DistinctEntities + existingRecordMountCounts.NonEntityClients += nrcMount.Counts.NonEntityClients + existingRecordMountCounts.NonEntityTokens += nrcMount.Counts.NonEntityTokens + } else { + existingRecord.Mounts = append(existingRecord.Mounts, nrcMount) + } + } } else { byNamespaceResponse = append(byNamespaceResponse, nrc) } From 7309865f2f87102eb5e57fa156e56faaf6a69065 Mon Sep 17 00:00:00 2001 From: Josh Black Date: Mon, 9 Jan 2023 15:26:11 -0800 Subject: [PATCH 05/26] backport of commit d641bbc28e5e8cc12b81d409e5d5fc1f2cb7f66c --- changelog/18629.txt | 3 +++ vault/activity_log.go | 1 + 2 files changed, 4 insertions(+) create mode 100644 changelog/18629.txt diff --git a/changelog/18629.txt b/changelog/18629.txt new file mode 100644 index 000000000000..50743b3916f1 --- /dev/null +++ b/changelog/18629.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: add namespace breakdown for new clients when date range spans multiple months, including the current month. +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 3c4bc613da26..d89f7599baf9 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1664,6 +1664,7 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T a.logger.Warn("no month data found, returning query with no namespace attribution for current month") } else { currentMonth.Namespaces = currentMonthNamespaceAttribution[0].Namespaces + currentMonth.NewClients = currentMonthNamespaceAttribution[0].NewClients } pq.Months = append(pq.Months, currentMonth) distinctEntitiesResponse += pq.Months[len(pq.Months)-1].NewClients.Counts.EntityClients From 1e8f1b65e287f6a8919dcb3d93503c2cfe3fa762 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Mon, 16 Jan 2023 15:51:19 +0000 Subject: [PATCH 06/26] backport of commit 2a5a07e390bca84e5dca73a441d99528d26fad99 --- changelog/18629.txt | 3 --- vault/activity_log.go | 1 - 2 files changed, 4 deletions(-) delete mode 100644 changelog/18629.txt diff --git a/changelog/18629.txt b/changelog/18629.txt deleted file mode 100644 index 50743b3916f1..000000000000 --- a/changelog/18629.txt +++ /dev/null @@ -1,3 +0,0 @@ -```release-note:bug -core/activity: add namespace breakdown for new clients when date range spans multiple months, including the current month. -``` diff --git a/vault/activity_log.go b/vault/activity_log.go index d89f7599baf9..3c4bc613da26 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1664,7 +1664,6 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T a.logger.Warn("no month data found, returning query with no namespace attribution for current month") } else { currentMonth.Namespaces = currentMonthNamespaceAttribution[0].Namespaces - currentMonth.NewClients = currentMonthNamespaceAttribution[0].NewClients } pq.Months = append(pq.Months, currentMonth) distinctEntitiesResponse += pq.Months[len(pq.Months)-1].NewClients.Counts.EntityClients From 761878dbebb1836a294ce544a41397866a41bb8d Mon Sep 17 00:00:00 2001 From: Josh Black Date: Thu, 19 Jan 2023 09:12:17 -0800 Subject: [PATCH 07/26] backport of commit 771bd8ba6d73bfb4d1fd870c8ccb53315378aa18 --- changelog/18766.txt | 3 +++ vault/activity_log.go | 1 + 2 files changed, 4 insertions(+) create mode 100644 changelog/18766.txt diff --git a/changelog/18766.txt b/changelog/18766.txt new file mode 100644 index 000000000000..50743b3916f1 --- /dev/null +++ b/changelog/18766.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: add namespace breakdown for new clients when date range spans multiple months, including the current month. +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 3c4bc613da26..de42223df4e2 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1664,6 +1664,7 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T a.logger.Warn("no month data found, returning query with no namespace attribution for current month") } else { currentMonth.Namespaces = currentMonthNamespaceAttribution[0].Namespaces + currentMonth.NewClients.Namespaces = currentMonthNamespaceAttribution[0].NewClients.Namespaces } pq.Months = append(pq.Months, currentMonth) distinctEntitiesResponse += pq.Months[len(pq.Months)-1].NewClients.Counts.EntityClients From 45ffa7129bb7def176492f722e4ce88e71958400 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Mon, 6 Feb 2023 10:26:32 +0100 Subject: [PATCH 08/26] backport of commit b5d7d47ca2b22e08eda08bf4da7ee1418f753581 --- changelog/18916.txt | 3 + vault/activity_log.go | 21 +----- vault/activity_log_test.go | 121 ++++++++++++++++++++++++++++++ vault/activity_log_util_common.go | 26 ++++++- 4 files changed, 150 insertions(+), 21 deletions(-) create mode 100644 changelog/18916.txt diff --git a/changelog/18916.txt b/changelog/18916.txt new file mode 100644 index 000000000000..eb2792b31e40 --- /dev/null +++ b/changelog/18916.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/activity: report mount paths (rather than mount accessors) in current month activity log counts and include deleted mount paths in precomputed queries. +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index de42223df4e2..965d34a662f3 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -2166,13 +2166,8 @@ func (a *ActivityLog) precomputedQueryWorker(ctx context.Context) error { for nsID, entry := range byNamespace { mountRecord := make([]*activity.MountRecord, 0, len(entry.Mounts)) for mountAccessor, mountData := range entry.Mounts { - valResp := a.core.router.ValidateMountByAccessor(mountAccessor) - if valResp == nil { - // Only persist valid mounts - continue - } mountRecord = append(mountRecord, &activity.MountRecord{ - MountPath: valResp.MountPath, + MountPath: a.mountAccessorToMountPath(mountAccessor), Counts: &activity.CountsRecord{ EntityClients: len(mountData.Counts.Entities), NonEntityClients: int(mountData.Counts.Tokens) + len(mountData.Counts.NonEntities), @@ -2339,20 +2334,8 @@ func (a *ActivityLog) transformMonthBreakdowns(byMonth map[int64]*processMonth) // Process mount specific data within a namespace within a given month mountRecord := make([]*activity.MountRecord, 0, len(nsMap[nsID].Mounts)) for mountAccessor, mountData := range nsMap[nsID].Mounts { - var displayPath string - if mountAccessor == "" { - displayPath = "no mount accessor (pre-1.10 upgrade?)" - } else { - valResp := a.core.router.ValidateMountByAccessor(mountAccessor) - if valResp == nil { - displayPath = fmt.Sprintf("deleted mount; accessor %q", mountAccessor) - } else { - displayPath = valResp.MountPath - } - } - mountRecord = append(mountRecord, &activity.MountRecord{ - MountPath: displayPath, + MountPath: a.mountAccessorToMountPath(mountAccessor), Counts: &activity.CountsRecord{ EntityClients: len(mountData.Counts.Entities), NonEntityClients: int(mountData.Counts.Tokens) + len(mountData.Counts.NonEntities), diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 40373a1313ab..69887a39d497 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -17,6 +17,8 @@ import ( "testing" "time" + "github.com/hashicorp/go-uuid" + "github.com/axiomhq/hyperloglog" "github.com/go-test/deep" "github.com/golang/protobuf/proto" @@ -3984,3 +3986,122 @@ func TestActivityLog_partialMonthClientCountUsingHandleQuery(t *testing.T) { } } } + +// TestActivityLog_partialMonthClientCountWithMultipleMountPaths verifies that logic in refreshFromStoredLog includes all mount paths +// in its mount data. In this test we create 3 entity records with different mount accessors: one is empty, one is +// valid, one can't be found (so it's assumed the mount is deleted). These records are written to storage, then this data is +// refreshed in refreshFromStoredLog, and finally we verify the results returned with partialMonthClientCount. +func TestActivityLog_partialMonthClientCountWithMultipleMountPaths(t *testing.T) { + timeutil.SkipAtEndOfMonth(t) + + core, _, _ := TestCoreUnsealed(t) + _, barrier, _ := mockBarrier(t) + view := NewBarrierView(barrier, "auth/") + + ctx := namespace.RootContext(nil) + now := time.Now().UTC() + meUUID, err := uuid.GenerateUUID() + if err != nil { + t.Fatal(err) + } + + a := core.activityLog + path := "auth/foo/bar" + accessor := "authfooaccessor" + + // we mount a path using the accessor 'authfooaccessor' which has mount path "auth/foo/bar" + // when an entity record references this accessor, activity log will be able to find it on its mounts and translate the mount accessor + // into a mount path + err = core.router.Mount(&NoopBackend{}, "auth/foo/", &MountEntry{UUID: meUUID, Accessor: accessor, NamespaceID: namespace.RootNamespaceID, namespace: namespace.RootNamespace, Path: path}, view) + if err != nil { + t.Fatalf("err: %v", err) + } + + entityRecords := []*activity.EntityRecord{ + { + // this record has no mount accessor, so it'll get recorded as a pre-1.10 upgrade + ClientID: "11111111-1111-1111-1111-111111111111", + NamespaceID: namespace.RootNamespaceID, + Timestamp: time.Now().Unix(), + }, + { + // this record's mount path won't be able to be found, because there's no mount with the accessor 'deleted' + // the code in mountAccessorToMountPath assumes that if the mount accessor isn't empty but the mount path + // can't be found, then the mount must have been deleted + ClientID: "22222222-2222-2222-2222-222222222222", + NamespaceID: namespace.RootNamespaceID, + Timestamp: time.Now().Unix(), + MountAccessor: "deleted", + }, + { + // this record will have mount path 'auth/foo/bar', because we set up the mount above + ClientID: "33333333-2222-2222-2222-222222222222", + NamespaceID: namespace.RootNamespaceID, + Timestamp: time.Now().Unix(), + MountAccessor: "authfooaccessor", + }, + } + for i, entityRecord := range entityRecords { + entityData, err := proto.Marshal(&activity.EntityActivityLog{ + Clients: []*activity.EntityRecord{entityRecord}, + }) + if err != nil { + t.Fatalf(err.Error()) + } + storagePath := fmt.Sprintf("%sentity/%d/%d", ActivityLogPrefix, timeutil.StartOfMonth(now).Unix(), i) + WriteToStorage(t, core, storagePath, entityData) + } + + a.SetEnable(true) + var wg sync.WaitGroup + err = a.refreshFromStoredLog(ctx, &wg, now) + if err != nil { + t.Fatalf("error loading clients: %v", err) + } + wg.Wait() + + results, err := a.partialMonthClientCount(ctx) + if err != nil { + t.Fatal(err) + } + if results == nil { + t.Fatal("no results to test") + } + + byNamespace, ok := results["by_namespace"] + if !ok { + t.Fatalf("malformed results. got %v", results) + } + + clientCountResponse := make([]*ResponseNamespace, 0) + err = mapstructure.Decode(byNamespace, &clientCountResponse) + if err != nil { + t.Fatal(err) + } + if len(clientCountResponse) != 1 { + t.Fatalf("incorrect client count responses, expected 1 but got %d", len(clientCountResponse)) + } + if len(clientCountResponse[0].Mounts) != len(entityRecords) { + t.Fatalf("incorrect client mounts, expected %d but got %d", len(entityRecords), len(clientCountResponse[0].Mounts)) + } + byPath := make(map[string]int, len(clientCountResponse[0].Mounts)) + for _, mount := range clientCountResponse[0].Mounts { + byPath[mount.MountPath] = byPath[mount.MountPath] + mount.Counts.Clients + } + + // these are the paths that are expected and correspond with the entity records created above + expectedPaths := []string{ + noMountAccessor, + fmt.Sprintf(deletedMountFmt, "deleted"), + path, + } + for _, expectedPath := range expectedPaths { + count, ok := byPath[expectedPath] + if !ok { + t.Fatalf("path %s not found", expectedPath) + } + if count != 1 { + t.Fatalf("incorrect count value %d for path %s", count, expectedPath) + } + } +} diff --git a/vault/activity_log_util_common.go b/vault/activity_log_util_common.go index a83736fcd6e5..11c322511823 100644 --- a/vault/activity_log_util_common.go +++ b/vault/activity_log_util_common.go @@ -209,9 +209,9 @@ func (a *ActivityLog) limitNamespacesInALResponse(byNamespaceResponse []*Respons // For more details, please see the function comment for transformMonthlyNamespaceBreakdowns func (a *ActivityLog) transformActivityLogMounts(mts map[string]*processMount) []*activity.MountRecord { mounts := make([]*activity.MountRecord, 0) - for mountpath, mountCounts := range mts { + for mountAccessor, mountCounts := range mts { mount := activity.MountRecord{ - MountPath: mountpath, + MountPath: a.mountAccessorToMountPath(mountAccessor), Counts: &activity.CountsRecord{ EntityClients: len(mountCounts.Counts.Entities), NonEntityClients: len(mountCounts.Counts.NonEntities) + int(mountCounts.Counts.Tokens), @@ -262,6 +262,28 @@ func (a *ActivityLog) sortActivityLogMonthsResponse(months []*ResponseMonth) { } } +const ( + noMountAccessor = "no mount accessor (pre-1.10 upgrade?)" + deletedMountFmt = "deleted mount; accessor %q" +) + +// mountAccessorToMountPath transforms the mount accessor to the mount path +// returns a placeholder string if the mount accessor is empty or deleted +func (a *ActivityLog) mountAccessorToMountPath(mountAccessor string) string { + var displayPath string + if mountAccessor == "" { + displayPath = noMountAccessor + } else { + valResp := a.core.router.ValidateMountByAccessor(mountAccessor) + if valResp == nil { + displayPath = fmt.Sprintf(deletedMountFmt, mountAccessor) + } else { + displayPath = valResp.MountPath + } + } + return displayPath +} + type singleTypeSegmentReader struct { basePath string startTime time.Time From baddce882d5d5fec94968f93a44e9986e478580d Mon Sep 17 00:00:00 2001 From: miagilepner Date: Fri, 24 Feb 2023 16:57:41 +0100 Subject: [PATCH 09/26] backport of commit a9e17c20119ecf090ba419974bb22f4e822b8a90 --- vault/activity_log_test.go | 53 ++++++++++++++++++++++++++++++- vault/activity_log_util_common.go | 4 +++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 69887a39d497..6248e9667e7a 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -17,6 +17,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/hashicorp/go-uuid" "github.com/axiomhq/hyperloglog" @@ -3987,6 +3989,55 @@ func TestActivityLog_partialMonthClientCountUsingHandleQuery(t *testing.T) { } } +// TestActivityLog_handleQuery_normalizedMountPaths ensures that the mount paths returned by the activity log always have a trailing slash and client accounting is done correctly when there's no trailing slash. +// Two clients that have the same mount path, but one has a trailing slash, should be considered part of the same mount path +func TestActivityLog_handleQuery_normalizedMountPaths(t *testing.T) { + timeutil.SkipAtEndOfMonth(t) + + core, _, _ := TestCoreUnsealed(t) + _, barrier, _ := mockBarrier(t) + view := NewBarrierView(barrier, "auth/") + ctx := namespace.RootContext(nil) + now := time.Now().UTC() + a := core.activityLog + a.SetEnable(true) + + uuid1, err := uuid.GenerateUUID() + require.NoError(t, err) + uuid2, err := uuid.GenerateUUID() + require.NoError(t, err) + accessor1 := "accessor1" + accessor2 := "accessor2" + pathWithSlash := "auth/foo/" + pathWithoutSlash := "auth/foo" + + // create two mounts of the same name. One has a trailing slash, the other doesn't + err = core.router.Mount(&NoopBackend{}, "auth/foo", &MountEntry{UUID: uuid1, Accessor: accessor1, NamespaceID: namespace.RootNamespaceID, namespace: namespace.RootNamespace, Path: pathWithSlash}, view) + require.NoError(t, err) + err = core.router.Mount(&NoopBackend{}, "auth/bar", &MountEntry{UUID: uuid2, Accessor: accessor2, NamespaceID: namespace.RootNamespaceID, namespace: namespace.RootNamespace, Path: pathWithoutSlash}, view) + require.NoError(t, err) + + // handle token usage for each of the mount paths + a.HandleTokenUsage(ctx, &logical.TokenEntry{Path: pathWithSlash, NamespaceID: namespace.RootNamespaceID}, "id1", false) + a.HandleTokenUsage(ctx, &logical.TokenEntry{Path: pathWithoutSlash, NamespaceID: namespace.RootNamespaceID}, "id2", false) + // and have client 2 use both mount paths + a.HandleTokenUsage(ctx, &logical.TokenEntry{Path: pathWithSlash, NamespaceID: namespace.RootNamespaceID}, "id2", false) + + // query the data for the month + results, err := a.handleQuery(ctx, timeutil.StartOfMonth(now), timeutil.EndOfMonth(now), 0) + require.NoError(t, err) + + byNamespace := results["by_namespace"].([]*ResponseNamespace) + require.Len(t, byNamespace, 1) + byMount := byNamespace[0].Mounts + require.Len(t, byMount, 1) + mountPath := byMount[0].MountPath + + // verify that both clients are recorded for the mount path with the slash + require.Equal(t, mountPath, pathWithSlash) + require.Equal(t, byMount[0].Counts.Clients, 2) +} + // TestActivityLog_partialMonthClientCountWithMultipleMountPaths verifies that logic in refreshFromStoredLog includes all mount paths // in its mount data. In this test we create 3 entity records with different mount accessors: one is empty, one is // valid, one can't be found (so it's assumed the mount is deleted). These records are written to storage, then this data is @@ -4006,7 +4057,7 @@ func TestActivityLog_partialMonthClientCountWithMultipleMountPaths(t *testing.T) } a := core.activityLog - path := "auth/foo/bar" + path := "auth/foo/bar/" accessor := "authfooaccessor" // we mount a path using the accessor 'authfooaccessor' which has mount path "auth/foo/bar" diff --git a/vault/activity_log_util_common.go b/vault/activity_log_util_common.go index 11c322511823..785a3f279506 100644 --- a/vault/activity_log_util_common.go +++ b/vault/activity_log_util_common.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "sort" + "strings" "time" "github.com/axiomhq/hyperloglog" @@ -279,6 +280,9 @@ func (a *ActivityLog) mountAccessorToMountPath(mountAccessor string) string { displayPath = fmt.Sprintf(deletedMountFmt, mountAccessor) } else { displayPath = valResp.MountPath + if !strings.HasSuffix(displayPath, "/") { + displayPath += "/" + } } } return displayPath From 914431b54136cf4ec5d18f78458b6adb35ed8682 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Mon, 6 Mar 2023 13:08:22 +0100 Subject: [PATCH 10/26] backport of commit 9f7f8d5bfad0aa3f06a4fcd86484f3e2f01a40a4 --- vault/activity_log_test.go | 78 +++++++++++++++++++++++++- vault/activity_log_util_common_test.go | 5 ++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 6248e9667e7a..1e3ee97d5aad 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -32,6 +32,7 @@ import ( "github.com/mitchellh/mapstructure" ) +// TestActivityLog_Creation calls AddEntityToFragment and verifies that it appears correctly in a.fragment. func TestActivityLog_Creation(t *testing.T) { core, _, _ := TestCoreUnsealed(t) @@ -102,6 +103,8 @@ func TestActivityLog_Creation(t *testing.T) { } } +// TestActivityLog_Creation_WrappingTokens calls HandleTokenUsage for two wrapping tokens, and verifies that this +// doesn't create a fragment. func TestActivityLog_Creation_WrappingTokens(t *testing.T) { core, _, _ := TestCoreUnsealed(t) @@ -170,6 +173,8 @@ func checkExpectedEntitiesInMap(t *testing.T, a *ActivityLog, entityIDs []string } } +// TestActivityLog_UniqueEntities calls AddEntityToFragment 4 times with 2 different clients, then verifies that there +// are only 2 clients in the fragment and that they have the earlier timestamps. func TestActivityLog_UniqueEntities(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -274,6 +279,9 @@ func expectedEntityIDs(t *testing.T, out *activity.EntityActivityLog, ids []stri } } +// TestActivityLog_SaveTokensToStorage calls AddTokenToFragment with duplicate namespaces and then saves the segment to +// storage. The test then reads and unmarshals the segment, and verifies that the results have the correct counts by +// namespace. func TestActivityLog_SaveTokensToStorage(t *testing.T) { core, _, _ := TestCoreUnsealed(t) ctx := context.Background() @@ -426,6 +434,8 @@ func TestActivityLog_SaveTokensToStorageDoesNotUpdateTokenCount(t *testing.T) { } } +// TestActivityLog_SaveEntitiesToStorage calls AddEntityToFragment with clients with different namespaces and then +// writes the segment to storage. Read back from storage, and verify that client IDs exist in storage. func TestActivityLog_SaveEntitiesToStorage(t *testing.T) { core, _, _ := TestCoreUnsealed(t) ctx := context.Background() @@ -477,7 +487,8 @@ func TestActivityLog_SaveEntitiesToStorage(t *testing.T) { expectedEntityIDs(t, out, ids) } -// Test to check store hyperloglog and fetch hyperloglog from storage +// TestActivityLog_StoreAndReadHyperloglog inserts into a hyperloglog, stores it and then reads it back. The test +// verifies the estimate count is correct. func TestActivityLog_StoreAndReadHyperloglog(t *testing.T) { core, _, _ := TestCoreUnsealed(t) ctx := context.Background() @@ -505,6 +516,8 @@ func TestActivityLog_StoreAndReadHyperloglog(t *testing.T) { } } +// TestModifyResponseMonthsNilAppend calls modifyResponseMonths for a range of 5 months ago to now. It verifies that the +// 5 months in the range are correct. func TestModifyResponseMonthsNilAppend(t *testing.T) { end := time.Now().UTC() start := timeutil.StartOfMonth(end).AddDate(0, -5, 0) @@ -535,6 +548,9 @@ func TestModifyResponseMonthsNilAppend(t *testing.T) { } } +// TestActivityLog_ReceivedFragment calls receivedFragment with a fragment and verifies it gets added to +// standbyFragmentsReceived. Send the same fragment again and then verify that it doesn't change the entity map but does +// get added to standbyFragmentsReceived. func TestActivityLog_ReceivedFragment(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -586,6 +602,8 @@ func TestActivityLog_ReceivedFragment(t *testing.T) { } } +// TestActivityLog_availableLogsEmptyDirectory verifies that availableLogs returns an empty slice when the log directory +// is empty. func TestActivityLog_availableLogsEmptyDirectory(t *testing.T) { // verify that directory is empty, and nothing goes wrong core, _, _ := TestCoreUnsealed(t) @@ -599,6 +617,8 @@ func TestActivityLog_availableLogsEmptyDirectory(t *testing.T) { } } +// TestActivityLog_availableLogs writes to the direct token paths and entity paths and verifies that the correct start +// times are returned. func TestActivityLog_availableLogs(t *testing.T) { // set up a few files in storage core, _, _ := TestCoreUnsealed(t) @@ -626,6 +646,9 @@ func TestActivityLog_availableLogs(t *testing.T) { } } +// TestActivityLog_MultipleFragmentsAndSegments adds 4000 clients to a fragment and saves it and reads it. The test then +// adds 4000 more clients and calls receivedFragment with 200 more entities. The current segment is saved to storage and +// read back. The test verifies that there are 5000 clients in the first segment index, then the rest in the second index. func TestActivityLog_MultipleFragmentsAndSegments(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -794,6 +817,7 @@ func TestActivityLog_MultipleFragmentsAndSegments(t *testing.T) { } } +// TestActivityLog_API_ConfigCRUD performs various CRUD operations on internal/counters/config. func TestActivityLog_API_ConfigCRUD(t *testing.T) { core, b, _ := testCoreSystemBackend(t) view := core.systemBarrierView @@ -936,6 +960,7 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { } } +// TestActivityLog_parseSegmentNumberFromPath verifies that the segment number is extracted correctly from a path. func TestActivityLog_parseSegmentNumberFromPath(t *testing.T) { testCases := []struct { input string @@ -985,6 +1010,7 @@ func TestActivityLog_parseSegmentNumberFromPath(t *testing.T) { } } +// TestActivityLog_getLastEntitySegmentNumber verifies that the last segment number is correctly returned. func TestActivityLog_getLastEntitySegmentNumber(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -1040,6 +1066,8 @@ func TestActivityLog_getLastEntitySegmentNumber(t *testing.T) { } } +// TestActivityLog_tokenCountExists writes to the direct tokens segment path and verifies that segment count exists +// returns true for the segments at these paths. func TestActivityLog_tokenCountExists(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -1164,6 +1192,8 @@ func (a *ActivityLog) resetEntitiesInMemory(t *testing.T) { a.partialMonthClientTracker = make(map[string]*activity.EntityRecord) } +// TestActivityLog_loadCurrentClientSegment writes entity segments and calls loadCurrentClientSegment, then verifies +// that the correct values are returned when querying the current segment. func TestActivityLog_loadCurrentClientSegment(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -1280,6 +1310,8 @@ func TestActivityLog_loadCurrentClientSegment(t *testing.T) { } } +// TestActivityLog_loadPriorEntitySegment writes entities to two months and calls loadPriorEntitySegment for each month, +// verifying that the active clients are correct. func TestActivityLog_loadPriorEntitySegment(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -1424,6 +1456,9 @@ func TestActivityLog_loadTokenCount(t *testing.T) { } } +// TestActivityLog_StopAndRestart disables the activity log, waits for deletes to complete, and then enables the +// activity log. The activity log is then stopped and started again, to simulate a seal and unseal. The test then +// verifies that there's no error adding an entity, direct token, and when writing a segment to storage. func TestActivityLog_StopAndRestart(t *testing.T) { core, b, _ := testCoreSystemBackend(t) sysView := core.systemBarrierView @@ -1555,6 +1590,8 @@ func setupActivityRecordsInStorage(t *testing.T, base time.Time, includeEntities return a, entityRecords, tokenRecords } +// TestActivityLog_refreshFromStoredLog writes records for 3 months ago and this month, then calls refreshFromStoredLog. +// The test verifies that current entities and current tokens are correct. func TestActivityLog_refreshFromStoredLog(t *testing.T) { a, expectedClientRecords, expectedTokenCounts := setupActivityRecordsInStorage(t, time.Now().UTC(), true, true) a.SetEnable(true) @@ -1592,6 +1629,9 @@ func TestActivityLog_refreshFromStoredLog(t *testing.T) { } } +// TestActivityLog_refreshFromStoredLogWithBackgroundLoadingCancelled writes data from 3 months ago to this month. The +// test closes a.doneCh and calls refreshFromStoredLog, which will not do any processing because the doneCh is closed. +// The test verifies that the current data is not loaded. func TestActivityLog_refreshFromStoredLogWithBackgroundLoadingCancelled(t *testing.T) { a, expectedClientRecords, expectedTokenCounts := setupActivityRecordsInStorage(t, time.Now().UTC(), true, true) a.SetEnable(true) @@ -1633,6 +1673,8 @@ func TestActivityLog_refreshFromStoredLogWithBackgroundLoadingCancelled(t *testi } } +// TestActivityLog_refreshFromStoredLogContextCancelled writes data from 3 months ago to this month and calls +// refreshFromStoredLog with a canceled context, verifying that the function errors because of the canceled context. func TestActivityLog_refreshFromStoredLogContextCancelled(t *testing.T) { a, _, _ := setupActivityRecordsInStorage(t, time.Now().UTC(), true, true) @@ -1646,6 +1688,8 @@ func TestActivityLog_refreshFromStoredLogContextCancelled(t *testing.T) { } } +// TestActivityLog_refreshFromStoredLogNoTokens writes only entities from 3 months ago to today, then calls +// refreshFromStoredLog. It verifies that there are no tokens loaded. func TestActivityLog_refreshFromStoredLogNoTokens(t *testing.T) { a, expectedClientRecords, _ := setupActivityRecordsInStorage(t, time.Now().UTC(), true, false) a.SetEnable(true) @@ -1681,6 +1725,8 @@ func TestActivityLog_refreshFromStoredLogNoTokens(t *testing.T) { } } +// TestActivityLog_refreshFromStoredLogNoEntities writes only direct tokens from 3 months ago to today, and runs +// refreshFromStoredLog. It verifies that there are no entities or clients loaded. func TestActivityLog_refreshFromStoredLogNoEntities(t *testing.T) { a, _, expectedTokenCounts := setupActivityRecordsInStorage(t, time.Now().UTC(), false, true) a.SetEnable(true) @@ -1708,6 +1754,8 @@ func TestActivityLog_refreshFromStoredLogNoEntities(t *testing.T) { } } +// TestActivityLog_refreshFromStoredLogNoData writes nothing and calls refreshFromStoredLog, and verifies that the +// current segment counts are zero. func TestActivityLog_refreshFromStoredLogNoData(t *testing.T) { now := time.Now().UTC() a, _, _ := setupActivityRecordsInStorage(t, now, false, false) @@ -1723,6 +1771,8 @@ func TestActivityLog_refreshFromStoredLogNoData(t *testing.T) { a.ExpectCurrentSegmentRefreshed(t, now.Unix(), false) } +// TestActivityLog_refreshFromStoredLogTwoMonthsPrevious creates segment data from 5 months ago to 2 months ago and +// calls refreshFromStoredLog, then verifies that the current segment counts are zero. func TestActivityLog_refreshFromStoredLogTwoMonthsPrevious(t *testing.T) { // test what happens when the most recent data is from month M-2 (or earlier - same effect) now := time.Now().UTC() @@ -1740,6 +1790,8 @@ func TestActivityLog_refreshFromStoredLogTwoMonthsPrevious(t *testing.T) { a.ExpectCurrentSegmentRefreshed(t, now.Unix(), false) } +// TestActivityLog_refreshFromStoredLogPreviousMonth creates segment data from 4 months ago to 1 month ago, then calls +// refreshFromStoredLog, then verifies that these clients are included in the current segment. func TestActivityLog_refreshFromStoredLogPreviousMonth(t *testing.T) { // test what happens when most recent data is from month M-1 // we expect to load the data from the previous month so that the activeFragmentWorker @@ -1782,6 +1834,8 @@ func TestActivityLog_refreshFromStoredLogPreviousMonth(t *testing.T) { } } +// TestActivityLog_Export writes overlapping client for 5 months with various mounts and namespaces. It performs an +// export for various month ranges in the range, and verifies that the outputs are correct. func TestActivityLog_Export(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -1973,6 +2027,8 @@ func (f *fakeResponseWriter) WriteHeader(statusCode int) { panic("unimplmeneted") } +// TestActivityLog_IncludeNamespace verifies that includeInResponse returns true for namespaces that are children of +// their parents. func TestActivityLog_IncludeNamespace(t *testing.T) { root := namespace.RootNamespace a := &ActivityLog{} @@ -2020,6 +2076,8 @@ func TestActivityLog_IncludeNamespace(t *testing.T) { } } +// TestActivityLog_DeleteWorker writes segments for entities and direct tokens for 2 different timestamps, then runs the +// deleteLogWorker for one of the timestamps. The test verifies that the correct segment is deleted, and the other remains. func TestActivityLog_DeleteWorker(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -2075,6 +2133,9 @@ func checkAPIWarnings(t *testing.T, originalEnabled, newEnabled bool, resp *logi } } +// TestActivityLog_EnableDisable writes a segment, adds an entity to the in-memory fragment, then disables the activity +// log. The test verifies that the segment doesn't exist. The activity log is enabled, then verified that an empty +// segment is written and new clients can be added and written to segments. func TestActivityLog_EnableDisable(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -2712,6 +2773,9 @@ func TestActivityLog_SaveAfterDisable(t *testing.T) { expectMissingSegment(t, core, path) } +// TestActivityLog_Precompute creates segments over a range of 11 months, with overlapping clients and namespaces. +// Create intent logs and run precomputedQueryWorker for various month ranges. Verify that the precomputed queries have +// the correct counts, including per namespace. func TestActivityLog_Precompute(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -3599,6 +3663,8 @@ func (b *BlockingInmemStorage) Delete(ctx context.Context, key string) error { return errors.New("fake implementation") } +// TestActivityLog_PrecomputeCancel stops the activity log before running the precomputedQueryWorker, and verifies that +// the context used to query storage has been canceled. func TestActivityLog_PrecomputeCancel(t *testing.T) { core, _, _ := TestCoreUnsealed(t) a := core.activityLog @@ -3627,6 +3693,8 @@ func TestActivityLog_PrecomputeCancel(t *testing.T) { } } +// TestActivityLog_NextMonthStart sets the activity log start timestamp, then verifies that StartOfNextMonth returns the +// correct value. func TestActivityLog_NextMonthStart(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -3679,6 +3747,8 @@ func waitForRetentionWorkerToFinish(t *testing.T, a *ActivityLog) { } } +// TestActivityLog_Deletion writes entity, direct tokens, and queries for dates ranging over 20 months. Then the test +// calls the retentionWorker with decreasing retention values, and verifies that the correct paths are being deleted. func TestActivityLog_Deletion(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -3794,6 +3864,8 @@ func TestActivityLog_Deletion(t *testing.T) { checkPresent(21) } +// TestActivityLog_partialMonthClientCount writes segment data for the curren month and runs refreshFromStoredLog and +// then partialMonthClientCount. The test verifies that the values returned by partialMonthClientCount are correct. func TestActivityLog_partialMonthClientCount(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -3863,6 +3935,8 @@ func TestActivityLog_partialMonthClientCount(t *testing.T) { } } +// TestActivityLog_partialMonthClientCountUsingHandleQuery writes segments for the current month and calls +// refreshFromStoredLog, then handleQuery. The test verifies that the results from handleQuery are correct. func TestActivityLog_partialMonthClientCountUsingHandleQuery(t *testing.T) { timeutil.SkipAtEndOfMonth(t) @@ -3990,7 +4064,7 @@ func TestActivityLog_partialMonthClientCountUsingHandleQuery(t *testing.T) { } // TestActivityLog_handleQuery_normalizedMountPaths ensures that the mount paths returned by the activity log always have a trailing slash and client accounting is done correctly when there's no trailing slash. -// Two clients that have the same mount path, but one has a trailing slash, should be considered part of the same mount path +// Two clients that have the same mount path, but one has a trailing slash, should be considered part of the same mount path. func TestActivityLog_handleQuery_normalizedMountPaths(t *testing.T) { timeutil.SkipAtEndOfMonth(t) diff --git a/vault/activity_log_util_common_test.go b/vault/activity_log_util_common_test.go index bd2dcd2a3e7e..e4d1ba4e3946 100644 --- a/vault/activity_log_util_common_test.go +++ b/vault/activity_log_util_common_test.go @@ -15,6 +15,11 @@ import ( "google.golang.org/protobuf/proto" ) +// Test_ActivityLog_ComputeCurrentMonthForBillingPeriodInternal creates 3 months of hyperloglogs and fills them with +// overlapping clients. The test calls computeCurrentMonthForBillingPeriodInternal with the current month map having +// some overlap with the previous months. The test then verifies that the results have the correct number of entity and +// non-entity clients. The test also calls computeCurrentMonthForBillingPeriodInternal with an empty current month map, +// and verifies that the results are all 0. func Test_ActivityLog_ComputeCurrentMonthForBillingPeriodInternal(t *testing.T) { // populate the first month with clients 1-10 monthOneHLL := hyperloglog.New() From 1d8a6d54bf4b67681db7b01ffe978a3a547f4a5a Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Mon, 20 Mar 2023 10:51:35 -0400 Subject: [PATCH 11/26] backport of commit e3c59773e969336bb4e85ddbf3a3700a6250f4c8 --- changelog/19625.txt | 4 ++++ vault/activity_log.go | 23 +++++++++++++++++++++++ vault/activity_log_util.go | 3 +++ vault/census.go | 6 ++++++ vault/core.go | 6 ++++++ vault/testing.go | 1 + 6 files changed, 43 insertions(+) create mode 100644 changelog/19625.txt create mode 100644 vault/census.go diff --git a/changelog/19625.txt b/changelog/19625.txt new file mode 100644 index 000000000000..b0cb558e3cd1 --- /dev/null +++ b/changelog/19625.txt @@ -0,0 +1,4 @@ +```release-note:feature +core (enterprise): Add background worker for automatic reporting of billing +information. +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index 965d34a662f3..d45525f2f21a 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -171,6 +171,14 @@ type ActivityLog struct { partialMonthClientTracker map[string]*activity.EntityRecord inprocessExport *atomic.Bool + + // CensusReportDone is a channel used to signal tests upon successful calls + // to (CensusReporter).Write() in CensusReport. + CensusReportDone chan bool + + // CensusReportInterval is the testing configuration for time between + // Write() calls initiated in CensusReport. + CensusReportInterval time.Duration } // These non-persistent configuration options allow us to disable @@ -182,6 +190,9 @@ type ActivityLogCoreConfig struct { // Do not start timers to send or persist fragments. DisableTimers bool + + // CensusReportInterval is the testing configuration for time + CensusReportInterval time.Duration } // NewActivityLog creates an activity log. @@ -203,6 +214,7 @@ func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics me writeCh: make(chan struct{}, 1), // same for full segment doneCh: make(chan struct{}, 1), partialMonthClientTracker: make(map[string]*activity.EntityRecord), + CensusReportInterval: time.Hour * 1, currentSegment: segmentInfo{ startTimestamp: 0, @@ -940,6 +952,10 @@ func (a *ActivityLog) SetConfigInit(config activityConfig) { a.defaultReportMonths = config.DefaultReportMonths a.retentionMonths = config.RetentionMonths + + if a.configOverrides.CensusReportInterval > 0 { + a.CensusReportInterval = a.configOverrides.CensusReportInterval + } } // This version reacts to user changes @@ -1076,6 +1092,9 @@ func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { manager.retentionWorker(ctx, time.Now(), months) close(manager.retentionDone) }(manager.retentionMonths) + + manager.CensusReportDone = make(chan bool) + go c.activityLog.CensusReport(ctx, c.censusAgent) } return nil @@ -1576,7 +1595,9 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T if computePartial { // Traverse through current month's activitylog data and group clients // into months and namespaces + a.fragmentLock.RLock() partialByMonth, partialByNamespace = a.populateNamespaceAndMonthlyBreakdowns() + a.fragmentLock.RUnlock() // Convert the byNamespace breakdowns into structs that are // consumable by the /activity endpoint, so as to reuse code between these two @@ -1760,6 +1781,8 @@ type activityConfig struct { // Enabled is one of enable, disable, default. Enabled string `json:"enabled"` + + CensusReportInterval time.Duration `json:"census_report_interval"` } func defaultActivityConfig() activityConfig { diff --git a/vault/activity_log_util.go b/vault/activity_log_util.go index 8cfa76a0f093..57ba7215a513 100644 --- a/vault/activity_log_util.go +++ b/vault/activity_log_util.go @@ -8,3 +8,6 @@ import "context" func (a *ActivityLog) sendCurrentFragment(ctx context.Context) error { return nil } + +// CensusReport is a no-op on OSS +func (a *ActivityLog) CensusReport(_ctx context.Context, _ca *CensusAgent) {} diff --git a/vault/census.go b/vault/census.go new file mode 100644 index 000000000000..2312b3b54ef1 --- /dev/null +++ b/vault/census.go @@ -0,0 +1,6 @@ +//go:build !enterprise + +package vault + +// CensusAgent is a stub for OSS +type CensusAgent struct{} diff --git a/vault/core.go b/vault/core.go index a9e9ac95f6c4..183e4a8dea82 100644 --- a/vault/core.go +++ b/vault/core.go @@ -605,6 +605,9 @@ type Core struct { activityLogConfig ActivityLogCoreConfig + // censusAgent is the mechanism used for reporting Vault's billing data. + censusAgent *CensusAgent + // activeTime is set on active nodes indicating the time at which this node // became active. activeTime time.Time @@ -744,6 +747,9 @@ type CoreConfig struct { LicensePath string LicensingConfig *LicensingConfig + // Configured Census Agent + censusAgent *CensusAgent + DisablePerformanceStandby bool DisableIndexing bool DisableKeyEncodingChecks bool diff --git a/vault/testing.go b/vault/testing.go index 5e343bf4caa9..5a0e2d89e691 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -209,6 +209,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core { conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname conf.DisableSSCTokens = opts.DisableSSCTokens conf.PluginDirectory = opts.PluginDirectory + conf.censusAgent = opts.censusAgent if opts.Logger != nil { conf.Logger = opts.Logger From 40bc4608ca56a53083c69b9e3bf692d44213819c Mon Sep 17 00:00:00 2001 From: miagilepner Date: Fri, 31 Mar 2023 17:05:16 +0200 Subject: [PATCH 12/26] backport of commit b4fab6ac2ae830f3bec8c287f07d5193dcfcdc22 --- changelog/19891.txt | 3 +++ command/server/config_test_helpers.go | 1 + command/server/config_test_helpers_util.go | 1 + vault/activity_log.go | 10 ++++++++++ vault/core.go | 3 +++ vault/logical_system_activity.go | 5 +++++ vault/testing.go | 1 + 7 files changed, 24 insertions(+) create mode 100644 changelog/19891.txt diff --git a/changelog/19891.txt b/changelog/19891.txt new file mode 100644 index 000000000000..b030151e858b --- /dev/null +++ b/changelog/19891.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core (enterprise): add configuration for license reporting +``` \ No newline at end of file diff --git a/command/server/config_test_helpers.go b/command/server/config_test_helpers.go index bb6e273a6a46..f487d80fb088 100644 --- a/command/server/config_test_helpers.go +++ b/command/server/config_test_helpers.go @@ -1029,6 +1029,7 @@ func testParseSeals(t *testing.T) { }, }, } + addExpectedDefaultEntConfig(expected) config.Prune() require.Equal(t, config, expected) } diff --git a/command/server/config_test_helpers_util.go b/command/server/config_test_helpers_util.go index 63fa3cfe6a80..54718050db72 100644 --- a/command/server/config_test_helpers_util.go +++ b/command/server/config_test_helpers_util.go @@ -3,4 +3,5 @@ package server func addExpectedEntConfig(c *Config, sentinelModules []string) {} +func addExpectedDefaultEntConfig(c *Config) {} func addExpectedEntSanitizedConfig(c map[string]interface{}, sentinelModules []string) {} diff --git a/vault/activity_log.go b/vault/activity_log.go index d45525f2f21a..ef71a9e0ae93 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -193,6 +193,9 @@ type ActivityLogCoreConfig struct { // CensusReportInterval is the testing configuration for time CensusReportInterval time.Duration + + // MinimumRetentionMonths defines the minimum value for retention + MinimumRetentionMonths int } // NewActivityLog creates an activity log. @@ -953,6 +956,10 @@ func (a *ActivityLog) SetConfigInit(config activityConfig) { a.defaultReportMonths = config.DefaultReportMonths a.retentionMonths = config.RetentionMonths + if a.retentionMonths < a.configOverrides.MinimumRetentionMonths { + a.retentionMonths = a.configOverrides.MinimumRetentionMonths + } + if a.configOverrides.CensusReportInterval > 0 { a.CensusReportInterval = a.configOverrides.CensusReportInterval } @@ -1010,6 +1017,9 @@ func (a *ActivityLog) SetConfig(ctx context.Context, config activityConfig) { a.defaultReportMonths = config.DefaultReportMonths a.retentionMonths = config.RetentionMonths + if a.retentionMonths < a.configOverrides.MinimumRetentionMonths { + a.retentionMonths = a.configOverrides.MinimumRetentionMonths + } // check for segments out of retention period, if it has changed go a.retentionWorker(ctx, time.Now(), a.retentionMonths) diff --git a/vault/core.go b/vault/core.go index 183e4a8dea82..21e1eb9a7c76 100644 --- a/vault/core.go +++ b/vault/core.go @@ -608,6 +608,9 @@ type Core struct { // censusAgent is the mechanism used for reporting Vault's billing data. censusAgent *CensusAgent + // censusLicensingEnabled records whether Vault is exporting census metrics + censusLicensingEnabled bool + // activeTime is set on active nodes indicating the time at which this node // became active. activeTime time.Time diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 8fda8ae891b3..27dec67ee894 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -326,6 +326,11 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log if config.Enabled == "enable" && enabledStr == "disable" || !activityLogEnabledDefault && config.Enabled == "enable" && enabledStr == "default" || activityLogEnabledDefault && config.Enabled == "default" && enabledStr == "disable" { + + // if census is enabled, the activity log cannot be disabled + if a.core.censusLicensingEnabled { + return logical.ErrorResponse("cannot disable the activity log while Reporting is enabled"), logical.ErrInvalidRequest + } warnings = append(warnings, "the current monthly segment will be deleted because the activity log was disabled") } diff --git a/vault/testing.go b/vault/testing.go index 5a0e2d89e691..4e89efd7c523 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -231,6 +231,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core { } conf.ActivityLogConfig = opts.ActivityLogConfig + testApplyEntBaseConfig(conf, opts) c, err := NewCore(conf) if err != nil { From 7c70383709d0e9d3fd6e8721ce1ce8367d552f5a Mon Sep 17 00:00:00 2001 From: miagilepner Date: Tue, 4 Apr 2023 14:50:19 +0200 Subject: [PATCH 13/26] backport of commit 54904e4cd6d6cb37e876d8b93c37d292b3419dd3 --- vault/activity_log.go | 135 ++++++++++++++++++++----------------- vault/activity_log_test.go | 59 ++++++++++++++++ 2 files changed, 131 insertions(+), 63 deletions(-) diff --git a/vault/activity_log.go b/vault/activity_log.go index ef71a9e0ae93..ea84ee32fa30 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1869,6 +1869,12 @@ func (a *ActivityLog) namespaceToLabel(ctx context.Context, nsID string) string return ns.Path } +type ( + summaryByNamespace map[string]*processByNamespace + summaryByMount map[string]*processMount + summaryByMonth map[int64]*processMonth +) + type processCounts struct { // entityID -> present Entities map[string]struct{} @@ -1886,6 +1892,23 @@ func newProcessCounts() *processCounts { } } +func (p *processCounts) add(client *activity.EntityRecord) { + if client.NonEntity { + p.NonEntities[client.ClientID] = struct{}{} + } else { + p.Entities[client.ClientID] = struct{}{} + } +} + +func (p *processCounts) contains(client *activity.EntityRecord) bool { + if client.NonEntity { + _, ok := p.NonEntities[client.ClientID] + return ok + } + _, ok := p.Entities[client.ClientID] + return ok +} + type processMount struct { Counts *processCounts } @@ -1896,105 +1919,91 @@ func newProcessMount() *processMount { } } +func (p *processMount) add(client *activity.EntityRecord) { + p.Counts.add(client) +} + +func (s summaryByMount) add(client *activity.EntityRecord) { + if _, present := s[client.MountAccessor]; !present { + s[client.MountAccessor] = newProcessMount() + } + s[client.MountAccessor].add(client) +} + type processByNamespace struct { Counts *processCounts - Mounts map[string]*processMount + Mounts summaryByMount } func newByNamespace() *processByNamespace { return &processByNamespace{ Counts: newProcessCounts(), - Mounts: make(map[string]*processMount), + Mounts: make(summaryByMount), } } +func (p *processByNamespace) add(client *activity.EntityRecord) { + p.Counts.add(client) + p.Mounts.add(client) +} + +func (s summaryByNamespace) add(client *activity.EntityRecord) { + if _, present := s[client.NamespaceID]; !present { + s[client.NamespaceID] = newByNamespace() + } + s[client.NamespaceID].add(client) +} + type processNewClients struct { Counts *processCounts - Namespaces map[string]*processByNamespace + Namespaces summaryByNamespace } func newProcessNewClients() *processNewClients { return &processNewClients{ Counts: newProcessCounts(), - Namespaces: make(map[string]*processByNamespace), + Namespaces: make(summaryByNamespace), } } +func (p *processNewClients) add(client *activity.EntityRecord) { + p.Counts.add(client) + p.Namespaces.add(client) +} + type processMonth struct { Counts *processCounts - Namespaces map[string]*processByNamespace + Namespaces summaryByNamespace NewClients *processNewClients } func newProcessMonth() *processMonth { return &processMonth{ Counts: newProcessCounts(), - Namespaces: make(map[string]*processByNamespace), + Namespaces: make(summaryByNamespace), NewClients: newProcessNewClients(), } } -// processClientRecord parses the client record e and stores the breakdowns in -// the maps provided. -func processClientRecord(e *activity.EntityRecord, byNamespace map[string]*processByNamespace, byMonth map[int64]*processMonth, startTime time.Time) { - if _, present := byNamespace[e.NamespaceID]; !present { - byNamespace[e.NamespaceID] = newByNamespace() - } - - if _, present := byNamespace[e.NamespaceID].Mounts[e.MountAccessor]; !present { - byNamespace[e.NamespaceID].Mounts[e.MountAccessor] = newProcessMount() - } - - if e.NonEntity { - byNamespace[e.NamespaceID].Counts.NonEntities[e.ClientID] = struct{}{} - byNamespace[e.NamespaceID].Mounts[e.MountAccessor].Counts.NonEntities[e.ClientID] = struct{}{} - } else { - byNamespace[e.NamespaceID].Counts.Entities[e.ClientID] = struct{}{} - byNamespace[e.NamespaceID].Mounts[e.MountAccessor].Counts.Entities[e.ClientID] = struct{}{} - } +func (p *processMonth) add(client *activity.EntityRecord) { + p.Counts.add(client) + p.NewClients.add(client) + p.Namespaces.add(client) +} +func (s summaryByMonth) add(client *activity.EntityRecord, startTime time.Time) { monthTimestamp := timeutil.StartOfMonth(startTime).UTC().Unix() - if _, present := byMonth[monthTimestamp]; !present { - byMonth[monthTimestamp] = newProcessMonth() - } - - if _, present := byMonth[monthTimestamp].Namespaces[e.NamespaceID]; !present { - byMonth[monthTimestamp].Namespaces[e.NamespaceID] = newByNamespace() - } - - if _, present := byMonth[monthTimestamp].Namespaces[e.NamespaceID].Mounts[e.MountAccessor]; !present { - byMonth[monthTimestamp].Namespaces[e.NamespaceID].Mounts[e.MountAccessor] = newProcessMount() + if _, present := s[monthTimestamp]; !present { + s[monthTimestamp] = newProcessMonth() } + s[monthTimestamp].add(client) +} - if _, present := byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID]; !present { - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID] = newByNamespace() - } - - if _, present := byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor]; !present { - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor] = newProcessMount() - } - - // At first assume all the clients in the given month, as new. - // Before persisting this information to disk, clients that have - // activity in the previous months of a given billing cycle will be - // deleted. - if e.NonEntity == true { - byMonth[monthTimestamp].Counts.NonEntities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].Namespaces[e.NamespaceID].Counts.NonEntities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.NonEntities[e.ClientID] = struct{}{} - - byMonth[monthTimestamp].NewClients.Counts.NonEntities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Counts.NonEntities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.NonEntities[e.ClientID] = struct{}{} - } else { - byMonth[monthTimestamp].Counts.Entities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].Namespaces[e.NamespaceID].Counts.Entities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.Entities[e.ClientID] = struct{}{} - - byMonth[monthTimestamp].NewClients.Counts.Entities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Counts.Entities[e.ClientID] = struct{}{} - byMonth[monthTimestamp].NewClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.Entities[e.ClientID] = struct{}{} - } +// processClientRecord parses the client record e and stores the breakdowns in +// the maps provided. +func processClientRecord(e *activity.EntityRecord, byNamespace summaryByNamespace, byMonth summaryByMonth, startTime time.Time) { + byNamespace.add(e) + byMonth.add(e, startTime) } // goroutine to process the request in the intent log, creating precomputed queries. diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 1e3ee97d5aad..5bda55a19440 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -4230,3 +4230,62 @@ func TestActivityLog_partialMonthClientCountWithMultipleMountPaths(t *testing.T) } } } + +// TestActivityLog_processClientRecord calls processClientRecord for an entity and a non-entity record and verifies that +// the record is present in the namespace and month maps +func TestActivityLog_processClientRecord(t *testing.T) { + startTime := time.Now() + mount := "mount" + namespace := "namespace" + clientID := "client-id" + run := func(t *testing.T, isNonEntity bool) { + t.Helper() + record := &activity.EntityRecord{ + MountAccessor: mount, + NamespaceID: namespace, + ClientID: clientID, + NonEntity: isNonEntity, + } + byNS := make(summaryByNamespace) + byMonth := make(summaryByMonth) + processClientRecord(record, byNS, byMonth, startTime) + require.Contains(t, byNS, namespace) + require.Contains(t, byNS[namespace].Mounts, mount) + monthIndex := timeutil.StartOfMonth(startTime).UTC().Unix() + require.Contains(t, byMonth, monthIndex) + require.Equal(t, byMonth[monthIndex].Namespaces, byNS) + require.Equal(t, byMonth[monthIndex].NewClients.Namespaces, byNS) + + if isNonEntity { + require.Contains(t, byMonth[monthIndex].Counts.NonEntities, clientID) + require.NotContains(t, byMonth[monthIndex].Counts.Entities, clientID) + + require.Contains(t, byMonth[monthIndex].NewClients.Counts.NonEntities, clientID) + require.NotContains(t, byMonth[monthIndex].NewClients.Counts.Entities, clientID) + + require.Contains(t, byNS[namespace].Mounts[mount].Counts.NonEntities, clientID) + require.Contains(t, byNS[namespace].Counts.NonEntities, clientID) + + require.NotContains(t, byNS[namespace].Mounts[mount].Counts.Entities, clientID) + require.NotContains(t, byNS[namespace].Counts.Entities, clientID) + } else { + require.Contains(t, byMonth[monthIndex].Counts.Entities, clientID) + require.NotContains(t, byMonth[monthIndex].Counts.NonEntities, clientID) + + require.Contains(t, byMonth[monthIndex].NewClients.Counts.Entities, clientID) + require.NotContains(t, byMonth[monthIndex].NewClients.Counts.NonEntities, clientID) + + require.Contains(t, byNS[namespace].Mounts[mount].Counts.Entities, clientID) + require.Contains(t, byNS[namespace].Counts.Entities, clientID) + + require.NotContains(t, byNS[namespace].Mounts[mount].Counts.NonEntities, clientID) + require.NotContains(t, byNS[namespace].Counts.NonEntities, clientID) + } + } + t.Run("non entity", func(t *testing.T) { + run(t, true) + }) + t.Run("entity", func(t *testing.T) { + run(t, false) + }) +} From 9576b8938019835623a56e3994a0170418d01227 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Tue, 11 Apr 2023 17:09:01 +0200 Subject: [PATCH 14/26] backport of commit 4b6ec4079d1bdccde4cab416417a296c8c233c1b --- changelog/20078.txt | 3 +++ vault/logical_system_activity.go | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 changelog/20078.txt diff --git a/changelog/20078.txt b/changelog/20078.txt new file mode 100644 index 000000000000..8749354b315d --- /dev/null +++ b/changelog/20078.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core/activity: error when attempting to update retention configuration below the minimum +``` \ No newline at end of file diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 27dec67ee894..294d9a11302b 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -352,6 +352,10 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log return logical.ErrorResponse("retention_months cannot be 0 while enabled"), logical.ErrInvalidRequest } + if a.core.censusLicensingEnabled && config.RetentionMonths < a.configOverrides.MinimumRetentionMonths { + return logical.ErrorResponse("retention_months must be at least %d while Reporting is enabled", a.configOverrides.MinimumRetentionMonths), logical.ErrInvalidRequest + } + // Store the config entry, err := logical.StorageEntryJSON(path.Join(activitySubPath, activityConfigKey), config) if err != nil { From f31148d11b50b4e99949c7c2dcd4efb7c1109ea0 Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Wed, 12 Apr 2023 12:02:28 -0400 Subject: [PATCH 15/26] backport of commit 05ba6bbddded428d2fa010f9359d0543f46af52b --- changelog/20086.txt | 4 +++ vault/activity_log_test.go | 30 +++++++++++-------- vault/census.go | 6 +++- vault/core.go | 11 +++++++ vault/logical_system_activity.go | 10 ++++--- .../api-docs/system/internal-counters.mdx | 4 ++- 6 files changed, 47 insertions(+), 18 deletions(-) create mode 100644 changelog/20086.txt diff --git a/changelog/20086.txt b/changelog/20086.txt new file mode 100644 index 000000000000..9511c97b66e3 --- /dev/null +++ b/changelog/20086.txt @@ -0,0 +1,4 @@ +```release-note:improvement +api: `/sys/internal/counters/config` endpoint now contains read-only +`reporting_enabled` and `billing_start_timestamp` fields. +``` diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 5bda55a19440..4304032f9384 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -831,10 +831,12 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { t.Fatalf("err: %v", err) } defaults := map[string]interface{}{ - "default_report_months": 12, - "retention_months": 24, - "enabled": activityLogEnabledDefaultValue, - "queries_available": false, + "default_report_months": 12, + "retention_months": 24, + "enabled": activityLogEnabledDefaultValue, + "queries_available": false, + "reporting_enabled": core.censusLicensingEnabled, + "billing_start_timestamp": core.GetBillingStart(), } if diff := deep.Equal(resp.Data, defaults); len(diff) > 0 { @@ -912,10 +914,12 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { t.Fatalf("err: %v", err) } expected := map[string]interface{}{ - "default_report_months": 1, - "retention_months": 2, - "enabled": "enable", - "queries_available": false, + "default_report_months": 1, + "retention_months": 2, + "enabled": "enable", + "queries_available": false, + "reporting_enabled": false, + "billing_start_timestamp": core.GetBillingStart(), } if diff := deep.Equal(resp.Data, expected); len(diff) > 0 { @@ -948,10 +952,12 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { } defaults := map[string]interface{}{ - "default_report_months": 12, - "retention_months": 24, - "enabled": activityLogEnabledDefaultValue, - "queries_available": false, + "default_report_months": 12, + "retention_months": 24, + "enabled": activityLogEnabledDefaultValue, + "queries_available": false, + "reporting_enabled": false, + "billing_start_timestamp": core.GetBillingStart(), } if diff := deep.Equal(resp.Data, defaults); len(diff) > 0 { diff --git a/vault/census.go b/vault/census.go index 2312b3b54ef1..603fbf48175d 100644 --- a/vault/census.go +++ b/vault/census.go @@ -2,5 +2,9 @@ package vault +import "time" + // CensusAgent is a stub for OSS -type CensusAgent struct{} +type CensusAgent struct { + billingStart time.Time +} diff --git a/vault/core.go b/vault/core.go index 21e1eb9a7c76..010abd459d32 100644 --- a/vault/core.go +++ b/vault/core.go @@ -3600,3 +3600,14 @@ func (c *Core) GetHCPLinkStatus() (string, string) { return status, resourceID } + +// GetBillingStart gets the billing start timestamp from the configured Census +// Agent, handling a nil agent. +func (c *Core) GetBillingStart() time.Time { + var billingStart time.Time + if c.censusAgent != nil { + billingStart = c.censusAgent.billingStart + } + + return billingStart +} diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 294d9a11302b..2b7f934509ff 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -268,10 +268,12 @@ func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logic return &logical.Response{ Data: map[string]interface{}{ - "default_report_months": config.DefaultReportMonths, - "retention_months": config.RetentionMonths, - "enabled": config.Enabled, - "queries_available": qa, + "default_report_months": config.DefaultReportMonths, + "retention_months": config.RetentionMonths, + "enabled": config.Enabled, + "queries_available": qa, + "reporting_enabled": b.Core.censusLicensingEnabled, + "billing_start_timestamp": b.Core.GetBillingStart(), }, }, nil } diff --git a/website/content/api-docs/system/internal-counters.mdx b/website/content/api-docs/system/internal-counters.mdx index 7147eefc9ffc..e87943a8158e 100644 --- a/website/content/api-docs/system/internal-counters.mdx +++ b/website/content/api-docs/system/internal-counters.mdx @@ -862,7 +862,9 @@ $ curl \ "default_report_months": 12, "enabled": "default-enabled", "queries_available": true, - "retention_months": 24 + "retention_months": 24, + "reporting_enabled": false, + "billing_start_timestamp": "2022-03-01T00:00:00Z", }, "warnings": null } From 49eef416eb1612df352e994bd7fc129c69e2c55d Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Thu, 13 Apr 2023 14:33:23 -0400 Subject: [PATCH 16/26] backport of commit 002a59a370a80c846191ece427bef92f25bf81eb --- changelog/20150.txt | 4 ++++ vault/activity_log_test.go | 39 +++++++++++++++++--------------- vault/logical_system_activity.go | 13 ++++++----- 3 files changed, 32 insertions(+), 24 deletions(-) create mode 100644 changelog/20150.txt diff --git a/changelog/20150.txt b/changelog/20150.txt new file mode 100644 index 000000000000..0ea8259f9e66 --- /dev/null +++ b/changelog/20150.txt @@ -0,0 +1,4 @@ +```release-note:improvement +api: `/sys/internal/counters/config` endpoint now contains read-only +`minimum_retention_months`. +``` diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 4304032f9384..39a1de01909a 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -831,12 +831,13 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { t.Fatalf("err: %v", err) } defaults := map[string]interface{}{ - "default_report_months": 12, - "retention_months": 24, - "enabled": activityLogEnabledDefaultValue, - "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.GetBillingStart(), + "default_report_months": 12, + "retention_months": 24, + "enabled": activityLogEnabledDefaultValue, + "queries_available": false, + "reporting_enabled": core.censusLicensingEnabled, + "billing_start_timestamp": core.GetBillingStart(), + "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } if diff := deep.Equal(resp.Data, defaults); len(diff) > 0 { @@ -914,12 +915,13 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { t.Fatalf("err: %v", err) } expected := map[string]interface{}{ - "default_report_months": 1, - "retention_months": 2, - "enabled": "enable", - "queries_available": false, - "reporting_enabled": false, - "billing_start_timestamp": core.GetBillingStart(), + "default_report_months": 1, + "retention_months": 2, + "enabled": "enable", + "queries_available": false, + "reporting_enabled": core.censusLicensingEnabled, + "billing_start_timestamp": core.GetBillingStart(), + "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } if diff := deep.Equal(resp.Data, expected); len(diff) > 0 { @@ -952,12 +954,13 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { } defaults := map[string]interface{}{ - "default_report_months": 12, - "retention_months": 24, - "enabled": activityLogEnabledDefaultValue, - "queries_available": false, - "reporting_enabled": false, - "billing_start_timestamp": core.GetBillingStart(), + "default_report_months": 12, + "retention_months": 24, + "enabled": activityLogEnabledDefaultValue, + "queries_available": false, + "reporting_enabled": core.censusLicensingEnabled, + "billing_start_timestamp": core.GetBillingStart(), + "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } if diff := deep.Equal(resp.Data, defaults); len(diff) > 0 { diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 2b7f934509ff..927ffd34ed85 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -268,12 +268,13 @@ func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logic return &logical.Response{ Data: map[string]interface{}{ - "default_report_months": config.DefaultReportMonths, - "retention_months": config.RetentionMonths, - "enabled": config.Enabled, - "queries_available": qa, - "reporting_enabled": b.Core.censusLicensingEnabled, - "billing_start_timestamp": b.Core.GetBillingStart(), + "default_report_months": config.DefaultReportMonths, + "retention_months": config.RetentionMonths, + "enabled": config.Enabled, + "queries_available": qa, + "reporting_enabled": b.Core.censusLicensingEnabled, + "billing_start_timestamp": b.Core.GetBillingStart(), + "minimum_retention_months": a.configOverrides.MinimumRetentionMonths, }, }, nil } From d040b69790a44cab80ea7961ee83b09fea96b924 Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Fri, 21 Apr 2023 15:29:37 -0400 Subject: [PATCH 17/26] backport of commit 77f83d9fe8b85c126347794a460410c2025675fd --- vault/activity_log.go | 2 +- vault/activity_log_test.go | 6 +++--- vault/activity_log_util.go | 7 +++++-- vault/census.go | 9 ++++----- vault/core.go | 23 ++++++++++------------- vault/logical_system_activity.go | 2 +- vault/testing.go | 2 +- 7 files changed, 25 insertions(+), 26 deletions(-) diff --git a/vault/activity_log.go b/vault/activity_log.go index ea84ee32fa30..356383d52a7e 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1104,7 +1104,7 @@ func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { }(manager.retentionMonths) manager.CensusReportDone = make(chan bool) - go c.activityLog.CensusReport(ctx, c.censusAgent) + go c.activityLog.CensusReport(ctx, c.censusAgent, c.billingStart) } return nil diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 39a1de01909a..20a77aa95cb6 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -836,7 +836,7 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "enabled": activityLogEnabledDefaultValue, "queries_available": false, "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.GetBillingStart(), + "billing_start_timestamp": core.billingStart, "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -920,7 +920,7 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "enabled": "enable", "queries_available": false, "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.GetBillingStart(), + "billing_start_timestamp": core.billingStart, "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -959,7 +959,7 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "enabled": activityLogEnabledDefaultValue, "queries_available": false, "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.GetBillingStart(), + "billing_start_timestamp": core.billingStart, "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } diff --git a/vault/activity_log_util.go b/vault/activity_log_util.go index 57ba7215a513..35625ac5b02c 100644 --- a/vault/activity_log_util.go +++ b/vault/activity_log_util.go @@ -2,7 +2,10 @@ package vault -import "context" +import ( + "context" + "time" +) // sendCurrentFragment is a no-op on OSS func (a *ActivityLog) sendCurrentFragment(ctx context.Context) error { @@ -10,4 +13,4 @@ func (a *ActivityLog) sendCurrentFragment(ctx context.Context) error { } // CensusReport is a no-op on OSS -func (a *ActivityLog) CensusReport(_ctx context.Context, _ca *CensusAgent) {} +func (a *ActivityLog) CensusReport(context.Context, CensusReporter, time.Time) {} diff --git a/vault/census.go b/vault/census.go index 603fbf48175d..9d916dc14896 100644 --- a/vault/census.go +++ b/vault/census.go @@ -2,9 +2,8 @@ package vault -import "time" - // CensusAgent is a stub for OSS -type CensusAgent struct { - billingStart time.Time -} +type CensusReporter struct{} + +// setupCensusAgent is a stub for OSS. +func (c *Core) setupCensusAgent() error { return nil } diff --git a/vault/core.go b/vault/core.go index 010abd459d32..53ecd4e10022 100644 --- a/vault/core.go +++ b/vault/core.go @@ -606,11 +606,14 @@ type Core struct { activityLogConfig ActivityLogCoreConfig // censusAgent is the mechanism used for reporting Vault's billing data. - censusAgent *CensusAgent + censusAgent CensusReporter // censusLicensingEnabled records whether Vault is exporting census metrics censusLicensingEnabled bool + // billingStart keeps track of the billing start time for exporting census metrics + billingStart time.Time + // activeTime is set on active nodes indicating the time at which this node // became active. activeTime time.Time @@ -751,7 +754,7 @@ type CoreConfig struct { LicensingConfig *LicensingConfig // Configured Census Agent - censusAgent *CensusAgent + CensusAgent CensusReporter DisablePerformanceStandby bool DisableIndexing bool @@ -2259,6 +2262,11 @@ func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c if err := c.setupAuditedHeadersConfig(ctx); err != nil { return err } + + if err := c.setupCensusAgent(); err != nil { + c.logger.Error("skipping reporting for nil agent", "error", err) + } + // not waiting on wg to avoid changing existing behavior var wg sync.WaitGroup if err := c.setupActivityLog(ctx, &wg); err != nil { @@ -3600,14 +3608,3 @@ func (c *Core) GetHCPLinkStatus() (string, string) { return status, resourceID } - -// GetBillingStart gets the billing start timestamp from the configured Census -// Agent, handling a nil agent. -func (c *Core) GetBillingStart() time.Time { - var billingStart time.Time - if c.censusAgent != nil { - billingStart = c.censusAgent.billingStart - } - - return billingStart -} diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 927ffd34ed85..28a4a5ff111a 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -273,7 +273,7 @@ func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logic "enabled": config.Enabled, "queries_available": qa, "reporting_enabled": b.Core.censusLicensingEnabled, - "billing_start_timestamp": b.Core.GetBillingStart(), + "billing_start_timestamp": b.Core.billingStart, "minimum_retention_months": a.configOverrides.MinimumRetentionMonths, }, }, nil diff --git a/vault/testing.go b/vault/testing.go index 4e89efd7c523..1efa0b1eee5b 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -209,7 +209,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core { conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname conf.DisableSSCTokens = opts.DisableSSCTokens conf.PluginDirectory = opts.PluginDirectory - conf.censusAgent = opts.censusAgent + conf.CensusAgent = opts.CensusAgent if opts.Logger != nil { conf.Logger = opts.Logger From e9872e6f91334a70fda7918209ee55e66d082999 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Tue, 16 May 2023 16:29:18 +0200 Subject: [PATCH 18/26] backport of commit 730d0e2821dbc3bb1fe91ade183aa8c2908eaae5 --- changelog/20073.txt | 3 + vault/activity_log.go | 389 ++++++++++++++++++++++--------------- vault/activity_log_test.go | 378 +++++++++++++++++++++++++++++++++++ 3 files changed, 610 insertions(+), 160 deletions(-) create mode 100644 changelog/20073.txt diff --git a/changelog/20073.txt b/changelog/20073.txt new file mode 100644 index 000000000000..10c21a58ba52 --- /dev/null +++ b/changelog/20073.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core/activity: refactor the activity log's generation of precomputed queries +``` \ No newline at end of file diff --git a/vault/activity_log.go b/vault/activity_log.go index 356383d52a7e..08faaba15501 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1892,6 +1892,17 @@ func newProcessCounts() *processCounts { } } +func (p *processCounts) delete(client *activity.EntityRecord) { + if !p.contains(client) { + return + } + if client.NonEntity { + delete(p.NonEntities, client.ClientID) + } else { + delete(p.Entities, client.ClientID) + } +} + func (p *processCounts) add(client *activity.EntityRecord) { if client.NonEntity { p.NonEntities[client.ClientID] = struct{}{} @@ -1923,6 +1934,10 @@ func (p *processMount) add(client *activity.EntityRecord) { p.Counts.add(client) } +func (p *processMount) delete(client *activity.EntityRecord) { + p.Counts.delete(client) +} + func (s summaryByMount) add(client *activity.EntityRecord) { if _, present := s[client.MountAccessor]; !present { s[client.MountAccessor] = newProcessMount() @@ -1930,6 +1945,12 @@ func (s summaryByMount) add(client *activity.EntityRecord) { s[client.MountAccessor].add(client) } +func (s summaryByMount) delete(client *activity.EntityRecord) { + if m, present := s[client.MountAccessor]; present { + m.delete(client) + } +} + type processByNamespace struct { Counts *processCounts Mounts summaryByMount @@ -1947,6 +1968,11 @@ func (p *processByNamespace) add(client *activity.EntityRecord) { p.Mounts.add(client) } +func (p *processByNamespace) delete(client *activity.EntityRecord) { + p.Counts.delete(client) + p.Mounts.delete(client) +} + func (s summaryByNamespace) add(client *activity.EntityRecord) { if _, present := s[client.NamespaceID]; !present { s[client.NamespaceID] = newByNamespace() @@ -1954,6 +1980,12 @@ func (s summaryByNamespace) add(client *activity.EntityRecord) { s[client.NamespaceID].add(client) } +func (s summaryByNamespace) delete(client *activity.EntityRecord) { + if n, present := s[client.NamespaceID]; present { + n.delete(client) + } +} + type processNewClients struct { Counts *processCounts Namespaces summaryByNamespace @@ -1971,6 +2003,11 @@ func (p *processNewClients) add(client *activity.EntityRecord) { p.Namespaces.add(client) } +func (p *processNewClients) delete(client *activity.EntityRecord) { + p.Counts.delete(client) + p.Namespaces.delete(client) +} + type processMonth struct { Counts *processCounts Namespaces summaryByNamespace @@ -2006,6 +2043,185 @@ func processClientRecord(e *activity.EntityRecord, byNamespace summaryByNamespac byMonth.add(e, startTime) } +// handleEntitySegment processes the record and adds it to the correct month/ +// namespace breakdown maps, as well as to the hyperloglog for the month. New +// clients are deduplicated in opts.byMonth so that clients will only appear in +// the first month in which they are seen. +// This method must be called in reverse chronological order of the months (with +// the most recent month being called before previous months) +func (a *ActivityLog) handleEntitySegment(l *activity.EntityActivityLog, segmentTime time.Time, hll *hyperloglog.Sketch, opts pqOptions) error { + for _, e := range l.Clients { + + processClientRecord(e, opts.byNamespace, opts.byMonth, segmentTime) + hll.Insert([]byte(e.ClientID)) + + // step forward in time through the months to check if the client is + // present. If it is, delete it. This is because the client should only + // be reported as new in the earliest month that it was seen + finalMonth := timeutil.StartOfMonth(opts.activePeriodEnd).UTC() + for currMonth := timeutil.StartOfMonth(segmentTime).UTC(); currMonth.Before(finalMonth); currMonth = timeutil.StartOfNextMonth(currMonth).UTC() { + // Invalidate the client from being a new client in the next month + next := timeutil.StartOfNextMonth(currMonth).UTC().Unix() + if _, present := opts.byMonth[next]; present { + // delete from the new clients map for the next month + // this will handle deleting from the per-namespace and per-mount maps of NewClients + opts.byMonth[next].NewClients.delete(e) + } + } + } + + return nil +} + +// breakdownTokenSegment handles a TokenCount record, adding it to the namespace breakdown +func (a *ActivityLog) breakdownTokenSegment(l *activity.TokenCount, byNamespace map[string]*processByNamespace) { + for nsID, v := range l.CountByNamespaceID { + if _, present := byNamespace[nsID]; !present { + byNamespace[nsID] = newByNamespace() + } + byNamespace[nsID].Counts.Tokens += v + } +} + +func (a *ActivityLog) writePrecomputedQuery(ctx context.Context, segmentTime time.Time, opts pqOptions) error { + pq := &activity.PrecomputedQuery{ + StartTime: segmentTime, + EndTime: opts.endTime, + Namespaces: make([]*activity.NamespaceRecord, 0, len(opts.byNamespace)), + Months: make([]*activity.MonthRecord, 0, len(opts.byMonth)), + } + // this will transform the byMonth map into the correctly formatted protobuf + pq.Months = a.transformMonthBreakdowns(opts.byMonth) + + // the byNamespace map also needs to be transformed into a protobuf + for nsID, entry := range opts.byNamespace { + mountRecord := make([]*activity.MountRecord, 0, len(entry.Mounts)) + for mountAccessor, mountData := range entry.Mounts { + mountRecord = append(mountRecord, &activity.MountRecord{ + MountPath: a.mountAccessorToMountPath(mountAccessor), + Counts: &activity.CountsRecord{ + EntityClients: len(mountData.Counts.Entities), + NonEntityClients: int(mountData.Counts.Tokens) + len(mountData.Counts.NonEntities), + }, + }) + } + + pq.Namespaces = append(pq.Namespaces, &activity.NamespaceRecord{ + NamespaceID: nsID, + Entities: uint64(len(entry.Counts.Entities)), + NonEntityTokens: entry.Counts.Tokens + uint64(len(entry.Counts.NonEntities)), + Mounts: mountRecord, + }) + } + err := a.queryStore.Put(ctx, pq) + if err != nil { + a.logger.Warn("failed to store precomputed query", "error", err) + } + return nil +} + +// pqOptions holds fields that will be used when creating precomputed queries +// These fields will remain the same for every segment that a precomputed query worker is handling +type pqOptions struct { + byNamespace map[string]*processByNamespace + byMonth map[int64]*processMonth + // endTime sets the end time of the precomputed query. + // When invoked on schedule by the precomputedQueryWorker, this is the end of the month that just finished. + endTime time.Time + // activePeriodStart is the earliest date in our retention window + activePeriodStart time.Time + // activePeriodEnd is the latest date in our retention window. + // When invoked on schedule by the precomputedQueryWorker, this will be the timestamp of the most recent segment + // that's present in storage + activePeriodEnd time.Time +} + +// segmentToPrecomputedQuery processes a single segment +func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime time.Time, reader SegmentReader, opts pqOptions) error { + hyperloglog, err := a.CreateOrFetchHyperlogLog(ctx, segmentTime) + if err != nil { + // We were unable to create or fetch the hll, but we should still + // continue with our precomputation + a.logger.Warn("unable to create or fetch hyperloglog", "start time", segmentTime, "error", err) + } + + // Iterate through entities, adding them to the hyperloglog and the summary maps in opts + for { + entity, err := reader.ReadEntity(ctx) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + a.logger.Warn("failed to read segment", "error", err) + return err + } + err = a.handleEntitySegment(entity, segmentTime, hyperloglog, opts) + if err != nil { + a.logger.Warn("failed to handle entity segment", "error", err) + return err + } + } + + // Store the hyperloglog + err = a.StoreHyperlogLog(ctx, segmentTime, hyperloglog) + if err != nil { + a.logger.Warn("failed to store hyperloglog for month", "start time", segmentTime, "error", err) + } + + // Iterate through any tokens and add them to per namespace map + for { + token, err := reader.ReadToken(ctx) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + a.logger.Warn("failed to load token counts", "error", err) + return err + } + a.breakdownTokenSegment(token, opts.byNamespace) + } + + // write metrics + for nsID, entry := range opts.byNamespace { + // If this is the most recent month, or the start of the reporting period, output + // a metric for each namespace. + if segmentTime == opts.activePeriodEnd { + a.metrics.SetGaugeWithLabels( + []string{"identity", "entity", "active", "monthly"}, + float32(len(entry.Counts.Entities)), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + a.metrics.SetGaugeWithLabels( + []string{"identity", "nonentity", "active", "monthly"}, + float32(len(entry.Counts.NonEntities))+float32(entry.Counts.Tokens), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + } else if segmentTime == opts.activePeriodStart { + a.metrics.SetGaugeWithLabels( + []string{"identity", "entity", "active", "reporting_period"}, + float32(len(entry.Counts.Entities)), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + a.metrics.SetGaugeWithLabels( + []string{"identity", "nonentity", "active", "reporting_period"}, + float32(len(entry.Counts.NonEntities))+float32(entry.Counts.Tokens), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + } + } + + // convert the maps to the proper format and write them as precomputed queries + return a.writePrecomputedQuery(ctx, segmentTime, opts) +} + // goroutine to process the request in the intent log, creating precomputed queries. // We expect the return value won't be checked, so log errors as they occur // (but for unit testing having the error return should help.) @@ -2082,187 +2298,38 @@ func (a *ActivityLog) precomputedQueryWorker(ctx context.Context) error { return errors.New("previous month not found") } - // "times" is already in reverse order, start building the per-namespace maps - // from the last month backward - byNamespace := make(map[string]*processByNamespace) byMonth := make(map[int64]*processMonth) - walkEntities := func(l *activity.EntityActivityLog, startTime time.Time, hll *hyperloglog.Sketch) error { - for _, e := range l.Clients { - - processClientRecord(e, byNamespace, byMonth, startTime) - - // We maintain an hyperloglog for each month - // hyperloglog is a sketch (hyperloglog data-structure) containing client ID's in a given month - // hyperloglog is used in activity log to get the approximate number new clients in the current billing month - // by counting the number of distinct clients in all the months including current month - // (this can be done by merging the hyperloglog all months with current month hyperloglog) - // and subtracting the number of distinct clients in the current month - // NOTE: current month here is not the month of startTime but the time period from the start of the current month, - // up until the time that this request was made. - hll.Insert([]byte(e.ClientID)) - - // The byMonth map will be filled in the reverse order of time. For - // example, if the billing period is from Jan to June, the byMonth - // will be filled for June first, May next and so on till Jan. When - // processing a client for the current month, it has been added as a - // new client above. Now, we check if that client is also used in - // the subsequent months (on any given month, byMonth map has - // already been processed for all the subsequent months due to the - // reverse ordering). If yes, we remove those references. This way a - // client is considered new only in the earliest month of its use in - // the billing period. - for currMonth := timeutil.StartOfMonth(startTime).UTC(); currMonth != timeutil.StartOfMonth(times[0]).UTC(); currMonth = timeutil.StartOfNextMonth(currMonth).UTC() { - // Invalidate the client from being a new client in the next month - next := timeutil.StartOfNextMonth(currMonth).UTC().Unix() - if _, present := byMonth[next]; !present { - continue - } - - newClients := byMonth[next].NewClients - - // Remove the client from the top level counts within the month. - if e.NonEntity { - delete(newClients.Counts.NonEntities, e.ClientID) - } else { - delete(newClients.Counts.Entities, e.ClientID) - } - - if _, present := newClients.Namespaces[e.NamespaceID]; present { - // Remove the client from the namespace within the month. - if e.NonEntity { - delete(newClients.Namespaces[e.NamespaceID].Counts.NonEntities, e.ClientID) - } else { - delete(newClients.Namespaces[e.NamespaceID].Counts.Entities, e.ClientID) - } - if _, present := newClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor]; present { - // Remove the client from the mount within the namespace within the month. - if e.NonEntity { - delete(newClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.NonEntities, e.ClientID) - } else { - delete(newClients.Namespaces[e.NamespaceID].Mounts[e.MountAccessor].Counts.Entities, e.ClientID) - } - } - } - } - } - - return nil - } - - walkTokens := func(l *activity.TokenCount) { - for nsID, v := range l.CountByNamespaceID { - if _, present := byNamespace[nsID]; !present { - byNamespace[nsID] = newByNamespace() - } - byNamespace[nsID].Counts.Tokens += v - } - } - endTime := timeutil.EndOfMonth(time.Unix(lastMonth, 0).UTC()) activePeriodStart := timeutil.MonthsPreviousTo(a.defaultReportMonths, endTime) // If not enough data, report as much as we have in the window if activePeriodStart.Before(times[len(times)-1]) { activePeriodStart = times[len(times)-1] } - + opts := pqOptions{ + byNamespace: byNamespace, + byMonth: byMonth, + endTime: endTime, + activePeriodStart: activePeriodStart, + activePeriodEnd: times[0], + } + // "times" is already in reverse order, start building the per-namespace maps + // from the last month backward for _, startTime := range times { // Do not work back further than the current retention window, // which will just get deleted anyway. if startTime.Before(retentionWindow) { break } - - hyperloglog, err := a.CreateOrFetchHyperlogLog(ctx, startTime) + reader, err := a.NewSegmentFileReader(ctx, startTime) if err != nil { - // We were unable to create or fetch the hll, but we should still - // continue with our precomputation - a.logger.Warn("unable to create or fetch hyperloglog", "start time", startTime, "error", err) - } - err = a.WalkEntitySegments(ctx, startTime, hyperloglog, walkEntities) - if err != nil { - a.logger.Warn("failed to load previous segments", "error", err) return err } - // Store the hyperloglog - err = a.StoreHyperlogLog(ctx, startTime, hyperloglog) - if err != nil { - a.logger.Warn("failed to store hyperloglog for month", "start time", startTime, "error", err) - } - err = a.WalkTokenSegments(ctx, startTime, walkTokens) + err = a.segmentToPrecomputedQuery(ctx, startTime, reader, opts) if err != nil { - a.logger.Warn("failed to load previous token counts", "error", err) return err } - - // Save the work to date in a record - pq := &activity.PrecomputedQuery{ - StartTime: startTime, - EndTime: endTime, - Namespaces: make([]*activity.NamespaceRecord, 0, len(byNamespace)), - Months: make([]*activity.MonthRecord, 0, len(byMonth)), - } - pq.Months = a.transformMonthBreakdowns(byMonth) - - for nsID, entry := range byNamespace { - mountRecord := make([]*activity.MountRecord, 0, len(entry.Mounts)) - for mountAccessor, mountData := range entry.Mounts { - mountRecord = append(mountRecord, &activity.MountRecord{ - MountPath: a.mountAccessorToMountPath(mountAccessor), - Counts: &activity.CountsRecord{ - EntityClients: len(mountData.Counts.Entities), - NonEntityClients: int(mountData.Counts.Tokens) + len(mountData.Counts.NonEntities), - }, - }) - } - - pq.Namespaces = append(pq.Namespaces, &activity.NamespaceRecord{ - NamespaceID: nsID, - Entities: uint64(len(entry.Counts.Entities)), - NonEntityTokens: entry.Counts.Tokens + uint64(len(entry.Counts.NonEntities)), - Mounts: mountRecord, - }) - - // If this is the most recent month, or the start of the reporting period, output - // a metric for each namespace. - if startTime == times[0] { - a.metrics.SetGaugeWithLabels( - []string{"identity", "entity", "active", "monthly"}, - float32(len(entry.Counts.Entities)), - []metricsutil.Label{ - {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, - }, - ) - a.metrics.SetGaugeWithLabels( - []string{"identity", "nonentity", "active", "monthly"}, - float32(len(entry.Counts.NonEntities))+float32(entry.Counts.Tokens), - []metricsutil.Label{ - {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, - }, - ) - } else if startTime == activePeriodStart { - a.metrics.SetGaugeWithLabels( - []string{"identity", "entity", "active", "reporting_period"}, - float32(len(entry.Counts.Entities)), - []metricsutil.Label{ - {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, - }, - ) - a.metrics.SetGaugeWithLabels( - []string{"identity", "nonentity", "active", "reporting_period"}, - float32(len(entry.Counts.NonEntities))+float32(entry.Counts.Tokens), - []metricsutil.Label{ - {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, - }, - ) - } - } - - err = a.queryStore.Put(ctx, pq) - if err != nil { - a.logger.Warn("failed to store precomputed query", "error", err) - } } // delete the intent log @@ -2368,6 +2435,8 @@ func (a *ActivityLog) populateNamespaceAndMonthlyBreakdowns() (map[int64]*proces return byMonth, byNamespace } +// transformMonthBreakdowns converts a map of unix timestamp -> processMonth to +// a slice of MonthRecord func (a *ActivityLog) transformMonthBreakdowns(byMonth map[int64]*processMonth) []*activity.MonthRecord { monthly := make([]*activity.MonthRecord, 0) processByNamespaces := func(nsMap map[string]*processByNamespace) []*activity.MonthlyNamespaceRecord { diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 20a77aa95cb6..75f6e90152bd 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -4240,6 +4240,50 @@ func TestActivityLog_partialMonthClientCountWithMultipleMountPaths(t *testing.T) } } +// TestActivityLog_processNewClients_delete ensures that the correct clients are deleted from a processNewClients struct +func TestActivityLog_processNewClients_delete(t *testing.T) { + mount := "mount" + namespace := "namespace" + clientID := "client-id" + run := func(t *testing.T, isNonEntity bool) { + t.Helper() + record := &activity.EntityRecord{ + MountAccessor: mount, + NamespaceID: namespace, + ClientID: clientID, + NonEntity: isNonEntity, + } + newClients := newProcessNewClients() + newClients.add(record) + + require.True(t, newClients.Counts.contains(record)) + require.True(t, newClients.Namespaces[namespace].Counts.contains(record)) + require.True(t, newClients.Namespaces[namespace].Mounts[mount].Counts.contains(record)) + + newClients.delete(record) + + byNS := newClients.Namespaces + counts := newClients.Counts + require.NotContains(t, counts.NonEntities, clientID) + require.NotContains(t, counts.Entities, clientID) + + require.NotContains(t, counts.NonEntities, clientID) + require.NotContains(t, counts.Entities, clientID) + + require.NotContains(t, byNS[namespace].Mounts[mount].Counts.NonEntities, clientID) + require.NotContains(t, byNS[namespace].Counts.NonEntities, clientID) + + require.NotContains(t, byNS[namespace].Mounts[mount].Counts.Entities, clientID) + require.NotContains(t, byNS[namespace].Counts.Entities, clientID) + } + t.Run("entity", func(t *testing.T) { + run(t, false) + }) + t.Run("non-entity", func(t *testing.T) { + run(t, true) + }) +} + // TestActivityLog_processClientRecord calls processClientRecord for an entity and a non-entity record and verifies that // the record is present in the namespace and month maps func TestActivityLog_processClientRecord(t *testing.T) { @@ -4298,3 +4342,337 @@ func TestActivityLog_processClientRecord(t *testing.T) { run(t, false) }) } + +func verifyByNamespaceContains(t *testing.T, s summaryByNamespace, clients ...*activity.EntityRecord) { + t.Helper() + for _, c := range clients { + require.Contains(t, s, c.NamespaceID) + counts := s[c.NamespaceID].Counts + require.True(t, counts.contains(c)) + mounts := s[c.NamespaceID].Mounts + require.Contains(t, mounts, c.MountAccessor) + require.True(t, mounts[c.MountAccessor].Counts.contains(c)) + } +} + +func (s summaryByMonth) firstSeen(t *testing.T, client *activity.EntityRecord) time.Time { + t.Helper() + var seen int64 + for month, data := range s { + present := data.NewClients.Counts.contains(client) + if present { + if seen != 0 { + require.Fail(t, "client seen more than once", client.ClientID, s) + } + seen = month + } + } + return time.Unix(seen, 0).UTC() +} + +// TestActivityLog_handleEntitySegment verifies that the by namespace and by month summaries are correctly filled in a +// variety of scenarios +func TestActivityLog_handleEntitySegment(t *testing.T) { + finalTime := timeutil.StartOfMonth(time.Date(2022, 12, 1, 0, 0, 0, 0, time.UTC)) + addMonths := func(i int) time.Time { + return timeutil.StartOfMonth(finalTime.AddDate(0, i, 0)) + } + currentSegmentClients := make([]*activity.EntityRecord, 0, 3) + for i := 0; i < 3; i++ { + currentSegmentClients = append(currentSegmentClients, &activity.EntityRecord{ + ClientID: fmt.Sprintf("id-%d", i), + NamespaceID: fmt.Sprintf("ns-%d", i), + MountAccessor: fmt.Sprintf("mnt-%d", i), + NonEntity: i == 0, + }) + } + a := &ActivityLog{} + t.Run("older segment empty", func(t *testing.T) { + hll := hyperloglog.New() + byNS := make(summaryByNamespace) + byMonth := make(summaryByMonth) + segmentTime := addMonths(-3) + // our 3 clients were seen 3 months ago, with no other clients having been seen + err := a.handleEntitySegment(&activity.EntityActivityLog{Clients: currentSegmentClients}, segmentTime, hll, pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: timeutil.EndOfMonth(segmentTime), + activePeriodStart: addMonths(-12), + activePeriodEnd: addMonths(12), + }) + require.NoError(t, err) + require.Len(t, byNS, 3) + verifyByNamespaceContains(t, byNS, currentSegmentClients...) + require.Len(t, byMonth, 1) + // they should all be registered as having first been seen 3 months ago + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[0]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[1]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[2]), segmentTime) + // and all 3 should be in the hyperloglog + require.Equal(t, hll.Estimate(), uint64(3)) + }) + t.Run("older segment clients seen earlier", func(t *testing.T) { + hll := hyperloglog.New() + byNS := make(summaryByNamespace) + byNS.add(currentSegmentClients[0]) + byNS.add(currentSegmentClients[1]) + byMonth := make(summaryByMonth) + segmentTime := addMonths(-3) + seenBefore2Months := addMonths(-2) + seenBefore1Month := addMonths(-1) + + // client 0 was seen 2 months ago + byMonth.add(currentSegmentClients[0], seenBefore2Months) + // client 1 was seen 1 month ago + byMonth.add(currentSegmentClients[1], seenBefore1Month) + + // handle clients 0, 1, and 2 as having been seen 3 months ago + err := a.handleEntitySegment(&activity.EntityActivityLog{Clients: currentSegmentClients}, segmentTime, hll, pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: timeutil.EndOfMonth(segmentTime), + activePeriodStart: addMonths(-12), + activePeriodEnd: addMonths(12), + }) + require.NoError(t, err) + require.Len(t, byNS, 3) + verifyByNamespaceContains(t, byNS, currentSegmentClients...) + // we expect that they will only be registered as new 3 months ago, because that's when they were first seen + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[0]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[1]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[2]), segmentTime) + + require.Equal(t, hll.Estimate(), uint64(3)) + }) + t.Run("disjoint set of clients", func(t *testing.T) { + hll := hyperloglog.New() + byNS := make(summaryByNamespace) + byNS.add(currentSegmentClients[0]) + byNS.add(currentSegmentClients[1]) + byMonth := make(summaryByMonth) + segmentTime := addMonths(-3) + seenBefore2Months := addMonths(-2) + seenBefore1Month := addMonths(-1) + + // client 0 was seen 2 months ago + byMonth.add(currentSegmentClients[0], seenBefore2Months) + // client 1 was seen 1 month ago + byMonth.add(currentSegmentClients[1], seenBefore1Month) + + // handle client 2 as having been seen 3 months ago + err := a.handleEntitySegment(&activity.EntityActivityLog{Clients: currentSegmentClients[2:]}, segmentTime, hll, pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: timeutil.EndOfMonth(segmentTime), + activePeriodStart: addMonths(-12), + activePeriodEnd: addMonths(12), + }) + require.NoError(t, err) + require.Len(t, byNS, 3) + verifyByNamespaceContains(t, byNS, currentSegmentClients...) + // client 2 should be added to the map, and the other clients should stay where they were + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[0]), seenBefore2Months) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[1]), seenBefore1Month) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[2]), segmentTime) + // the hyperloglog will have 1 element, because there was only 1 client in the segment + require.Equal(t, hll.Estimate(), uint64(1)) + }) + t.Run("new clients same namespaces", func(t *testing.T) { + hll := hyperloglog.New() + byNS := make(summaryByNamespace) + byNS.add(currentSegmentClients[0]) + byNS.add(currentSegmentClients[1]) + byNS.add(currentSegmentClients[2]) + byMonth := make(summaryByMonth) + segmentTime := addMonths(-3) + seenBefore2Months := addMonths(-2) + seenBefore1Month := addMonths(-1) + + // client 0 and 2 were seen 2 months ago + byMonth.add(currentSegmentClients[0], seenBefore2Months) + byMonth.add(currentSegmentClients[2], seenBefore2Months) + // client 1 was seen 1 month ago + byMonth.add(currentSegmentClients[1], seenBefore1Month) + + // create 3 additional clients + // these have ns-1, ns-2, ns-3 and mnt-1, mnt-2, mnt-3 + moreSegmentClients := make([]*activity.EntityRecord, 0, 3) + for i := 0; i < 3; i++ { + moreSegmentClients = append(moreSegmentClients, &activity.EntityRecord{ + ClientID: fmt.Sprintf("id-%d", i+3), + NamespaceID: fmt.Sprintf("ns-%d", i), + MountAccessor: fmt.Sprintf("ns-%d", i), + NonEntity: i == 1, + }) + } + // 3 new clients have been seen 3 months ago + err := a.handleEntitySegment(&activity.EntityActivityLog{Clients: moreSegmentClients}, segmentTime, hll, pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: timeutil.EndOfMonth(segmentTime), + activePeriodStart: addMonths(-12), + activePeriodEnd: addMonths(12), + }) + require.NoError(t, err) + // there are only 3 namespaces, since both currentSegmentClients and moreSegmentClients use the same namespaces + require.Len(t, byNS, 3) + verifyByNamespaceContains(t, byNS, currentSegmentClients...) + verifyByNamespaceContains(t, byNS, moreSegmentClients...) + // The segment clients that have already been seen have their same first seen dates + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[0]), seenBefore2Months) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[1]), seenBefore1Month) + require.Equal(t, byMonth.firstSeen(t, currentSegmentClients[2]), seenBefore2Months) + // and the new clients should be first seen at segmentTime + require.Equal(t, byMonth.firstSeen(t, moreSegmentClients[0]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, moreSegmentClients[1]), segmentTime) + require.Equal(t, byMonth.firstSeen(t, moreSegmentClients[2]), segmentTime) + // the hyperloglog will have 3 elements, because there were the 3 new elements in moreSegmentClients seen + require.Equal(t, hll.Estimate(), uint64(3)) + }) +} + +// TestActivityLog_breakdownTokenSegment verifies that tokens are correctly added to a map that tracks counts per namespace +func TestActivityLog_breakdownTokenSegment(t *testing.T) { + toAdd := map[string]uint64{ + "a": 1, + "b": 2, + "c": 3, + } + a := &ActivityLog{} + testCases := []struct { + name string + existingNamespaceCounts map[string]uint64 + wantCounts map[string]uint64 + }{ + { + name: "empty", + wantCounts: toAdd, + }, + { + name: "some overlap", + existingNamespaceCounts: map[string]uint64{ + "a": 2, + "z": 1, + }, + wantCounts: map[string]uint64{ + "a": 3, + "b": 2, + "c": 3, + "z": 1, + }, + }, + { + name: "disjoint sets", + existingNamespaceCounts: map[string]uint64{ + "z": 5, + "y": 3, + "x": 2, + }, + wantCounts: map[string]uint64{ + "a": 1, + "b": 2, + "c": 3, + "z": 5, + "y": 3, + "x": 2, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + byNamespace := make(map[string]*processByNamespace) + for k, v := range tc.existingNamespaceCounts { + byNamespace[k] = newByNamespace() + byNamespace[k].Counts.Tokens = v + } + a.breakdownTokenSegment(&activity.TokenCount{CountByNamespaceID: toAdd}, byNamespace) + got := make(map[string]uint64) + for k, v := range byNamespace { + got[k] = v.Counts.Tokens + } + require.Equal(t, tc.wantCounts, got) + }) + } +} + +// TestActivityLog_writePrecomputedQuery calls writePrecomputedQuery for a segment with 1 non entity and 1 entity client, +// which have different namespaces and mounts. The precomputed query is then retrieved from storage and we verify that +// the data structure is filled correctly +func TestActivityLog_writePrecomputedQuery(t *testing.T) { + core, _, _ := TestCoreUnsealed(t) + + a := core.activityLog + a.SetEnable(true) + + byMonth := make(summaryByMonth) + byNS := make(summaryByNamespace) + clientEntity := &activity.EntityRecord{ + ClientID: "id-1", + NamespaceID: "ns-1", + MountAccessor: "mnt-1", + } + clientNonEntity := &activity.EntityRecord{ + ClientID: "id-2", + NamespaceID: "ns-2", + MountAccessor: "mnt-2", + NonEntity: true, + } + now := time.Now() + + // add the 2 clients to the namespace and month summaries + processClientRecord(clientEntity, byNS, byMonth, now) + processClientRecord(clientNonEntity, byNS, byMonth, now) + + endTime := timeutil.EndOfMonth(now) + opts := pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: endTime, + } + + err := a.writePrecomputedQuery(context.Background(), now, opts) + require.NoError(t, err) + + // read the query back from storage + val, err := a.queryStore.Get(context.Background(), now, endTime) + require.NoError(t, err) + require.Equal(t, now.UTC().Unix(), val.StartTime.UTC().Unix()) + require.Equal(t, endTime.UTC().Unix(), val.EndTime.UTC().Unix()) + + // ns-1 and ns-2 should both be present in the results + require.Len(t, val.Namespaces, 2) + require.Len(t, val.Months, 1) + resultByNS := make(map[string]*activity.NamespaceRecord) + for _, ns := range val.Namespaces { + resultByNS[ns.NamespaceID] = ns + } + ns1 := resultByNS["ns-1"] + ns2 := resultByNS["ns-2"] + + require.Equal(t, ns1.Entities, uint64(1)) + require.Equal(t, ns1.NonEntityTokens, uint64(0)) + require.Equal(t, ns2.Entities, uint64(0)) + require.Equal(t, ns2.NonEntityTokens, uint64(1)) + + require.Len(t, ns1.Mounts, 1) + require.Len(t, ns2.Mounts, 1) + // ns-1 needs to have mnt-1 + require.Contains(t, ns1.Mounts[0].MountPath, "mnt-1") + // ns-2 needs to have mnt-2 + require.Contains(t, ns2.Mounts[0].MountPath, "mnt-2") + + require.Equal(t, 1, ns1.Mounts[0].Counts.EntityClients) + require.Equal(t, 0, ns1.Mounts[0].Counts.NonEntityClients) + require.Equal(t, 0, ns2.Mounts[0].Counts.EntityClients) + require.Equal(t, 1, ns2.Mounts[0].Counts.NonEntityClients) + + monthRecord := val.Months[0] + // there should only be one month present, since the clients were added with the same timestamp + require.Equal(t, monthRecord.Timestamp, timeutil.StartOfMonth(now).UTC().Unix()) + require.Equal(t, 1, monthRecord.Counts.NonEntityClients) + require.Equal(t, 1, monthRecord.Counts.EntityClients) + require.Len(t, monthRecord.Namespaces, 2) + require.Len(t, monthRecord.NewClients.Namespaces, 2) + require.Equal(t, 1, monthRecord.NewClients.Counts.EntityClients) + require.Equal(t, 1, monthRecord.NewClients.Counts.NonEntityClients) +} From aa7f940a140b3d42af1f32daa7e25d542d8c36e6 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Fri, 19 May 2023 16:42:50 +0200 Subject: [PATCH 19/26] backport of commit 35e2c1665f009183088387532e17d02ded312e18 --- changelog/20680.txt | 6 ++++++ command/server.go | 3 +++ vault/activity_log.go | 32 +++++++++++++++++++++++++++----- vault/activity_log_test.go | 12 ++++++------ vault/census.go | 11 +++++++++-- vault/core.go | 17 +++++++++-------- vault/logical_system_activity.go | 23 ++++++++++++++++++----- vault/request_handling.go | 7 +++++-- 8 files changed, 83 insertions(+), 28 deletions(-) create mode 100644 changelog/20680.txt diff --git a/changelog/20680.txt b/changelog/20680.txt new file mode 100644 index 000000000000..ff80ac466092 --- /dev/null +++ b/changelog/20680.txt @@ -0,0 +1,6 @@ +```release-note:improvement +core (enterprise): support reloading configuration for automated reporting via SIGHUP +``` +```release-note:improvement +core (enterprise): license updates trigger a reload of reporting and the activity log +``` \ No newline at end of file diff --git a/command/server.go b/command/server.go index 69ffe9efc0f4..15e4e9d60ca1 100644 --- a/command/server.go +++ b/command/server.go @@ -1758,6 +1758,9 @@ func (c *ServerCommand) Run(args []string) int { c.UI.Error(err.Error()) } + if err := core.ReloadCensus(); err != nil { + c.UI.Error(err.Error()) + } select { case c.licenseReloadedCh <- err: default: diff --git a/vault/activity_log.go b/vault/activity_log.go index 08faaba15501..28453d3ab2b3 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1058,12 +1058,25 @@ func (a *ActivityLog) queriesAvailable(ctx context.Context) (bool, error) { // setupActivityLog hooks up the singleton ActivityLog into Core. func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { + c.activityLogLock.Lock() + defer c.activityLogLock.Unlock() + return c.setupActivityLogLocked(ctx, wg) +} + +// setupActivityLogLocked hooks up the singleton ActivityLog into Core. +// this function should be called with activityLogLock. +func (c *Core) setupActivityLogLocked(ctx context.Context, wg *sync.WaitGroup) error { logger := c.baseLogger.Named("activity") c.AddLogger(logger) if os.Getenv("VAULT_DISABLE_ACTIVITY_LOG") != "" { - logger.Info("activity log disabled via environment variable") - return nil + if c.CensusLicensingEnabled() { + logger.Warn("activity log disabled via environment variable while reporting is enabled. " + + "Reporting will override, and the activity log will be enabled") + } else { + logger.Info("activity log disabled via environment variable") + return nil + } } view := c.systemBarrierView.SubView(activitySubPath) @@ -1104,15 +1117,16 @@ func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { }(manager.retentionMonths) manager.CensusReportDone = make(chan bool) - go c.activityLog.CensusReport(ctx, c.censusAgent, c.billingStart) + go c.activityLog.CensusReport(ctx, c.CensusAgent(), c.BillingStart()) } return nil } -// stopActivityLog removes the ActivityLog from Core +// stopActivityLogLocked removes the ActivityLog from Core // and frees any resources. -func (c *Core) stopActivityLog() { +// this function should be called with activityLogLock +func (c *Core) stopActivityLogLocked() { // preSeal may run before startActivityLog got a chance to complete. if c.activityLog != nil { // Shut down background worker @@ -1122,6 +1136,14 @@ func (c *Core) stopActivityLog() { c.activityLog = nil } +// stopActivityLog removes the ActivityLog from Core +// and frees any resources. +func (c *Core) stopActivityLog() { + c.activityLogLock.Lock() + defer c.activityLogLock.Unlock() + c.stopActivityLogLocked() +} + func (a *ActivityLog) StartOfNextMonth() time.Time { a.l.RLock() defer a.l.RUnlock() diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 75f6e90152bd..d046c5ecaf75 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -835,8 +835,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 24, "enabled": activityLogEnabledDefaultValue, "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -919,8 +919,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 2, "enabled": "enable", "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -958,8 +958,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 24, "enabled": activityLogEnabledDefaultValue, "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } diff --git a/vault/census.go b/vault/census.go index 9d916dc14896..bb1f4bc61f3d 100644 --- a/vault/census.go +++ b/vault/census.go @@ -2,8 +2,15 @@ package vault +import "time" + // CensusAgent is a stub for OSS -type CensusReporter struct{} +type CensusReporter interface{} // setupCensusAgent is a stub for OSS. -func (c *Core) setupCensusAgent() error { return nil } +func (c *Core) setupCensusAgent() error { return nil } +func (c *Core) BillingStart() time.Time { return time.Time{} } +func (c *Core) CensusLicensingEnabled() bool { return false } +func (c *Core) CensusAgent() CensusReporter { return nil } +func (c *Core) ReloadCensus() error { return nil } +func (c *Core) teardownCensusAgent() error { return nil } diff --git a/vault/core.go b/vault/core.go index 53ecd4e10022..fe14ce30ab66 100644 --- a/vault/core.go +++ b/vault/core.go @@ -395,6 +395,8 @@ type Core struct { // activityLog is used to track active client count activityLog *ActivityLog + // activityLogLock protects the activityLog and activityLogConfig + activityLogLock sync.RWMutex // metricsCh is used to stop the metrics streaming metricsCh chan struct{} @@ -603,16 +605,11 @@ type Core struct { clusterHeartbeatInterval time.Duration + // activityLogConfig contains override values for the activity log + // it is protected by activityLogLock activityLogConfig ActivityLogCoreConfig - // censusAgent is the mechanism used for reporting Vault's billing data. - censusAgent CensusReporter - - // censusLicensingEnabled records whether Vault is exporting census metrics - censusLicensingEnabled bool - - // billingStart keeps track of the billing start time for exporting census metrics - billingStart time.Time + censusConfig atomic.Value // activeTime is set on active nodes indicating the time at which this node // became active. @@ -2467,6 +2464,10 @@ func (c *Core) preSeal() error { result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err)) } c.stopActivityLog() + // Clean up the censusAgent on seal + if err := c.teardownCensusAgent(); err != nil { + result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err)) + } if err := c.teardownCredentials(context.Background()); err != nil { result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err)) diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 28a4a5ff111a..15560410dfcb 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -167,7 +167,9 @@ func parseStartEndTimes(a *ActivityLog, d *framework.FieldData) (time.Time, time // This endpoint is not used by the UI. The UI's "export" feature is entirely client-side. func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -198,7 +200,9 @@ func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Req } func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -228,7 +232,9 @@ func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logica } func (b *SystemBackend) handleMonthlyActivityCount(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -247,7 +253,9 @@ func (b *SystemBackend) handleMonthlyActivityCount(ctx context.Context, req *log } func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -272,15 +280,17 @@ func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logic "retention_months": config.RetentionMonths, "enabled": config.Enabled, "queries_available": qa, - "reporting_enabled": b.Core.censusLicensingEnabled, - "billing_start_timestamp": b.Core.billingStart, + "reporting_enabled": b.Core.CensusLicensingEnabled(), + "billing_start_timestamp": b.Core.BillingStart(), "minimum_retention_months": a.configOverrides.MinimumRetentionMonths, }, }, nil } func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -331,7 +341,7 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log activityLogEnabledDefault && config.Enabled == "default" && enabledStr == "disable" { // if census is enabled, the activity log cannot be disabled - if a.core.censusLicensingEnabled { + if a.core.CensusLicensingEnabled() { return logical.ErrorResponse("cannot disable the activity log while Reporting is enabled"), logical.ErrInvalidRequest } warnings = append(warnings, "the current monthly segment will be deleted because the activity log was disabled") @@ -346,6 +356,9 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log } } + a.core.activityLogLock.RLock() + minimumRetentionMonths := a.configOverrides.MinimumRetentionMonths + a.core.activityLogLock.RUnlock() enabled := config.Enabled == "enable" if !enabled && config.Enabled == "default" { enabled = activityLogEnabledDefault @@ -355,8 +368,8 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log return logical.ErrorResponse("retention_months cannot be 0 while enabled"), logical.ErrInvalidRequest } - if a.core.censusLicensingEnabled && config.RetentionMonths < a.configOverrides.MinimumRetentionMonths { - return logical.ErrorResponse("retention_months must be at least %d while Reporting is enabled", a.configOverrides.MinimumRetentionMonths), logical.ErrInvalidRequest + if a.core.CensusLicensingEnabled() && config.RetentionMonths < minimumRetentionMonths { + return logical.ErrorResponse("retention_months must be at least %d while Reporting is enabled", minimumRetentionMonths), logical.ErrInvalidRequest } // Store the config diff --git a/vault/request_handling.go b/vault/request_handling.go index 151792610f4e..fa0c0968fbd7 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -421,9 +421,12 @@ func (c *Core) checkToken(ctx context.Context, req *logical.Request, unauth bool auth.PolicyResults.GrantingPolicies = append(auth.PolicyResults.GrantingPolicies, authResults.SentinelResults.GrantingPolicies...) } + c.activityLogLock.RLock() + activityLog := c.activityLog + c.activityLogLock.RUnlock() // If it is an authenticated ( i.e with vault token ) request, increment client count - if !unauth && c.activityLog != nil { - c.activityLog.HandleTokenUsage(ctx, te, clientID, isTWE) + if !unauth && activityLog != nil { + activityLog.HandleTokenUsage(ctx, te, clientID, isTWE) } return auth, te, nil } From 9c07a1ca84472d2b77064d4669319dc5430bc845 Mon Sep 17 00:00:00 2001 From: Mike Palmiotto Date: Mon, 22 May 2023 09:22:45 -0400 Subject: [PATCH 20/26] backport of commit 810d504e4f676e857632230bf565eaa214927bcd --- changelog/20694.txt | 4 ++++ vault/logical_system_activity.go | 17 ++++++++++++++--- .../api-docs/system/internal-counters.mdx | 4 ++++ 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 changelog/20694.txt diff --git a/changelog/20694.txt b/changelog/20694.txt new file mode 100644 index 000000000000..07f790a666dd --- /dev/null +++ b/changelog/20694.txt @@ -0,0 +1,4 @@ +```release-note:improvement +api: GET ... /sys/internal/counters/activity?current_billing_period=true now +results in a response which contains the full billing period +``` diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index 15560410dfcb..3fc9487bf112 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -19,6 +19,10 @@ func (b *SystemBackend) activityQueryPath() *framework.Path { return &framework.Path{ Pattern: "internal/counters/activity$", Fields: map[string]*framework.FieldSchema{ + "current_billing_period": { + Type: framework.TypeBool, + Description: "Query utilization for configured billing period", + }, "start_time": { Type: framework.TypeTime, Description: "Start of query interval", @@ -200,6 +204,7 @@ func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Req } func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + var startTime, endTime time.Time b.Core.activityLogLock.RLock() a := b.Core.activityLog b.Core.activityLogLock.RUnlock() @@ -207,9 +212,15 @@ func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logica return logical.ErrorResponse("no activity log present"), nil } - startTime, endTime, err := parseStartEndTimes(a, d) - if err != nil { - return logical.ErrorResponse(err.Error()), nil + if d.Get("current_billing_period").(bool) { + startTime = b.Core.BillingStart() + endTime = time.Now().UTC() + } else { + var err error + startTime, endTime, err = parseStartEndTimes(a, d) + if err != nil { + return logical.ErrorResponse(err.Error()), nil + } } var limitNamespaces int diff --git a/website/content/api-docs/system/internal-counters.mdx b/website/content/api-docs/system/internal-counters.mdx index e87943a8158e..0fc9963e9e09 100644 --- a/website/content/api-docs/system/internal-counters.mdx +++ b/website/content/api-docs/system/internal-counters.mdx @@ -284,6 +284,10 @@ This endpoint was added in Vault 1.6. - `limit_namespaces` `(int, optional)` - Controls the total number of by_namespace data returned. This can be used to return the client counts for the specified number of namespaces having highest activity. If no `limit_namespaces` parameter is specified, client counts for all namespaces in specified usage period is returned. +- `current_billing_period` `(bool, optional)` - Uses the builtin billing start + timestamp as `start_time` and the current time as the `end_time`, returning a + response with the current billing period information without having to + explicitly provide a start and end time. ### Sample Request From 0bb3bad811ebf5db5b165c7d8ff36d460858d8c1 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Tue, 23 May 2023 11:58:51 +0200 Subject: [PATCH 21/26] backport of commit 5b23dd506fb3d2c79f4a18b995a72548560cc799 --- vault/activity/generation/generate_data.pb.go | 72 ++++---- vault/activity/generation/generate_data.proto | 11 +- .../logical_system_activity_write_testonly.go | 120 +++++++++++++ ...cal_system_activity_write_testonly_test.go | 163 ++++++++++++++++++ 4 files changed, 319 insertions(+), 47 deletions(-) diff --git a/vault/activity/generation/generate_data.pb.go b/vault/activity/generation/generate_data.pb.go index 404ed373a9e7..522a5e3609e7 100644 --- a/vault/activity/generation/generate_data.pb.go +++ b/vault/activity/generation/generate_data.pb.go @@ -439,12 +439,11 @@ type Client struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"` - TimesSeen int32 `protobuf:"varint,3,opt,name=times_seen,json=timesSeen,proto3" json:"times_seen,omitempty"` - Repeated bool `protobuf:"varint,4,opt,name=repeated,proto3" json:"repeated,omitempty"` - RepeatedFromMonth int32 `protobuf:"varint,5,opt,name=repeated_from_month,json=repeatedFromMonth,proto3" json:"repeated_from_month,omitempty"` - Namespace string `protobuf:"bytes,6,opt,name=namespace,proto3" json:"namespace,omitempty"` - Mount string `protobuf:"bytes,7,opt,name=mount,proto3" json:"mount,omitempty"` - NonEntity bool `protobuf:"varint,8,opt,name=non_entity,json=nonEntity,proto3" json:"non_entity,omitempty"` + Repeated bool `protobuf:"varint,3,opt,name=repeated,proto3" json:"repeated,omitempty"` + RepeatedFromMonth int32 `protobuf:"varint,4,opt,name=repeated_from_month,json=repeatedFromMonth,proto3" json:"repeated_from_month,omitempty"` + Namespace string `protobuf:"bytes,5,opt,name=namespace,proto3" json:"namespace,omitempty"` + Mount string `protobuf:"bytes,6,opt,name=mount,proto3" json:"mount,omitempty"` + NonEntity bool `protobuf:"varint,7,opt,name=non_entity,json=nonEntity,proto3" json:"non_entity,omitempty"` } func (x *Client) Reset() { @@ -493,13 +492,6 @@ func (x *Client) GetCount() int32 { return 0 } -func (x *Client) GetTimesSeen() int32 { - if x != nil { - return x.TimesSeen - } - return 0 -} - func (x *Client) GetRepeated() bool { if x != nil { return x.Repeated @@ -584,36 +576,34 @@ var file_vault_activity_generation_generate_data_proto_rawDesc = []byte{ 0x74, 0x73, 0x12, 0x2c, 0x0a, 0x07, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x52, 0x07, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x73, - 0x22, 0xec, 0x01, 0x0a, 0x06, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x22, 0xcd, 0x01, 0x0a, 0x06, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, - 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x53, 0x65, 0x65, 0x6e, - 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x2e, 0x0a, 0x13, - 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x5f, 0x6d, 0x6f, - 0x6e, 0x74, 0x68, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x72, 0x65, 0x70, 0x65, 0x61, - 0x74, 0x65, 0x64, 0x46, 0x72, 0x6f, 0x6d, 0x4d, 0x6f, 0x6e, 0x74, 0x68, 0x12, 0x1c, 0x0a, 0x09, - 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, - 0x75, 0x6e, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x75, 0x6e, 0x74, - 0x12, 0x1d, 0x0a, 0x0a, 0x6e, 0x6f, 0x6e, 0x5f, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x18, 0x08, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6e, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x2a, - 0xa0, 0x01, 0x0a, 0x0c, 0x57, 0x72, 0x69, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x11, 0x0a, 0x0d, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, - 0x4e, 0x10, 0x00, 0x12, 0x1d, 0x0a, 0x19, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x50, 0x52, 0x45, - 0x43, 0x4f, 0x4d, 0x50, 0x55, 0x54, 0x45, 0x44, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x49, 0x45, 0x53, - 0x10, 0x01, 0x12, 0x1a, 0x0a, 0x16, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x44, 0x49, 0x53, 0x54, - 0x49, 0x4e, 0x43, 0x54, 0x5f, 0x43, 0x4c, 0x49, 0x45, 0x4e, 0x54, 0x53, 0x10, 0x02, 0x12, 0x12, - 0x0a, 0x0e, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x45, 0x4e, 0x54, 0x49, 0x54, 0x49, 0x45, 0x53, - 0x10, 0x03, 0x12, 0x17, 0x0a, 0x13, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x44, 0x49, 0x52, 0x45, - 0x43, 0x54, 0x5f, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x53, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x11, 0x57, - 0x52, 0x49, 0x54, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x5f, 0x4c, 0x4f, 0x47, 0x53, - 0x10, 0x05, 0x42, 0x36, 0x5a, 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f, 0x76, 0x61, 0x75, 0x6c, 0x74, - 0x2f, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x69, 0x74, 0x79, 0x2f, - 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x2e, 0x0a, + 0x13, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x5f, 0x6d, + 0x6f, 0x6e, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x72, 0x65, 0x70, 0x65, + 0x61, 0x74, 0x65, 0x64, 0x46, 0x72, 0x6f, 0x6d, 0x4d, 0x6f, 0x6e, 0x74, 0x68, 0x12, 0x1c, 0x0a, + 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x6d, + 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x75, 0x6e, + 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x6e, 0x6f, 0x6e, 0x5f, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6e, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, + 0x2a, 0xa0, 0x01, 0x0a, 0x0c, 0x57, 0x72, 0x69, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x11, 0x0a, 0x0d, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, + 0x57, 0x4e, 0x10, 0x00, 0x12, 0x1d, 0x0a, 0x19, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x50, 0x52, + 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x55, 0x54, 0x45, 0x44, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x49, 0x45, + 0x53, 0x10, 0x01, 0x12, 0x1a, 0x0a, 0x16, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x44, 0x49, 0x53, + 0x54, 0x49, 0x4e, 0x43, 0x54, 0x5f, 0x43, 0x4c, 0x49, 0x45, 0x4e, 0x54, 0x53, 0x10, 0x02, 0x12, + 0x12, 0x0a, 0x0e, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x45, 0x4e, 0x54, 0x49, 0x54, 0x49, 0x45, + 0x53, 0x10, 0x03, 0x12, 0x17, 0x0a, 0x13, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x44, 0x49, 0x52, + 0x45, 0x43, 0x54, 0x5f, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x53, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x11, + 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x5f, 0x4c, 0x4f, 0x47, + 0x53, 0x10, 0x05, 0x42, 0x36, 0x5a, 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f, 0x76, 0x61, 0x75, 0x6c, + 0x74, 0x2f, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x69, 0x74, 0x79, + 0x2f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, } var ( diff --git a/vault/activity/generation/generate_data.proto b/vault/activity/generation/generate_data.proto index b62209a21bac..c23a4141d054 100644 --- a/vault/activity/generation/generate_data.proto +++ b/vault/activity/generation/generate_data.proto @@ -48,10 +48,9 @@ message Clients { message Client { string id = 1; int32 count = 2; - int32 times_seen = 3; - bool repeated = 4; - int32 repeated_from_month = 5; - string namespace = 6; - string mount = 7; - bool non_entity = 8; + bool repeated = 3; + int32 repeated_from_month = 4; + string namespace = 5; + string mount = 6; + bool non_entity = 7; } diff --git a/vault/logical_system_activity_write_testonly.go b/vault/logical_system_activity_write_testonly.go index 5d289793df56..dc8b5485c2e3 100644 --- a/vault/logical_system_activity_write_testonly.go +++ b/vault/logical_system_activity_write_testonly.go @@ -7,9 +7,14 @@ package vault import ( "context" + "errors" + "fmt" + "github.com/hashicorp/go-uuid" + "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/framework" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault/activity" "github.com/hashicorp/vault/vault/activity/generation" "google.golang.org/protobuf/encoding/protojson" ) @@ -51,3 +56,118 @@ func (b *SystemBackend) handleActivityWriteData(ctx context.Context, request *lo } return nil, nil } + +// singleMonthActivityClients holds a single month's client IDs, in the order they were seen +type singleMonthActivityClients struct { + // clients are indexed by ID + clients []*activity.EntityRecord +} + +// multipleMonthsActivityClients holds multiple month's data +type multipleMonthsActivityClients struct { + // months are in order, with month 0 being the current month and index 1 being 1 month ago + months []*singleMonthActivityClients +} + +// addNewClients generates clients according to the given parameters, and adds them to the month +// the client will always have the mountAccessor as its mount accessor +func (s *singleMonthActivityClients) addNewClients(c *generation.Client, mountAccessor string) error { + count := 1 + if c.Count > 1 { + count = int(c.Count) + } + for i := 0; i < count; i++ { + record := &activity.EntityRecord{ + ClientID: c.Id, + NamespaceID: c.Namespace, + NonEntity: c.NonEntity, + MountAccessor: mountAccessor, + } + if record.ClientID == "" { + var err error + record.ClientID, err = uuid.GenerateUUID() + if err != nil { + return err + } + } + s.clients = append(s.clients, record) + } + return nil +} + +// processMonth populates a month of client data +func (m *multipleMonthsActivityClients) processMonth(ctx context.Context, core *Core, month *generation.Data) error { + if month.GetAll() == nil { + return errors.New("segmented monthly data is not yet supported") + } + + // default to using the root namespace and the first mount on the root namespace + mounts, err := core.ListMounts() + if err != nil { + return err + } + defaultMountAccessorRootNS := "" + for _, mount := range mounts { + if mount.NamespaceID == namespace.RootNamespaceID { + defaultMountAccessorRootNS = mount.Accessor + break + } + } + addingTo := m.months[month.GetMonthsAgo()] + + for _, clients := range month.GetAll().Clients { + if clients.Repeated || clients.RepeatedFromMonth > 0 { + return errors.New("repeated clients are not yet supported") + } + + if clients.Namespace == "" { + clients.Namespace = namespace.RootNamespaceID + } + + // verify that the namespace exists + ns, err := core.NamespaceByID(ctx, clients.Namespace) + if err != nil { + return err + } + + // verify that the mount exists + if clients.Mount != "" { + nctx := namespace.ContextWithNamespace(ctx, ns) + mountEntry := core.router.MatchingMountEntry(nctx, clients.Mount) + if mountEntry == nil { + return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) + } + } + + mountAccessor := defaultMountAccessorRootNS + if clients.Namespace != namespace.RootNamespaceID && clients.Mount == "" { + // if we're not using the root namespace, find a mount on the namespace that we are using + found := false + for _, mount := range mounts { + if mount.NamespaceID == clients.Namespace { + mountAccessor = mount.Accessor + found = true + break + } + } + if !found { + return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) + } + } + err = addingTo.addNewClients(clients, mountAccessor) + if err != nil { + return err + } + } + return nil +} + +func newMultipleMonthsActivityClients(numberOfMonths int) *multipleMonthsActivityClients { + m := &multipleMonthsActivityClients{ + months: make([]*singleMonthActivityClients, numberOfMonths), + } + for i := 0; i < numberOfMonths; i++ { + m.months[i] = new(singleMonthActivityClients) + } + return m +} diff --git a/vault/logical_system_activity_write_testonly_test.go b/vault/logical_system_activity_write_testonly_test.go index 57a4b8d410db..2a0f44c5bad2 100644 --- a/vault/logical_system_activity_write_testonly_test.go +++ b/vault/logical_system_activity_write_testonly_test.go @@ -6,10 +6,12 @@ package vault import ( + "context" "testing" "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault/activity/generation" "github.com/stretchr/testify/require" ) @@ -82,3 +84,164 @@ func TestSystemBackend_handleActivityWriteData(t *testing.T) { }) } } + +// Test_singleMonthActivityClients_addNewClients verifies that new clients are +// created correctly, adhering to the requested parameters. The clients should +// use the inputted mount and a generated ID if one is not supplied. The new +// client should be added to the month's `clients` slice +func Test_singleMonthActivityClients_addNewClients(t *testing.T) { + tests := []struct { + name string + mount string + clients *generation.Client + wantNamespace string + wantMount string + wantID string + }{ + { + name: "default mount is used", + mount: "default_mount", + wantMount: "default_mount", + clients: &generation.Client{}, + }, + { + name: "record namespace is used, default mount is used", + mount: "default_mount", + wantNamespace: "ns", + wantMount: "default_mount", + clients: &generation.Client{ + Namespace: "ns", + Mount: "mount", + }, + }, + { + name: "predefined ID is used", + clients: &generation.Client{ + Id: "client_id", + }, + wantID: "client_id", + }, + { + name: "non zero count", + clients: &generation.Client{ + Count: 5, + }, + }, + { + name: "non entity client", + clients: &generation.Client{ + NonEntity: true, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := &singleMonthActivityClients{} + err := m.addNewClients(tt.clients, tt.mount) + require.NoError(t, err) + numNew := tt.clients.Count + if numNew == 0 { + numNew = 1 + } + require.Len(t, m.clients, int(numNew)) + for _, rec := range m.clients { + require.NotNil(t, rec) + require.Equal(t, tt.wantNamespace, rec.NamespaceID) + require.Equal(t, tt.wantMount, rec.MountAccessor) + require.Equal(t, tt.clients.NonEntity, rec.NonEntity) + if tt.wantID != "" { + require.Equal(t, tt.wantID, rec.ClientID) + } else { + require.NotEqual(t, "", rec.ClientID) + } + } + }) + } +} + +// Test_multipleMonthsActivityClients_processMonth verifies that a month of data +// is added correctly. The test checks that default values are handled correctly +// for mounts and namespaces. +func Test_multipleMonthsActivityClients_processMonth(t *testing.T) { + core, _, _ := TestCoreUnsealed(t) + tests := []struct { + name string + clients *generation.Data + wantError bool + numMonths int + }{ + { + name: "specified namespace and mount exist", + clients: &generation.Data{ + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{ + Namespace: namespace.RootNamespaceID, + Mount: "identity/", + }}}}, + }, + numMonths: 1, + }, + { + name: "specified namespace exists, mount empty", + clients: &generation.Data{ + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{ + Namespace: namespace.RootNamespaceID, + }}}}, + }, + numMonths: 1, + }, + { + name: "empty namespace and mount", + clients: &generation.Data{ + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{}}}}, + }, + numMonths: 1, + }, + { + name: "namespace doesn't exist", + clients: &generation.Data{ + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{ + Namespace: "abcd", + }}}}, + }, + wantError: true, + numMonths: 1, + }, + { + name: "namespace exists, mount doesn't exist", + clients: &generation.Data{ + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{ + Namespace: namespace.RootNamespaceID, + Mount: "mount", + }}}}, + }, + wantError: true, + numMonths: 1, + }, + { + name: "older month", + clients: &generation.Data{ + Month: &generation.Data_MonthsAgo{MonthsAgo: 4}, + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{}}}}, + }, + numMonths: 5, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := newMultipleMonthsActivityClients(tt.numMonths) + err := m.processMonth(context.Background(), core, tt.clients) + if tt.wantError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Len(t, m.months[tt.clients.GetMonthsAgo()].clients, len(tt.clients.GetAll().Clients)) + for _, month := range m.months { + for _, c := range month.clients { + require.NotEmpty(t, c.NamespaceID) + require.NotEmpty(t, c.MountAccessor) + } + } + } + }) + } +} From d0cde52048c837a6651b733351d5107e249d6a32 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Tue, 23 May 2023 18:25:23 +0200 Subject: [PATCH 22/26] backport of commit 018ea84997b49137ae3884e00e4dc9fc389f8b50 --- helper/metricsutil/gauge_process.go | 25 ++------- helper/metricsutil/gauge_process_test.go | 6 +- helper/timeutil/timeutil.go | 23 ++++++++ vault/activity_log.go | 67 ++++++++++++++-------- vault/activity_log_test.go | 71 +++++++++++++++++++++++- vault/activity_log_testing_util.go | 6 +- vault/activity_log_util_common.go | 2 +- 7 files changed, 148 insertions(+), 52 deletions(-) diff --git a/helper/metricsutil/gauge_process.go b/helper/metricsutil/gauge_process.go index 0ad0e9d876cf..538d8eb49bab 100644 --- a/helper/metricsutil/gauge_process.go +++ b/helper/metricsutil/gauge_process.go @@ -8,24 +8,9 @@ import ( "github.com/armon/go-metrics" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/vault/helper/timeutil" ) -// This interface allows unit tests to substitute in a simulated clock. -type clock interface { - Now() time.Time - NewTicker(time.Duration) *time.Ticker -} - -type defaultClock struct{} - -func (_ defaultClock) Now() time.Time { - return time.Now() -} - -func (_ defaultClock) NewTicker(d time.Duration) *time.Ticker { - return time.NewTicker(d) -} - // GaugeLabelValues is one gauge in a set sharing a single key, that // are measured in a batch. type GaugeLabelValues struct { @@ -73,7 +58,7 @@ type GaugeCollectionProcess struct { maxGaugeCardinality int // time source - clock clock + clock timeutil.Clock } // NewGaugeCollectionProcess creates a new collection process for the callback @@ -98,7 +83,7 @@ func NewGaugeCollectionProcess( gaugeInterval, maxGaugeCardinality, logger, - defaultClock{}, + timeutil.DefaultClock{}, ) } @@ -121,7 +106,7 @@ func (m *ClusterMetricSink) NewGaugeCollectionProcess( m.GaugeInterval, m.MaxGaugeCardinality, logger, - defaultClock{}, + timeutil.DefaultClock{}, ) } @@ -134,7 +119,7 @@ func newGaugeCollectionProcessWithClock( gaugeInterval time.Duration, maxGaugeCardinality int, logger log.Logger, - clock clock, + clock timeutil.Clock, ) (*GaugeCollectionProcess, error) { process := &GaugeCollectionProcess{ stop: make(chan struct{}, 1), diff --git a/helper/metricsutil/gauge_process_test.go b/helper/metricsutil/gauge_process_test.go index 9971714e04e3..b6c6005c8727 100644 --- a/helper/metricsutil/gauge_process_test.go +++ b/helper/metricsutil/gauge_process_test.go @@ -12,6 +12,7 @@ import ( "github.com/armon/go-metrics" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/vault/helper/timeutil" ) // SimulatedTime maintains a virtual clock so the test isn't @@ -21,9 +22,10 @@ import ( type SimulatedTime struct { now time.Time tickerBarrier chan *SimulatedTicker + timeutil.DefaultClock } -var _ clock = &SimulatedTime{} +var _ timeutil.Clock = &SimulatedTime{} type SimulatedTicker struct { ticker *time.Ticker @@ -118,7 +120,7 @@ func TestGauge_Creation(t *testing.T) { t.Fatalf("Error creating collection process: %v", err) } - if _, ok := p.clock.(defaultClock); !ok { + if _, ok := p.clock.(timeutil.DefaultClock); !ok { t.Error("Default clock not installed.") } diff --git a/helper/timeutil/timeutil.go b/helper/timeutil/timeutil.go index a65d3cf908bc..0c1b1ec1553f 100644 --- a/helper/timeutil/timeutil.go +++ b/helper/timeutil/timeutil.go @@ -139,3 +139,26 @@ func SkipAtEndOfMonth(t *testing.T) { t.Skip("too close to end of month") } } + +// This interface allows unit tests to substitute in a simulated Clock. +type Clock interface { + Now() time.Time + NewTicker(time.Duration) *time.Ticker + NewTimer(time.Duration) *time.Timer +} + +type DefaultClock struct{} + +var _ Clock = (*DefaultClock)(nil) + +func (_ DefaultClock) Now() time.Time { + return time.Now() +} + +func (_ DefaultClock) NewTicker(d time.Duration) *time.Ticker { + return time.NewTicker(d) +} + +func (_ DefaultClock) NewTimer(d time.Duration) *time.Timer { + return time.NewTimer(d) +} diff --git a/vault/activity_log.go b/vault/activity_log.go index 28453d3ab2b3..ef6e46d76a2c 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -179,6 +179,12 @@ type ActivityLog struct { // CensusReportInterval is the testing configuration for time between // Write() calls initiated in CensusReport. CensusReportInterval time.Duration + + // clock is used to support manipulating time in unit and integration tests + clock timeutil.Clock + // precomputedQueryWritten receives an element whenever a precomputed query + // is written. It's used for unit testing + precomputedQueryWritten chan struct{} } // These non-persistent configuration options allow us to disable @@ -196,6 +202,10 @@ type ActivityLogCoreConfig struct { // MinimumRetentionMonths defines the minimum value for retention MinimumRetentionMonths int + + // Clock holds a custom clock to modify time.Now, time.Ticker, time.Timer. + // If nil, the default functions from the time package are used + Clock timeutil.Clock } // NewActivityLog creates an activity log. @@ -205,6 +215,10 @@ func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics me return nil, err } + clock := core.activityLogConfig.Clock + if clock == nil { + clock = timeutil.DefaultClock{} + } a := &ActivityLog{ core: core, configOverrides: &core.activityLogConfig, @@ -218,7 +232,7 @@ func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics me doneCh: make(chan struct{}, 1), partialMonthClientTracker: make(map[string]*activity.EntityRecord), CensusReportInterval: time.Hour * 1, - + clock: clock, currentSegment: segmentInfo{ startTimestamp: 0, currentClients: &activity.EntityActivityLog{ @@ -234,6 +248,7 @@ func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics me }, standbyFragmentsReceived: make([]*activity.LogFragment, 0), inprocessExport: atomic.NewBool(false), + precomputedQueryWritten: make(chan struct{}), } config, err := a.loadConfigOrDefault(core.activeContext) @@ -265,7 +280,7 @@ func (a *ActivityLog) saveCurrentSegmentToStorage(ctx context.Context, force boo // :force: forces a save of tokens/entities even if the in-memory log is empty func (a *ActivityLog) saveCurrentSegmentToStorageLocked(ctx context.Context, force bool) error { defer a.metrics.MeasureSinceWithLabels([]string{"core", "activity", "segment_write"}, - time.Now(), []metricsutil.Label{}) + a.clock.Now(), []metricsutil.Label{}) // Swap out the pending fragments a.fragmentLock.Lock() @@ -424,7 +439,7 @@ func (a *ActivityLog) saveCurrentSegmentInternal(ctx context.Context, force bool case err != nil: a.logger.Error(fmt.Sprintf("unable to retrieve oldest version timestamp: %s", err.Error())) case len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 && - (oldestUpgradeTime.Add(time.Duration(trackedTWESegmentPeriod * time.Hour)).Before(time.Now())): + (oldestUpgradeTime.Add(time.Duration(trackedTWESegmentPeriod * time.Hour)).Before(a.clock.Now())): a.logger.Error(fmt.Sprintf("storing nonzero token count over a month after vault was upgraded to %s", oldestVersion)) default: if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 { @@ -996,7 +1011,7 @@ func (a *ActivityLog) SetConfig(ctx context.Context, config activityConfig) { forceSave := false if a.enabled && a.currentSegment.startTimestamp == 0 { - a.startNewCurrentLogLocked(time.Now().UTC()) + a.startNewCurrentLogLocked(a.clock.Now().UTC()) // Force a save so we can distinguish between // // Month N-1: present @@ -1022,7 +1037,7 @@ func (a *ActivityLog) SetConfig(ctx context.Context, config activityConfig) { } // check for segments out of retention period, if it has changed - go a.retentionWorker(ctx, time.Now(), a.retentionMonths) + go a.retentionWorker(ctx, a.clock.Now(), a.retentionMonths) } // update the enable flag and reset the current log @@ -1088,7 +1103,7 @@ func (c *Core) setupActivityLogLocked(ctx context.Context, wg *sync.WaitGroup) e c.activityLog = manager // load activity log for "this month" into memory - err = manager.refreshFromStoredLog(manager.core.activeContext, wg, time.Now().UTC()) + err = manager.refreshFromStoredLog(manager.core.activeContext, wg, manager.clock.Now().UTC()) if err != nil { return err } @@ -1112,7 +1127,7 @@ func (c *Core) setupActivityLogLocked(ctx context.Context, wg *sync.WaitGroup) e // Signal when this is done so that unit tests can proceed. manager.retentionDone = make(chan struct{}) go func(months int) { - manager.retentionWorker(ctx, time.Now(), months) + manager.retentionWorker(ctx, manager.clock.Now(), months) close(manager.retentionDone) }(manager.retentionMonths) @@ -1149,7 +1164,7 @@ func (a *ActivityLog) StartOfNextMonth() time.Time { defer a.l.RUnlock() var segmentStart time.Time if a.currentSegment.startTimestamp == 0 { - segmentStart = time.Now().UTC() + segmentStart = a.clock.Now().UTC() } else { segmentStart = time.Unix(a.currentSegment.startTimestamp, 0).UTC() } @@ -1161,12 +1176,12 @@ func (a *ActivityLog) StartOfNextMonth() time.Time { // perfStandbyFragmentWorker handles scheduling fragments // to send via RPC; it runs on perf standby nodes only. func (a *ActivityLog) perfStandbyFragmentWorker(ctx context.Context) { - timer := time.NewTimer(time.Duration(0)) + timer := a.clock.NewTimer(time.Duration(0)) fragmentWaiting := false // Eat first event, so timer is stopped <-timer.C - endOfMonth := time.NewTimer(a.StartOfNextMonth().Sub(time.Now())) + endOfMonth := a.clock.NewTimer(a.StartOfNextMonth().Sub(a.clock.Now())) if a.configOverrides.DisableTimers { endOfMonth.Stop() } @@ -1238,8 +1253,8 @@ func (a *ActivityLog) perfStandbyFragmentWorker(ctx context.Context) { // Set timer for next month. // The current segment *probably* hasn't been set yet (via invalidation), // so don't rely on it. - target := timeutil.StartOfNextMonth(time.Now().UTC()) - endOfMonth.Reset(target.Sub(time.Now())) + target := timeutil.StartOfNextMonth(a.clock.Now().UTC()) + endOfMonth.Reset(target.Sub(a.clock.Now())) } } } @@ -1247,9 +1262,9 @@ func (a *ActivityLog) perfStandbyFragmentWorker(ctx context.Context) { // activeFragmentWorker handles scheduling the write of the next // segment. It runs on active nodes only. func (a *ActivityLog) activeFragmentWorker(ctx context.Context) { - ticker := time.NewTicker(activitySegmentInterval) + ticker := a.clock.NewTicker(activitySegmentInterval) - endOfMonth := time.NewTimer(a.StartOfNextMonth().Sub(time.Now())) + endOfMonth := a.clock.NewTimer(a.StartOfNextMonth().Sub(a.clock.Now())) if a.configOverrides.DisableTimers { endOfMonth.Stop() } @@ -1299,7 +1314,7 @@ func (a *ActivityLog) activeFragmentWorker(ctx context.Context) { // Reset the schedule to wait 10 minutes from this forced write. ticker.Stop() - ticker = time.NewTicker(activitySegmentInterval) + ticker = a.clock.NewTicker(activitySegmentInterval) // Simpler, but ticker.Reset was introduced in go 1.15: // ticker.Reset(activitySegmentInterval) @@ -1315,7 +1330,7 @@ func (a *ActivityLog) activeFragmentWorker(ctx context.Context) { go a.retentionWorker(ctx, currentTime.UTC(), a.retentionMonths) a.l.RUnlock() - delta := a.StartOfNextMonth().Sub(time.Now()) + delta := a.StartOfNextMonth().Sub(a.clock.Now()) if delta < 20*time.Minute { delta = 20 * time.Minute } @@ -1476,7 +1491,7 @@ func (a *ActivityLog) createCurrentFragment() { Clients: make([]*activity.EntityRecord, 0, 120), NonEntityTokens: make(map[string]uint64), } - a.fragmentCreation = time.Now().UTC() + a.fragmentCreation = a.clock.Now().UTC() // Signal that a new segment is available, start // the timer to send it. @@ -1576,13 +1591,13 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T // with the endTime equal to the end of the last month, and add in the current month // data. precomputedQueryEndTime := endTime - if timeutil.IsCurrentMonth(endTime, time.Now().UTC()) { + if timeutil.IsCurrentMonth(endTime, a.clock.Now().UTC()) { precomputedQueryEndTime = timeutil.EndOfMonth(timeutil.MonthsPreviousTo(1, timeutil.StartOfMonth(endTime))) computePartial = true } pq := &activity.PrecomputedQuery{} - if startTime.After(precomputedQueryEndTime) && timeutil.IsCurrentMonth(startTime, time.Now().UTC()) { + if startTime.After(precomputedQueryEndTime) && timeutil.IsCurrentMonth(startTime, a.clock.Now().UTC()) { // We're only calculating the partial month client count. Skip the precomputation // get call. pq = &activity.PrecomputedQuery{ @@ -1757,7 +1772,7 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T a.sortActivityLogMonthsResponse(months) // Modify the final month output to make response more consumable based on API request - months = modifyResponseMonths(months, startTime, endTime) + months = a.modifyResponseMonths(months, startTime, endTime) responseData["months"] = months return responseData, nil @@ -1765,13 +1780,13 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T // modifyResponseMonths fills out various parts of the query structure to help // activity log clients parse the returned query. -func modifyResponseMonths(months []*ResponseMonth, start time.Time, end time.Time) []*ResponseMonth { +func (a *ActivityLog) modifyResponseMonths(months []*ResponseMonth, start time.Time, end time.Time) []*ResponseMonth { if len(months) == 0 { return months } start = timeutil.StartOfMonth(start) end = timeutil.EndOfMonth(end) - if timeutil.IsCurrentMonth(end, time.Now().UTC()) { + if timeutil.IsCurrentMonth(end, a.clock.Now().UTC()) { end = timeutil.EndOfMonth(timeutil.StartOfMonth(end).AddDate(0, -1, 0)) } modifiedResponseMonths := make([]*ResponseMonth, 0) @@ -2291,7 +2306,7 @@ func (a *ActivityLog) precomputedQueryWorker(ctx context.Context) error { // If there's an intent log, finish it even if the feature is currently disabled. a.l.RLock() currentMonth := a.currentSegment.startTimestamp - // Base retention period on the month we are generating (even in the past)--- time.Now() + // Base retention period on the month we are generating (even in the past)--- a.clock.Now() // would work but this will be easier to control in tests. retentionWindow := timeutil.MonthsPreviousTo(a.retentionMonths, time.Unix(intent.NextMonth, 0).UTC()) a.l.RUnlock() @@ -2359,6 +2374,10 @@ func (a *ActivityLog) precomputedQueryWorker(ctx context.Context) error { a.logger.Info("finished computing queries", "month", endTime) + select { + case a.precomputedQueryWritten <- struct{}{}: + default: + } return nil } @@ -2452,7 +2471,7 @@ func (a *ActivityLog) populateNamespaceAndMonthlyBreakdowns() (map[int64]*proces byNamespace := make(map[string]*processByNamespace) byMonth := make(map[int64]*processMonth) for _, e := range a.partialMonthClientTracker { - processClientRecord(e, byNamespace, byMonth, time.Now()) + processClientRecord(e, byNamespace, byMonth, a.clock.Now()) } return byMonth, byNamespace } diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index d046c5ecaf75..219a55efd542 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -519,11 +519,13 @@ func TestActivityLog_StoreAndReadHyperloglog(t *testing.T) { // TestModifyResponseMonthsNilAppend calls modifyResponseMonths for a range of 5 months ago to now. It verifies that the // 5 months in the range are correct. func TestModifyResponseMonthsNilAppend(t *testing.T) { + core, _, _ := TestCoreUnsealed(t) + a := core.activityLog end := time.Now().UTC() start := timeutil.StartOfMonth(end).AddDate(0, -5, 0) responseMonthTimestamp := timeutil.StartOfMonth(end).AddDate(0, -3, 0).Format(time.RFC3339) responseMonths := []*ResponseMonth{{Timestamp: responseMonthTimestamp}} - months := modifyResponseMonths(responseMonths, start, end) + months := a.modifyResponseMonths(responseMonths, start, end) if len(months) != 5 { t.Fatal("wrong number of months padded") } @@ -4676,3 +4678,70 @@ func TestActivityLog_writePrecomputedQuery(t *testing.T) { require.Equal(t, 1, monthRecord.NewClients.Counts.EntityClients) require.Equal(t, 1, monthRecord.NewClients.Counts.NonEntityClients) } + +type mockTimeNowClock struct { + timeutil.DefaultClock + start time.Time + created time.Time +} + +func newMockTimeNowClock(startAt time.Time) timeutil.Clock { + return &mockTimeNowClock{start: startAt, created: time.Now()} +} + +// NewTimer returns a timer with a channel that will return the correct time, +// relative to the starting time. This is used when testing the +// activeFragmentWorker, as that function uses the returned value from timer.C +// to perform additional functionality +func (m mockTimeNowClock) NewTimer(d time.Duration) *time.Timer { + timerStarted := m.Now() + t := time.NewTimer(d) + readCh := t.C + writeCh := make(chan time.Time, 1) + go func() { + <-readCh + writeCh <- timerStarted.Add(d) + }() + t.C = writeCh + return t +} + +func (m mockTimeNowClock) Now() time.Time { + return m.start.Add(time.Since(m.created)) +} + +// TestActivityLog_HandleEndOfMonth runs the activity log with a mock clock. +// The current time is set to be 3 seconds before the end of a month. The test +// verifies that the precomputedQueryWorker runs and writes precomputed queries +// with the proper start and end times when the end of the month is triggered +func TestActivityLog_HandleEndOfMonth(t *testing.T) { + // 3 seconds until a new month + now := time.Date(2021, 1, 31, 23, 59, 57, 0, time.UTC) + core, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{ActivityLogConfig: ActivityLogCoreConfig{Clock: newMockTimeNowClock(now)}}) + done := make(chan struct{}) + go func() { + defer close(done) + <-core.activityLog.precomputedQueryWritten + }() + core.activityLog.SetEnable(true) + core.activityLog.SetStartTimestamp(now.Unix()) + core.activityLog.AddClientToFragment("id", "ns", now.Unix(), false, "mount") + + // wait for the end of month to be triggered + select { + case <-done: + case <-time.After(10 * time.Second): + t.Fatal("timeout waiting for precomputed query") + } + + // verify that a precomputed query was written + exists, err := core.activityLog.queryStore.QueriesAvailable(context.Background()) + require.NoError(t, err) + require.True(t, exists) + + // verify that the timestamp is correct + pq, err := core.activityLog.queryStore.Get(context.Background(), now, now.Add(24*time.Hour)) + require.NoError(t, err) + require.Equal(t, now, pq.StartTime) + require.Equal(t, timeutil.EndOfMonth(now), pq.EndTime) +} diff --git a/vault/activity_log_testing_util.go b/vault/activity_log_testing_util.go index 14fe56c88cf9..a935fdbf2158 100644 --- a/vault/activity_log_testing_util.go +++ b/vault/activity_log_testing_util.go @@ -5,10 +5,8 @@ import ( "fmt" "math/rand" "testing" - "time" "github.com/hashicorp/vault/helper/constants" - "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault/activity" ) @@ -29,7 +27,7 @@ func (c *Core) InjectActivityLogDataThisMonth(t *testing.T) map[string]*activity ClientID: fmt.Sprintf("testclientid-%d", i), NamespaceID: "root", MountAccessor: fmt.Sprintf("testmountaccessor-%d", i), - Timestamp: time.Now().Unix(), + Timestamp: c.activityLog.clock.Now().Unix(), NonEntity: i%2 == 0, } c.activityLog.partialMonthClientTracker[er.ClientID] = er @@ -42,7 +40,7 @@ func (c *Core) InjectActivityLogDataThisMonth(t *testing.T) map[string]*activity ClientID: fmt.Sprintf("ns-%d-testclientid-%d", j, i), NamespaceID: fmt.Sprintf("ns-%d", j), MountAccessor: fmt.Sprintf("ns-%d-testmountaccessor-%d", j, i), - Timestamp: time.Now().Unix(), + Timestamp: c.activityLog.clock.Now().Unix(), NonEntity: i%2 == 0, } c.activityLog.partialMonthClientTracker[er.ClientID] = er diff --git a/vault/activity_log_util_common.go b/vault/activity_log_util_common.go index 785a3f279506..ec5272c15a61 100644 --- a/vault/activity_log_util_common.go +++ b/vault/activity_log_util_common.go @@ -72,7 +72,7 @@ func (a *ActivityLog) StoreHyperlogLog(ctx context.Context, startTime time.Time, } func (a *ActivityLog) computeCurrentMonthForBillingPeriodInternal(ctx context.Context, byMonth map[int64]*processMonth, hllGetFunc HLLGetter, startTime time.Time, endTime time.Time) (*activity.MonthRecord, error) { - if timeutil.IsCurrentMonth(startTime, time.Now().UTC()) { + if timeutil.IsCurrentMonth(startTime, a.clock.Now().UTC()) { monthlyComputation := a.transformMonthBreakdowns(byMonth) if len(monthlyComputation) > 1 { a.logger.Warn("monthly in-memory activitylog computation returned multiple months of data", "months returned", len(byMonth)) From 2148f667aca579e242305ff989291a0fdbf2c25e Mon Sep 17 00:00:00 2001 From: miagilepner Date: Wed, 24 May 2023 10:42:00 +0200 Subject: [PATCH 23/26] backport of commit 541f18eeb782cd0c8ee28b961e99c3adf952bd22 --- .../logical_system_activity_write_testonly.go | 204 ++++++++++++++---- ...cal_system_activity_write_testonly_test.go | 191 +++++++++++++++- 2 files changed, 350 insertions(+), 45 deletions(-) diff --git a/vault/logical_system_activity_write_testonly.go b/vault/logical_system_activity_write_testonly.go index dc8b5485c2e3..ed39fa15542b 100644 --- a/vault/logical_system_activity_write_testonly.go +++ b/vault/logical_system_activity_write_testonly.go @@ -7,7 +7,6 @@ package vault import ( "context" - "errors" "fmt" "github.com/hashicorp/go-uuid" @@ -61,6 +60,11 @@ func (b *SystemBackend) handleActivityWriteData(ctx context.Context, request *lo type singleMonthActivityClients struct { // clients are indexed by ID clients []*activity.EntityRecord + // predefinedSegments map from the segment number to the client's index in + // the clients slice + predefinedSegments map[int][]int + // generationParameters holds the generation request + generationParameters *generation.Data } // multipleMonthsActivityClients holds multiple month's data @@ -69,9 +73,80 @@ type multipleMonthsActivityClients struct { months []*singleMonthActivityClients } +func (s *singleMonthActivityClients) addEntityRecord(record *activity.EntityRecord, segmentIndex *int) { + s.clients = append(s.clients, record) + if segmentIndex != nil { + index := len(s.clients) - 1 + s.predefinedSegments[*segmentIndex] = append(s.predefinedSegments[*segmentIndex], index) + } +} + +// populateSegments converts a month of clients into a segmented map. The map's +// keys are the segment index, and the value are the clients that were seen in +// that index. If the value is an empty slice, then it's an empty index. If the +// value is nil, then it's a skipped index +func (s *singleMonthActivityClients) populateSegments() (map[int][]*activity.EntityRecord, error) { + segments := make(map[int][]*activity.EntityRecord) + ignoreIndexes := make(map[int]struct{}) + skipIndexes := s.generationParameters.SkipSegmentIndexes + emptyIndexes := s.generationParameters.EmptySegmentIndexes + + for _, i := range skipIndexes { + segments[int(i)] = nil + ignoreIndexes[int(i)] = struct{}{} + } + for _, i := range emptyIndexes { + segments[int(i)] = make([]*activity.EntityRecord, 0, 0) + ignoreIndexes[int(i)] = struct{}{} + } + + // if we have predefined segments, then we can construct the map using those + if len(s.predefinedSegments) > 0 { + for segment, clientIndexes := range s.predefinedSegments { + clientsInSegment := make([]*activity.EntityRecord, 0, len(clientIndexes)) + for _, idx := range clientIndexes { + clientsInSegment = append(clientsInSegment, s.clients[idx]) + } + segments[segment] = clientsInSegment + } + return segments, nil + } + + totalSegmentCount := 1 + if s.generationParameters.GetNumSegments() > 0 { + totalSegmentCount = int(s.generationParameters.GetNumSegments()) + } + numNonUsable := len(skipIndexes) + len(emptyIndexes) + usableSegmentCount := totalSegmentCount - numNonUsable + if usableSegmentCount <= 0 { + return nil, fmt.Errorf("num segments %d is too low, it must be greater than %d (%d skipped indexes + %d empty indexes)", totalSegmentCount, numNonUsable, len(skipIndexes), len(emptyIndexes)) + } + + // determine how many clients should be in each segment + segmentSizes := len(s.clients) / usableSegmentCount + if len(s.clients)%usableSegmentCount != 0 { + segmentSizes++ + } + + clientIndex := 0 + for i := 0; i < totalSegmentCount; i++ { + if clientIndex >= len(s.clients) { + break + } + if _, ok := ignoreIndexes[i]; ok { + continue + } + for len(segments[i]) < segmentSizes && clientIndex < len(s.clients) { + segments[i] = append(segments[i], s.clients[clientIndex]) + clientIndex++ + } + } + return segments, nil +} + // addNewClients generates clients according to the given parameters, and adds them to the month // the client will always have the mountAccessor as its mount accessor -func (s *singleMonthActivityClients) addNewClients(c *generation.Client, mountAccessor string) error { +func (s *singleMonthActivityClients) addNewClients(c *generation.Client, mountAccessor string, segmentIndex *int) error { count := 1 if c.Count > 1 { count = int(c.Count) @@ -90,17 +165,13 @@ func (s *singleMonthActivityClients) addNewClients(c *generation.Client, mountAc return err } } - s.clients = append(s.clients, record) + s.addEntityRecord(record, segmentIndex) } return nil } // processMonth populates a month of client data func (m *multipleMonthsActivityClients) processMonth(ctx context.Context, core *Core, month *generation.Data) error { - if month.GetAll() == nil { - return errors.New("segmented monthly data is not yet supported") - } - // default to using the root namespace and the first mount on the root namespace mounts, err := core.ListMounts() if err != nil { @@ -113,52 +184,101 @@ func (m *multipleMonthsActivityClients) processMonth(ctx context.Context, core * break } } - addingTo := m.months[month.GetMonthsAgo()] - - for _, clients := range month.GetAll().Clients { - if clients.Repeated || clients.RepeatedFromMonth > 0 { - return errors.New("repeated clients are not yet supported") - } + m.months[month.GetMonthsAgo()].generationParameters = month + add := func(c []*generation.Client, segmentIndex *int) error { + for _, clients := range c { - if clients.Namespace == "" { - clients.Namespace = namespace.RootNamespaceID - } + if clients.Namespace == "" { + clients.Namespace = namespace.RootNamespaceID + } - // verify that the namespace exists - ns, err := core.NamespaceByID(ctx, clients.Namespace) - if err != nil { - return err - } + // verify that the namespace exists + ns, err := core.NamespaceByID(ctx, clients.Namespace) + if err != nil { + return err + } - // verify that the mount exists - if clients.Mount != "" { - nctx := namespace.ContextWithNamespace(ctx, ns) - mountEntry := core.router.MatchingMountEntry(nctx, clients.Mount) - if mountEntry == nil { - return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) + // verify that the mount exists + if clients.Mount != "" { + nctx := namespace.ContextWithNamespace(ctx, ns) + mountEntry := core.router.MatchingMountEntry(nctx, clients.Mount) + if mountEntry == nil { + return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) + } } - } - mountAccessor := defaultMountAccessorRootNS - if clients.Namespace != namespace.RootNamespaceID && clients.Mount == "" { - // if we're not using the root namespace, find a mount on the namespace that we are using - found := false - for _, mount := range mounts { - if mount.NamespaceID == clients.Namespace { - mountAccessor = mount.Accessor - found = true - break + mountAccessor := defaultMountAccessorRootNS + if clients.Namespace != namespace.RootNamespaceID && clients.Mount == "" { + // if we're not using the root namespace, find a mount on the namespace that we are using + found := false + for _, mount := range mounts { + if mount.NamespaceID == clients.Namespace { + mountAccessor = mount.Accessor + found = true + break + } + } + if !found { + return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) } } - if !found { - return fmt.Errorf("unable to find matching mount in namespace %s", clients.Namespace) + + err = m.addClientToMonth(month.GetMonthsAgo(), clients, mountAccessor, segmentIndex) + if err != nil { + return err } } - err = addingTo.addNewClients(clients, mountAccessor) + return nil + } + + if month.GetAll() != nil { + return add(month.GetAll().GetClients(), nil) + } + predefinedSegments := month.GetSegments() + for i, segment := range predefinedSegments.GetSegments() { + index := i + if segment.SegmentIndex != nil { + index = int(*segment.SegmentIndex) + } + err = add(segment.GetClients().GetClients(), &index) if err != nil { return err } } + + return nil +} + +func (m *multipleMonthsActivityClients) addClientToMonth(monthsAgo int32, c *generation.Client, mountAccessor string, segmentIndex *int) error { + if c.Repeated || c.RepeatedFromMonth > 0 { + return m.addRepeatedClients(monthsAgo, c, mountAccessor, segmentIndex) + } + return m.months[monthsAgo].addNewClients(c, mountAccessor, segmentIndex) +} + +func (m *multipleMonthsActivityClients) addRepeatedClients(monthsAgo int32, c *generation.Client, mountAccessor string, segmentIndex *int) error { + addingTo := m.months[monthsAgo] + repeatedFromMonth := monthsAgo + 1 + if c.RepeatedFromMonth > 0 { + repeatedFromMonth = c.RepeatedFromMonth + } + repeatedFrom := m.months[repeatedFromMonth] + numClients := 1 + if c.Count > 0 { + numClients = int(c.Count) + } + for _, client := range repeatedFrom.clients { + if c.NonEntity == client.NonEntity && mountAccessor == client.MountAccessor && c.Namespace == client.NamespaceID { + addingTo.addEntityRecord(client, segmentIndex) + numClients-- + if numClients == 0 { + break + } + } + } + if numClients > 0 { + return fmt.Errorf("missing repeated %d clients matching given parameters", numClients) + } return nil } @@ -167,7 +287,9 @@ func newMultipleMonthsActivityClients(numberOfMonths int) *multipleMonthsActivit months: make([]*singleMonthActivityClients, numberOfMonths), } for i := 0; i < numberOfMonths; i++ { - m.months[i] = new(singleMonthActivityClients) + m.months[i] = &singleMonthActivityClients{ + predefinedSegments: make(map[int][]int), + } } return m } diff --git a/vault/logical_system_activity_write_testonly_test.go b/vault/logical_system_activity_write_testonly_test.go index 2a0f44c5bad2..f72a0e167868 100644 --- a/vault/logical_system_activity_write_testonly_test.go +++ b/vault/logical_system_activity_write_testonly_test.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault/activity" "github.com/hashicorp/vault/vault/activity/generation" "github.com/stretchr/testify/require" ) @@ -88,8 +89,10 @@ func TestSystemBackend_handleActivityWriteData(t *testing.T) { // Test_singleMonthActivityClients_addNewClients verifies that new clients are // created correctly, adhering to the requested parameters. The clients should // use the inputted mount and a generated ID if one is not supplied. The new -// client should be added to the month's `clients` slice +// client should be added to the month's `clients` slice and segment map, if +// a segment index is supplied func Test_singleMonthActivityClients_addNewClients(t *testing.T) { + segmentIndex := 0 tests := []struct { name string mount string @@ -97,6 +100,7 @@ func Test_singleMonthActivityClients_addNewClients(t *testing.T) { wantNamespace string wantMount string wantID string + segmentIndex *int }{ { name: "default mount is used", @@ -133,18 +137,25 @@ func Test_singleMonthActivityClients_addNewClients(t *testing.T) { NonEntity: true, }, }, + { + name: "added to segment", + clients: &generation.Client{}, + segmentIndex: &segmentIndex, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - m := &singleMonthActivityClients{} - err := m.addNewClients(tt.clients, tt.mount) + m := &singleMonthActivityClients{ + predefinedSegments: make(map[int][]int), + } + err := m.addNewClients(tt.clients, tt.mount, tt.segmentIndex) require.NoError(t, err) numNew := tt.clients.Count if numNew == 0 { numNew = 1 } require.Len(t, m.clients, int(numNew)) - for _, rec := range m.clients { + for i, rec := range m.clients { require.NotNil(t, rec) require.Equal(t, tt.wantNamespace, rec.NamespaceID) require.Equal(t, tt.wantMount, rec.MountAccessor) @@ -154,6 +165,9 @@ func Test_singleMonthActivityClients_addNewClients(t *testing.T) { } else { require.NotEqual(t, "", rec.ClientID) } + if tt.segmentIndex != nil { + require.Contains(t, m.predefinedSegments[*tt.segmentIndex], i) + } } }) } @@ -245,3 +259,172 @@ func Test_multipleMonthsActivityClients_processMonth(t *testing.T) { }) } } + +// Test_multipleMonthsActivityClients_processMonth_segmented verifies that segments +// are filled correctly when a month is processed with segmented data. The clients +// should be in the clients array, and should also be in the predefinedSegments map +// at the correct segment index +func Test_multipleMonthsActivityClients_processMonth_segmented(t *testing.T) { + index7 := int32(7) + data := &generation.Data{ + Clients: &generation.Data_Segments{ + Segments: &generation.Segments{ + Segments: []*generation.Segment{ + { + Clients: &generation.Clients{Clients: []*generation.Client{ + {}, + }}, + }, + { + Clients: &generation.Clients{Clients: []*generation.Client{{}}}, + }, + { + SegmentIndex: &index7, + Clients: &generation.Clients{Clients: []*generation.Client{{}}}, + }, + }, + }, + }, + } + m := newMultipleMonthsActivityClients(1) + core, _, _ := TestCoreUnsealed(t) + require.NoError(t, m.processMonth(context.Background(), core, data)) + require.Len(t, m.months[0].predefinedSegments, 3) + require.Len(t, m.months[0].clients, 3) + + // segment indexes are correct + require.Contains(t, m.months[0].predefinedSegments, 0) + require.Contains(t, m.months[0].predefinedSegments, 1) + require.Contains(t, m.months[0].predefinedSegments, 7) + + // the data in each segment is correct + require.Contains(t, m.months[0].predefinedSegments[0], 0) + require.Contains(t, m.months[0].predefinedSegments[1], 1) + require.Contains(t, m.months[0].predefinedSegments[7], 2) +} + +// Test_multipleMonthsActivityClients_addRepeatedClients adds repeated clients +// from 1 month ago and 2 months ago, and verifies that the correct clients are +// added based on namespace, mount, and non-entity attributes +func Test_multipleMonthsActivityClients_addRepeatedClients(t *testing.T) { + m := newMultipleMonthsActivityClients(3) + defaultMount := "default" + + require.NoError(t, m.addClientToMonth(2, &generation.Client{Count: 2}, "identity", nil)) + require.NoError(t, m.addClientToMonth(2, &generation.Client{Count: 2, Namespace: "other_ns"}, defaultMount, nil)) + require.NoError(t, m.addClientToMonth(1, &generation.Client{Count: 2}, defaultMount, nil)) + require.NoError(t, m.addClientToMonth(1, &generation.Client{Count: 2, NonEntity: true}, defaultMount, nil)) + + month2Clients := m.months[2].clients + month1Clients := m.months[1].clients + + thisMonth := m.months[0] + // this will match the first client in month 1 + require.NoError(t, m.addRepeatedClients(0, &generation.Client{Count: 1, Repeated: true}, defaultMount, nil)) + require.Contains(t, month1Clients, thisMonth.clients[0]) + + // this will match the 3rd client in month 1 + require.NoError(t, m.addRepeatedClients(0, &generation.Client{Count: 1, Repeated: true, NonEntity: true}, defaultMount, nil)) + require.Equal(t, month1Clients[2], thisMonth.clients[1]) + + // this will match the first two clients in month 1 + require.NoError(t, m.addRepeatedClients(0, &generation.Client{Count: 2, Repeated: true}, defaultMount, nil)) + require.Equal(t, month1Clients[0:2], thisMonth.clients[2:4]) + + // this will match the first client in month 2 + require.NoError(t, m.addRepeatedClients(0, &generation.Client{Count: 1, RepeatedFromMonth: 2}, "identity", nil)) + require.Equal(t, month2Clients[0], thisMonth.clients[4]) + + // this will match the 3rd client in month 2 + require.NoError(t, m.addRepeatedClients(0, &generation.Client{Count: 1, RepeatedFromMonth: 2, Namespace: "other_ns"}, defaultMount, nil)) + require.Equal(t, month2Clients[2], thisMonth.clients[5]) + + require.Error(t, m.addRepeatedClients(0, &generation.Client{Count: 1, RepeatedFromMonth: 2, Namespace: "other_ns"}, "other_mount", nil)) +} + +// Test_singleMonthActivityClients_populateSegments calls populateSegments for a +// collection of 5 clients, segmented in various ways. The test ensures that the +// resulting map has the correct clients for each segment index +func Test_singleMonthActivityClients_populateSegments(t *testing.T) { + clients := []*activity.EntityRecord{ + {ClientID: "a"}, + {ClientID: "b"}, + {ClientID: "c"}, + {ClientID: "d"}, + {ClientID: "e"}, + } + cases := []struct { + name string + segments map[int][]int + numSegments int + emptyIndexes []int32 + skipIndexes []int32 + wantSegments map[int][]*activity.EntityRecord + }{ + { + name: "segmented", + segments: map[int][]int{ + 0: {0, 1}, + 1: {2, 3}, + 2: {4}, + }, + wantSegments: map[int][]*activity.EntityRecord{ + 0: {{ClientID: "a"}, {ClientID: "b"}}, + 1: {{ClientID: "c"}, {ClientID: "d"}}, + 2: {{ClientID: "e"}}, + }, + }, + { + name: "segmented with skip and empty", + segments: map[int][]int{ + 0: {0, 1}, + 2: {0, 1}, + }, + emptyIndexes: []int32{1, 4}, + skipIndexes: []int32{3}, + wantSegments: map[int][]*activity.EntityRecord{ + 0: {{ClientID: "a"}, {ClientID: "b"}}, + 1: {}, + 2: {{ClientID: "a"}, {ClientID: "b"}}, + 3: nil, + 4: {}, + }, + }, + { + name: "all clients", + numSegments: 0, + wantSegments: map[int][]*activity.EntityRecord{ + 0: {{ClientID: "a"}, {ClientID: "b"}, {ClientID: "c"}, {ClientID: "d"}, {ClientID: "e"}}, + }, + }, + { + name: "all clients split", + numSegments: 2, + wantSegments: map[int][]*activity.EntityRecord{ + 0: {{ClientID: "a"}, {ClientID: "b"}, {ClientID: "c"}}, + 1: {{ClientID: "d"}, {ClientID: "e"}}, + }, + }, + { + name: "all clients with skip and empty", + numSegments: 5, + skipIndexes: []int32{0, 3}, + emptyIndexes: []int32{2}, + wantSegments: map[int][]*activity.EntityRecord{ + 0: nil, + 1: {{ClientID: "a"}, {ClientID: "b"}, {ClientID: "c"}}, + 2: {}, + 3: nil, + 4: {{ClientID: "d"}, {ClientID: "e"}}, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + s := singleMonthActivityClients{predefinedSegments: tc.segments, clients: clients, generationParameters: &generation.Data{EmptySegmentIndexes: tc.emptyIndexes, SkipSegmentIndexes: tc.skipIndexes, NumSegments: int32(tc.numSegments)}} + gotSegments, err := s.populateSegments() + require.NoError(t, err) + require.Equal(t, tc.wantSegments, gotSegments) + }) + } +} From 592a288c8a41c444595ffed92972993bad119fa3 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Thu, 25 May 2023 18:55:55 +0200 Subject: [PATCH 24/26] backport of commit b4e2751a09d411abef62a3769b08e9f1ce647e25 --- vault/activity_log.go | 113 ++++++++------ .../logical_system_activity_write_testonly.go | 73 ++++++++- ...cal_system_activity_write_testonly_test.go | 146 ++++++++++++++++++ 3 files changed, 282 insertions(+), 50 deletions(-) diff --git a/vault/activity_log.go b/vault/activity_log.go index ef6e46d76a2c..f6534cba5d0b 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -400,67 +400,82 @@ func (a *ActivityLog) saveCurrentSegmentToStorageLocked(ctx context.Context, for // :force: forces a save of tokens/entities even if the in-memory log is empty func (a *ActivityLog) saveCurrentSegmentInternal(ctx context.Context, force bool) error { - entityPath := fmt.Sprintf("%s%d/%d", activityEntityBasePath, a.currentSegment.startTimestamp, a.currentSegment.clientSequenceNumber) + _, err := a.saveSegmentEntitiesInternal(ctx, a.currentSegment, force) + if err != nil { + return err + } + _, err = a.saveSegmentTokensInternal(ctx, a.currentSegment, force) + return err +} + +func (a *ActivityLog) saveSegmentTokensInternal(ctx context.Context, currentSegment segmentInfo, force bool) (string, error) { + if len(currentSegment.tokenCount.CountByNamespaceID) == 0 && !force { + return "", nil + } // RFC (VLT-120) defines this as 1-indexed, but it should be 0-indexed - tokenPath := fmt.Sprintf("%s%d/0", activityTokenBasePath, a.currentSegment.startTimestamp) + tokenPath := fmt.Sprintf("%s%d/0", activityTokenBasePath, currentSegment.startTimestamp) + // We must still allow for the tokenCount of the current segment to + // be written to storage, since if we remove this code we will incur + // data loss for one segment's worth of TWEs. + // We can get away with simply using the oldest version stored because + // the storing of versions was introduced at the same time as this code. + oldestVersion, oldestUpgradeTime, err := a.core.FindOldestVersionTimestamp() + switch { + case err != nil: + a.logger.Error(fmt.Sprintf("unable to retrieve oldest version timestamp: %s", err.Error())) + case len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 && + (oldestUpgradeTime.Add(time.Duration(trackedTWESegmentPeriod * time.Hour)).Before(time.Now())): + a.logger.Error(fmt.Sprintf("storing nonzero token count over a month after vault was upgraded to %s", oldestVersion)) + default: + if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 { + a.logger.Info("storing nonzero token count") + } + } + tokenCount, err := proto.Marshal(a.currentSegment.tokenCount) + if err != nil { + return "", err + } + + a.logger.Trace("writing segment", "path", tokenPath) + err = a.view.Put(ctx, &logical.StorageEntry{ + Key: tokenPath, + Value: tokenCount, + }) + if err != nil { + return "", err + } + + return tokenPath, nil +} + +func (a *ActivityLog) saveSegmentEntitiesInternal(ctx context.Context, currentSegment segmentInfo, force bool) (string, error) { + entityPath := fmt.Sprintf("%s%d/%d", activityEntityBasePath, currentSegment.startTimestamp, currentSegment.clientSequenceNumber) for _, client := range a.currentSegment.currentClients.Clients { // Explicitly catch and throw clear error message if client ID creation and storage // results in a []byte that doesn't assert into a valid string. if !utf8.ValidString(client.ClientID) { - return fmt.Errorf("client ID %q is not a valid string:", client.ClientID) + return "", fmt.Errorf("client ID %q is not a valid string:", client.ClientID) } } - if len(a.currentSegment.currentClients.Clients) > 0 || force { - clients, err := proto.Marshal(a.currentSegment.currentClients) - if err != nil { - return err - } - - a.logger.Trace("writing segment", "path", entityPath) - err = a.view.Put(ctx, &logical.StorageEntry{ - Key: entityPath, - Value: clients, - }) - if err != nil { - return err - } + if len(currentSegment.currentClients.Clients) == 0 && !force { + return "", nil + } + clients, err := proto.Marshal(currentSegment.currentClients) + if err != nil { + return entityPath, err } - // We must still allow for the tokenCount of the current segment to - // be written to storage, since if we remove this code we will incur - // data loss for one segment's worth of TWEs. - if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 || force { - // We can get away with simply using the oldest version stored because - // the storing of versions was introduced at the same time as this code. - oldestVersion, oldestUpgradeTime, err := a.core.FindOldestVersionTimestamp() - switch { - case err != nil: - a.logger.Error(fmt.Sprintf("unable to retrieve oldest version timestamp: %s", err.Error())) - case len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 && - (oldestUpgradeTime.Add(time.Duration(trackedTWESegmentPeriod * time.Hour)).Before(a.clock.Now())): - a.logger.Error(fmt.Sprintf("storing nonzero token count over a month after vault was upgraded to %s", oldestVersion)) - default: - if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 { - a.logger.Info("storing nonzero token count") - } - } - tokenCount, err := proto.Marshal(a.currentSegment.tokenCount) - if err != nil { - return err - } - - a.logger.Trace("writing segment", "path", tokenPath) - err = a.view.Put(ctx, &logical.StorageEntry{ - Key: tokenPath, - Value: tokenCount, - }) - if err != nil { - return err - } + a.logger.Trace("writing segment", "path", entityPath) + err = a.view.Put(ctx, &logical.StorageEntry{ + Key: entityPath, + Value: clients, + }) + if err != nil { + return "", err } - return nil + return entityPath, err } // parseSegmentNumberFromPath returns the segment number from a path diff --git a/vault/logical_system_activity_write_testonly.go b/vault/logical_system_activity_write_testonly.go index ed39fa15542b..4186ca14d7c3 100644 --- a/vault/logical_system_activity_write_testonly.go +++ b/vault/logical_system_activity_write_testonly.go @@ -8,9 +8,12 @@ package vault import ( "context" "fmt" + "sync" + "time" "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/helper/timeutil" "github.com/hashicorp/vault/sdk/framework" "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault/activity" @@ -53,7 +56,34 @@ func (b *SystemBackend) handleActivityWriteData(ctx context.Context, request *lo if len(input.Data) == 0 { return logical.ErrorResponse("Missing required \"data\" values"), logical.ErrInvalidRequest } - return nil, nil + + numMonths := 0 + for _, month := range input.Data { + if int(month.GetMonthsAgo()) > numMonths { + numMonths = int(month.GetMonthsAgo()) + } + } + generated := newMultipleMonthsActivityClients(numMonths + 1) + for _, month := range input.Data { + err := generated.processMonth(ctx, b.Core, month) + if err != nil { + return logical.ErrorResponse("failed to process data for month %d", month.GetMonthsAgo()), err + } + } + + opts := make(map[generation.WriteOptions]struct{}, len(input.Write)) + for _, opt := range input.Write { + opts[opt] = struct{}{} + } + paths, err := generated.write(ctx, opts, b.Core.activityLog) + if err != nil { + return logical.ErrorResponse("failed to write data"), err + } + return &logical.Response{ + Data: map[string]interface{}{ + "paths": paths, + }, + }, nil } // singleMonthActivityClients holds a single month's client IDs, in the order they were seen @@ -282,6 +312,47 @@ func (m *multipleMonthsActivityClients) addRepeatedClients(monthsAgo int32, c *g return nil } +func (m *multipleMonthsActivityClients) write(ctx context.Context, opts map[generation.WriteOptions]struct{}, activityLog *ActivityLog) ([]string, error) { + now := timeutil.StartOfMonth(time.Now().UTC()) + paths := []string{} + for i, month := range m.months { + var timestamp time.Time + if i > 0 { + timestamp = timeutil.StartOfMonth(timeutil.MonthsPreviousTo(i, now)) + } else { + timestamp = now + } + segments, err := month.populateSegments() + if err != nil { + return nil, err + } + for segmentIndex, segment := range segments { + if _, ok := opts[generation.WriteOptions_WRITE_ENTITIES]; ok { + if segment == nil { + // skip the index + continue + } + entityPath, err := activityLog.saveSegmentEntitiesInternal(ctx, segmentInfo{ + startTimestamp: timestamp.Unix(), + currentClients: &activity.EntityActivityLog{Clients: segment}, + clientSequenceNumber: uint64(segmentIndex), + tokenCount: &activity.TokenCount{}, + }, true) + if err != nil { + return nil, err + } + paths = append(paths, entityPath) + } + } + } + wg := sync.WaitGroup{} + err := activityLog.refreshFromStoredLog(ctx, &wg, now) + if err != nil { + return nil, err + } + return paths, nil +} + func newMultipleMonthsActivityClients(numberOfMonths int) *multipleMonthsActivityClients { m := &multipleMonthsActivityClients{ months: make([]*singleMonthActivityClients, numberOfMonths), diff --git a/vault/logical_system_activity_write_testonly_test.go b/vault/logical_system_activity_write_testonly_test.go index f72a0e167868..b9b1a939a8c5 100644 --- a/vault/logical_system_activity_write_testonly_test.go +++ b/vault/logical_system_activity_write_testonly_test.go @@ -7,6 +7,7 @@ package vault import ( "context" + "sort" "testing" "github.com/hashicorp/vault/helper/namespace" @@ -14,6 +15,8 @@ import ( "github.com/hashicorp/vault/vault/activity" "github.com/hashicorp/vault/vault/activity/generation" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/proto" ) // TestSystemBackend_handleActivityWriteData calls the activity log write endpoint and confirms that the inputs are @@ -24,6 +27,7 @@ func TestSystemBackend_handleActivityWriteData(t *testing.T) { operation logical.Operation input map[string]interface{} wantError error + wantPaths int }{ { name: "read fails", @@ -70,6 +74,12 @@ func TestSystemBackend_handleActivityWriteData(t *testing.T) { operation: logical.CreateOperation, input: map[string]interface{}{"input": `{"write":["WRITE_PRECOMPUTED_QUERIES"],"data":[{"current_month":true,"all":{"clients":[{"count":5}]}}]}`}, }, + { + name: "entities with multiple segments", + operation: logical.CreateOperation, + input: map[string]interface{}{"input": `{"write":["WRITE_ENTITIES"],"data":[{"current_month":true,"num_segments":3,"all":{"clients":[{"count":5}]}}]}`}, + wantPaths: 3, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { @@ -81,6 +91,8 @@ func TestSystemBackend_handleActivityWriteData(t *testing.T) { require.Equal(t, tc.wantError, err, resp.Error()) } else { require.NoError(t, err) + paths := resp.Data["paths"].([]string) + require.Len(t, paths, tc.wantPaths) } }) } @@ -428,3 +440,137 @@ func Test_singleMonthActivityClients_populateSegments(t *testing.T) { }) } } + +// Test_multipleMonthsActivityClients_write_entities writes 4 months of data +// splitting some months across segments and using empty segments and skipped +// segments. Entities are written and then storage is queried. The test verifies +// that the correct timestamps are present in the activity log and that the correct +// segment numbers for each month contain the correct number of clients +func Test_multipleMonthsActivityClients_write_entities(t *testing.T) { + index5 := int32(5) + index4 := int32(4) + data := &generation.ActivityLogMockInput{ + Write: []generation.WriteOptions{ + generation.WriteOptions_WRITE_ENTITIES, + }, + Data: []*generation.Data{ + { + // segments: 0:[x,y], 1:[z] + Month: &generation.Data_MonthsAgo{MonthsAgo: 3}, + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 3}}}}, + NumSegments: 2, + }, + { + // segments: 1:[a,b,c], 2:[d,e] + Month: &generation.Data_MonthsAgo{MonthsAgo: 2}, + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 5}}}}, + NumSegments: 3, + SkipSegmentIndexes: []int32{0}, + }, + { + // segments: 5:[f,g] + Month: &generation.Data_MonthsAgo{MonthsAgo: 1}, + Clients: &generation.Data_Segments{ + Segments: &generation.Segments{Segments: []*generation.Segment{{ + SegmentIndex: &index5, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 2}}}, + }}}, + }, + }, + { + // segments: 1:[], 2:[], 4:[n], 5:[o] + Month: &generation.Data_CurrentMonth{}, + EmptySegmentIndexes: []int32{1, 2}, + Clients: &generation.Data_Segments{ + Segments: &generation.Segments{Segments: []*generation.Segment{ + { + SegmentIndex: &index5, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, + }, + { + SegmentIndex: &index4, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, + }, + }}, + }, + }, + }, + } + + core, _, _ := TestCoreUnsealed(t) + marshaled, err := protojson.Marshal(data) + require.NoError(t, err) + req := logical.TestRequest(t, logical.CreateOperation, "internal/counters/activity/write") + req.Data = map[string]interface{}{"input": string(marshaled)} + resp, err := core.systemBackend.HandleRequest(namespace.RootContext(nil), req) + require.NoError(t, err) + paths := resp.Data["paths"].([]string) + require.Len(t, paths, 9) + + times, err := core.activityLog.availableLogs(context.Background()) + require.NoError(t, err) + require.Len(t, times, 4) + + sortPaths := func(monthPaths []string) { + sort.Slice(monthPaths, func(i, j int) bool { + iVal, _ := parseSegmentNumberFromPath(monthPaths[i]) + jVal, _ := parseSegmentNumberFromPath(monthPaths[j]) + return iVal < jVal + }) + } + + month0Paths := paths[0:4] + month1Paths := paths[4:5] + month2Paths := paths[5:7] + month3Paths := paths[7:9] + sortPaths(month0Paths) + sortPaths(month1Paths) + sortPaths(month2Paths) + sortPaths(month3Paths) + entities := func(paths []string) map[int][]*activity.EntityRecord { + segments := make(map[int][]*activity.EntityRecord) + for _, path := range paths { + segmentNum, _ := parseSegmentNumberFromPath(path) + entry, err := core.activityLog.view.Get(context.Background(), path) + require.NoError(t, err) + if entry == nil { + segments[segmentNum] = []*activity.EntityRecord{} + continue + } + activities := &activity.EntityActivityLog{} + err = proto.Unmarshal(entry.Value, activities) + require.NoError(t, err) + segments[segmentNum] = activities.Clients + } + return segments + } + month0Entities := entities(month0Paths) + require.Len(t, month0Entities, 4) + require.Contains(t, month0Entities, 1) + require.Contains(t, month0Entities, 2) + require.Contains(t, month0Entities, 4) + require.Contains(t, month0Entities, 5) + require.Len(t, month0Entities[1], 0) + require.Len(t, month0Entities[2], 0) + require.Len(t, month0Entities[4], 1) + require.Len(t, month0Entities[5], 1) + + month1Entities := entities(month1Paths) + require.Len(t, month1Entities, 1) + require.Contains(t, month1Entities, 5) + require.Len(t, month1Entities[5], 2) + + month2Entities := entities(month2Paths) + require.Len(t, month2Entities, 2) + require.Contains(t, month2Entities, 1) + require.Contains(t, month2Entities, 2) + require.Len(t, month2Entities[1], 3) + require.Len(t, month2Entities[2], 2) + + month3Entities := entities(month3Paths) + require.Len(t, month3Entities, 2) + require.Contains(t, month3Entities, 0) + require.Contains(t, month3Entities, 1) + require.Len(t, month3Entities[0], 2) + require.Len(t, month3Entities[1], 1) +} From 92fa1f4d515412754558a85340d5005ff7451007 Mon Sep 17 00:00:00 2001 From: Nick Cabatoff Date: Fri, 2 Jun 2023 07:15:27 -0400 Subject: [PATCH 25/26] backport of commit dc5dd71c72a981e703379484bcac57e32af01fec --- vault/activity_log.go | 24 ++++++++---------------- vault/activity_log_test.go | 23 +++++++++++------------ vault/logical_system_test.go | 6 +++++- vault/testing.go | 2 +- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/vault/activity_log.go b/vault/activity_log.go index f6534cba5d0b..30430accb37e 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -139,9 +139,6 @@ type ActivityLog struct { // Channel for sending fragment immediately sendCh chan struct{} - // Channel for writing fragment immediately - writeCh chan struct{} - // Channel to stop background processing doneCh chan struct{} @@ -194,6 +191,8 @@ type ActivityLogCoreConfig struct { // Enable activity log even if the feature flag not set ForceEnable bool + DisableFragmentWorker bool + // Do not start timers to send or persist fragments. DisableTimers bool @@ -228,7 +227,6 @@ func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics me nodeID: hostname, newFragmentCh: make(chan struct{}, 1), sendCh: make(chan struct{}, 1), // buffered so it can be triggered by fragment size - writeCh: make(chan struct{}, 1), // same for full segment doneCh: make(chan struct{}, 1), partialMonthClientTracker: make(map[string]*activity.EntityRecord), CensusReportInterval: time.Hour * 1, @@ -1127,9 +1125,13 @@ func (c *Core) setupActivityLogLocked(ctx context.Context, wg *sync.WaitGroup) e // Lock already held here, can't use .PerfStandby() // The workers need to know the current segment time. if c.perfStandby { - go manager.perfStandbyFragmentWorker(ctx) + if !c.activityLogConfig.DisableFragmentWorker { + go manager.perfStandbyFragmentWorker(ctx) + } } else { - go manager.activeFragmentWorker(ctx) + if !c.activityLogConfig.DisableFragmentWorker { + go manager.activeFragmentWorker(ctx) + } // Check for any intent log, in the background manager.computationWorkerDone = make(chan struct{}) @@ -1323,16 +1325,6 @@ func (a *ActivityLog) activeFragmentWorker(ctx context.Context) { } a.logger.Trace("writing segment on timer expiration") writeFunc() - case <-a.writeCh: - a.logger.Trace("writing segment on request") - writeFunc() - - // Reset the schedule to wait 10 minutes from this forced write. - ticker.Stop() - ticker = a.clock.NewTicker(activitySegmentInterval) - - // Simpler, but ticker.Reset was introduced in go 1.15: - // ticker.Reset(activitySegmentInterval) case currentTime := <-endOfMonthChannel: err := a.HandleEndOfMonth(ctx, currentTime.UTC()) if err != nil { diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 219a55efd542..47e7307e6dc5 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -648,25 +648,24 @@ func TestActivityLog_availableLogs(t *testing.T) { } } -// TestActivityLog_MultipleFragmentsAndSegments adds 4000 clients to a fragment and saves it and reads it. The test then -// adds 4000 more clients and calls receivedFragment with 200 more entities. The current segment is saved to storage and -// read back. The test verifies that there are 5000 clients in the first segment index, then the rest in the second index. +// TestActivityLog_MultipleFragmentsAndSegments adds 4000 clients to a fragment +// and saves it and reads it. The test then adds 4000 more clients and calls +// receivedFragment with 200 more entities. The current segment is saved to +// storage and read back. The test verifies that there are 5000 clients in the +// first segment index, then the rest in the second index. func TestActivityLog_MultipleFragmentsAndSegments(t *testing.T) { - core, _, _ := TestCoreUnsealed(t) + core, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{ + ActivityLogConfig: ActivityLogCoreConfig{ + DisableFragmentWorker: true, + DisableTimers: true, + }, + }) a := core.activityLog // enabled check is now inside AddClientToFragment a.SetEnable(true) a.SetStartTimestamp(time.Now().Unix()) // set a nonzero segment - // Stop timers for test purposes - close(a.doneCh) - defer func() { - a.l.Lock() - a.doneCh = make(chan struct{}, 1) - a.l.Unlock() - }() - startTimestamp := a.GetStartTimestamp() path0 := fmt.Sprintf("sys/counters/activity/log/entity/%d/0", startTimestamp) path1 := fmt.Sprintf("sys/counters/activity/log/entity/%d/1", startTimestamp) diff --git a/vault/logical_system_test.go b/vault/logical_system_test.go index e7c3768aa3a8..85e4ef7fd5b8 100644 --- a/vault/logical_system_test.go +++ b/vault/logical_system_test.go @@ -30,6 +30,7 @@ import ( "github.com/hashicorp/vault/sdk/helper/compressutil" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/helper/jsonutil" + "github.com/hashicorp/vault/sdk/helper/logging" "github.com/hashicorp/vault/sdk/helper/pluginutil" "github.com/hashicorp/vault/sdk/helper/salt" "github.com/hashicorp/vault/sdk/logical" @@ -5054,7 +5055,10 @@ func TestSystemBackend_LoggersByName(t *testing.T) { t.Run(fmt.Sprintf("loggers-by-name-%s", tc.logger), func(t *testing.T) { t.Parallel() - core, b, _ := testCoreSystemBackend(t) + core, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{ + Logger: logging.NewVaultLogger(hclog.Trace), + }) + b := core.systemBackend // Test core overrides logging level outside of config, // an initial delete will ensure that we an initial read diff --git a/vault/testing.go b/vault/testing.go index 1efa0b1eee5b..5f98b7bd584c 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -185,7 +185,7 @@ func TestCoreWithSealAndUI(t testing.T, opts *CoreConfig) *Core { } func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core { - logger := logging.NewVaultLogger(log.Trace) + logger := logging.NewVaultLogger(log.Trace).Named(t.Name()) physicalBackend, err := physInmem.NewInmem(nil, logger) if err != nil { t.Fatal(err) From 89247a22ca0f4967bb820315cbade394142c92f8 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Thu, 8 Jun 2023 15:54:36 +0200 Subject: [PATCH 26/26] backport of commit 5002489d279ea2a98342ae31f342e0cd9c888e85 --- .../logical_system_activity_write_testonly.go | 75 +++++ ...cal_system_activity_write_testonly_test.go | 261 ++++++++++-------- 2 files changed, 221 insertions(+), 115 deletions(-) diff --git a/vault/logical_system_activity_write_testonly.go b/vault/logical_system_activity_write_testonly.go index 4186ca14d7c3..1ddca629b7eb 100644 --- a/vault/logical_system_activity_write_testonly.go +++ b/vault/logical_system_activity_write_testonly.go @@ -8,6 +8,7 @@ package vault import ( "context" "fmt" + "io" "sync" "time" @@ -315,7 +316,23 @@ func (m *multipleMonthsActivityClients) addRepeatedClients(monthsAgo int32, c *g func (m *multipleMonthsActivityClients) write(ctx context.Context, opts map[generation.WriteOptions]struct{}, activityLog *ActivityLog) ([]string, error) { now := timeutil.StartOfMonth(time.Now().UTC()) paths := []string{} + + _, writePQ := opts[generation.WriteOptions_WRITE_PRECOMPUTED_QUERIES] + _, writeDistinctClients := opts[generation.WriteOptions_WRITE_DISTINCT_CLIENTS] + + pqOpts := pqOptions{} + if writePQ || writeDistinctClients { + pqOpts.byNamespace = make(map[string]*processByNamespace) + pqOpts.byMonth = make(map[int64]*processMonth) + pqOpts.activePeriodEnd = m.latestTimestamp(now) + pqOpts.endTime = timeutil.EndOfMonth(pqOpts.activePeriodEnd) + pqOpts.activePeriodStart = m.earliestTimestamp(now) + } + for i, month := range m.months { + if month.generationParameters == nil { + continue + } var timestamp time.Time if i > 0 { timestamp = timeutil.StartOfMonth(timeutil.MonthsPreviousTo(i, now)) @@ -344,6 +361,14 @@ func (m *multipleMonthsActivityClients) write(ctx context.Context, opts map[gene paths = append(paths, entityPath) } } + + if writePQ || writeDistinctClients { + reader := newProtoSegmentReader(segments) + err = activityLog.segmentToPrecomputedQuery(ctx, timestamp, reader, pqOpts) + if err != nil { + return nil, err + } + } } wg := sync.WaitGroup{} err := activityLog.refreshFromStoredLog(ctx, &wg, now) @@ -353,6 +378,25 @@ func (m *multipleMonthsActivityClients) write(ctx context.Context, opts map[gene return paths, nil } +func (m *multipleMonthsActivityClients) latestTimestamp(now time.Time) time.Time { + for i, month := range m.months { + if month.generationParameters != nil { + return timeutil.StartOfMonth(timeutil.MonthsPreviousTo(i, now)) + } + } + return time.Time{} +} + +func (m *multipleMonthsActivityClients) earliestTimestamp(now time.Time) time.Time { + for i := len(m.months) - 1; i >= 0; i-- { + month := m.months[i] + if month.generationParameters != nil { + return timeutil.StartOfMonth(timeutil.MonthsPreviousTo(i, now)) + } + } + return time.Time{} +} + func newMultipleMonthsActivityClients(numberOfMonths int) *multipleMonthsActivityClients { m := &multipleMonthsActivityClients{ months: make([]*singleMonthActivityClients, numberOfMonths), @@ -364,3 +408,34 @@ func newMultipleMonthsActivityClients(numberOfMonths int) *multipleMonthsActivit } return m } + +func newProtoSegmentReader(segments map[int][]*activity.EntityRecord) SegmentReader { + allRecords := make([][]*activity.EntityRecord, 0, len(segments)) + for _, records := range segments { + if segments == nil { + continue + } + allRecords = append(allRecords, records) + } + return &sliceSegmentReader{ + records: allRecords, + } +} + +type sliceSegmentReader struct { + records [][]*activity.EntityRecord + i int +} + +func (p *sliceSegmentReader) ReadToken(ctx context.Context) (*activity.TokenCount, error) { + return nil, io.EOF +} + +func (p *sliceSegmentReader) ReadEntity(ctx context.Context) (*activity.EntityActivityLog, error) { + if p.i == len(p.records) { + return nil, io.EOF + } + record := p.records[p.i] + p.i++ + return &activity.EntityActivityLog{Clients: record}, nil +} diff --git a/vault/logical_system_activity_write_testonly_test.go b/vault/logical_system_activity_write_testonly_test.go index b9b1a939a8c5..f104d82ad700 100644 --- a/vault/logical_system_activity_write_testonly_test.go +++ b/vault/logical_system_activity_write_testonly_test.go @@ -9,8 +9,10 @@ import ( "context" "sort" "testing" + "time" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/helper/timeutil" "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault/activity" "github.com/hashicorp/vault/vault/activity/generation" @@ -441,136 +443,165 @@ func Test_singleMonthActivityClients_populateSegments(t *testing.T) { } } -// Test_multipleMonthsActivityClients_write_entities writes 4 months of data -// splitting some months across segments and using empty segments and skipped -// segments. Entities are written and then storage is queried. The test verifies -// that the correct timestamps are present in the activity log and that the correct -// segment numbers for each month contain the correct number of clients -func Test_multipleMonthsActivityClients_write_entities(t *testing.T) { +// Test_handleActivityWriteData writes 4 months of data splitting some months +// across segments and using empty segments and skipped segments. Entities and +// precomputed queries are written. written and then storage is queried. The +// test verifies that the correct timestamps are present in the activity log and +// that the correct segment numbers for each month contain the correct number of +// clients +func Test_handleActivityWriteData(t *testing.T) { index5 := int32(5) index4 := int32(4) - data := &generation.ActivityLogMockInput{ - Write: []generation.WriteOptions{ - generation.WriteOptions_WRITE_ENTITIES, + data := []*generation.Data{ + { + // segments: 0:[x,y], 1:[z] + Month: &generation.Data_MonthsAgo{MonthsAgo: 3}, + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 3}}}}, + NumSegments: 2, }, - Data: []*generation.Data{ - { - // segments: 0:[x,y], 1:[z] - Month: &generation.Data_MonthsAgo{MonthsAgo: 3}, - Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 3}}}}, - NumSegments: 2, - }, - { - // segments: 1:[a,b,c], 2:[d,e] - Month: &generation.Data_MonthsAgo{MonthsAgo: 2}, - Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 5}}}}, - NumSegments: 3, - SkipSegmentIndexes: []int32{0}, + { + // segments: 1:[a,b,c], 2:[d,e] + Month: &generation.Data_MonthsAgo{MonthsAgo: 2}, + Clients: &generation.Data_All{All: &generation.Clients{Clients: []*generation.Client{{Count: 5}}}}, + NumSegments: 3, + SkipSegmentIndexes: []int32{0}, + }, + { + // segments: 5:[f,g] + Month: &generation.Data_MonthsAgo{MonthsAgo: 1}, + Clients: &generation.Data_Segments{ + Segments: &generation.Segments{Segments: []*generation.Segment{{ + SegmentIndex: &index5, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 2}}}, + }}}, }, - { - // segments: 5:[f,g] - Month: &generation.Data_MonthsAgo{MonthsAgo: 1}, - Clients: &generation.Data_Segments{ - Segments: &generation.Segments{Segments: []*generation.Segment{{ + }, + { + // segments: 1:[], 2:[], 4:[n], 5:[o] + Month: &generation.Data_CurrentMonth{}, + EmptySegmentIndexes: []int32{1, 2}, + Clients: &generation.Data_Segments{ + Segments: &generation.Segments{Segments: []*generation.Segment{ + { SegmentIndex: &index5, - Clients: &generation.Clients{Clients: []*generation.Client{{Count: 2}}}, - }}}, - }, - }, - { - // segments: 1:[], 2:[], 4:[n], 5:[o] - Month: &generation.Data_CurrentMonth{}, - EmptySegmentIndexes: []int32{1, 2}, - Clients: &generation.Data_Segments{ - Segments: &generation.Segments{Segments: []*generation.Segment{ - { - SegmentIndex: &index5, - Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, - }, - { - SegmentIndex: &index4, - Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, - }, - }}, - }, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, + }, + { + SegmentIndex: &index4, + Clients: &generation.Clients{Clients: []*generation.Client{{Count: 1}}}, + }, + }}, }, }, } - core, _, _ := TestCoreUnsealed(t) - marshaled, err := protojson.Marshal(data) - require.NoError(t, err) - req := logical.TestRequest(t, logical.CreateOperation, "internal/counters/activity/write") - req.Data = map[string]interface{}{"input": string(marshaled)} - resp, err := core.systemBackend.HandleRequest(namespace.RootContext(nil), req) - require.NoError(t, err) - paths := resp.Data["paths"].([]string) - require.Len(t, paths, 9) + t.Run("write entitites", func(t *testing.T) { + core, _, _ := TestCoreUnsealed(t) + marshaled, err := protojson.Marshal(&generation.ActivityLogMockInput{ + Data: data, + Write: []generation.WriteOptions{generation.WriteOptions_WRITE_ENTITIES}, + }) + require.NoError(t, err) + req := logical.TestRequest(t, logical.CreateOperation, "internal/counters/activity/write") + req.Data = map[string]interface{}{"input": string(marshaled)} + resp, err := core.systemBackend.HandleRequest(namespace.RootContext(nil), req) + require.NoError(t, err) + paths := resp.Data["paths"].([]string) + require.Len(t, paths, 9) - times, err := core.activityLog.availableLogs(context.Background()) - require.NoError(t, err) - require.Len(t, times, 4) + times, err := core.activityLog.availableLogs(context.Background()) + require.NoError(t, err) + require.Len(t, times, 4) - sortPaths := func(monthPaths []string) { - sort.Slice(monthPaths, func(i, j int) bool { - iVal, _ := parseSegmentNumberFromPath(monthPaths[i]) - jVal, _ := parseSegmentNumberFromPath(monthPaths[j]) - return iVal < jVal - }) - } + sortPaths := func(monthPaths []string) { + sort.Slice(monthPaths, func(i, j int) bool { + iVal, _ := parseSegmentNumberFromPath(monthPaths[i]) + jVal, _ := parseSegmentNumberFromPath(monthPaths[j]) + return iVal < jVal + }) + } - month0Paths := paths[0:4] - month1Paths := paths[4:5] - month2Paths := paths[5:7] - month3Paths := paths[7:9] - sortPaths(month0Paths) - sortPaths(month1Paths) - sortPaths(month2Paths) - sortPaths(month3Paths) - entities := func(paths []string) map[int][]*activity.EntityRecord { - segments := make(map[int][]*activity.EntityRecord) - for _, path := range paths { - segmentNum, _ := parseSegmentNumberFromPath(path) - entry, err := core.activityLog.view.Get(context.Background(), path) - require.NoError(t, err) - if entry == nil { - segments[segmentNum] = []*activity.EntityRecord{} - continue + month0Paths := paths[0:4] + month1Paths := paths[4:5] + month2Paths := paths[5:7] + month3Paths := paths[7:9] + sortPaths(month0Paths) + sortPaths(month1Paths) + sortPaths(month2Paths) + sortPaths(month3Paths) + entities := func(paths []string) map[int][]*activity.EntityRecord { + segments := make(map[int][]*activity.EntityRecord) + for _, path := range paths { + segmentNum, _ := parseSegmentNumberFromPath(path) + entry, err := core.activityLog.view.Get(context.Background(), path) + require.NoError(t, err) + if entry == nil { + segments[segmentNum] = []*activity.EntityRecord{} + continue + } + activities := &activity.EntityActivityLog{} + err = proto.Unmarshal(entry.Value, activities) + require.NoError(t, err) + segments[segmentNum] = activities.Clients } - activities := &activity.EntityActivityLog{} - err = proto.Unmarshal(entry.Value, activities) - require.NoError(t, err) - segments[segmentNum] = activities.Clients + return segments } - return segments - } - month0Entities := entities(month0Paths) - require.Len(t, month0Entities, 4) - require.Contains(t, month0Entities, 1) - require.Contains(t, month0Entities, 2) - require.Contains(t, month0Entities, 4) - require.Contains(t, month0Entities, 5) - require.Len(t, month0Entities[1], 0) - require.Len(t, month0Entities[2], 0) - require.Len(t, month0Entities[4], 1) - require.Len(t, month0Entities[5], 1) + month0Entities := entities(month0Paths) + require.Len(t, month0Entities, 4) + require.Contains(t, month0Entities, 1) + require.Contains(t, month0Entities, 2) + require.Contains(t, month0Entities, 4) + require.Contains(t, month0Entities, 5) + require.Len(t, month0Entities[1], 0) + require.Len(t, month0Entities[2], 0) + require.Len(t, month0Entities[4], 1) + require.Len(t, month0Entities[5], 1) - month1Entities := entities(month1Paths) - require.Len(t, month1Entities, 1) - require.Contains(t, month1Entities, 5) - require.Len(t, month1Entities[5], 2) + month1Entities := entities(month1Paths) + require.Len(t, month1Entities, 1) + require.Contains(t, month1Entities, 5) + require.Len(t, month1Entities[5], 2) + + month2Entities := entities(month2Paths) + require.Len(t, month2Entities, 2) + require.Contains(t, month2Entities, 1) + require.Contains(t, month2Entities, 2) + require.Len(t, month2Entities[1], 3) + require.Len(t, month2Entities[2], 2) + + month3Entities := entities(month3Paths) + require.Len(t, month3Entities, 2) + require.Contains(t, month3Entities, 0) + require.Contains(t, month3Entities, 1) + require.Len(t, month3Entities[0], 2) + require.Len(t, month3Entities[1], 1) + }) + t.Run("write precomputed queries", func(t *testing.T) { + core, _, _ := TestCoreUnsealed(t) + marshaled, err := protojson.Marshal(&generation.ActivityLogMockInput{ + Data: data, + Write: []generation.WriteOptions{generation.WriteOptions_WRITE_PRECOMPUTED_QUERIES}, + }) + require.NoError(t, err) + req := logical.TestRequest(t, logical.CreateOperation, "internal/counters/activity/write") + req.Data = map[string]interface{}{"input": string(marshaled)} + _, err = core.systemBackend.HandleRequest(namespace.RootContext(nil), req) + require.NoError(t, err) - month2Entities := entities(month2Paths) - require.Len(t, month2Entities, 2) - require.Contains(t, month2Entities, 1) - require.Contains(t, month2Entities, 2) - require.Len(t, month2Entities[1], 3) - require.Len(t, month2Entities[2], 2) + queries, err := core.activityLog.queryStore.QueriesAvailable(context.Background()) + require.NoError(t, err) + require.True(t, queries) - month3Entities := entities(month3Paths) - require.Len(t, month3Entities, 2) - require.Contains(t, month3Entities, 0) - require.Contains(t, month3Entities, 1) - require.Len(t, month3Entities[0], 2) - require.Len(t, month3Entities[1], 1) + now := time.Now().UTC() + start := timeutil.StartOfMonth(timeutil.MonthsPreviousTo(3, now)) + end := timeutil.EndOfMonth(now) + pq, err := core.activityLog.queryStore.Get(context.Background(), start, end) + require.NoError(t, err) + require.NotNil(t, pq) + require.Equal(t, end, pq.EndTime) + require.Equal(t, start, pq.StartTime) + require.Len(t, pq.Namespaces, 1) + require.Equal(t, uint64(12), pq.Namespaces[0].Entities) + require.Len(t, pq.Months, 4) + }) }