Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement scaling latency metrics through revisions #983

Merged
merged 62 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
b5ba2a9
go.mod: fix dependency version
Omrigan Jul 9, 2024
3b19cc5
lint: bump golangci-lint to v1.59.1 and fix new warnings
Omrigan Jul 9, 2024
805292d
Implement scaling latency metrics through logical clock
Omrigan Jun 20, 2024
628552a
generate CRD
Omrigan Jul 4, 2024
d5f6798
add kind
Omrigan Jul 4, 2024
3af9d62
fix lint
Omrigan Jul 5, 2024
70fef0b
replace kind with flags
Omrigan Jul 8, 2024
20be2d9
finish the implementation
Omrigan Jul 8, 2024
fc2e3d6
fix earliest
Omrigan Jul 8, 2024
e6b7144
fix tests
Omrigan Jul 8, 2024
0e1ab3b
self-review changes
Omrigan Jul 8, 2024
ca9e6c4
couple of renames
Omrigan Jul 8, 2024
5561734
add comments
Omrigan Jul 9, 2024
63605e1
don't exclude Action from exhaustruct
Omrigan Jul 9, 2024
b43dd40
move DesiredLogicalTime out of Guest
Omrigan Jul 9, 2024
eb1e6b1
small changes
Omrigan Jul 9, 2024
2c13d94
mErge branch 'main' into oleg/latency-metrics
Omrigan Jul 11, 2024
3118e51
move labels calculation into logiclock pkg
Omrigan Jul 11, 2024
5a6b537
cleanup extract vm info
Omrigan Jul 11, 2024
e26c155
add tests for logic clock
Omrigan Jul 11, 2024
fa758c0
get rid of separate UpdateLogicalTime
Omrigan Jul 11, 2024
5f9c9d1
rewind clock everywhere
Omrigan Jul 11, 2024
96890c8
fix lint
Omrigan Jul 11, 2024
896d0df
tmp: no dedicated .UpdateLogicalTime for Plugin
Omrigan Jul 11, 2024
ce1dbb3
revert unused param
Omrigan Jul 11, 2024
358f6c7
rename logic clock to revisions
Omrigan Jul 12, 2024
73de584
revert some of the changes and fix tests
Omrigan Jul 12, 2024
365af63
fix tests
Omrigan Jul 14, 2024
7b6b08d
more changes of wording
Omrigan Jul 14, 2024
af5b683
codestyle fixes
Omrigan Jul 14, 2024
fc436aa
add aux metrics
Omrigan Jul 15, 2024
df45965
test minor latency
Omrigan Jul 15, 2024
1c80201
couple more tests
Omrigan Jul 15, 2024
62bb952
fix comment
Omrigan Jul 15, 2024
a6e5781
add revisions e2e test
Omrigan Jul 16, 2024
6389105
rollback accidental changes
Omrigan Jul 16, 2024
76df85b
tmp: use expectedRevision in tests
Omrigan Jul 16, 2024
0908e1b
incremental cleanup
Omrigan Jul 16, 2024
5b0f93d
iterative changes
Omrigan Jul 16, 2024
0e0b2ca
prevent unbounded growth
Omrigan Jul 17, 2024
9facc7d
fix format
Omrigan Jul 18, 2024
88675b9
one more test
Omrigan Jul 18, 2024
49813c7
simplify tests
Omrigan Jul 18, 2024
7f39806
fix tiny thing
Omrigan Jul 19, 2024
be700ff
replace boolean values in metrics with direction label
Omrigan Jul 19, 2024
280163e
Merge branch 'main' into oleg/latency-metrics
Omrigan Jul 19, 2024
9b07933
misc renames
Omrigan Jul 19, 2024
84fa062
rollback extra diff
Omrigan Jul 21, 2024
0df27ac
add initial revision
Omrigan Jul 21, 2024
d9dee98
fix revision updating when it is the same
Omrigan Jul 21, 2024
6d6c25f
Merge branch 'main' into oleg/latency-metrics
Omrigan Jul 21, 2024
2099577
don't propagate if we are already current
Omrigan Jul 21, 2024
3e3c765
fix the test
Omrigan Jul 21, 2024
7e5ad35
fix test
Omrigan Jul 21, 2024
f269424
fix test
Omrigan Jul 21, 2024
5228335
fix test
Omrigan Jul 21, 2024
8dcd87a
add new test
Omrigan Jul 22, 2024
49bd61a
change behaviour to count both upscalings
Omrigan Jul 22, 2024
3f59c87
final thing
Omrigan Jul 22, 2024
eb491f2
Merge branch 'main' into oleg/latency-metrics
Omrigan Jul 22, 2024
5a2355e
but but
Omrigan Jul 22, 2024
fcd957f
Merge branch 'main' into oleg/latency-metrics
Omrigan Jul 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions neonvm/apis/neonvm/v1/virtualmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ import (
"errors"
"fmt"
"slices"
"time"

"github.com/samber/lo"
"go.uber.org/zap/zapcore"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -139,6 +141,15 @@ type VirtualMachineSpec struct {
// +kubebuilder:default:=true
// +optional
EnableSSH *bool `json:"enableSSH,omitempty"`

// TargetRevision is the identifier set by external party to track when changes to the spec
// propagate to the VM.
//
// If a certain value is written into Spec.TargetRevision together with the changes, and
// the same value is observed in Status.CurrentRevision, it means that the changes were
// propagated to the VM.
// +optional
TargetRevision *RevisionWithTime `json:"targetRevision,omitempty"`
}

func (spec *VirtualMachineSpec) Resources() VirtualMachineResources {
Expand Down Expand Up @@ -215,6 +226,66 @@ func (g Guest) ValidateForMemoryProvider(p MemoryProvider) error {
return nil
}

// Flag is a bitmask of flags. The meaning is up to the user.
//
// Used in Revision below.
type Flag uint64

func (f *Flag) Set(flag Flag) {
*f |= flag
}

func (f *Flag) Clear(flag Flag) {
*f &= ^flag
}

func (f *Flag) Has(flag Flag) bool {
return *f&flag != 0
}

// Revision is an identifier, which can be assigned to a specific configuration of a VM.
// Later it can be used to track the application of the configuration.
type Revision struct {
Value int64 `json:"value"`
Flags Flag `json:"flags"`
}

// ZeroRevision is the default value when revisions updates are disabled.
var ZeroRevision = Revision{Value: 0, Flags: 0}

func (r Revision) Min(other Revision) Revision {
if r.Value < other.Value {
return r
}
return other
}

func (r Revision) WithTime(t time.Time) RevisionWithTime {
return RevisionWithTime{
Revision: r,
UpdatedAt: metav1.NewTime(t),
}
}

// MarshalLogObject implements zapcore.ObjectMarshaler, so that Revision can be used with zap.Object
func (r *Revision) MarshalLogObject(enc zapcore.ObjectEncoder) error {
enc.AddInt64("value", r.Value)
enc.AddUint64("flags", uint64(r.Flags))
return nil
}

// RevisionWithTime contains a Revision and the time it was last updated.
type RevisionWithTime struct {
Revision `json:"revision"`
UpdatedAt metav1.Time `json:"updatedAt"`
}

// MarshalLogObject implements zapcore.ObjectMarshaler, so that RevisionWithTime can be used with zap.Object
func (r *RevisionWithTime) MarshalLogObject(enc zapcore.ObjectEncoder) error {
enc.AddTime("updatedAt", r.UpdatedAt.Time)
return r.Revision.MarshalLogObject(enc)
}

type GuestSettings struct {
// Individual lines to add to a sysctl.conf file. See sysctl.conf(5) for more
// +optional
Expand Down Expand Up @@ -534,6 +605,11 @@ type VirtualMachineStatus struct {
MemoryProvider *MemoryProvider `json:"memoryProvider,omitempty"`
// +optional
SSHSecretName string `json:"sshSecretName,omitempty"`

// CurrentRevision is updated with Spec.TargetRevision's value once
// the changes are propagated to the VM.
// +optional
CurrentRevision *RevisionWithTime `json:"currentRevision,omitempty"`
}

type VmPhase string
Expand Down
42 changes: 42 additions & 0 deletions neonvm/apis/neonvm/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions neonvm/config/crd/bases/vm.neon.tech_virtualmachines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2662,6 +2662,37 @@ spec:
type: boolean
serviceAccountName:
type: string
targetRevision:
description: "TargetRevision is the identifier set by external party
to track when changes to the spec propagate to the VM. \n If a certain
value is written into Spec.TargetRevision together with the changes,
and the same value is observed in Status.CurrentRevision, it means
that the changes were propagated to the VM."
properties:
revision:
description: Revision is an identifier, which can be assigned
to a specific configuration of a VM. Later it can be used to
track the application of the configuration.
properties:
flags:
description: "Flag is a bitmask of flags. The meaning is up
to the user. \n Used in Revision below."
format: int64
type: integer
value:
format: int64
type: integer
required:
- flags
- value
type: object
updatedAt:
format: date-time
type: string
required:
- revision
- updatedAt
type: object
terminationGracePeriodSeconds:
default: 5
format: int64
Expand Down Expand Up @@ -2786,6 +2817,34 @@ spec:
pattern: ^[0-9]+((\.[0-9]*)?|m)
type: integer
x-kubernetes-int-or-string: true
currentRevision:
description: CurrentRevision is updated with Spec.TargetRevision's
value once the changes are propagated to the VM.
properties:
revision:
description: Revision is an identifier, which can be assigned
to a specific configuration of a VM. Later it can be used to
track the application of the configuration.
properties:
flags:
description: "Flag is a bitmask of flags. The meaning is up
to the user. \n Used in Revision below."
format: int64
type: integer
value:
format: int64
type: integer
required:
- flags
- value
type: object
updatedAt:
format: date-time
type: string
required:
- revision
- updatedAt
type: object
extraNetIP:
type: string
extraNetMask:
Expand Down
18 changes: 18 additions & 0 deletions neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -807,9 +807,27 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
// do nothing
}

// Propagate TargetRevision to CurrentRevision. This is done only if the VM is fully
// reconciled and running.
if vm.Status.Phase == vmv1.VmRunning {
propagateRevision(vm)
}

return nil
}

func propagateRevision(vm *vmv1.VirtualMachine) {
if vm.Spec.TargetRevision == nil {
return
}
if vm.Status.CurrentRevision != nil &&
vm.Status.CurrentRevision.Revision == vm.Spec.TargetRevision.Revision {
return
}
rev := vm.Spec.TargetRevision.WithTime(time.Now())
vm.Status.CurrentRevision = &rev
}

func pickMemoryProvider(config *ReconcilerConfig, vm *vmv1.VirtualMachine) vmv1.MemoryProvider {
if p := vm.Spec.Guest.MemoryProvider; p != nil {
return *p
Expand Down
23 changes: 14 additions & 9 deletions pkg/agent/core/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"go.uber.org/zap/zapcore"

vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1"
"github.com/neondatabase/autoscaling/pkg/api"
)

Expand All @@ -21,24 +22,28 @@ type ActionWait struct {
}

type ActionPluginRequest struct {
LastPermit *api.Resources `json:"current"`
Target api.Resources `json:"target"`
Metrics *api.Metrics `json:"metrics"`
LastPermit *api.Resources `json:"current"`
Target api.Resources `json:"target"`
Metrics *api.Metrics `json:"metrics"`
TargetRevision vmv1.RevisionWithTime `json:"targetRevision"`
}

type ActionNeonVMRequest struct {
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
TargetRevision vmv1.RevisionWithTime `json:"targetRevision"`
}

type ActionMonitorDownscale struct {
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
TargetRevision vmv1.RevisionWithTime `json:"targetRevision"`
}

type ActionMonitorUpscale struct {
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
Current api.Resources `json:"current"`
Target api.Resources `json:"target"`
TargetRevision vmv1.RevisionWithTime `json:"targetRevision"`
}

func addObjectPtr[T zapcore.ObjectMarshaler](enc zapcore.ObjectEncoder, key string, value *T) error {
Expand Down
28 changes: 17 additions & 11 deletions pkg/agent/core/dumpstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,26 @@ func (d StateDump) MarshalJSON() ([]byte, error) {
func (s *State) Dump() StateDump {
return StateDump{
internal: state{
Debug: s.internal.Debug,
Config: s.internal.Config,
VM: s.internal.VM,
Plugin: s.internal.Plugin.deepCopy(),
Monitor: s.internal.Monitor.deepCopy(),
NeonVM: s.internal.NeonVM.deepCopy(),
Metrics: shallowCopy[SystemMetrics](s.internal.Metrics),
Debug: s.internal.Debug,
Config: s.internal.Config,
VM: s.internal.VM,
Plugin: s.internal.Plugin.deepCopy(),
Monitor: s.internal.Monitor.deepCopy(),
NeonVM: s.internal.NeonVM.deepCopy(),
Metrics: shallowCopy[SystemMetrics](s.internal.Metrics),
TargetRevision: s.internal.TargetRevision,
LastDesiredResources: s.internal.LastDesiredResources,
},
}
}

func (s *pluginState) deepCopy() pluginState {
return pluginState{
OngoingRequest: s.OngoingRequest,
LastRequest: shallowCopy[pluginRequested](s.LastRequest),
LastFailureAt: shallowCopy[time.Time](s.LastFailureAt),
Permit: shallowCopy[api.Resources](s.Permit),
OngoingRequest: s.OngoingRequest,
LastRequest: shallowCopy[pluginRequested](s.LastRequest),
LastFailureAt: shallowCopy[time.Time](s.LastFailureAt),
Permit: shallowCopy[api.Resources](s.Permit),
CurrentRevision: s.CurrentRevision,
}
}

Expand All @@ -61,6 +64,7 @@ func (s *monitorState) deepCopy() monitorState {
Approved: shallowCopy[api.Resources](s.Approved),
DownscaleFailureAt: shallowCopy[time.Time](s.DownscaleFailureAt),
UpscaleFailureAt: shallowCopy[time.Time](s.UpscaleFailureAt),
CurrentRevision: s.CurrentRevision,
}
}

Expand All @@ -69,5 +73,7 @@ func (s *neonvmState) deepCopy() neonvmState {
LastSuccess: shallowCopy[api.Resources](s.LastSuccess),
OngoingRequested: shallowCopy[api.Resources](s.OngoingRequested),
RequestFailedAt: shallowCopy[time.Time](s.RequestFailedAt),
TargetRevision: s.TargetRevision,
CurrentRevision: s.CurrentRevision,
}
}
Loading
Loading