diff --git a/internal/hcs/system.go b/internal/hcs/system.go index de0d6ebdd1..87b36adacf 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -17,6 +17,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/jobobject" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/vmcompute" @@ -33,7 +34,7 @@ type System struct { waitBlock chan struct{} waitError error exitError error - os, typ string + os, typ, owner string startTime time.Time } @@ -138,6 +139,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error { } computeSystem.typ = strings.ToLower(props.SystemType) computeSystem.os = strings.ToLower(props.RuntimeOSType) + computeSystem.owner = strings.ToLower(props.Owner) if computeSystem.os == "" && computeSystem.typ == "container" { // Pre-RS5 HCS did not return the OS, but it only supported containers // that ran Windows. @@ -337,9 +339,10 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr // statisticsInProc emulates what HCS does to grab statistics for a given container with a small // change to make grabbing the private working set total much more efficient. -func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.Properties, error) { +func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.Statistics, error) { // Start timestamp for these stats before we grab them to match HCS timestamp := time.Now() + // In the future we can make use of some new functionality in the HCS that allows you // to pass a job object for HCS to use for the container. Currently, the only way we'll // be able to open the job/silo is if we're running as SYSTEM. @@ -368,72 +371,86 @@ func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.P return nil, err } + // This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation + // with the class SystemProcessInformation which returns an array containing system information for *every* + // process running on the machine. They then grab the pids that are running in the container and filter down + // the entries in the array to only what's running in that silo and start tallying up the total. This doesn't + // work well as performance should get worse if more processess are running on the machine in general and not + // just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored + // as well which isn't great and is wasted work to fetch. + // + // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private + // working set ourselves and ask for everything else seperately. The optimization we can make here is + // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating + // the private working set in a more efficient manner by: + // + // 1. Find the pids running in the silo + // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) + // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters + // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. privateWorkingSet, err := job.PrivateWorkingSet() if err != nil { return nil, err } - return &hcsschema.Properties{ - Statistics: &hcsschema.Statistics{ - Timestamp: timestamp, - ContainerStartTime: computeSystem.startTime, - Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, - Memory: &hcsschema.MemoryStats{ - MemoryUsageCommitBytes: memInfo.JobMemory, - MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, - MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, - }, - Processor: &hcsschema.ProcessorStats{ - RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), - RuntimeUser100ns: uint64(processorInfo.TotalUserTime), - TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), - }, - Storage: &hcsschema.StorageStats{ - ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), - ReadSizeBytes: storageInfo.ReadStats.TotalSize, - WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), - WriteSizeBytes: storageInfo.WriteStats.TotalSize, - }, + return &hcsschema.Statistics{ + Timestamp: timestamp, + ContainerStartTime: computeSystem.startTime, + Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, + Memory: &hcsschema.MemoryStats{ + MemoryUsageCommitBytes: memInfo.JobMemory, + MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, + MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, + }, + Processor: &hcsschema.ProcessorStats{ + RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), + RuntimeUser100ns: uint64(processorInfo.TotalUserTime), + TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), + }, + Storage: &hcsschema.StorageStats{ + ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), + ReadSizeBytes: storageInfo.ReadStats.TotalSize, + WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), + WriteSizeBytes: storageInfo.WriteStats.TotalSize, }, }, nil } // PropertiesV2 returns the requested container properties targeting a V2 schema container. -func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { +func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() operation := "hcs::System::PropertiesV2" - // There's an optimization we can make here if the user is asking for Statistics only, and this is due to - // an inefficient way that HCS calculates the private working set total for a given container. HCS - // calls NtQuerySystemInformation with the class SystemProcessInformation which returns an array containing - // system information for *every* process running on the machine. They then grab the pids that are running in - // the container and filter down the entries in the array to only what's running in that silo and start tallying - // up the total. This doesn't work well as performance should get worse if more processess are running on the - // machine in general and not just in the container. All of the additional information besides the - // WorkingSetPrivateSize field is ignored as well which isn't great and is wasted work to fetch. - // - // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private - // working set ourselves and ask for everything else seperately. The optimization we can make here is - // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating - // the private working set in a more efficient manner by: - // - // 1. Find the pids running in the silo - // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) - // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters - // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. - // - // The only caveat is the process invoking this code must be running as SYSTEM to open the silo, - // otherwise we'll fallback to asking HCS for the information. - - // First check if we're only asking for Stats to see if we can apply the above optimization. We should - // only do this for containers and not UVMs. - if (len(types) == 1 && types[0] == hcsschema.PTStatistics) && computeSystem.typ == "container" { - properties, err := computeSystem.statisticsInProc(ctx) - // If no errors just return the info as is, if not we'll fallback to the HCS route. + // Check if any of the queries are for stats. We can grab these in process and skip the hop to + // HCS. + var statsPresent, onlyStats bool + for i, prop := range types { + if prop == hcsschema.PTStatistics { + statsPresent = true + onlyStats = len(types) == 1 + // Remove stats from the query types. + types = append(types[:i], types[i+1:]...) + } + } + + properties := &hcsschema.Properties{} + if statsPresent && computeSystem.typ == "container" { + properties.Statistics, err = computeSystem.statisticsInProc(ctx) if err == nil { - return properties, nil + // Early return if this was the only thing we were querying for. + if onlyStats { + properties.Id = computeSystem.id + properties.SystemType = computeSystem.typ + properties.RuntimeOsType = computeSystem.os + properties.Owner = computeSystem.owner + return properties, nil + } + } else { + logTxt := "failed to grab statistics in process - falling back to HCS" + log.G(ctx).WithError(err).WithField(logfields.ContainerID, computeSystem.id).Warn(logTxt) + types = append(types, hcsschema.PTStatistics) } } @@ -451,12 +468,17 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem if propertiesJSON == "" { return nil, ErrUnexpectedValue } - properties := &hcsschema.Properties{} - if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { + hcsProps := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), hcsProps); err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) } + // Copy over stats if we might've failed the inProc query. + if properties.Statistics != nil { + hcsProps.Statistics = properties.Statistics + hcsProps.Owner = computeSystem.owner + } - return properties, nil + return hcsProps, nil } // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5. diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go index de0d6ebdd1..87b36adacf 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go @@ -17,6 +17,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/jobobject" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/vmcompute" @@ -33,7 +34,7 @@ type System struct { waitBlock chan struct{} waitError error exitError error - os, typ string + os, typ, owner string startTime time.Time } @@ -138,6 +139,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error { } computeSystem.typ = strings.ToLower(props.SystemType) computeSystem.os = strings.ToLower(props.RuntimeOSType) + computeSystem.owner = strings.ToLower(props.Owner) if computeSystem.os == "" && computeSystem.typ == "container" { // Pre-RS5 HCS did not return the OS, but it only supported containers // that ran Windows. @@ -337,9 +339,10 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr // statisticsInProc emulates what HCS does to grab statistics for a given container with a small // change to make grabbing the private working set total much more efficient. -func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.Properties, error) { +func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.Statistics, error) { // Start timestamp for these stats before we grab them to match HCS timestamp := time.Now() + // In the future we can make use of some new functionality in the HCS that allows you // to pass a job object for HCS to use for the container. Currently, the only way we'll // be able to open the job/silo is if we're running as SYSTEM. @@ -368,72 +371,86 @@ func (computeSystem *System) statisticsInProc(ctx context.Context) (*hcsschema.P return nil, err } + // This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation + // with the class SystemProcessInformation which returns an array containing system information for *every* + // process running on the machine. They then grab the pids that are running in the container and filter down + // the entries in the array to only what's running in that silo and start tallying up the total. This doesn't + // work well as performance should get worse if more processess are running on the machine in general and not + // just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored + // as well which isn't great and is wasted work to fetch. + // + // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private + // working set ourselves and ask for everything else seperately. The optimization we can make here is + // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating + // the private working set in a more efficient manner by: + // + // 1. Find the pids running in the silo + // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) + // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters + // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. privateWorkingSet, err := job.PrivateWorkingSet() if err != nil { return nil, err } - return &hcsschema.Properties{ - Statistics: &hcsschema.Statistics{ - Timestamp: timestamp, - ContainerStartTime: computeSystem.startTime, - Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, - Memory: &hcsschema.MemoryStats{ - MemoryUsageCommitBytes: memInfo.JobMemory, - MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, - MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, - }, - Processor: &hcsschema.ProcessorStats{ - RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), - RuntimeUser100ns: uint64(processorInfo.TotalUserTime), - TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), - }, - Storage: &hcsschema.StorageStats{ - ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), - ReadSizeBytes: storageInfo.ReadStats.TotalSize, - WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), - WriteSizeBytes: storageInfo.WriteStats.TotalSize, - }, + return &hcsschema.Statistics{ + Timestamp: timestamp, + ContainerStartTime: computeSystem.startTime, + Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, + Memory: &hcsschema.MemoryStats{ + MemoryUsageCommitBytes: memInfo.JobMemory, + MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, + MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, + }, + Processor: &hcsschema.ProcessorStats{ + RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), + RuntimeUser100ns: uint64(processorInfo.TotalUserTime), + TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), + }, + Storage: &hcsschema.StorageStats{ + ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), + ReadSizeBytes: storageInfo.ReadStats.TotalSize, + WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), + WriteSizeBytes: storageInfo.WriteStats.TotalSize, }, }, nil } // PropertiesV2 returns the requested container properties targeting a V2 schema container. -func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { +func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() operation := "hcs::System::PropertiesV2" - // There's an optimization we can make here if the user is asking for Statistics only, and this is due to - // an inefficient way that HCS calculates the private working set total for a given container. HCS - // calls NtQuerySystemInformation with the class SystemProcessInformation which returns an array containing - // system information for *every* process running on the machine. They then grab the pids that are running in - // the container and filter down the entries in the array to only what's running in that silo and start tallying - // up the total. This doesn't work well as performance should get worse if more processess are running on the - // machine in general and not just in the container. All of the additional information besides the - // WorkingSetPrivateSize field is ignored as well which isn't great and is wasted work to fetch. - // - // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private - // working set ourselves and ask for everything else seperately. The optimization we can make here is - // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating - // the private working set in a more efficient manner by: - // - // 1. Find the pids running in the silo - // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) - // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters - // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. - // - // The only caveat is the process invoking this code must be running as SYSTEM to open the silo, - // otherwise we'll fallback to asking HCS for the information. - - // First check if we're only asking for Stats to see if we can apply the above optimization. We should - // only do this for containers and not UVMs. - if (len(types) == 1 && types[0] == hcsschema.PTStatistics) && computeSystem.typ == "container" { - properties, err := computeSystem.statisticsInProc(ctx) - // If no errors just return the info as is, if not we'll fallback to the HCS route. + // Check if any of the queries are for stats. We can grab these in process and skip the hop to + // HCS. + var statsPresent, onlyStats bool + for i, prop := range types { + if prop == hcsschema.PTStatistics { + statsPresent = true + onlyStats = len(types) == 1 + // Remove stats from the query types. + types = append(types[:i], types[i+1:]...) + } + } + + properties := &hcsschema.Properties{} + if statsPresent && computeSystem.typ == "container" { + properties.Statistics, err = computeSystem.statisticsInProc(ctx) if err == nil { - return properties, nil + // Early return if this was the only thing we were querying for. + if onlyStats { + properties.Id = computeSystem.id + properties.SystemType = computeSystem.typ + properties.RuntimeOsType = computeSystem.os + properties.Owner = computeSystem.owner + return properties, nil + } + } else { + logTxt := "failed to grab statistics in process - falling back to HCS" + log.G(ctx).WithError(err).WithField(logfields.ContainerID, computeSystem.id).Warn(logTxt) + types = append(types, hcsschema.PTStatistics) } } @@ -451,12 +468,17 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem if propertiesJSON == "" { return nil, ErrUnexpectedValue } - properties := &hcsschema.Properties{} - if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { + hcsProps := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), hcsProps); err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) } + // Copy over stats if we might've failed the inProc query. + if properties.Statistics != nil { + hcsProps.Statistics = properties.Statistics + hcsProps.Owner = computeSystem.owner + } - return properties, nil + return hcsProps, nil } // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.