Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[corechecks/snmp] Device Scan POC (in SNMP Integration) #21038

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion comp/netflow/flowaggregator/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (agg *FlowAggregator) sendExporterMetadata(flows []*common.Flow, flushTime
for _, exporterID := range ids {
netflowExporters = append(netflowExporters, exporterMap[namespace][exporterID])
}
metadataPayloads := metadata.BatchPayloads(namespace, "", flushTime, metadata.PayloadMetadataBatchSize, nil, nil, nil, nil, netflowExporters, nil)
metadataPayloads := metadata.BatchPayloads(namespace, "", flushTime, metadata.PayloadMetadataBatchSize, nil, nil, nil, nil, netflowExporters, nil, nil)
for _, payload := range metadataPayloads {
payloadBytes, err := json.Marshal(payload)
if err != nil {
Expand Down
29 changes: 29 additions & 0 deletions pkg/collector/corechecks/snmp/internal/checkconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const defaultDiscoveryWorkers = 5
const defaultDiscoveryAllowedFailures = 3
const defaultDiscoveryInterval = 3600
const defaultDetectMetricsRefreshInterval = 3600
const defaultDeviceScanMaxOidsPerRun = 1000

// subnetTagKey is the prefix used for subnet tag
const subnetTagKey = "autodiscovery_subnet"
Expand Down Expand Up @@ -78,6 +79,8 @@ type InitConfig struct {
Namespace string `yaml:"namespace"`
DetectMetricsEnabled Boolean `yaml:"experimental_detect_metrics_enabled"`
DetectMetricsRefreshInterval int `yaml:"experimental_detect_metrics_refresh_interval"`
DeviceScanEnabled Boolean `yaml:"device_scan_enabled"`
DeviceScanMaxOidsPerRun int `yaml:"device_scan_max_oids_per_run"`
}

// InstanceConfig is used to deserialize integration instance config
Expand Down Expand Up @@ -137,6 +140,9 @@ type InstanceConfig struct {
DetectMetricsEnabled *Boolean `yaml:"experimental_detect_metrics_enabled"`
DetectMetricsRefreshInterval int `yaml:"experimental_detect_metrics_refresh_interval"`

DeviceScanEnabled *Boolean `yaml:"device_scan_enabled"`
DeviceScanMaxOidsPerRun int `yaml:"device_scan_max_oids_per_run"`

// `interface_configs` option is not supported by SNMP corecheck autodiscovery (`network_address`)
// it's only supported for single device instance (`ip_address`)
InterfaceConfigs InterfaceConfigs `yaml:"interface_configs"`
Expand Down Expand Up @@ -188,6 +194,12 @@ type CheckConfig struct {
DetectMetricsEnabled bool
DetectMetricsRefreshInterval int

DeviceScanEnabled bool
DeviceScanMaxOidsPerRun int
DeviceScanLastOid string
DeviceScanCurScanStart time.Time
DeviceScanCurScanOidsCount int

Network string
DiscoveryWorkers int
Workers int
Expand Down Expand Up @@ -377,6 +389,21 @@ func NewCheckConfig(rawInstance integration.Data, rawInitConfig integration.Data
c.DetectMetricsEnabled = bool(initConfig.DetectMetricsEnabled)
}

if instance.DeviceScanEnabled != nil {
c.DeviceScanEnabled = bool(*instance.DeviceScanEnabled)
} else {
c.DeviceScanEnabled = bool(initConfig.DeviceScanEnabled)
}

// TODO: TEST ME
if instance.DeviceScanMaxOidsPerRun != 0 {
c.DeviceScanMaxOidsPerRun = int(instance.DeviceScanMaxOidsPerRun)
} else if initConfig.DeviceScanMaxOidsPerRun != 0 {
c.DeviceScanMaxOidsPerRun = int(initConfig.DeviceScanMaxOidsPerRun)
} else {
c.DeviceScanMaxOidsPerRun = defaultDeviceScanMaxOidsPerRun
}

if instance.DetectMetricsRefreshInterval != 0 {
c.DetectMetricsRefreshInterval = int(instance.DetectMetricsRefreshInterval)
} else if initConfig.DetectMetricsRefreshInterval != 0 {
Expand Down Expand Up @@ -645,6 +672,8 @@ func (c *CheckConfig) Copy() *CheckConfig {
newConfig.AutodetectProfile = c.AutodetectProfile
newConfig.DetectMetricsEnabled = c.DetectMetricsEnabled
newConfig.DetectMetricsRefreshInterval = c.DetectMetricsRefreshInterval
newConfig.DeviceScanEnabled = c.DeviceScanEnabled
newConfig.DeviceScanMaxOidsPerRun = c.DeviceScanMaxOidsPerRun
newConfig.MinCollectionInterval = c.MinCollectionInterval
newConfig.InterfaceConfigs = c.InterfaceConfigs

Expand Down
22 changes: 17 additions & 5 deletions pkg/collector/corechecks/snmp/internal/common/oidtrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@
Children map[int]*OIDTrie
}

func newOidTrie() *OIDTrie {
func NewOidTrie() *OIDTrie {

Check failure on line 25 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported function NewOidTrie should have comment or be unexported (revive)

Check failure on line 25 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported function NewOidTrie should have comment or be unexported (revive)
return &OIDTrie{}
}

// BuildOidTrie builds the OIDTrie from a list of OIDs
func BuildOidTrie(allOIDs []string) *OIDTrie {
root := newOidTrie()
root := NewOidTrie()
for _, oid := range allOIDs {
current := root

numbers, err := oidToNumbers(oid)
numbers, err := OidToNumbers(oid)
if err != nil {
log.Debugf("error processing oid `%s`: %s", oid, err)
continue
Expand All @@ -43,15 +43,15 @@
current.Children = make(map[int]*OIDTrie)
}
if _, ok := current.Children[num]; !ok {
current.Children[num] = newOidTrie()
current.Children[num] = NewOidTrie()
}
current = current.Children[num]
}
}
return root
}

func oidToNumbers(oid string) ([]int, error) {
func OidToNumbers(oid string) ([]int, error) {

Check failure on line 54 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported function OidToNumbers should have comment or be unexported (revive)

Check failure on line 54 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported function OidToNumbers should have comment or be unexported (revive)
oid = strings.TrimLeft(oid, ".")
strNumbers := strings.Split(oid, ".")
var numbers []int
Expand Down Expand Up @@ -86,6 +86,18 @@
return current, nil
}

func (o *OIDTrie) GetNodeFromDigits(oidDigits []int) (*OIDTrie, error) {

Check failure on line 89 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported method OIDTrie.GetNodeFromDigits should have comment or be unexported (revive)

Check failure on line 89 in pkg/collector/corechecks/snmp/internal/common/oidtrie.go

View workflow job for this annotation

GitHub Actions / windows-lint

exported: exported method OIDTrie.GetNodeFromDigits should have comment or be unexported (revive)
current := o
for _, digit := range oidDigits {
child, ok := current.Children[digit]
if !ok {
return nil, fmt.Errorf("node `%v` not found in OIDTrie", oidDigits)
}
current = child
}
return current, nil
}

// NonLeafNodeExist checks if the oid is a known node (a node have at least one child)
func (o *OIDTrie) NonLeafNodeExist(oid string) bool {
node, err := o.getNode(oid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ func (d *DeviceCheck) getValuesAndTags() (bool, []string, *valuestore.ResultValu
}

func (d *DeviceCheck) detectMetricsToMonitor(sess session.Session) error {

if d.config.DetectMetricsEnabled {
if d.nextAutodetectMetrics.After(timeNow()) {
return nil
Expand Down Expand Up @@ -262,7 +263,7 @@ func (d *DeviceCheck) detectMetricsToMonitor(sess session.Session) error {
}

func (d *DeviceCheck) detectAvailableMetrics() ([]profiledefinition.MetricsConfig, []profiledefinition.MetricTagConfig) {
fetchedOIDs := session.FetchAllOIDsUsingGetNext(d.session)
fetchedOIDs := session.FetchAllFirstRowOIDs(d.session)
log.Debugf("fetched OIDs: %v", fetchedOIDs)

root := common.BuildOidTrie(fetchedOIDs)
Expand Down
112 changes: 111 additions & 1 deletion pkg/collector/corechecks/snmp/internal/fetch/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
package fetch

import (
_ "embed"
"encoding/json"
"fmt"
"github.com/DataDog/datadog-agent/pkg/collector/corechecks/snmp/internal/common"
"github.com/gosnmp/gosnmp"
"strconv"
"time"

"github.com/DataDog/datadog-agent/pkg/util/log"

Expand All @@ -34,6 +39,8 @@
}
}

const defaultDeviceScanRootOid = "1.0"

Check failure on line 43 in pkg/collector/corechecks/snmp/internal/fetch/fetch.go

View workflow job for this annotation

GitHub Actions / windows-lint

const `defaultDeviceScanRootOid` is unused (unused)
// Fetch oid values from device
// TODO: pass only specific configs instead of the whole CheckConfig
func Fetch(sess session.Session, config *checkconfig.CheckConfig) (*valuestore.ResultValueStore, error) {
Expand All @@ -59,5 +66,108 @@
}
}

return &valuestore.ResultValueStore{ScalarValues: scalarResults, ColumnValues: columnResults}, nil
results := getDeviceScanValues(sess, config)

return &valuestore.ResultValueStore{ScalarValues: scalarResults, ColumnValues: columnResults, DeviceScanValues: results}, nil
}

// BuildOidTrie2 builds the OIDTrie from a list of OIDs
func BuildOidTrie2(rawOIDTrie map[string]rawOIDTrieNode) *common.OIDTrie {
newTrieRoot := common.NewOidTrie()
BuildOidTrie2Recursive(rawOIDTrie, newTrieRoot)
return newTrieRoot
}

// BuildOidTrie2Recursive builds the OIDTrie from a list of OIDs
func BuildOidTrie2Recursive(rawOIDTrie map[string]rawOIDTrieNode, newTrieNode *common.OIDTrie) {
for key, node := range rawOIDTrie {
num, err := strconv.Atoi(key)
if err != nil {
log.Warnf("[BuildOidTrie2Recursive] Error %s", err)
return
}

if newTrieNode.Children == nil {
newTrieNode.Children = make(map[int]*common.OIDTrie)
}
if _, ok := newTrieNode.Children[num]; !ok {
newTrieNode.Children[num] = common.NewOidTrie()
}
BuildOidTrie2Recursive(node, newTrieNode.Children[num])
}
}

//go:embed oid_trie_full.json
var oidTrie []byte

type rawOIDTrieNode map[string]rawOIDTrieNode

func getDeviceScanValues(sess session.Session, config *checkconfig.CheckConfig) []gosnmp.SnmpPDU {
// TODO: avoid unmarshalling every check run
rawTrie := rawOIDTrieNode{}
err := json.Unmarshal(oidTrie, &rawTrie)
if err != nil {
log.Warnf("[FetchAllFirstRowOIDsVariables] json.Unmarshal Error %s", err)
return nil
}

//log.Warnf("[FetchAllFirstRowOIDsVariables] RAW oidTrie json %s", string(oidTrie))
//log.Warnf("[FetchAllFirstRowOIDsVariables] RAW oidTrie struct %s", rawTrie)
//log.Warnf("[FetchAllFirstRowOIDsVariables] rawTrie %+v", rawTrie)
newTrie := BuildOidTrie2(rawTrie)

//newTrieAsJson, _ := json.Marshal(newTrie)
//log.Warnf("[FetchAllFirstRowOIDsVariables] NEW Trie %+v", string(newTrieAsJson))
//for _, child := range newTrie.Children {
// log.Warnf("[FetchAllFirstRowOIDsVariables] NEW Trie child %+v", child)
//}

// TODO: Use a internal type instead of gosnmp.SnmpPDU to avoid leaking gosnmp types ?
var results []gosnmp.SnmpPDU
if config.DeviceScanEnabled {
// TODO: ONLY RUN once every X time

//rootOid := config.DeviceScanLastOid // default root Oid
//if rootOid == "" {
// // NEW DEVICE SCAN
// rootOid = defaultDeviceScanRootOid
// config.DeviceScanCurScanStart = time.Now()
// config.DeviceScanCurScanOidsCount = 0
//}

fetchStart := time.Now()
fetchedResults := session.FetchAllFirstRowOIDsVariables(sess, newTrie)

fetchDuration := time.Since(fetchStart)
log.Warnf("[FetchAllFirstRowOIDsVariables] PRINT PDUs (len: %d)", len(results))
for _, resultPdu := range fetchedResults {
log.Warnf("[FetchAllFirstRowOIDsVariables] PDU: %+v", resultPdu)
}
//config.DeviceScanCurScanOidsCount += len(fetchedResults)

log.Warnf("[FetchAllFirstRowOIDsVariables] Device Scan (Total Count: %d, Duration: %.2f Sec)",
len(fetchedResults),
fetchDuration.Seconds(),
)

//// TODO: ADD TELEMETRY for each check run
//if len(fetchedResults) == config.DeviceScanMaxOidsPerRun {
// log.Warnf("[FetchAllOIDsUsingGetNext] Partial Device Scan (Total Count: %d, Fetch Duration Ms: %d)",
// config.DeviceScanCurScanOidsCount,
// fetchDuration.Milliseconds(),
// )
// // Partial Device Scan
// //config.DeviceScanLastOid = lastOid
//} else {
// log.Warnf("[FetchAllOIDsUsingGetNext] Full Device Scan (Total Count: %d, Duration: %.2f Sec)",
// config.DeviceScanCurScanOidsCount,
// time.Since(config.DeviceScanCurScanStart).Seconds(),
// )
// // TODO: ADD TELEMETRY for complete device scan
// // Full Device Scan completed
// //config.DeviceScanLastOid = ""
//}
results = fetchedResults
}
return results
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,20 @@
interfaces := buildNetworkInterfacesMetadata(config.DeviceID, metadataStore)
ipAddresses := buildNetworkIPAddressesMetadata(config.DeviceID, metadataStore)
topologyLinks := buildNetworkTopologyMetadata(config.DeviceID, metadataStore, interfaces)

metadataPayloads := devicemetadata.BatchPayloads(config.Namespace, config.ResolvedSubnetName, collectTime, devicemetadata.PayloadMetadataBatchSize, devices, interfaces, ipAddresses, topologyLinks, nil, diagnoses)
deviceOids := buildDeviceScanMetadata(config.DeviceID, store)

metadataPayloads := devicemetadata.BatchPayloads(
config.Namespace, config.ResolvedSubnetName,
collectTime,
devicemetadata.PayloadMetadataBatchSize,
devices,
interfaces,
ipAddresses,
topologyLinks,
nil,
diagnoses,
deviceOids,
)

for _, payload := range metadataPayloads {
payloadBytes, err := json.Marshal(payload)
Expand Down Expand Up @@ -86,6 +98,35 @@
}
}

func buildDeviceScanMetadata(deviceId string, oidsValues *valuestore.ResultValueStore) []devicemetadata.DeviceOid {

Check failure on line 101 in pkg/collector/corechecks/snmp/internal/report/report_device_metadata.go

View workflow job for this annotation

GitHub Actions / windows-lint

var-naming: func parameter deviceId should be deviceID (revive)

Check failure on line 101 in pkg/collector/corechecks/snmp/internal/report/report_device_metadata.go

View workflow job for this annotation

GitHub Actions / windows-lint

var-naming: func parameter deviceId should be deviceID (revive)
var deviceOids []devicemetadata.DeviceOid
if oidsValues == nil {
return deviceOids
}
for _, variablePdu := range oidsValues.DeviceScanValues {
_, value, err := valuestore.GetResultValueFromPDU(variablePdu)
if err != nil {
log.Debugf("GetValueFromPDU error: %s", err)
continue
}

// TODO: How to store different types? Use Base64?
strValue, err := value.ToString()
if err != nil {
log.Debugf("ToString error: %s", err)
continue
}

deviceOids = append(deviceOids, devicemetadata.DeviceOid{
DeviceID: deviceId,
Oid: strings.TrimLeft(variablePdu.Name, "."),
Type: variablePdu.Type.String(), // TODO: Map to internal types?
ValueString: strValue,
})
}
return deviceOids
}

func computeInterfaceStatus(adminStatus devicemetadata.IfAdminStatus, operStatus devicemetadata.IfOperStatus) devicemetadata.InterfaceStatus {
if adminStatus == devicemetadata.AdminStatusUp {
switch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@
}, nil
}

// Walk retrieves a subtree of values using GETNEXT
func (s *FakeSession) Walk(rootOid string, walkFn gosnmp.WalkFunc) error {

Check failure on line 223 in pkg/collector/corechecks/snmp/internal/session/fake_session.go

View workflow job for this annotation

GitHub Actions / windows-lint

receiver-naming: receiver name s should be consistent with previous receiver name fs for FakeSession (revive)
return fmt.Errorf("NOT IMPLEMENTED")
}

// SetByte adds an OctetString PDU with the given OID and value
func (fs *FakeSession) SetByte(oid string, value []byte) *FakeSession {
return fs.Set(oid, gosnmp.OctetString, value)
Expand Down
Loading
Loading