Skip to content

Commit

Permalink
Refresh port status every minute unless there are events to process
Browse files Browse the repository at this point in the history
Signed-off-by: Anthony Floeder <[email protected]>
  • Loading branch information
ajfloeder committed Aug 20, 2024
1 parent 9aeb430 commit a5e0f1f
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 21 deletions.
2 changes: 1 addition & 1 deletion internal/switchtec/cmd/fabric/perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (cmd *BandwidthCmd) Run() error {
return err
}

// Resetting the bandwidth counter takes about 1s
// Resetting the bandwidht counter takes about 1s
time.Sleep(time.Second)

// Record the bandwidth counters
Expand Down
2 changes: 1 addition & 1 deletion internal/switchtec/pkg/switchtec/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ func (dev *Device) GetGfmsEvents() ([]GfmsEvent, error) {
type response struct {
ResponseNumber uint16 // Number of GFMS Event Entries in this response
RemainingNumberFlags uint16 // [0:14] Number of GFMS Event Entries remaining in GFMS Event Queue
// [15] Flag to indicatre whether the event entry buffer queue has
// [15] Flag to indicate whether the event entry buffer queue has
// been overwritten as a result of not being read in time.
Data [maxDataLength - 4]byte
}
Expand Down
16 changes: 8 additions & 8 deletions pkg/manager-event/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ import (
// "EventType": "Alert",
// "EventId": "ABCD123",
// "Severity": "Warning",
// "Message": "Fabric switch port is down",
// "MessageId": "Alert.1.0.FabricSwitchPortDown",
// "Message": "Fabirc switch port is down",
// "MessageId": "Alert.1.0.FabircSwitchPortDown",
// "MessageArgs": [
// "Rabbit", "0", "Port 0"
// ],
Expand All @@ -73,16 +73,16 @@ import (
// ]
// }
//
// There should always be a Message Registry that is paired with a Message ID. In this case
// There should always be a Message Registry that is paried with a Message ID. In this case
// there would be a Message Registry with the following
//
// {
// "Id": "Alert.1.0.0",
// "RegistryPrefix": "Alert",
// "RegistryVersion": "1.0.0",
// "Messages": {
// "FabricSwitchPortDown": {
// "Description": "A Port is down on Fabric %1 Switch %2 Port %3",
// "FabircSwitchPortDown": {
// "Description": "A Port is down on Fabirc %1 Switch %2 Port %3",
// "Message": "Port connectivity was lost on Fabric %1 Switch %2 Port %3",
// "Severity": "Critical",
// "NumberOfArgs": 3,
Expand All @@ -97,14 +97,14 @@ import (
//
// When a client receives an event (by creating a subscription with the event service), they
// first use the MessageId to find the corresponding registry, version, and message.
// In the above example, the MessageId is "Alert.1.0.FabricSwitchPortDown". This is broken
// In the above example, the MessageId is "Alert.1.0.FabircSwitchPortDown". This is broken
// down by the client into three parts
// - Registry Prefix: "Alert"
// - Major / Minor Version: "1.0"
// - Message Identifier: "FabricSwitchPortDown"
// - Message Identifier: "FabircSwitchPortDown"
//
// The client must then use that information to scan the available registries and locate the
// correct Message Registry. The Event Service contains a link of the registries in use, so
// correct Message Registry. The Event Service contains a link of the registires in use, so
// the client can scan those registries and do a lookup.
//
// There are a bunch of registries already created by DMTF: https://redfish.dmtf.org/registries/
Expand Down
1 change: 0 additions & 1 deletion pkg/manager-fabric/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ switches:
- name: Rabbit
type: ManagementPort
port: 24
slot: -1
width: 16
- name: Compute 0
type: UpstreamPort
Expand Down
10 changes: 6 additions & 4 deletions pkg/manager-fabric/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,17 +386,17 @@ func (s *Switch) identify() error {
return err
}

paxID, err := dev.Identify()
paxId, err := dev.Identify()
if err != nil {
log.Error(err, "Identify error")
return err
}

log.V(2).Info("Identified switch device", "pax", paxID)
if id := strconv.Itoa(int(paxID)); id == s.id {
log.V(2).Info("Identified switch device", "pax", paxId)
if id := strconv.Itoa(int(paxId)); id == s.id {
s.dev = dev
s.path = path
s.paxId = paxID
s.paxId = paxId

s.model = s.getModel()
s.manufacturer = s.getManufacturer()
Expand Down Expand Up @@ -920,6 +920,8 @@ func Initialize(log ec.Logger, ctrl SwitchtecControllerInterface) error {
log.Error(err, "Failed to identify switch")
}

log.Info("Switch identified", "pax", s.paxId)

for portIdx, portConf := range switchConf.Ports {
portType := portConf.getPortType()
id := strconv.Itoa(portIdx)
Expand Down
16 changes: 12 additions & 4 deletions pkg/manager-fabric/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/NearNodeFlash/nnf-ec/internal/switchtec/pkg/switchtec"
)

// The Fabric Monitor is responsible for ensuring that the fabric and related sub-resource
// The Fabric Monitor is responsible for ensuring that the fabriic and related sub-resource
// are updated with the latest information from the switch. This runs as a background
// thread, and periodically queries the fabric.
func NewFabricMonitor(f *Fabric) *monitor {
Expand All @@ -38,7 +38,7 @@ type monitor struct {
fabric *Fabric
}

// Run will run the Fabric Monitor forever
// Run Fabric Monitor forever
func (m *monitor) Run() {

for {
Expand All @@ -49,10 +49,18 @@ func (m *monitor) Run() {
s := &m.fabric.switches[idx]

// The normal path is when the switch is operating without issue and we can
// poll the switch for any events, and process those events
// poll the switch for any events then process those events
if s.isReady() {

if events, err := s.dev.GetEvents(); err == nil {

// In the steady state there will be no events.
// Refresh the port status to ensure we're up to date.
if len(events) == 0 {
s.refreshPortStatus()
continue
}

for _, event := range events {
physPortId, isDown := m.getEventInfo(event)

Expand Down Expand Up @@ -96,7 +104,7 @@ func (*monitor) checkSwitchStatus(s *Switch) {

const invalidPhysicalPortId = math.MaxUint8

func (m *monitor) getEventInfo(e switchtec.GfmsEvent) (uint8, bool) {
func (m *monitor) getEventInfo(e switchtec.GfmsEvent) (uint8, bool /* is down event? */) {

switch e.Id {
case switchtec.FabricLinkUp_GfmsEvent, switchtec.FabricLinkDown_GfmsEvent:
Expand Down
4 changes: 2 additions & 2 deletions pkg/manager-nnf/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ func (s *StorageService) cleanupVolumes() {
}

// Initialize is responsible for initializing the NNF Storage Service; the
// Storage Service must complete initialization without error prior any
// Storage Service must complete initialization without error prior to any
// access to the Storage Service. Failure to initialize will cause the
// storage service to misbehave.
func (s *StorageService) Initialize(log ec.Logger, ctrl NnfControllerInterface) error {
Expand Down Expand Up @@ -529,7 +529,7 @@ func (s *StorageService) EventHandler(e event.Event) error {
}

// Check if the fabric is ready; that is all devices are enumerated and discovery
// is complete. We
// is complete.
if e.Is(msgreg.FabricReadyNnf("")) {
log.V(1).Info("Fabric ready")

Expand Down

0 comments on commit a5e0f1f

Please sign in to comment.