Skip to content

Commit

Permalink
Merge branch 'master' into release-v0.1.1
Browse files Browse the repository at this point in the history
Signed-off-by: Matt Richerson <[email protected]>
  • Loading branch information
matthew-richerson committed Aug 30, 2024
2 parents 5fbf581 + cb407b1 commit 69291d8
Show file tree
Hide file tree
Showing 16 changed files with 444 additions and 48 deletions.
2 changes: 1 addition & 1 deletion internal/switchtec/pkg/switchtec/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ func (dev *Device) GetGfmsEvents() ([]GfmsEvent, error) {
type response struct {
ResponseNumber uint16 // Number of GFMS Event Entries in this response
RemainingNumberFlags uint16 // [0:14] Number of GFMS Event Entries remaining in GFMS Event Queue
// [15] Flag to indicatre whether the event entry buffer queue has
// [15] Flag to indicate whether the event entry buffer queue has
// been overwritten as a result of not being read in time.
Data [maxDataLength - 4]byte
}
Expand Down
8 changes: 7 additions & 1 deletion pkg/manager-fabric/manager.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2020, 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2020-2024 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand Down Expand Up @@ -381,6 +381,7 @@ func (s *Switch) identify() error {
if os.IsNotExist(err) {
continue
} else if err != nil {
s.path = "" // Test this; it's easier than testing s.dev.
log.Error(err, "Error opening path")
return err
}
Expand Down Expand Up @@ -787,6 +788,11 @@ func (p *Port) bind() error {
break
}

if s.path == "" {
// See s.identify()
panic(fmt.Sprintf("Unable to identify switch for port: Initiator Port %d, Logical Port %d, PDFID: %#04x", initiatorPort.config.Port, logicalPortId, endpoint.pdfid))
}

log.Info("Binding Port")
if err := s.dev.Bind(uint8(initiatorPort.config.Port), uint8(logicalPortId), endpoint.pdfid); err != nil {
log.Error(err, "Bind Failed")
Expand Down
16 changes: 12 additions & 4 deletions pkg/manager-fabric/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type monitor struct {
fabric *Fabric
}

// Run will run the Fabirc Monitor forever
// Run Fabric Monitor forever
func (m *monitor) Run() {

for {
Expand All @@ -49,10 +49,18 @@ func (m *monitor) Run() {
s := &m.fabric.switches[idx]

// The normal path is when the switch is operating without issue and we can
// poll the switch for any events, and process those events
// poll the switch for any events then process those events
if s.isReady() {

if events, err := s.dev.GetEvents(); err == nil {

// In the steady state there will be no events.
// Refresh the port status to ensure we're up to date.
if len(events) == 0 {
s.refreshPortStatus()
continue
}

for _, event := range events {
physPortId, isDown := m.getEventInfo(event)

Expand All @@ -77,7 +85,7 @@ func (m *monitor) Run() {

func (*monitor) checkSwitchStatus(s *Switch) {

// Check if the switch path changed by trying to re-identifying the switch.
// Check if the switch path changed by trying to re-identify the switch.
// If the switch is found, it's likely the switch path has changed and we
// need to re-open the switch.
if err := s.identify(); err != nil {
Expand All @@ -96,7 +104,7 @@ func (*monitor) checkSwitchStatus(s *Switch) {

const invalidPhysicalPortId = math.MaxUint8

func (m *monitor) getEventInfo(e switchtec.GfmsEvent) (uint8, bool) {
func (m *monitor) getEventInfo(e switchtec.GfmsEvent) (uint8, bool /* is down event? */) {

switch e.Id {
case switchtec.FabricLinkUp_GfmsEvent, switchtec.FabricLinkDown_GfmsEvent:
Expand Down
4 changes: 2 additions & 2 deletions pkg/manager-nnf/aer.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ func (aer *AerService) StorageServiceIdStoragePoolIdCapacitySourceIdGet(id0 stri
func (aer *AerService) StorageServiceIdStoragePoolIdCapacitySourceIdProvidingVolumesGet(id0 string, id1 string, id2 string, model *sf.VolumeCollectionVolumeCollection) error {
return aer.c(aer.s.StorageServiceIdStoragePoolIdCapacitySourceIdProvidingVolumesGet(id0, id1, id2, model))
}
func (aer *AerService) StorageServiceIdStoragePoolIdAlloctedVolumesGet(id0 string, id1 string, model *sf.VolumeCollectionVolumeCollection) error {
return aer.c(aer.s.StorageServiceIdStoragePoolIdAlloctedVolumesGet(id0, id1, model))
func (aer *AerService) StorageServiceIdStoragePoolIdAllocatedVolumesGet(id0 string, id1 string, model *sf.VolumeCollectionVolumeCollection) error {
return aer.c(aer.s.StorageServiceIdStoragePoolIdAllocatedVolumesGet(id0, id1, model))
}
func (aer *AerService) StorageServiceIdStoragePoolIdAllocatedVolumeIdGet(id0 string, id1 string, id2 string, model *sf.VolumeV161Volume) error {
return aer.c(aer.s.StorageServiceIdStoragePoolIdAllocatedVolumeIdGet(id0, id1, id2, model))
Expand Down
12 changes: 6 additions & 6 deletions pkg/manager-nnf/allocation_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ const (

// Default AllocationPolicy and AllocationCompliance
const (
DefaultAlloctionPolicy = SpareAllocationPolicyType
DefaultAlloctionCompliance = StrictAllocationComplianceType
DefaultAllocationPolicy = SpareAllocationPolicyType
DefaultAllocationCompliance = StrictAllocationComplianceType
)

// AllocationPolicyOem -
Expand All @@ -78,13 +78,13 @@ type AllocationPolicyOem struct {
// is not as desired.
func NewAllocationPolicy(config AllocationConfig, oem map[string]interface{}) AllocationPolicy {

policy := DefaultAlloctionPolicy
compliance := DefaultAlloctionCompliance
policy := DefaultAllocationPolicy
compliance := DefaultAllocationCompliance

if oem != nil {
overrides := AllocationPolicyOem{
Policy: DefaultAlloctionPolicy,
Compliance: DefaultAlloctionCompliance,
Policy: DefaultAllocationPolicy,
Compliance: DefaultAllocationCompliance,
}

if err := openapi.UnmarshalOem(oem, &overrides); err == nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager-nnf/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ type StorageServiceApi interface {
StorageServiceIdStoragePoolIdCapacitySourcesGet(string, string, *sf.CapacitySourceCollectionCapacitySourceCollection) error
StorageServiceIdStoragePoolIdCapacitySourceIdGet(string, string, string, *sf.CapacityCapacitySource) error
StorageServiceIdStoragePoolIdCapacitySourceIdProvidingVolumesGet(string, string, string, *sf.VolumeCollectionVolumeCollection) error
StorageServiceIdStoragePoolIdAlloctedVolumesGet(string, string, *sf.VolumeCollectionVolumeCollection) error
StorageServiceIdStoragePoolIdAllocatedVolumesGet(string, string, *sf.VolumeCollectionVolumeCollection) error
StorageServiceIdStoragePoolIdAllocatedVolumeIdGet(string, string, string, *sf.VolumeV161Volume) error

StorageServiceIdStorageGroupsGet(string, *sf.StorageGroupCollectionStorageGroupCollection) error
Expand Down
10 changes: 5 additions & 5 deletions pkg/manager-nnf/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ func (s *StorageService) cleanupVolumes() {
}

// Initialize is responsible for initializing the NNF Storage Service; the
// Storage Service must complete initialization without error prior any
// Storage Service must complete initialization without error prior to any
// access to the Storage Service. Failure to initialize will cause the
// storage service to misbehave.
func (s *StorageService) Initialize(log ec.Logger, ctrl NnfControllerInterface) error {
Expand Down Expand Up @@ -529,7 +529,7 @@ func (s *StorageService) EventHandler(e event.Event) error {
}

// Check if the fabric is ready; that is all devices are enumerated and discovery
// is complete. We
// is complete.
if e.Is(msgreg.FabricReadyNnf("")) {
log.V(1).Info("Fabric ready")

Expand Down Expand Up @@ -752,7 +752,7 @@ func (*StorageService) StorageServiceIdStoragePoolIdGet(storageServiceId, storag

model.Id = p.id
model.OdataId = p.OdataId()
model.AllocatedVolumes = p.OdataIdRef("/AlloctedVolumes")
model.AllocatedVolumes = p.OdataIdRef("/AllocatedVolumes")

model.BlockSizeBytes = 4096 // TODO
model.Capacity = sf.CapacityV100Capacity{
Expand Down Expand Up @@ -901,8 +901,8 @@ func (*StorageService) StorageServiceIdStoragePoolIdCapacitySourceIdProvidingVol
return nil
}

// StorageServiceIdStoragePoolIdAlloctedVolumesGet -
func (*StorageService) StorageServiceIdStoragePoolIdAlloctedVolumesGet(storageServiceId, storagePoolId string, model *sf.VolumeCollectionVolumeCollection) error {
// StorageServiceIdStoragePoolIdAllocatedVolumesGet -
func (*StorageService) StorageServiceIdStoragePoolIdAllocatedVolumesGet(storageServiceId, storagePoolId string, model *sf.VolumeCollectionVolumeCollection) error {
_, p := findStoragePool(storageServiceId, storagePoolId)
if p == nil {
return ec.NewErrNotFound().WithEvent(msgreg.ResourceNotFoundBase(StoragePoolOdataType, storagePoolId))
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager-nnf/servicer.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func (s *DefaultApiService) RedfishV1StorageServicesStorageServiceIdStoragePools
Name: "Allocated Volume Collection",
}

err := s.ss.StorageServiceIdStoragePoolIdAlloctedVolumesGet(storageServiceId, storagePoolId, &model)
err := s.ss.StorageServiceIdStoragePoolIdAllocatedVolumesGet(storageServiceId, storagePoolId, &model)

EncodeResponse(model, err, w)
}
Expand Down
9 changes: 7 additions & 2 deletions pkg/manager-nvme/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ type Storage struct {
// the life of the object.
capacityBytes uint64

// Unallocted capacity in bytes. This value is updated for any namespaces create or
// Unallocated capacity in bytes. This value is updated for any namespaces create or
// delete operation that might shrink or grow the byte count as expected.
unallocatedBytes uint64

Expand Down Expand Up @@ -1137,7 +1137,12 @@ func (mgr *Manager) StorageIdStoragePoolsStoragePoolIdGet(storageId, storagePool
},
}

model.RemainingCapacityPercent = int64(float64(s.unallocatedBytes/s.capacityBytes) * 100.0)
if s.capacityBytes == 0 {
// If a drive could not be found, don't divide by zero.
model.RemainingCapacityPercent = 0
} else {
model.RemainingCapacityPercent = int64(float64(s.unallocatedBytes/s.capacityBytes) * 100.0)
}

return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager-remote/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ func (*ServerStorageService) StorageServiceIdStoragePoolIdCapacitySourceIdProvid
return ec.NewErrNotAcceptable()
}

func (*ServerStorageService) StorageServiceIdStoragePoolIdAlloctedVolumesGet(storageServiceId, storagePoolId string, model *sf.VolumeCollectionVolumeCollection) error {
func (*ServerStorageService) StorageServiceIdStoragePoolIdAllocatedVolumesGet(storageServiceId, storagePoolId string, model *sf.VolumeCollectionVolumeCollection) error {
return nil
}

Expand Down
151 changes: 151 additions & 0 deletions scripts/admin-node-scripts/updateDriveFirmwareMfg.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/bin/bash

# Copyright 2023-2024 Hewlett Packard Enterprise Development LP
# Other additional copyright holders may be indicated within.
#
# The entirety of this work is licensed under the Apache License,
# Version 2.0 (the "License"); you may not use this file except
# in compliance with the License.
#
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=================================================================
# The following assumtions are made/expected coming into this script:
# * The Rabbits are installed in the Cabinet with geo-location "x1002c?j4b0", "x1002c?j7b0", and "x1002c?j7b0n0"
# * The Rabbits are in the HPCM database & have had their SSH keys set
# * The Rabbits nC's are fully booted
#
# ============ This is Ver1.6 of the script, 8/15/2024 ===========
# This script expects and/or programs the following FW versions:
# E3.s Kioxia Drive FW = ver 1TCRS104
#===================================================================
set -e
# set -o xtrace

usage() { # This function is called if a bad/missing parameter is found - displays proper usage
cat <<EOF # The 'EOF' is a "Here Tag" - will 'cat' all the text until an EOF is found
Query drive firmware version for all drives in a Rabbit. Update drives that are out-of-date.
Assumes that the following are installed on the Rabbit:
- /root/nnf-ec
- /root/tools/nvme.sh
- <firmware-file-path>
Usage: $0 [-h] [RABBIT-XNAME] [EXPECTED-FIRMWARE] [FIRMWARE-FILE-PATH]
Arguments:
-h display this help
Examples:
./updateDriveFirmware.sh -h # Display help message
./updateDriveFirmware.sh x1002c3j7b0n0 1TCRS104 /root/KIOXIA/1TCRS104.std # Rabbit: x1002c3j7b0n0, Expected Firmware: "1TCRS104", Firmware File Path: "x1002c3rbt7b0n0:/root/KIOXIA/1TCRS104.std"
EOF
}

alias TIME=""
while getopts "th" OPTION;
do
case "${OPTION}" in
t)
alias TIME=time
export TIMEFORMAT='%3lR'
;;
h)
usage
exit 0
;;
*)
;;
esac
done
shift $((OPTIND - 1))

if [ $# -lt 3 ]; then
usage
exit 1
fi

rabbit=$1
expectedFirmware=$2
firmwareFile=$3
LOGFILE="$(pwd)/logs/$rabbit.log"
LOGFILE_FAILURE="$(pwd)/logs/${rabbit}_Failure.log"
TEE_LOGFILE="tee -a $LOGFILE"

echo -e " Validating firmware $expectedFirmware is on Rabbit's drives . . ." > "$LOGFILE"

# Run nnf-ec to initialize PAX chips and drives
echo -e " Initialize PCIe Switch connections to drives first:" | eval "$TEE_LOGFILE"

if [ ! "$(ssh "$rabbit" ./nnf-ec -initializeAndExit 2>&1 >/dev/null)" ]
then
DATE_TIME=$(date '+%Y-%m-%d %H:%M:%S') # Date/time stamp for the log file
echo -e "\nBye-bye with an NNF-EC Failure at $DATE_TIME!\n" | eval "$TEE_LOGFILE"
cp "$LOGFILE" "$LOGFILE_FAILURE"
exit 1
fi

echo -e " nnf-ec ran successfully!" | eval "$TEE_LOGFILE"

# Retrieve a list of unique firmware levels
firmware_list=$(ssh "$rabbit" "tools/nvme.sh cmd id-ctrl | grep '^fr '")
firmware_levels=$(echo "$firmware_list" | awk '{print $3}' | sort | uniq)
mapfile -t firmware_versions<<<"$firmware_levels"

# printf "Firmware versions detected: "
# for i in "${firmware_versions[@]}";
# do
# printf "%s " "$i"
# done
# printf "\n"

# printf "Number of versions: %d\n" ${#firmware_versions[@]}

# At this point, if we have only 1 version of firmware present and it matches the
# expected version, we've done.
if [ "${#firmware_versions[@]}" == 1 ] && [ "${firmware_versions[0]}" == "$expectedFirmware" ]; then
echo -e " Drive FW is already up-to-date!" | eval "$TEE_LOGFILE"
else
echo -e "Firmware mismatch, downloading $firmwareFile" | eval "$TEE_LOGFILE"

for (( slot=1; slot <= 3; ++slot ));
do
# shellcheck disable=SC2029
ssh "$rabbit" "tools/nvme.sh cmd fw-download --fw=$firmwareFile --xfer=256"

# Action values
# 1: Activate immediately, no reset.
# 2: Activate after the next controller reset.
# 3: Activate immediately and reset the controller.
action=1
# On the 3rd slot, we want to reset the drive controller to activate the firmware
if ((slot == 3)); then
action=3
fi
# shellcheck disable=SC2029
ssh "$rabbit" "tools/nvme.sh cmd fw-activate --slot=$slot --action=$action"
done
fi


#NumGudFWs=$(ssh "$rabbit" "tools/nvme.sh cmd id-ctrl | grep -e "$expectedFirmware")
declare -i NumGudFWs
NumGudFWs=$(ssh "$rabbit" tools/nvme.sh cmd id-ctrl | grep -c "$expectedFirmware")

# echo -e "Number of drives found with latest FW is $NumGudFWs " | tee -a $(pwd)/logs/$rabbit.log
if (( "$NumGudFWs" != 16 )); then # Should find 16 Drives
DATE_TIME=$(date '+%Y-%m-%d %H:%M:%S') # Date/time stamp for the log file
echo -e "\nOnly $NumGudFWs were successfully flashed to $expectedFirmware at $DATE_TIME!\n" | eval "$TEE_LOGFILE"
cp "$LOGFILE" "$LOGFILE_FAILURE"
exit 1
fi

echo -e "All 16 drives have the latest FW!\n" | eval "$TEE_LOGFILE"
exit 0
Loading

0 comments on commit 69291d8

Please sign in to comment.