Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SmartSwitch] Enhance PCIe device check to skip the warning log, if device is in detaching mode #546

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
39 changes: 39 additions & 0 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ SYSLOG_IDENTIFIER = "pcied"
PCIE_RESULT_REGEX = "PCIe Device Checking All Test"
PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE"
PCIE_STATUS_TABLE_NAME = "PCIE_DEVICES"
PCIE_DETACH_INFO_TABLE = "PCIE_DETACH_INFO"

PCIE_DETACH_BUS_INFO_FIELD = "bus_info"
PCIE_DETACH_DPU_STATE_FIELD = "dpu_state"

PCIED_MAIN_THREAD_SLEEP_SECS = 60

Expand Down Expand Up @@ -92,6 +96,7 @@ class DaemonPcied(daemon_base.DaemonBase):
self.state_db = daemon_base.db_connect("STATE_DB")
self.device_table = swsscommon.Table(self.state_db, PCIE_DEVICE_TABLE_NAME)
self.status_table = swsscommon.Table(self.state_db, PCIE_STATUS_TABLE_NAME)
self.detach_info = swsscommon.Table(self.state_db, PCIE_DETACH_INFO_TABLE)
vvolam marked this conversation as resolved.
Show resolved Hide resolved

def __del__(self):
if self.device_table:
Expand All @@ -102,6 +107,10 @@ class DaemonPcied(daemon_base.DaemonBase):
stable_keys = self.status_table.getKeys()
for stk in stable_keys:
self.status_table._del(stk)
if self.detach_info:
detach_info_keys = self.detach_info.getKeys()
for dk in detach_info_keys:
self.detach_info._del(dk)

# load aer-fields into statedb
def update_aer_to_statedb(self):
Expand Down Expand Up @@ -151,6 +160,28 @@ class DaemonPcied(daemon_base.DaemonBase):

self.status_table.set("status", fvs)

# Check if any interface is in detaching mode by querying the state_db
vvolam marked this conversation as resolved.
Show resolved Hide resolved
def is_dpu_in_detaching_mode(self, pcie_dev):
# Ensure detach_info is not None
if self.detach_info is None:
self.log_debug("detach_info is None")
return False

# Query the state_db for the device detaching status
detach_info_keys = list(self.detach_info.getKeys())
if not detach_info_keys:
return False

for key in detach_info_keys:
dpu_info = self.detach_info.get(key)
if dpu_info:
bus_info = dpu_info.get(PCIE_DETACH_BUS_INFO_FIELD)
dpu_state = dpu_info.get(PCIE_DETACH_DPU_STATE_FIELD)
if bus_info == pcie_dev and dpu_state == "detaching":
vvolam marked this conversation as resolved.
Show resolved Hide resolved
return True

return False

# Check the PCIe devices
def check_pcie_devices(self):
self.resultInfo = platform_pcieutil.get_pcie_check()
Expand All @@ -160,6 +191,14 @@ class DaemonPcied(daemon_base.DaemonBase):

for result in self.resultInfo:
if result["result"] == "Failed":
# Convert bus, device, and function to a bus_info format like "0000:03:00.0"
pcie_dev = "0000:{int(result['bus'], 16):02x}:{int(result['dev'], 16):02x}.{int(result['fn'], 16)}"

# Check if the device is in detaching mode
if device_info.is_smartswitch() and self.is_dpu_in_detaching_mode(pcie_dev):
self.log_debug("PCIe Device: {} is in detaching mode, skipping warning.".format(pcie_dev))
continue

self.log_warning("PCIe Device: " + result["name"] + " Not Found")
err += 1
else:
Expand Down
73 changes: 71 additions & 2 deletions sonic-pcied/tests/test_DaemonPcied.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,86 @@ def test_run(self):
daemon_pcied.run()
assert daemon_pcied.check_pcie_devices.call_count == 1

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_is_dpu_in_detaching_mode(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
daemon_pcied.detach_info = mock.MagicMock()
daemon_pcied.detach_info.getKeys = mock.MagicMock(return_value=['DPU_0', 'DPU_1'])
daemon_pcied.detach_info.get = mock.MagicMock(
side_effect=lambda key: {
'DPU_0': {'bus_info': '0000:03:00.1', 'dpu_state': 'detaching'},
'DPU_1': {'bus_info': '0000:03:00.2', 'dpu_state': 'attached'}
}.get(key, None)
)

# Test when the device is in detaching mode
assert daemon_pcied.is_dpu_in_detaching_mode('0000:03:00.1') == True

# Test when the device is not in detaching mode
assert daemon_pcied.is_dpu_in_detaching_mode('0000:03:00.2') == False

# Test when the device does not exist in detach_info
assert daemon_pcied.is_dpu_in_detaching_mode('0000:03:00.3') == False

# Test when detach_info is None
daemon_pcied.detach_info = None
assert daemon_pcied.is_dpu_in_detaching_mode('0000:03:00.1') == False

# Test when detach_info has no keys
daemon_pcied.detach_info = mock.MagicMock()
daemon_pcied.detach_info.getKeys.return_value = []
assert daemon_pcied.is_dpu_in_detaching_mode('0000:03:00.1') == False

@mock.patch('pcied.device_info.is_smartswitch', mock.MagicMock(return_value=False))
@mock.patch('pcied.DaemonPcied.is_dpu_in_detaching_mode', mock.MagicMock(return_value=False))
@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
daemon_pcied.update_pcie_devices_status_db = mock.MagicMock()
daemon_pcied.check_n_update_pcie_aer_stats = mock.MagicMock()
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock()
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(
return_value=[
{"result": "Failed", "bus": "03", "dev": "00", "fn": "1", "name": "PCIe Device 1"},
]
)

daemon_pcied.check_pcie_devices()
assert daemon_pcied.update_pcie_devices_status_db.call_count == 1
assert daemon_pcied.check_n_update_pcie_aer_stats.call_count == 0

@mock.patch('pcied.device_info.is_smartswitch', mock.MagicMock(return_value=False))
@mock.patch('pcied.DaemonPcied.is_dpu_in_detaching_mode', mock.MagicMock(return_value=False))
@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_update_aer(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
daemon_pcied.update_pcie_devices_status_db = mock.MagicMock()
daemon_pcied.check_n_update_pcie_aer_stats = mock.MagicMock()
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(
return_value=[
{"result": "Passed", "bus": "03", "dev": "00", "fn": "1", "name": "PCIe Device 1"},
]
)

daemon_pcied.check_pcie_devices()
assert daemon_pcied.update_pcie_devices_status_db.call_count == 1
assert daemon_pcied.check_n_update_pcie_aer_stats.call_count == 1

@mock.patch('pcied.device_info.is_smartswitch', mock.MagicMock(return_value=True))
@mock.patch('pcied.DaemonPcied.is_dpu_in_detaching_mode', mock.MagicMock(return_value=True))
@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_detaching(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
daemon_pcied.update_pcie_devices_status_db = mock.MagicMock()
daemon_pcied.check_n_update_pcie_aer_stats = mock.MagicMock()
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(
return_value=[
{"result": "Failed", "bus": "03", "dev": "00", "fn": "1", "name": "PCIe Device 1"},
]
)

daemon_pcied.check_pcie_devices()
assert daemon_pcied.update_pcie_devices_status_db.call_count == 1
assert daemon_pcied.check_n_update_pcie_aer_stats.call_count == 0

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_update_pcie_devices_status_db(self):
Expand Down Expand Up @@ -210,5 +279,5 @@ def test_update_aer_to_statedb(self):
])
"""

daemon_pcied.update_aer_to_statedb()
daemon_pcied.update_aer_to_statedb()
assert daemon_pcied.log_debug.call_count == 0
Loading