Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] Fix inconsistence in the shared headroom pool initialization #3057

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions cfgmgr/buffer_pool_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ local function iterate_profile_list(all_items)
return 0
end

local function fetch_buffer_pool_size_from_appldb()
local function fetch_buffer_pool_size_from_appldb(shp_enabled)
local buffer_pools = {}
redis.call('SELECT', config_db)
local buffer_pool_keys = redis.call('KEYS', 'BUFFER_POOL|*')
Expand All @@ -158,7 +158,18 @@ local function fetch_buffer_pool_size_from_appldb()
end
xoff = redis.call('HGET', 'BUFFER_POOL_TABLE:' .. buffer_pools[i], 'xoff')
if not xoff then
table.insert(result, buffer_pools[i] .. ':' .. size)
if shp_enabled and size == "0" and buffer_pools[i] == "ingress_lossless_pool" then
stephenxs marked this conversation as resolved.
Show resolved Hide resolved
-- During initialization, if SHP is enabled
-- 1. the buffer pool sizes, xoff have initialized to 0, which means the shared headroom pool is disabled
-- 2. but the buffer profiles already indicate the shared headroom pool is enabled
-- 3. later on the buffer pool sizes are updated with xoff being non-zero
-- In case the orchagent starts handling buffer configuration between 2 and 3,
-- It is inconsistent between buffer pools and profiles, which fails Mellanox SAI sanity check
-- To avoid it, it indicates the shared headroom pool is enabled by setting a very small buffer pool and shared headroom pool sizes
table.insert(result, buffer_pools[i] .. ':2048:1024')
else
table.insert(result, buffer_pools[i] .. ':' .. size)
end
else
table.insert(result, buffer_pools[i] .. ':' .. size .. ':' .. xoff)
end
Expand Down Expand Up @@ -295,7 +306,7 @@ local fail_count = 0
fail_count = fail_count + iterate_all_items(all_pgs, true)
fail_count = fail_count + iterate_all_items(all_tcs, false)
if fail_count > 0 then
fetch_buffer_pool_size_from_appldb()
fetch_buffer_pool_size_from_appldb(shp_enabled)
return result
end

Expand All @@ -305,7 +316,7 @@ local all_egress_profile_lists = redis.call('KEYS', 'BUFFER_PORT_EGRESS_PROFILE_
fail_count = fail_count + iterate_profile_list(all_ingress_profile_lists)
fail_count = fail_count + iterate_profile_list(all_egress_profile_lists)
if fail_count > 0 then
fetch_buffer_pool_size_from_appldb()
fetch_buffer_pool_size_from_appldb(shp_enabled)
return result
end

Expand Down
38 changes: 38 additions & 0 deletions tests/test_buffer_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,3 +865,41 @@ def test_bufferPortMaxParameter(self, dvs, testlog):
dvs.port_admin_set('Ethernet0', 'down')

self.cleanup_db(dvs)


def test_bufferPoolInitWithSHP(self, dvs, testlog):
self.setup_db(dvs)

try:
# 1. Enable the shared headroom pool
default_lossless_buffer_parameter = self.config_db.get_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE')
default_lossless_buffer_parameter['over_subscribe_ratio'] = '2'
self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter)

# 2. Stop the orchagent
_, oa_pid = dvs.runcmd("pgrep orchagent")
dvs.runcmd("kill -s SIGSTOP {}".format(oa_pid))

# 3. Remove the size from CONFIG_DB|BUFFER_POOL.ingress_lossless_pool
original_ingress_lossless_pool = self.config_db.get_entry('BUFFER_POOL', 'ingress_lossless_pool')
try:
self.config_db.delete_field('BUFFER_POOL', 'ingress_lossless_pool', 'size')
self.config_db.delete_field('BUFFER_POOL', 'ingress_lossless_pool', 'xoff')
except Exception as e:
pass

# 4. Remove the ingress_lossless_pool from the APPL_DB
self.app_db.delete_entry('BUFFER_POOL_TABLE', 'ingress_lossless_pool')

# 5. Mock it by adding a "TABLE_SET" entry to trigger the fallback logic
self.app_db.update_entry("BUFFER_PG_TABLE_SET", "", {"NULL": "NULL"})

# 6. Invoke the lua plugin
_, output = dvs.runcmd("redis-cli --eval /usr/share/swss/buffer_pool_vs.lua")
assert "ingress_lossless_pool:2048:1024" in output

finally:
self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', original_ingress_lossless_pool)
self.config_db.delete_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE')
self.app_db.delete_entry("BUFFER_PG_TABLE_SET", "")
dvs.runcmd("kill -s SIGCONT {}".format(oa_pid))
Loading