Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pfc_detect] fix RedisReply errors #2040

Merged
merged 2 commits into from
Dec 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 53 additions & 48 deletions orchagent/pfc_detect_barefoot.lua
Original file line number Diff line number Diff line change
Expand Up @@ -36,63 +36,68 @@ for i = n, 1, -1 do

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION'
-- If there is no entry in COUNTERS_QUEUE_INDEX_MAP or COUNTERS_QUEUE_PORT_MAP then
-- it means KEYS[i] queue is inserted into FLEX COUNTER DB but the corresponding
-- maps haven't been updated yet.
if queue_index and port_id then
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION'

-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)
-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)

if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_duration = tonumber(pfc_duration)
if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_duration = tonumber(pfc_duration)

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.
local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_duration_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_duration_last = tonumber(pfc_duration_last)
-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_duration_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_duration_last = tonumber(pfc_duration_last)

-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
if time_left <= poll_time then
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
if time_left <= poll_time then
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
if is_deadlock == false then
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
if is_deadlock == false then
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
end
end
end
end
Expand Down
95 changes: 50 additions & 45 deletions orchagent/pfc_detect_broadcom.lua
Original file line number Diff line number Diff line change
Expand Up @@ -35,61 +35,66 @@ for i = n, 1, -1 do

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS'
-- If there is no entry in COUNTERS_QUEUE_INDEX_MAP or COUNTERS_QUEUE_PORT_MAP then
-- it means KEYS[i] queue is inserted into FLEX COUNTER DB but the corresponding
-- maps haven't been updated yet.
if queue_index and port_id then
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS'

-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_on2off = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key)
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')
-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_on2off = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key)
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

if occupancy_bytes and packets and pfc_rx_packets and pfc_on2off and queue_pause_status then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_on2off = tonumber(pfc_on2off)
if occupancy_bytes and packets and pfc_rx_packets and pfc_on2off and queue_pause_status then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_on2off = tonumber(pfc_on2off)

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last')
local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last')
local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last')
local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last')

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.
-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_on2off_last = tonumber(pfc_on2off_last)
-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_on2off_last = tonumber(pfc_on2off_last)

-- Check actual condition of queue being in PFC storm
if (pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') or
(debug_storm == "enabled") then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
-- Check actual condition of queue being in PFC storm
if (pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') or
(debug_storm == "enabled") then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off)
-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off)
end
end
end
end
Expand Down
Loading