From 2c4fc2a44ab9def913634e0f8a9ffe34bb47baf9 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Tue, 28 Jan 2020 06:56:05 +0200 Subject: [PATCH] [portsorch] use ingress/egress disable for LAG member enable/disable (#1166) Originally portsorch was designed to remove LAG member from LAG when member gets disabled by teamd. This could lead to potential issues including flood to that port and loops, since after removal member becomes a switch port. Now, portsorch will make use of SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE and SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE to control collection/distribution through that LAG member port. With this new flow, teammgrd and teamsyncd are candidates to be refactored, especially teamsyncd warm reboot logic, since now we don't need to compare old, new lags and lag members. Teamsyncd's job is simply to signal "status" field update without waiting for reconciliation timer to expire. e.g. one port channel went down on peer: ``` admin@arc-switch1025:~$ show interfaces portchannel Flags: A - active, I - inactive, Up - up, Dw - Down, N/A - not available, S - selected, D - deselected, * - not synced No. Team Dev Protocol Ports ----- --------------- ----------- -------------- 0001 PortChannel0001 LACP(A)(Up) Ethernet112(S) 0002 PortChannel0002 LACP(A)(Up) Ethernet116(S) 0003 PortChannel0003 LACP(A)(Up) Ethernet120(S) 0004 PortChannel0004 LACP(A)(Dw) Ethernet124(D) admin@arc-switch1025:~$ docker exec -it syncd sx_api_lag_dump.py LAG Members Table =============================================================================================================== | SWID| LAG Logical Port| LAG Oper State| PVID| Member Port ID| Port Label| Collector| Distributor| Oper State| =============================================================================================================== | 0 0x10000000 UP 1| 0x11900| 29| Enable| Enable| UP| =============================================================================================================== | 0 0x10000100 UP 1| 0x11b00| 30| Enable| Enable| UP| =============================================================================================================== | 0 0x10000200 UP 1| 0x11d00| 31| Enable| Enable| UP| =============================================================================================================== | 0 0x10000300 DOWN 1| 0x11f00| 32| Disable| Disable| UP| =============================================================================================================== ``` Signed-off-by: Stepan Blyschak --- orchagent/portsorch.cpp | 94 ++++++++++++++++++++++++++++++++------- orchagent/portsorch.h | 2 + tests/test_portchannel.py | 42 ++++++++++++++--- 3 files changed, 115 insertions(+), 23 deletions(-) diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index ab9c37b371b3..3666fac68489 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -2494,39 +2494,51 @@ void PortsOrch::doLagMemberTask(Consumer &consumer) status = fvValue(i); } - /* Sync an enabled member */ - if (status == "enabled") + if (lag.m_members.find(port_alias) == lag.m_members.end()) { - /* Duplicate entry */ - if (lag.m_members.find(port_alias) != lag.m_members.end()) + /* Assert the port doesn't belong to any LAG already */ + assert(!port.m_lag_id && !port.m_lag_member_id); + + if (!addLagMember(lag, port)) { - it = consumer.m_toSync.erase(it); + it++; continue; } + } - /* Assert the port doesn't belong to any LAG */ - assert(!port.m_lag_id && !port.m_lag_member_id); - - if (addLagMember(lag, port)) + /* Sync an enabled member */ + if (status == "enabled") + { + /* enable collection first, distribution-only mode + * is not supported on Mellanox platform + */ + if (setCollectionOnLagMember(port, true) && + setDistributionOnLagMember(port, true)) + { it = consumer.m_toSync.erase(it); + } else + { it++; + continue; + } } /* Sync an disabled member */ else /* status == "disabled" */ { - /* "status" is "disabled" at start when m_lag_id and - * m_lag_member_id are absent */ - if (!port.m_lag_id || !port.m_lag_member_id) + /* disable distribution first, distribution-only mode + * is not supported on Mellanox platform + */ + if (setDistributionOnLagMember(port, false) && + setCollectionOnLagMember(port, false)) { it = consumer.m_toSync.erase(it); - continue; } - - if (removeLagMember(lag, port)) - it = consumer.m_toSync.erase(it); else + { it++; + continue; + } } } /* Remove a LAG member */ @@ -2544,9 +2556,13 @@ void PortsOrch::doLagMemberTask(Consumer &consumer) } if (removeLagMember(lag, port)) + { it = consumer.m_toSync.erase(it); + } else + { it++; + } } else { @@ -3340,6 +3356,52 @@ bool PortsOrch::removeLagMember(Port &lag, Port &port) return true; } +bool PortsOrch::setCollectionOnLagMember(Port &lagMember, bool enableCollection) +{ + /* Port must be LAG member */ + assert(port.m_lag_member_id); + + sai_status_t status = SAI_STATUS_FAILURE; + sai_attribute_t attr {}; + + attr.id = SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE; + attr.value.booldata = !enableCollection; + + status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to %s collection on LAG member %s", + enableCollection ? "enable" : "disable", + lagMember.m_alias.c_str()); + return false; + } + + return true; +} + +bool PortsOrch::setDistributionOnLagMember(Port &lagMember, bool enableDistribution) +{ + /* Port must be LAG member */ + assert(port.m_lag_member_id); + + sai_status_t status = SAI_STATUS_FAILURE; + sai_attribute_t attr {}; + + attr.id = SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE; + attr.value.booldata = !enableDistribution; + + status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to %s distribution on LAG member %s", + enableDistribution ? "enable" : "disable", + lagMember.m_alias.c_str()); + return false; + } + + return true; +} + void PortsOrch::generateQueueMap() { if (m_isQueueMapGenerated) diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index a1a99a94062e..9ed9625f30c7 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -170,6 +170,8 @@ class PortsOrch : public Orch, public Subject bool removeLag(Port lag); bool addLagMember(Port &lag, Port &port); bool removeLagMember(Port &lag, Port &port); + bool setCollectionOnLagMember(Port &lagMember, bool enableCollection); + bool setDistributionOnLagMember(Port &lagMember, bool enableDistribution); void getLagMember(Port &lag, vector &portv); bool addPort(const set &lane_set, uint32_t speed, int an=0, string fec=""); diff --git a/tests/test_portchannel.py b/tests/test_portchannel.py index 0bc6db1e2ce3..d95a2e5eed8b 100644 --- a/tests/test_portchannel.py +++ b/tests/test_portchannel.py @@ -33,13 +33,41 @@ def test_Portchannel(self, dvs, testlog): assert len(lagms) == 1 (status, fvs) = lagmtbl.get(lagms[0]) - for fv in fvs: - if fv[0] == "SAI_LAG_MEMBER_ATTR_LAG_ID": - assert fv[1] == lags[0] - elif fv[0] == "SAI_LAG_MEMBER_ATTR_PORT_ID": - assert dvs.asicdb.portoidmap[fv[1]] == "Ethernet0" - else: - assert False + fvs = dict(fvs) + assert status + assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0] + assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs + assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0" + assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "false" + assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "false" + assert not fvs + + ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE") + fvs = swsscommon.FieldValuePairs([("status", "disabled")]) + + ps.set("PortChannel0001:Ethernet0", fvs) + + time.sleep(1) + + lagmtbl = swsscommon.Table(asicdb, "ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER") + lagms = lagmtbl.getKeys() + assert len(lagms) == 1 + + (status, fvs) = lagmtbl.get(lagms[0]) + fvs = dict(fvs) + assert status + assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0] + assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs + assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0" + assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "true" + assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "true" + assert not fvs # remove port channel member ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE")