From 41e61bd28351fdd5672b1f5db1713ea94ddc4712 Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Fri, 14 Sep 2018 16:11:41 -0700 Subject: [PATCH] Warm reboot: port state sync up (#557) * Warm start: port state sync up * Use Table::hget() to simplify the oper_status retrieval processing. * Add more comment for port state sync up * Use m_oper_status field of Port class instead of reading port oper status rom appDB. * Add common function for port oper status update * Throw execption upon port oper status get error * Add VS test for port state sync up --- orchagent/main.cpp | 9 +++- orchagent/orchdaemon.cpp | 7 ++- orchagent/portsorch.cpp | 58 +++++++++++++++++++++++- orchagent/portsorch.h | 4 +- tests/test_warm_reboot.py | 94 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 167 insertions(+), 5 deletions(-) diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 556352f94786..5ab1fcd7060c 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -281,7 +281,14 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - syncd_apply_view(); + /* + * In syncd view comparison solution, apply view has been sent + * immediately after restore is done + */ + if (!WarmStart::isWarmStart()) + { + syncd_apply_view(); + } orchDaemon->start(); } diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 007b6d8d039a..16bed5242247 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -17,6 +17,8 @@ using namespace swss; extern sai_switch_api_t* sai_switch_api; extern sai_object_id_t gSwitchId; + +extern void syncd_apply_view(); /* * Global orch daemon variables */ @@ -387,7 +389,10 @@ bool OrchDaemon::warmRestoreAndSyncUp() SWSS_LOG_NOTICE("Orchagent state restore done"); - /* TODO: perform port and fdb state sync up*/ + syncd_apply_view(); + + /* Start dynamic state sync up */ + gPortsOrch->refreshPortStatus(); /* * Note. Arp sync up is handled in neighsyncd. diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 9e3ffb62756a..9b0dad45ebda 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -2866,10 +2866,64 @@ void PortsOrch::doTask(NotificationConsumer &consumer) SWSS_LOG_NOTICE("Get port state change notification id:%lx status:%d", id, status); - this->updateDbPortOperStatus(id, status); - this->setHostIntfsOperStatus(id, status == SAI_PORT_OPER_STATUS_UP); + Port p; + if (!getPort(id, p)) + { + SWSS_LOG_ERROR("Failed to get port object for port id 0x%lx", id); + continue; + } + updatePortOperStatus(p, status); } sai_deserialize_free_port_oper_status_ntf(count, portoperstatus); } } + +void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status) +{ + if (status != port.m_oper_status) + { + SWSS_LOG_NOTICE("Port state changed for %s from %s to %s", port.m_alias.c_str(), + oper_status_strings.at(port.m_oper_status).c_str(), oper_status_strings.at(status).c_str()); + this->updateDbPortOperStatus(port.m_port_id, status); + if(status == SAI_PORT_OPER_STATUS_UP || port.m_oper_status == SAI_PORT_OPER_STATUS_UP) + { + this->setHostIntfsOperStatus(port.m_port_id, status == SAI_PORT_OPER_STATUS_UP); + } + } +} +/* + * sync up orchagent with libsai/ASIC for port state. + * + * Currently NotificationProducer is used by syncd to inform port state change, + * which means orchagent will miss the signal if it happens between orchagent shutdown and startup. + * Syncd doesn't know whether the signal has been lost or not. + * Also the source of notification event is from libsai/SDK. + * + * Latest oper status for each port is retrieved via SAI_PORT_ATTR_OPER_STATUS sai API, + * the hostif and db are updated accordingly. + */ +void PortsOrch::refreshPortStatus() +{ + SWSS_LOG_ENTER(); + + for (auto &it: m_portList) + { + auto &p = it.second; + if (p.m_type == Port::PHY) + { + sai_attribute_t attr; + attr.id = SAI_PORT_ATTR_OPER_STATUS; + + sai_status_t ret = sai_port_api->get_port_attribute(p.m_port_id, 1, &attr); + if (ret != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to get oper status for %s", p.m_alias.c_str()); + throw "PortsOrch get port oper status failure"; + } + sai_port_oper_status_t status = (sai_port_oper_status_t)attr.value.u32; + SWSS_LOG_INFO("%s oper status is %s", p.m_alias.c_str(), oper_status_strings.at(status).c_str()); + updatePortOperStatus(p, status); + } + } +} diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index e39a9c9f780b..1a012f8cbae8 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -75,7 +75,7 @@ class PortsOrch : public Orch, public Subject bool setPortPfc(sai_object_id_t portId, uint8_t pfc_bitmask); void generateQueueMap(); - + void refreshPortStatus(); private: unique_ptr m_counterTable; unique_ptr
m_portTable; @@ -167,6 +167,8 @@ class PortsOrch : public Orch, public Subject bool setPortAutoNeg(sai_object_id_t id, int an); bool setPortFecMode(sai_object_id_t id, int fec); + + void updatePortOperStatus(Port &port, sai_port_oper_status_t status); }; #endif /* SWSS_PORTSORCH_H */ diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index a7062231b38d..b6f2a75c928c 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -4,6 +4,19 @@ import time import json +# start processes in SWSS +def start_swss(dvs): + dvs.runcmd(['sh', '-c', 'supervisorctl start orchagent; supervisorctl start portsyncd; supervisorctl start intfsyncd; \ + supervisorctl start neighsyncd; supervisorctl start intfmgrd; supervisorctl start vlanmgrd; \ + supervisorctl start buffermgrd; supervisorctl start arp_update']) + +# stop processes in SWSS +def stop_swss(dvs): + dvs.runcmd(['sh', '-c', 'supervisorctl stop orchagent; supervisorctl stop portsyncd; supervisorctl stop intfsyncd; \ + supervisorctl stop neighsyncd; supervisorctl stop intfmgrd; supervisorctl stop vlanmgrd; \ + supervisorctl stop buffermgrd; supervisorctl stop arp_update']) + + # Get restart count of all processes supporting warm restart def swss_get_RestartCount(state_db): restart_count = {} @@ -683,3 +696,84 @@ def test_swss_neighbor_syncup(dvs): check_sairedis_for_neighbor_entry(dvs, 4, 4, 4) # check restart Count swss_app_check_RestartCount_single(state_db, restart_count, "neighsyncd") + +def test_swss_port_state_syncup(dvs): + + appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0) + conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0) + + # enable warm restart + # TODO: use cfg command to config it + create_entry_tbl( + conf_db, + swsscommon.CFG_WARM_RESTART_TABLE_NAME, "swss", + [ + ("enable", "true"), + ] + ) + + tbl = swsscommon.Table(appl_db, swsscommon.APP_PORT_TABLE_NAME) + + restart_count = swss_get_RestartCount(state_db) + + # update port admin state + dvs.runcmd("ifconfig Ethernet0 10.0.0.0/31 up") + dvs.runcmd("ifconfig Ethernet4 10.0.0.2/31 up") + dvs.runcmd("ifconfig Ethernet8 10.0.0.4/31 up") + + dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") + dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") + dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") + + dvs.servers[0].runcmd("ip link set down dev eth0") == 0 + dvs.servers[1].runcmd("ip link set down dev eth0") == 0 + dvs.servers[2].runcmd("ip link set down dev eth0") == 0 + + dvs.servers[2].runcmd("ip link set up dev eth0") == 0 + + time.sleep(3) + + for i in [0, 1, 2]: + (status, fvs) = tbl.get("Ethernet%d" % (i * 4)) + assert status == True + oper_status = "unknown" + for v in fvs: + if v[0] == "oper_status": + oper_status = v[1] + break + if i == 2: + assert oper_status == "up" + else: + assert oper_status == "down" + + stop_swss(dvs) + time.sleep(3) + + # flap the port oper status for Ethernet0, Ethernet4 and Ethernet8 + dvs.servers[0].runcmd("ip link set down dev eth0") == 0 + dvs.servers[1].runcmd("ip link set down dev eth0") == 0 + dvs.servers[2].runcmd("ip link set down dev eth0") == 0 + + dvs.servers[0].runcmd("ip link set up dev eth0") == 0 + dvs.servers[1].runcmd("ip link set up dev eth0") == 0 + + time.sleep(5) + start_swss(dvs) + time.sleep(10) + + swss_check_RestartCount(state_db, restart_count) + + for i in [0, 1, 2]: + (status, fvs) = tbl.get("Ethernet%d" % (i * 4)) + assert status == True + oper_status = "unknown" + for v in fvs: + if v[0] == "oper_status": + oper_status = v[1] + break + if i == 2: + assert oper_status == "down" + else: + assert oper_status == "up" +