Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle dual ToR neighbor miss scenario #2137

Merged
Merged
36 changes: 31 additions & 5 deletions neighsyncd/neighsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
using namespace std;
using namespace swss;

NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) :
NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb) :
m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME),
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME)
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
m_cfgPeerSwitchTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME)
{
m_AppRestartAssist = new AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", DEFAULT_NEIGHSYNC_WARMSTART_TIMER);
if (m_AppRestartAssist)
Expand Down Expand Up @@ -87,14 +88,39 @@ void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
return;
}

std::vector<std::string> peerSwitchKeys;
bool delete_key = false;
if ((nlmsg_type == RTM_DELNEIGH) || (state == NUD_INCOMPLETE) ||
(state == NUD_FAILED))
bool use_zero_mac = false;
m_cfgPeerSwitchTable.getKeys(peerSwitchKeys);
bool is_dualtor = peerSwitchKeys.size() > 0;
if (is_dualtor && (state == NUD_INCOMPLETE || state == NUD_FAILED))
{
SWSS_LOG_INFO("Unable to resolve %s, setting zero MAC", key.c_str());
use_zero_mac = true;

// Unresolved neighbor deletion on dual ToR devices must be handled
// separately, otherwise delete_key is never set to true
// and neighorch is never able to remove the neighbor
if (nlmsg_type == RTM_DELNEIGH)
{
delete_key = true;
}
}
else if ((nlmsg_type == RTM_DELNEIGH) ||
(state == NUD_INCOMPLETE) || (state == NUD_FAILED))
{
delete_key = true;
}

nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
if (use_zero_mac)
{
std::string zero_mac = "00:00:00:00:00:00";
strncpy(macStr, zero_mac.c_str(), zero_mac.length());
}
else
{
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
}

/* Ignore neighbor entries with Broadcast Mac - Trigger for directed broadcast */
if (!delete_key && (MacAddress(macStr) == MacAddress("ff:ff:ff:ff:ff:ff")))
Expand Down
4 changes: 2 additions & 2 deletions neighsyncd/neighsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class NeighSync : public NetMsg
public:
enum { MAX_ADDR_SIZE = 64 };

NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb);
NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb);
~NeighSync();

virtual void onMsg(int nlmsg_type, struct nl_object *obj);
Expand All @@ -36,7 +36,7 @@ class NeighSync : public NetMsg
}

private:
Table m_stateNeighRestoreTable;
Table m_stateNeighRestoreTable, m_cfgPeerSwitchTable;
ProducerStateTable m_neighTable;
AppRestartAssist *m_AppRestartAssist;
};
Expand Down
3 changes: 2 additions & 1 deletion neighsyncd/neighsyncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ int main(int argc, char **argv)
DBConnector appDb("APPL_DB", 0);
RedisPipeline pipelineAppDB(&appDb);
DBConnector stateDb("STATE_DB", 0);
DBConnector cfgDb("CONFIG_DB", 0);

NeighSync sync(&pipelineAppDB, &stateDb);
NeighSync sync(&pipelineAppDB, &stateDb, &cfgDb);

NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync);
NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync);
Expand Down
53 changes: 53 additions & 0 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ MuxCable::MuxCable(string name, IpPrefix& srv_ip4, IpPrefix& srv_ip6, IpAddress

/* Set initial state to "standby" */
stateStandby();
state_ = MuxState::MUX_STATE_STANDBY;
}

bool MuxCable::stateInitActive()
Expand Down Expand Up @@ -1025,6 +1026,37 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
return;
}

auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
// Handling zero MAC neighbor updates
if (!update.mac)
{
/* For neighbors that were previously resolvable but are now unresolvable,
* we expect such neighbor entries to be deleted prior to a zero MAC update
* arriving for that same neighbor.
*/

if (update.add)
{
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
{
createStandaloneTunnelRoute(update.entry.ip_address);
}
/* If the MAC address in the neighbor entry is zero but the neighbor IP
* is already present in standalone_tunnel_neighbors_, assume we have already
* added a tunnel route for it and exit early
*/
return;
}
}
/* If the update operation for a neighbor contains a non-zero MAC, we must
* make sure to remove any existing tunnel routes to prevent conflicts.
* This block also covers the case of neighbor deletion.
*/
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
{
removeStandaloneTunnelRoute(update.entry.ip_address);
}

for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
{
MuxCable* ptr = it->second.get();
Expand Down Expand Up @@ -1292,6 +1324,27 @@ bool MuxOrch::delOperation(const Request& request)
return true;
}

void MuxOrch::createStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Creating standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
sai_object_id_t tunnel_nexthop = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
if (tunnel_nexthop == SAI_NULL_OBJECT_ID) {
SWSS_LOG_NOTICE("%s nexthop not created yet, ignoring tunnel route creation for %s", MUX_TUNNEL, neighborIp.to_string().c_str());
return;
}
IpPrefix pfx = neighborIp.to_string();
create_route(pfx, tunnel_nexthop);
standalone_tunnel_neighbors_.insert(neighborIp);
}

void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Removing standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
IpPrefix pfx = neighborIp.to_string();
remove_route(pfx);
standalone_tunnel_neighbors_.erase(neighborIp);
}

MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
Orch2(db, tableName, request_),
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),
Expand Down
8 changes: 8 additions & 0 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ class MuxOrch : public Orch2, public Observer, public Subject

bool getMuxPort(const MacAddress&, const string&, string&);

/***
* Methods for managing tunnel routes for neighbor IPs not associated
* with a specific mux cable
***/
void createStandaloneTunnelRoute(IpAddress neighborIp);
void removeStandaloneTunnelRoute(IpAddress neighborIp);

IpAddress mux_peer_switch_ = 0x0;
sai_object_id_t mux_tunnel_id_ = SAI_NULL_OBJECT_ID;

Expand All @@ -210,6 +217,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
FdbOrch *fdb_orch_;

MuxCfgRequest request_;
std::set<IpAddress> standalone_tunnel_neighbors_;
};

const request_description_t mux_cable_request_description = {
Expand Down
23 changes: 22 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "routeorch.h"
#include "directory.h"
#include "muxorch.h"
#include "observer.h"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this. Its already in .h file


extern sai_neighbor_api_t* sai_neighbor_api;
extern sai_next_hop_api_t* sai_next_hop_api;
Expand All @@ -17,6 +18,8 @@ extern RouteOrch *gRouteOrch;
extern FgNhgOrch *gFgNhgOrch;
extern Directory<Orch*> gDirectory;

#define MUX_TUNNEL "MuxTunnel0"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove. Its unused and also not relevant to neighorch


const int neighorch_pri = 30;

NeighOrch::NeighOrch(DBConnector *appDb, string tableName, IntfsOrch *intfsOrch, FdbOrch *fdbOrch, PortsOrch *portsOrch) :
Expand Down Expand Up @@ -547,7 +550,16 @@ void NeighOrch::doTask(Consumer &consumer)
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
{
if (addNeighbor(neighbor_entry, mac_address))
// only for unresolvable neighbors that are new
if (!mac_address)
{
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
addZeroMacTunnelRoute(neighbor_entry, mac_address);
}
it = consumer.m_toSync.erase(it);
}
else if (addNeighbor(neighbor_entry, mac_address))
{
it = consumer.m_toSync.erase(it);
}
Expand Down Expand Up @@ -954,3 +966,12 @@ bool NeighOrch::removeTunnelNextHop(const NextHopKey& nh)
return true;
}

void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
{
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
NeighborUpdate update = {entry, mac, true};
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
m_syncdNeighbors[entry] = { mac, false };
}

2 changes: 2 additions & 0 deletions orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class NeighOrch : public Orch, public Subject, public Observer

bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
void clearResolvedNeighborEntry(const NeighborEntry &);

void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
};

#endif /* SWSS_NEIGHORCH_H */
16 changes: 8 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1529,7 +1529,7 @@ def verify_vct(self):
return ret1 and ret2


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dvs(request) -> DockerVirtualSwitch:
if sys.version_info[0] < 3:
raise NameError("Python 2 is not supported, please install python 3")
Expand Down Expand Up @@ -1559,7 +1559,7 @@ def dvs(request) -> DockerVirtualSwitch:
dvs.ctn_restart()


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def vct(request):
vctns = request.config.getoption("--vctns")
topo = request.config.getoption("--topo")
Expand All @@ -1579,7 +1579,7 @@ def vct(request):
vct.destroy()


@pytest.yield_fixture
@pytest.fixture
def testlog(request, dvs):
dvs.runcmd(f"logger === start test {request.node.name} ===")
yield testlog
Expand All @@ -1602,13 +1602,13 @@ def dvs_route(request, dvs) -> DVSRoute:

# FIXME: The rest of these also need to be reverted back to normal fixtures to
# appease the linter.
@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_lag_manager(request, dvs):
request.cls.dvs_lag = dvs_lag.DVSLag(dvs.get_asic_db(),
dvs.get_config_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_vlan_manager(request, dvs):
request.cls.dvs_vlan = dvs_vlan.DVSVlan(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1617,7 +1617,7 @@ def dvs_vlan_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_mirror_manager(request, dvs):
request.cls.dvs_mirror = dvs_mirror.DVSMirror(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1626,7 +1626,7 @@ def dvs_mirror_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_policer_manager(request, dvs):
request.cls.dvs_policer = dvs_policer.DVSPolicer(dvs.get_asic_db(),
dvs.get_config_db())
Expand All @@ -1647,7 +1647,7 @@ def remove_dpb_config_file(dvs):
dvs.runcmd(cmd)


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dpb_setup_fixture(dvs):
create_dpb_config_file(dvs)
if dvs.vct is None:
Expand Down
Loading