Skip to content

Commit

Permalink
Handle duplicate routes in a graceful manner (sonic-net#2688)
Browse files Browse the repository at this point in the history
* With VNET routes, the same route can be learned via multiple sources, like via
  BGP. Handle this special situation gracefully without orchagent crash during
  both create and remove operations.
* Add a test case to simulate duplicate route creation/removal
* Do cleanups like deleting vnets post certain test cases that were lacking them

Signed-off-by: Prabhat Aravind <[email protected]>
  • Loading branch information
prabhataravind authored Apr 29, 2023
1 parent 68cef32 commit 6e25014
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 8 deletions.
56 changes: 48 additions & 8 deletions orchagent/saihelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,24 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
break;
}
break;
case SAI_API_ROUTE:
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
return task_success;
case SAI_STATUS_ITEM_ALREADY_EXISTS:
case SAI_STATUS_NOT_EXECUTED:
/* With VNET routes, the same route can be learned via multiple
sources, like via BGP. Handle this gracefully */
return task_success;
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
handleSaiFailure(true);
break;
}
break;
default:
switch (status)
{
Expand Down Expand Up @@ -612,16 +630,38 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
return task_success;
switch (api)
{
case SAI_API_ROUTE:
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
return task_success;
case SAI_STATUS_ITEM_NOT_FOUND:
case SAI_STATUS_NOT_EXECUTED:
/* When the same route is learned via multiple sources,
there can be a duplicate remove operation. Handle this gracefully */
return task_success;
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
handleSaiFailure(true);
break;
}
break;
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
return task_success;
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
handleSaiFailure(true);
break;
handleSaiFailure(true);
break;
}
}
return task_need_retry;
}
Expand Down
121 changes: 121 additions & 0 deletions tests/test_vnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,11 @@ def check_remove_routes_advertisement(dvs, prefix):
assert prefix not in keys


def check_syslog(dvs, marker, err_log):
(exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep \"%s\" | wc -l" % (marker, err_log)])
assert num.strip() == "0"


loopback_id = 0
def_vr_id = 0
switch_mac = None
Expand Down Expand Up @@ -1079,6 +1084,52 @@ class TestVnetOrch(object):
def get_vnet_obj(self):
return VnetVxlanVrfTunnel()

def setup_db(self, dvs):
self.pdb = dvs.get_app_db()
self.adb = dvs.get_asic_db()
self.cdb = dvs.get_config_db()
self.sdb = dvs.get_state_db()

def clear_srv_config(self, dvs):
dvs.servers[0].runcmd("ip address flush dev eth0")
dvs.servers[1].runcmd("ip address flush dev eth0")
dvs.servers[2].runcmd("ip address flush dev eth0")
dvs.servers[3].runcmd("ip address flush dev eth0")

def set_admin_status(self, interface, status):
self.cdb.update_entry("PORT", interface, {"admin_status": status})

def create_l3_intf(self, interface, vrf_name):
if len(vrf_name) == 0:
self.cdb.create_entry("INTERFACE", interface, {"NULL": "NULL"})
else:
self.cdb.create_entry("INTERFACE", interface, {"vrf_name": vrf_name})

def add_ip_address(self, interface, ip):
self.cdb.create_entry("INTERFACE", interface + "|" + ip, {"NULL": "NULL"})

def remove_ip_address(self, interface, ip):
self.cdb.delete_entry("INTERFACE", interface + "|" + ip)

def create_route_entry(self, key, pairs):
tbl = swsscommon.ProducerStateTable(self.pdb.db_connection, "ROUTE_TABLE")
fvs = swsscommon.FieldValuePairs(list(pairs.items()))
tbl.set(key, fvs)

def remove_route_entry(self, key):
tbl = swsscommon.ProducerStateTable(self.pdb.db_connection, "ROUTE_TABLE")
tbl._del(key)

def check_route_entries(self, destinations):
def _access_function():
route_entries = self.adb.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY")
route_destinations = [json.loads(route_entry)["dest"]
for route_entry in route_entries]
return (all(destination in route_destinations for destination in destinations), None)

wait_for_result(_access_function)


@pytest.fixture(params=["true", "false"])
def ordered_ecmp(self, dvs, request):

Expand Down Expand Up @@ -3307,6 +3358,76 @@ def test_vnet_orch_23(self, dvs, testlog):
vnet_obj.check_del_vnet_entry(dvs, vnet_name)
delete_vxlan_tunnel(dvs, tunnel_name)

'''
Test 24 - Test duplicate route addition and removal.
'''
def test_vnet_orch_24(self, dvs, testlog):
self.setup_db(dvs)
self.clear_srv_config(dvs)

vnet_obj = self.get_vnet_obj()
vnet_obj.fetch_exist_entries(dvs)

# create vxlan tunnel and vnet in default vrf
tunnel_name = 'tunnel_24'
create_vxlan_tunnel(dvs, tunnel_name, '10.10.10.10')
create_vnet_entry(dvs, 'Vnet_2000', tunnel_name, '2000', "", 'default')

vnet_obj.check_default_vnet_entry(dvs, 'Vnet_2000')
vnet_obj.check_vxlan_tunnel_entry(dvs, tunnel_name, 'Vnet_2000', '2000')
vnet_obj.check_vxlan_tunnel(dvs, tunnel_name, '10.10.10.10')
vnet_obj.fetch_exist_entries(dvs)

# create vnet route
create_vnet_routes(dvs, "100.100.1.0/24", 'Vnet_2000', '10.10.10.3')
vnet_obj.check_vnet_routes(dvs, 'Vnet_2000', '10.10.10.3', tunnel_name)
check_state_db_routes(dvs, 'Vnet_2000', "100.100.1.0/24", ['10.10.10.3'])
time.sleep(2)

# create l3 interface
self.create_l3_intf("Ethernet0", "")

# set ip address
self.add_ip_address("Ethernet0", "10.10.10.1/24")

# bring up interface
self.set_admin_status("Ethernet0", "up")

# set ip address and default route
dvs.servers[0].runcmd("ip address add 10.10.10.3/24 dev eth0")
dvs.servers[0].runcmd("ip route add default via 10.10.10.1")

marker = dvs.add_log_marker("/var/log/syslog")
time.sleep(2)

# add another route for same prefix as vnet route
dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 100.100.1.0/24 10.10.10.3\"")

# check application database
self.pdb.wait_for_entry("ROUTE_TABLE", "100.100.1.0/24")

# check ASIC route database
self.check_route_entries(["100.100.1.0/24"])

log_string = "Encountered failure in create operation, exiting orchagent, SAI API: SAI_API_ROUTE, status: SAI_STATUS_NOT_EXECUTED"
# check for absence of log_string in syslog
check_syslog(dvs, marker, log_string)

# remove route entry
dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 100.100.1.0/24 10.10.10.3\"")

# delete vnet route
delete_vnet_routes(dvs, "100.100.1.0/24", 'Vnet_2000')
vnet_obj.check_del_vnet_routes(dvs, 'Vnet_2000')
check_remove_state_db_routes(dvs, 'Vnet_2000', "100.100.1.0/24")

# delete vnet
delete_vnet_entry(dvs, 'Vnet_2000')
vnet_obj.check_del_vnet_entry(dvs, 'Vnet_2000')

# delete vxlan tunnel
delete_vxlan_tunnel(dvs, tunnel_name)

# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
def test_nonflaky_dummy():
Expand Down

0 comments on commit 6e25014

Please sign in to comment.