Skip to content

Commit

Permalink
bgpd: delay and reduce VPN processing calls from ZAPI notifications
Browse files Browse the repository at this point in the history
At startup, when both the configuration of hundreds of VRFS interfaces
and an L3VPN peering is being set up, a CPU surge happens for BGP. The
following message can be seen:

> STARVATION: task zclient_read (7fa1886cc98d) ran for 5069ms (cpu time 2417ms)
> May 03 11:27:40 xxx bgpd[5351]: [PHJDC-499N2][EC 100663314] STARVATION: task zclient_read (7fa1886cc98d) ran for 5155ms (cpu time 2419ms)
> May 03 11:28:42 xxx bgpd[5351]: [PHJDC-499N2][EC 100663314] STARVATION: task zclient_read (7fa1886cc98d) ran for 5112ms (cpu time 2416ms)

The interface UP notification of each of those interfaces forces the
zclient task to perform VPN processing. The below extra traces indicate
the time taken between each interface up event stands for the time to
perform VPN processing.

> 2024/05/06 11:56:42 BGP: [ZXFVW-H54SV] Rx Intf up VRF 29 IF c38119
> 2024/05/06 11:56:43 BGP: [ZXFVW-H54SV] Rx Intf up VRF 30 IF c3812
> 2024/05/06 11:56:46 BGP: [ZXFVW-H54SV] Rx Intf up VRF 31 IF c38120
> 2024/05/06 11:56:49 BGP: [ZXFVW-H54SV] Rx Intf up VRF 32 IF c38121
> 2024/05/06 11:56:52 BGP: [ZXFVW-H54SV] Rx Intf up VRF 33 IF c38122
> 2024/05/06 11:56:54 BGP: [ZXFVW-H54SV] Rx Intf up VRF 34 IF c38123

The more there are VRF interfaces, and VPN prefixes, the more the time
taken will increase and starvation issues will happen.

To resolve this issue, a separate thread for VPN processing will be
scheduled by the ZAPI interface. This will let time for the ZAPI to get
more similar events to process. If the VPN job is already scheduled,
the event_add_timer() call will not launch it.

Signed-off-by: Philippe Guibert <[email protected]>
  • Loading branch information
pguibert6WIND committed May 30, 2024
1 parent fd8a2c4 commit c4e2fe6
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 10 deletions.
17 changes: 16 additions & 1 deletion bgpd/bgp_mplsvpn.c
Original file line number Diff line number Diff line change
Expand Up @@ -3753,14 +3753,17 @@ vrf_id_t get_first_vrf_for_redirect_with_rt(struct ecommunity *eckey)
* This function gets called when the default instance ("router bgp NNN")
* is created.
*/
void vpn_leak_postchange_all(void)
static void vpn_leak_postchange_all_internal(struct event *t
__attribute__((__unused__)))
{
struct listnode *next;
struct bgp *bgp;
struct bgp *bgp_default = bgp_get_default();

assert(bgp_default);

EVENT_OFF(bm->t_vpn_leak_postchange);

/* First, do any exporting from VRFs to the single VPN RIB */
for (ALL_LIST_ELEMENTS_RO(bm->bgp, next, bgp)) {

Expand Down Expand Up @@ -3800,6 +3803,18 @@ void vpn_leak_postchange_all(void)
}
}

/* if event is true, delay the vpn processing by 1 second
* in a separate thread
*/
void vpn_leak_postchange_all(bool event)
{
if (event)
event_add_timer(bm->master, vpn_leak_postchange_all_internal,
NULL, 1, &bm->t_vpn_leak_postchange);
else
vpn_leak_postchange_all_internal(NULL);
}

/* When a bgp vrf instance is unconfigured, remove its routes
* from the VPN table and this vrf could be importing routes from other
* bgp vrf instnaces, unimport them.
Expand Down
2 changes: 1 addition & 1 deletion bgpd/bgp_mplsvpn.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ extern void vpn_policy_routemap_event(const char *rmap_name);

extern vrf_id_t get_first_vrf_for_redirect_with_rt(struct ecommunity *eckey);

extern void vpn_leak_postchange_all(void);
extern void vpn_leak_postchange_all(bool event);
extern void vpn_handle_router_id_update(struct bgp *bgp, bool withdraw,
bool is_config);
extern void bgp_vpn_leak_unimport(struct bgp *from_bgp);
Expand Down
4 changes: 2 additions & 2 deletions bgpd/bgp_vty.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ static int bgp_srv6_locator_unset(struct bgp *bgp)
}

/* update vpn bgp processes */
vpn_leak_postchange_all();
vpn_leak_postchange_all(false);

/* refresh tovpn_sid_locator */
for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) {
Expand Down Expand Up @@ -1598,7 +1598,7 @@ DEFUN_NOSH (router_bgp,
* earlier "router bgp X vrf FOO" blocks.
*/
if (is_new_bgp && inst_type == BGP_INSTANCE_TYPE_DEFAULT)
vpn_leak_postchange_all();
vpn_leak_postchange_all(false);

if (inst_type == BGP_INSTANCE_TYPE_VRF)
bgp_vpn_leak_export(bgp);
Expand Down
13 changes: 7 additions & 6 deletions bgpd/bgp_zebra.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ static int bgp_ifp_up(struct interface *ifp)
vpn_leak_zebra_vrf_label_update(bgp, AFI_IP6);
vpn_leak_zebra_vrf_sid_update(bgp, AFI_IP);
vpn_leak_zebra_vrf_sid_update(bgp, AFI_IP6);
vpn_leak_postchange_all();
vpn_leak_postchange_all(true);
}

return 0;
Expand Down Expand Up @@ -295,7 +295,7 @@ static int bgp_ifp_down(struct interface *ifp)
vpn_leak_zebra_vrf_label_withdraw(bgp, AFI_IP6);
vpn_leak_zebra_vrf_sid_withdraw(bgp, AFI_IP);
vpn_leak_zebra_vrf_sid_withdraw(bgp, AFI_IP6);
vpn_leak_postchange_all();
vpn_leak_postchange_all(true);
}

return 0;
Expand Down Expand Up @@ -3364,7 +3364,7 @@ static int bgp_ifp_create(struct interface *ifp)
vpn_leak_zebra_vrf_label_update(bgp, AFI_IP6);
vpn_leak_zebra_vrf_sid_update(bgp, AFI_IP);
vpn_leak_zebra_vrf_sid_update(bgp, AFI_IP6);
vpn_leak_postchange_all();
vpn_leak_postchange_all(true);
}

return 0;
Expand Down Expand Up @@ -3396,7 +3396,7 @@ static int bgp_zebra_process_srv6_locator_chunk(ZAPI_CALLBACK_ARGS)
}

listnode_add(bgp->srv6_locator_chunks, chunk);
vpn_leak_postchange_all();
vpn_leak_postchange_all(true);
return 0;
}

Expand Down Expand Up @@ -3496,7 +3496,7 @@ static int bgp_zebra_process_srv6_locator_delete(ZAPI_CALLBACK_ARGS)
}
}

vpn_leak_postchange_all();
vpn_leak_postchange_all(false);

/* refresh tovpn_sid_locator */
for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) {
Expand Down Expand Up @@ -3649,7 +3649,8 @@ static bool bgp_zebra_label_manager_connect(void)

/* tell BGP L3VPN that label manager is available */
if (bgp_get_default())
vpn_leak_postchange_all();
vpn_leak_postchange_all(false);

return true;
}

Expand Down
2 changes: 2 additions & 0 deletions bgpd/bgpd.c
Original file line number Diff line number Diff line change
Expand Up @@ -8381,6 +8381,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
bm->t_bgp_sync_label_manager = NULL;
bm->t_bgp_start_label_manager = NULL;
bm->t_bgp_zebra_route = NULL;
bm->t_vpn_leak_postchange = NULL;

bgp_mac_init();
/* init the rd id space.
Expand Down Expand Up @@ -8632,6 +8633,7 @@ void bgp_terminate(void)
EVENT_OFF(bm->t_bgp_sync_label_manager);
EVENT_OFF(bm->t_bgp_start_label_manager);
EVENT_OFF(bm->t_bgp_zebra_route);
EVENT_OFF(bm->t_vpn_leak_postchange);

bgp_mac_finish();
}
Expand Down
3 changes: 3 additions & 0 deletions bgpd/bgpd.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ struct bgp_master {
/* DSCP value for TCP sessions */
uint8_t tcp_dscp;

/* L3VPN processing thread */
struct event *t_vpn_leak_postchange;

#define BM_DEFAULT_Q_LIMIT 10000
uint32_t inq_limit;
uint32_t outq_limit;
Expand Down

0 comments on commit c4e2fe6

Please sign in to comment.