Skip to content

Commit

Permalink
northd: ECMP prefer local routes if possible.
Browse files Browse the repository at this point in the history
Assume the following setup:
1. there is an LR connected via LRP to some internal networks
2. the LR is connected via two separate LRPs (LRP-ext-1, LRP-ext-2)
   to a external network
3. there are two default routes, one for each of the external LRPs
4. the external LRPs have ha_chassis_groups with different priorities

In this case for internal traffic arriving to the LR it would first
determine one of the ecmp routes to use and then forward the traffic
appropriately.

This can mean that if we are on the same chassis as LRP-ext-1 then we
could still choose a route that outputs via the chassis of LRP-ext-2.
In this case we would send traffic to another chassis for no real
reason.

To avoid this case we add for each ecmp route additional non-ecmp
routes. These use is_chassis_resident to filter for the above case and
then choose local routes. If there are no local routes available we use
the normal ecmp route selection.

This feature is especially needed in the case of active-active routing.
There there will be a lot of per "project" LRs connected to one LS which
then connects to the external LR as described above. As the LRPs of the
"project" LRs mostly already need ha_chassis_groups for NAT handling the
chance of the traffic to be already on an appropriate chassis is quite
high.

Signed-off-by: Felix Huettner <[email protected]>
Signed-off-by: 0-day Robot <[email protected]>
  • Loading branch information
felixhuettner authored and ovsrobot committed Dec 18, 2024
1 parent 0f1f559 commit 86dd4d9
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 55 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ Post v24.09.0
combination with the dynamic routing features this allows operators to
integrate OVN into the network fabric in a highly available way without
significant (or any) changes to the CMS.
- Prioritize routes on the same chassis as an active-active-lrp.
This will prevent such setups from forwarding traffic between ovn chassis
just based on ecmp routes and the datapath hash if the route is locally
available.

OVN v24.09.0 - 13 Sep 2024
--------------------------
Expand Down
128 changes: 118 additions & 10 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,15 @@ BUILD_ASSERT_DECL(ACL_OBS_STAGE_MAX < (1 << 2));
* Route offsets implement logic to prioritize traffic for routes with
* same ip_prefix values:
* - connected route overrides static one;
* - static route overrides src-ip route. */
#define ROUTE_PRIO_OFFSET_MULTIPLIER 5
* - static route overrides src-ip route.
*
* When having ecmp routes with multiple different output ports on different
* chassis we prioritize being on the same chassis.
* However longer prefix matches are more important than being local. */
#define ROUTE_PRIO_OFFSET_MULTIPLIER 12
#define ROUTE_PRIO_OFFSET_STATIC 2
#define ROUTE_PRIO_OFFSET_CONNECTED 4
#define ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS 6

/* Returns the type of the datapath to which a flow with the given 'stage' may
* be added. */
Expand Down Expand Up @@ -11738,6 +11743,10 @@ struct ecmp_groups_node {
uint16_t route_count;
struct ovs_list route_list; /* Contains ecmp_route_list_node */
struct sset selection_fields;
/* If this is set the route should only apply to chassis where the port
* is resident. It will also receive a higher priority*/
struct sset ports_resident;
bool has_different_chassis;
};

static void
Expand All @@ -11762,6 +11771,22 @@ ecmp_groups_add_route(struct ecmp_groups_node *group,
}

ovs_list_insert(&group->route_list, &er->list_node);

if (!group->has_different_chassis) {
struct ecmp_route_list_node *ern;
struct sset chassis_names = SSET_INITIALIZER(&chassis_names);
LIST_FOR_EACH (ern, list_node, &group->route_list) {
if (ern->route->is_discard_route ||
!ern->route->out_port->is_active_active) {
continue;
}
sset_add(&chassis_names, ern->route->out_port->aa_chassis_name);
}
if (sset_count(&chassis_names) > 1) {
group->has_different_chassis = true;
}
sset_destroy(&chassis_names);
}
}

static struct ecmp_groups_node *
Expand All @@ -11784,7 +11809,9 @@ ecmp_groups_add(struct hmap *ecmp_groups,
eg->source = route->source;
eg->route_table_id = route->route_table_id;
sset_init(&eg->selection_fields);
eg->has_different_chassis = false;
ovs_list_init(&eg->route_list);
sset_init(&eg->ports_resident);
ecmp_groups_add_route(eg, route);

return eg;
Expand Down Expand Up @@ -11815,6 +11842,7 @@ ecmp_groups_destroy(struct hmap *ecmp_groups)
ovs_list_remove(&er->list_node);
free(er);
}
sset_destroy(&eg->ports_resident);
hmap_remove(ecmp_groups, &eg->hmap_node);
sset_destroy(&eg->selection_fields);
free(eg);
Expand All @@ -11825,15 +11853,19 @@ ecmp_groups_destroy(struct hmap *ecmp_groups)
struct unique_routes_node {
struct hmap_node hmap_node;
const struct parsed_route *route;
/* If this is set the route should only apply to chassis where the port
* is resident. It will also receive a higher priority*/
const char *port_resident;
};

static void
static struct unique_routes_node *
unique_routes_add(struct hmap *unique_routes,
const struct parsed_route *route)
{
struct unique_routes_node *ur = xmalloc(sizeof *ur);
struct unique_routes_node *ur = xzalloc(sizeof *ur);
ur->route = route;
hmap_insert(unique_routes, &ur->hmap_node, route->hash);
return ur;
}

/* Remove the unique_routes_node from the hmap, and return the parsed_route
Expand Down Expand Up @@ -12111,7 +12143,21 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
eg->is_src_route, is_ipv4_prefix, &route_match,
&priority, ofs,
protocol != NULL);
free(prefix_s);

if (sset_count(&eg->ports_resident) > 0) {
priority += ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS;
ds_put_format(&route_match, " && (");
bool first = true;
const char *port;
SSET_FOR_EACH (port, &eg->ports_resident) {
if (!first) {
ds_put_format(&route_match, "||");
}
first = false;
ds_put_format(&route_match, " is_chassis_resident(\"%s\") ", port);
}
ds_put_format(&route_match, ")");
}

struct ds actions = DS_EMPTY_INITIALIZER;
ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16
Expand Down Expand Up @@ -12209,6 +12255,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
ds_cstr(&match), ds_cstr(&actions),
&route->header_, lflow_ref);
}
free(prefix_s);
sset_destroy(&visited_ports);
ds_destroy(&match);
ds_destroy(&route_match);
Expand All @@ -12223,7 +12270,8 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od,
const struct sset *bfd_ports,
const struct ovsdb_idl_row *stage_hint, bool is_discard_route,
enum route_source source, struct lflow_ref *lflow_ref,
bool is_ipv4_prefix, bool is_ipv4_nexthop)
bool is_ipv4_prefix, bool is_ipv4_nexthop,
const char *port_resident)
{
struct ds match = DS_EMPTY_INITIALIZER;
uint16_t priority;
Expand All @@ -12242,6 +12290,12 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od,
build_route_match(op_inport, rtb_id, network_s, plen, is_src_route,
is_ipv4_prefix, &match, &priority, ofs, false);

if (port_resident) {
priority += ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS;
ds_put_format(&match, " && is_chassis_resident(\"%s\")",
port_resident);
}

struct ds common_actions = DS_EMPTY_INITIALIZER;
struct ds actions = DS_EMPTY_INITIALIZER;
if (is_discard_route) {
Expand Down Expand Up @@ -12291,7 +12345,8 @@ static void
build_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
const struct parsed_route *route_,
const struct sset *bfd_ports,
struct lflow_ref *lflow_ref)
struct lflow_ref *lflow_ref,
const char *port_resident)
{
const struct nbrec_logical_router_static_route *route = route_->route;
bool is_ipv4_prefix = IN6_IS_ADDR_V4MAPPED(&route_->prefix);
Expand All @@ -12306,7 +12361,7 @@ build_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
route_->route_table_id, bfd_ports,
route ? &route->header_ : &route_->out_port->nbrp->header_,
route_->is_discard_route, route_->source, lflow_ref,
is_ipv4_prefix, is_ipv4_nexthop);
is_ipv4_prefix, is_ipv4_nexthop, port_resident);

free(prefix_s);
}
Expand Down Expand Up @@ -14176,6 +14231,58 @@ build_route_flows_for_lrouter(
}
}
}

/* We now duplicate some routes based on ecmp groups. The goal here is to
* prioritize taking some route of a ecmp route if we are already on the
* respective chassis. This saves us potentially forwarding traffic between
* chassis for no reason. */
HMAP_FOR_EACH_SAFE (group, hmap_node, &ecmp_groups) {
if (!group->has_different_chassis) {
continue;
}
struct simap chassis_count = SIMAP_INITIALIZER(&chassis_count);
struct ecmp_route_list_node *er;
LIST_FOR_EACH (er, list_node, &group->route_list) {
if (er->route->is_discard_route ||
!er->route->out_port->is_active_active) {
continue;
}
simap_increase(&chassis_count,
er->route->out_port->aa_chassis_name, 1);
}


struct simap_node *chassis_node;
SIMAP_FOR_EACH (chassis_node, &chassis_count) {
ovs_assert(chassis_node->data != 0);
struct ecmp_groups_node *found_group = NULL;
LIST_FOR_EACH (er, list_node, &group->route_list) {
if (er->route->is_discard_route ||
!er->route->out_port->is_active_active ||
strcmp(chassis_node->name,
er->route->out_port->aa_chassis_name)) {
continue;
}
const char *port_name = er->route->out_port->cr_port->key;
if (chassis_node->data == 1) {
struct unique_routes_node *ur =
unique_routes_add(&unique_routes, er->route);
ur->port_resident = port_name;
} else {
if (!found_group) {
found_group = ecmp_groups_add(&ecmp_groups, er->route);
} else {
ecmp_groups_add_route(found_group, er->route);
}
sset_add(&found_group->ports_resident, port_name);
}
}
}

simap_destroy(&chassis_count);
}

/* And now really add the routing flows */
HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
/* add a flow in IP_ROUTING, and one flow for each member in
* IP_ROUTING_ECMP. */
Expand All @@ -14192,7 +14299,8 @@ build_route_flows_for_lrouter(
}
const struct unique_routes_node *ur;
HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
build_route_flow(lflows, od, ur->route, bfd_ports, lflow_ref);
build_route_flow(lflows, od, ur->route,
bfd_ports, lflow_ref, ur->port_resident);
}
ecmp_groups_destroy(&ecmp_groups);
unique_routes_destroy(&unique_routes);
Expand Down Expand Up @@ -17451,7 +17559,7 @@ build_routable_flows_for_router_port(
bfd_ports, &router_port->nbrp->header_,
false, ROUTE_SOURCE_CONNECTED,
lrp->stateful_lflow_ref,
true, true);
true, true, NULL);
}
}
}
Expand Down
Loading

0 comments on commit 86dd4d9

Please sign in to comment.