From 86dd4d985c16028175f0c0104454777f9a8102fa Mon Sep 17 00:00:00 2001 From: Felix Huettner Date: Wed, 18 Dec 2024 16:54:41 +0100 Subject: [PATCH] northd: ECMP prefer local routes if possible. Assume the following setup: 1. there is an LR connected via LRP to some internal networks 2. the LR is connected via two separate LRPs (LRP-ext-1, LRP-ext-2) to a external network 3. there are two default routes, one for each of the external LRPs 4. the external LRPs have ha_chassis_groups with different priorities In this case for internal traffic arriving to the LR it would first determine one of the ecmp routes to use and then forward the traffic appropriately. This can mean that if we are on the same chassis as LRP-ext-1 then we could still choose a route that outputs via the chassis of LRP-ext-2. In this case we would send traffic to another chassis for no real reason. To avoid this case we add for each ecmp route additional non-ecmp routes. These use is_chassis_resident to filter for the above case and then choose local routes. If there are no local routes available we use the normal ecmp route selection. This feature is especially needed in the case of active-active routing. There there will be a lot of per "project" LRs connected to one LS which then connects to the external LR as described above. As the LRPs of the "project" LRs mostly already need ha_chassis_groups for NAT handling the chance of the traffic to be already on an appropriate chassis is quite high. Signed-off-by: Felix Huettner Signed-off-by: 0-day Robot --- NEWS | 4 ++ northd/northd.c | 128 ++++++++++++++++++++++++++++++++++++++++---- tests/ovn-northd.at | 65 +++++++++++----------- tests/ovn.at | 49 ++++++++++++----- 4 files changed, 191 insertions(+), 55 deletions(-) diff --git a/NEWS b/NEWS index 124ce60a27..003e0a14dc 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,10 @@ Post v24.09.0 combination with the dynamic routing features this allows operators to integrate OVN into the network fabric in a highly available way without significant (or any) changes to the CMS. + - Prioritize routes on the same chassis as an active-active-lrp. + This will prevent such setups from forwarding traffic between ovn chassis + just based on ecmp routes and the datapath hash if the route is locally + available. OVN v24.09.0 - 13 Sep 2024 -------------------------- diff --git a/northd/northd.c b/northd/northd.c index 9538ec2d67..644f1225e0 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -306,10 +306,15 @@ BUILD_ASSERT_DECL(ACL_OBS_STAGE_MAX < (1 << 2)); * Route offsets implement logic to prioritize traffic for routes with * same ip_prefix values: * - connected route overrides static one; - * - static route overrides src-ip route. */ -#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 + * - static route overrides src-ip route. + * + * When having ecmp routes with multiple different output ports on different + * chassis we prioritize being on the same chassis. + * However longer prefix matches are more important than being local. */ +#define ROUTE_PRIO_OFFSET_MULTIPLIER 12 #define ROUTE_PRIO_OFFSET_STATIC 2 #define ROUTE_PRIO_OFFSET_CONNECTED 4 +#define ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS 6 /* Returns the type of the datapath to which a flow with the given 'stage' may * be added. */ @@ -11738,6 +11743,10 @@ struct ecmp_groups_node { uint16_t route_count; struct ovs_list route_list; /* Contains ecmp_route_list_node */ struct sset selection_fields; + /* If this is set the route should only apply to chassis where the port + * is resident. It will also receive a higher priority*/ + struct sset ports_resident; + bool has_different_chassis; }; static void @@ -11762,6 +11771,22 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, } ovs_list_insert(&group->route_list, &er->list_node); + + if (!group->has_different_chassis) { + struct ecmp_route_list_node *ern; + struct sset chassis_names = SSET_INITIALIZER(&chassis_names); + LIST_FOR_EACH (ern, list_node, &group->route_list) { + if (ern->route->is_discard_route || + !ern->route->out_port->is_active_active) { + continue; + } + sset_add(&chassis_names, ern->route->out_port->aa_chassis_name); + } + if (sset_count(&chassis_names) > 1) { + group->has_different_chassis = true; + } + sset_destroy(&chassis_names); + } } static struct ecmp_groups_node * @@ -11784,7 +11809,9 @@ ecmp_groups_add(struct hmap *ecmp_groups, eg->source = route->source; eg->route_table_id = route->route_table_id; sset_init(&eg->selection_fields); + eg->has_different_chassis = false; ovs_list_init(&eg->route_list); + sset_init(&eg->ports_resident); ecmp_groups_add_route(eg, route); return eg; @@ -11815,6 +11842,7 @@ ecmp_groups_destroy(struct hmap *ecmp_groups) ovs_list_remove(&er->list_node); free(er); } + sset_destroy(&eg->ports_resident); hmap_remove(ecmp_groups, &eg->hmap_node); sset_destroy(&eg->selection_fields); free(eg); @@ -11825,15 +11853,19 @@ ecmp_groups_destroy(struct hmap *ecmp_groups) struct unique_routes_node { struct hmap_node hmap_node; const struct parsed_route *route; + /* If this is set the route should only apply to chassis where the port + * is resident. It will also receive a higher priority*/ + const char *port_resident; }; -static void +static struct unique_routes_node * unique_routes_add(struct hmap *unique_routes, const struct parsed_route *route) { - struct unique_routes_node *ur = xmalloc(sizeof *ur); + struct unique_routes_node *ur = xzalloc(sizeof *ur); ur->route = route; hmap_insert(unique_routes, &ur->hmap_node, route->hash); + return ur; } /* Remove the unique_routes_node from the hmap, and return the parsed_route @@ -12111,7 +12143,21 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, eg->is_src_route, is_ipv4_prefix, &route_match, &priority, ofs, protocol != NULL); - free(prefix_s); + + if (sset_count(&eg->ports_resident) > 0) { + priority += ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS; + ds_put_format(&route_match, " && ("); + bool first = true; + const char *port; + SSET_FOR_EACH (port, &eg->ports_resident) { + if (!first) { + ds_put_format(&route_match, "||"); + } + first = false; + ds_put_format(&route_match, " is_chassis_resident(\"%s\") ", port); + } + ds_put_format(&route_match, ")"); + } struct ds actions = DS_EMPTY_INITIALIZER; ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 @@ -12209,6 +12255,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, ds_cstr(&match), ds_cstr(&actions), &route->header_, lflow_ref); } + free(prefix_s); sset_destroy(&visited_ports); ds_destroy(&match); ds_destroy(&route_match); @@ -12223,7 +12270,8 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, const struct sset *bfd_ports, const struct ovsdb_idl_row *stage_hint, bool is_discard_route, enum route_source source, struct lflow_ref *lflow_ref, - bool is_ipv4_prefix, bool is_ipv4_nexthop) + bool is_ipv4_prefix, bool is_ipv4_nexthop, + const char *port_resident) { struct ds match = DS_EMPTY_INITIALIZER; uint16_t priority; @@ -12242,6 +12290,12 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, is_ipv4_prefix, &match, &priority, ofs, false); + if (port_resident) { + priority += ROUTE_PRIO_OFFSET_ADD_SPECIFIC_CHASSIS; + ds_put_format(&match, " && is_chassis_resident(\"%s\")", + port_resident); + } + struct ds common_actions = DS_EMPTY_INITIALIZER; struct ds actions = DS_EMPTY_INITIALIZER; if (is_discard_route) { @@ -12291,7 +12345,8 @@ static void build_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, const struct parsed_route *route_, const struct sset *bfd_ports, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + const char *port_resident) { const struct nbrec_logical_router_static_route *route = route_->route; bool is_ipv4_prefix = IN6_IS_ADDR_V4MAPPED(&route_->prefix); @@ -12306,7 +12361,7 @@ build_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, route_->route_table_id, bfd_ports, route ? &route->header_ : &route_->out_port->nbrp->header_, route_->is_discard_route, route_->source, lflow_ref, - is_ipv4_prefix, is_ipv4_nexthop); + is_ipv4_prefix, is_ipv4_nexthop, port_resident); free(prefix_s); } @@ -14176,6 +14231,58 @@ build_route_flows_for_lrouter( } } } + + /* We now duplicate some routes based on ecmp groups. The goal here is to + * prioritize taking some route of a ecmp route if we are already on the + * respective chassis. This saves us potentially forwarding traffic between + * chassis for no reason. */ + HMAP_FOR_EACH_SAFE (group, hmap_node, &ecmp_groups) { + if (!group->has_different_chassis) { + continue; + } + struct simap chassis_count = SIMAP_INITIALIZER(&chassis_count); + struct ecmp_route_list_node *er; + LIST_FOR_EACH (er, list_node, &group->route_list) { + if (er->route->is_discard_route || + !er->route->out_port->is_active_active) { + continue; + } + simap_increase(&chassis_count, + er->route->out_port->aa_chassis_name, 1); + } + + + struct simap_node *chassis_node; + SIMAP_FOR_EACH (chassis_node, &chassis_count) { + ovs_assert(chassis_node->data != 0); + struct ecmp_groups_node *found_group = NULL; + LIST_FOR_EACH (er, list_node, &group->route_list) { + if (er->route->is_discard_route || + !er->route->out_port->is_active_active || + strcmp(chassis_node->name, + er->route->out_port->aa_chassis_name)) { + continue; + } + const char *port_name = er->route->out_port->cr_port->key; + if (chassis_node->data == 1) { + struct unique_routes_node *ur = + unique_routes_add(&unique_routes, er->route); + ur->port_resident = port_name; + } else { + if (!found_group) { + found_group = ecmp_groups_add(&ecmp_groups, er->route); + } else { + ecmp_groups_add_route(found_group, er->route); + } + sset_add(&found_group->ports_resident, port_name); + } + } + } + + simap_destroy(&chassis_count); + } + + /* And now really add the routing flows */ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { /* add a flow in IP_ROUTING, and one flow for each member in * IP_ROUTING_ECMP. */ @@ -14192,7 +14299,8 @@ build_route_flows_for_lrouter( } const struct unique_routes_node *ur; HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { - build_route_flow(lflows, od, ur->route, bfd_ports, lflow_ref); + build_route_flow(lflows, od, ur->route, + bfd_ports, lflow_ref, ur->port_resident); } ecmp_groups_destroy(&ecmp_groups); unique_routes_destroy(&unique_routes); @@ -17451,7 +17559,7 @@ build_routable_flows_for_router_port( bfd_ports, &router_port->nbrp->header_, false, ROUTE_SOURCE_CONNECTED, lrp->stateful_lflow_ref, - true, true); + true, true, NULL); } } } diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 21d9d63ab8..97c764519b 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -6823,9 +6823,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg5 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = 1; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=386 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = 1; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6841,9 +6841,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg5 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=386 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6870,9 +6870,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg5 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=386 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6888,14 +6888,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=290 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) ]) check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=290 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) ]) check ovn-nbctl lr-route-add lr0 3.3.0.0/16 192.168.0.11 @@ -6910,7 +6910,7 @@ check ovn-nbctl set logical_router_static_route $route2_uuid selection_fields="i check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "(lr_in_ip_routing ).*3.3.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl - table=??(lr_in_ip_routing ), priority=82 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src");) + table=??(lr_in_ip_routing ), priority=194 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src");) ]) check ovn-nbctl set logical_router_static_route $route1_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" @@ -6919,10 +6919,10 @@ check ovn-nbctl set logical_router_static_route $route2_uuid selection_fields="i check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "(lr_in_ip_routing ).*3.3.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl - table=??(lr_in_ip_routing ), priority=82 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src");) - table=??(lr_in_ip_routing ), priority=83 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && sctp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,sctp_dst,sctp_src");) - table=??(lr_in_ip_routing ), priority=83 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && tcp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,tcp_dst,tcp_src");) - table=??(lr_in_ip_routing ), priority=83 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && udp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,udp_dst,udp_src");) + table=??(lr_in_ip_routing ), priority=194 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src");) + table=??(lr_in_ip_routing ), priority=195 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && sctp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,sctp_dst,sctp_src");) + table=??(lr_in_ip_routing ), priority=195 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && tcp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,tcp_dst,tcp_src");) + table=??(lr_in_ip_routing ), priority=195 , match=(reg7 == 0 && ip4.dst == 3.3.0.0/16 && udp), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(values=(1, 2); hash_fields="ip_dst,ip_proto,ip_src,udp_dst,udp_src");) ]) AT_CLEANUP @@ -6960,14 +6960,14 @@ ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing " lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 10.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 11.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 2001:db8::10; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=322 , match=(reg7 == 0 && ip6.dst == 2001:db8:1::/64), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=322 , match=(reg7 == 0 && ip6.dst == 2001:db8:2::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 2001:db8::20; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-private" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1214; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(ip6.dst == 2001:db8::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=290 , match=(reg7 == 0 && ip4.dst == 10.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=290 , match=(reg7 == 0 && ip4.dst == 11.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 2001:db8::10; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=770 , match=(reg7 == 0 && ip6.dst == 2001:db8:1::/64), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg5 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=770 , match=(reg7 == 0 && ip6.dst == 2001:db8:2::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 2001:db8::20; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lr0-private" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1214; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(ip6.dst == 2001:db8::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = 2001:db8::1; eth.src = 00:00:20:20:12:14; outport = "lr0-private"; flags.loopback = 1; reg9[[9]] = 0; next;) ]) AT_CHECK([grep -e "lr_in_arp_resolve" lr0flows | ovn_strip_lflows], [0], [dnl @@ -7406,16 +7406,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl grep -e "(lr_in_ip_routing ).*outport" lr0flows AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg5 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 0; next;) - table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=290 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg5 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=292 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg5 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=386 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg5 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 1; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; reg9[[9]] = 0; next;) + table=??(lr_in_ip_routing ), priority=772 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; reg9[[9]] = 0; next;) ]) AT_CLEANUP @@ -14384,3 +14384,4 @@ AT_CHECK([ovn-sbctl lflow-list S1 | grep ls_out_acl_action | grep priority=500 | AT_CLEANUP ]) + diff --git a/tests/ovn.at b/tests/ovn.at index d9a8c320d5..dcdbaf12c8 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -40025,6 +40025,17 @@ outside_to_vif() { OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv$hv/vif$vif-tx.pcap], [$filename]) } +send_vif_to_outside() { + for i in `seq 1 100`; do + packet=$(fmt_pkt "Ether(dst='00:00:03:00:00:01', src='00:00:ff:ff:ff:01')/ \ + IP(dst='1.1.1.1', src='10.0.0.2')/ TCP(dport=5$i)") + as hv1 ovs-appctl netdev-dummy/receive vif1 $packet + packet=$(fmt_pkt "Ether(dst='00:00:04:00:00:01', src='00:00:ff:ff:ff:02')/ \ + IP(dst='1.1.1.1', src='198.51.100.10')/ TCP(dport=5$i)") + as hv2 ovs-appctl netdev-dummy/receive vif2 $packet + done +} + # injecting packets on any of the active-active interfaces will allow them # to reach vif1 and vif2 outside_to_vif 1 1 "IP(dst='192.0.2.10', src='1.1.1.1')/ TCP()" \ @@ -40037,14 +40048,9 @@ outside_to_vif 2 2 "IP(dst='198.51.100.10', src='1.1.1.1')/ TCP()" \ # packets from vif1 and vif2 to the internet will use any of the active-active # interfaces. As the dp_hash will decide which they use we only check if # each of the egress interfaces has at least one expected packet going out. -for i in `seq 1 100`; do - packet=$(fmt_pkt "Ether(dst='00:00:03:00:00:01', src='00:00:ff:ff:ff:01')/ \ - IP(dst='1.1.1.1', src='10.0.0.2')/ TCP(dport=5$i)") - as hv1 ovs-appctl netdev-dummy/receive vif1 $packet - packet=$(fmt_pkt "Ether(dst='00:00:04:00:00:01', src='00:00:ff:ff:ff:02')/ \ - IP(dst='1.1.1.1', src='198.51.100.10')/ TCP(dport=5$i)") - as hv2 ovs-appctl netdev-dummy/receive vif2 $packet -done +# Since vif1 is bound to hv1 and vif2 to hv2 we expect them to egress using +# only these interfaces +send_vif_to_outside # for vif1 packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:10', src='00:01:01:00:00:01')/ \ @@ -40055,12 +40061,32 @@ packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:11', src='00:01:02:00:00:01')/ \ IP(dst='1.1.1.1', src='192.0.2.10', ttl=62)/ TCP()" | \ cut -c 1-68) OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys2_hv1-2-tx.pcap], ["$packet"]) + +# for vif2 +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:20', src='00:02:01:00:00:01')/ \ + IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv2/br-phys_hv2-1-tx.pcap], ["$packet"]) + +# now pr1 gets bound to hv2 and pr2 to hv1. We expect that vif1 will now exit +# on hv2 and vif2 on hv1. +as hv1 reset_pcap_file br-phys_hv1-1 hv1/br-phys_hv1-1 +as hv1 reset_pcap_file br-phys2_hv1-2 hv1/br-phys2_hv1-2 +as hv2 reset_pcap_file br-phys_hv2-1 hv2/br-phys_hv2-1 +check ovn-nbctl lrp-del-gateway-chassis pr1-public hv1 +check ovn-nbctl lrp-set-gateway-chassis pr1-public hv2 +check ovn-nbctl lrp-del-gateway-chassis pr2-public hv2 +check ovn-nbctl lrp-set-gateway-chassis pr2-public hv1 +check ovn-nbctl --wait=hv sync +send_vif_to_outside + +# vif1 packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:20', src='00:02:01:00:00:01')/ \ IP(dst='1.1.1.1', src='192.0.2.10', ttl=62)/ TCP()" | \ cut -c 1-68) OVN_CHECK_PACKETS_CONTAIN_PARTS([hv2/br-phys_hv2-1-tx.pcap], ["$packet"]) -# for vif2 +# vif2 packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:10', src='00:01:01:00:00:01')/ \ IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ cut -c 1-68) @@ -40069,10 +40095,7 @@ packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:11', src='00:01:02:00:00:01')/ \ IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ cut -c 1-68) OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys2_hv1-2-tx.pcap], ["$packet"]) -packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:20', src='00:02:01:00:00:01')/ \ - IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ - cut -c 1-68) -OVN_CHECK_PACKETS_CONTAIN_PARTS([hv2/br-phys_hv2-1-tx.pcap], ["$packet"]) + # bgp packet to the active-active router ports are forwarded to the respective # bgp redirect LSP