diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index af1407ba8e..2d9772c37c 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -4820,6 +4820,13 @@ struct ed_type_route { /* Contains struct tracked_datapath entries for local datapaths subject to * route exchange. */ struct hmap tracked_route_datapaths; + + /* Contains the tracked_ports that in the last run where bound locally */ + struct sset tracked_ports_local; + + /* Contains the tracked_ports that in the last run where bound not local */ + struct sset tracked_ports_remote; + /* Contains struct advertise_datapath_entry */ struct hmap announce_routes; }; @@ -4868,6 +4875,8 @@ en_route_run(struct engine_node *node, void *data) struct route_ctx_out r_ctx_out = { .tracked_re_datapaths = &re_data->tracked_route_datapaths, + .tracked_ports_local = &re_data->tracked_ports_local, + .tracked_ports_remote = &re_data->tracked_ports_remote, .announce_routes = &re_data->announce_routes, }; @@ -4884,6 +4893,8 @@ en_route_init(struct engine_node *node OVS_UNUSED, struct ed_type_route *data = xzalloc(sizeof *data); hmap_init(&data->tracked_route_datapaths); + sset_init(&data->tracked_ports_local); + sset_init(&data->tracked_ports_remote); hmap_init(&data->announce_routes); return data; @@ -4895,6 +4906,8 @@ en_route_cleanup(void *data) struct ed_type_route *re_data = data; tracked_datapaths_destroy(&re_data->tracked_route_datapaths); + sset_destroy(&re_data->tracked_ports_local); + sset_destroy(&re_data->tracked_ports_remote); route_cleanup(&re_data->announce_routes); hmap_destroy(&re_data->announce_routes); } @@ -4942,6 +4955,26 @@ route_sb_port_binding_data_handler(struct engine_node *node, void *data) const struct sbrec_port_binding_table *pb_table = EN_OVSDB_GET(engine_get_input("SB_port_binding", node)); + const struct ovsrec_open_vswitch_table *ovs_table = + EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node)); + const char *chassis_id = get_ovs_chassis_id(ovs_table); + ovs_assert(chassis_id); + + struct ovsdb_idl_index *sbrec_chassis_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_chassis", node), + "name"); + const struct sbrec_chassis *chassis + = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id); + ovs_assert(chassis); + + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + const struct sbrec_port_binding *sbrec_pb; SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (sbrec_pb, pb_table) { struct tracked_datapath *re_t_dp = @@ -4959,6 +4992,24 @@ route_sb_port_binding_data_handler(struct engine_node *node, void *data) return false; } + if (sset_contains(&re_data->tracked_ports_local, + sbrec_pb->logical_port)) { + if (!find_route_exchange_pb(sbrec_port_binding_by_name, chassis, + &rt_data->active_tunnels, sbrec_pb)) { + /* The port was previously local but now it no longer is. */ + return false; + } + } + + if (sset_contains(&re_data->tracked_ports_remote, + sbrec_pb->logical_port)) { + if (find_route_exchange_pb(sbrec_port_binding_by_name, chassis, + &rt_data->active_tunnels, sbrec_pb)) { + /* The port was previously remote but now we bound it. */ + return false; + } + } + } return true; } diff --git a/controller/route-exchange-netlink.c b/controller/route-exchange-netlink.c index 9fea0d9551..f4428009e9 100644 --- a/controller/route-exchange-netlink.c +++ b/controller/route-exchange-netlink.c @@ -102,7 +102,8 @@ re_nl_delete_vrf(const char *ifname) static int modify_route(uint32_t type, uint32_t flags_arg, uint32_t table_id, - const struct in6_addr *dst, unsigned int plen) + const struct in6_addr *dst, unsigned int plen, + unsigned int priority) { uint32_t flags = NLM_F_REQUEST | NLM_F_ACK; bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(dst); @@ -128,6 +129,7 @@ modify_route(uint32_t type, uint32_t flags_arg, uint32_t table_id, rt->rtm_dst_len = plen; nl_msg_put_u32(&request, RTA_TABLE, table_id); + nl_msg_put_u32(&request, RTA_PRIORITY, priority); if (is_ipv4) { nl_msg_put_be32(&request, RTA_DST, in6_addr_get_mapped_ipv4(dst)); @@ -143,7 +145,7 @@ modify_route(uint32_t type, uint32_t flags_arg, uint32_t table_id, int re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, - unsigned int plen) + unsigned int plen, unsigned int priority) { uint32_t flags = NLM_F_CREATE | NLM_F_EXCL; uint32_t type = RTM_NEWROUTE; @@ -155,12 +157,12 @@ re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, return EINVAL; } - return modify_route(type, flags, table_id, dst, plen); + return modify_route(type, flags, table_id, dst, plen, priority); } int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, - unsigned int plen) + unsigned int plen, unsigned int priority) { if (!TABLE_ID_VALID(table_id)) { VLOG_WARN_RL(&rl, @@ -169,7 +171,7 @@ re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, return EINVAL; } - return modify_route(RTM_DELROUTE, 0, table_id, dst, plen); + return modify_route(RTM_DELROUTE, 0, table_id, dst, plen, priority); } static uint32_t @@ -230,13 +232,15 @@ handle_route_msg_delete_routes(const struct route_table_msg *msg, void *data) uint32_t arhash = advertise_route_hash(&rd->rta_dst, rd->rtm_dst_len); HMAP_FOR_EACH_WITH_HASH (ar, node, arhash, routes) { if (ipv6_addr_equals(&ar->addr, &rd->rta_dst) - && ar->plen == rd->rtm_dst_len) { + && ar->plen == rd->rtm_dst_len + && ar->priority == rd->rta_priority) { ar->installed = true; return; } } + err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, - rd->rtm_dst_len); + rd->rtm_dst_len, rd->rta_priority); if (err) { char addr_s[INET6_ADDRSTRLEN + 1]; VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s plen=%d: %s", @@ -273,7 +277,8 @@ re_nl_sync_routes(uint32_t table_id, if (ar->installed) { continue; } - int err = re_nl_add_route(table_id, &ar->addr, ar->plen); + int err = re_nl_add_route(table_id, &ar->addr, ar->plen, + ar->priority); if (err) { char addr_s[INET6_ADDRSTRLEN + 1]; VLOG_WARN_RL(&rl, "Add route table_id=%"PRIu32" dst=%s " diff --git a/controller/route-exchange-netlink.h b/controller/route-exchange-netlink.h index fca2429e67..13346e9448 100644 --- a/controller/route-exchange-netlink.h +++ b/controller/route-exchange-netlink.h @@ -39,9 +39,9 @@ int re_nl_create_vrf(const char *ifname, uint32_t table_id); int re_nl_delete_vrf(const char *ifname); int re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, - unsigned int plen); + unsigned int plen, unsigned int priority); int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, - unsigned int plen); + unsigned int plen, unsigned int priority); void re_nl_dump(uint32_t table_id); diff --git a/controller/route.c b/controller/route.c index 036a574d8f..53349821fc 100644 --- a/controller/route.c +++ b/controller/route.c @@ -33,6 +33,9 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); * in the corresponding VRF interface name. */ #define MAX_TABLE_ID 1000000000 +#define PRIORITY_DEFAULT 1000 +#define PRIORITY_LOCAL_BOUND 100 + bool route_exchange_relevant_port(const struct sbrec_port_binding *pb) { @@ -46,7 +49,7 @@ advertise_route_hash(const struct in6_addr *dst, unsigned int plen) return hash_int(plen, hash); } -static const struct sbrec_port_binding* +const struct sbrec_port_binding* find_route_exchange_pb(struct ovsdb_idl_index *sbrec_port_binding_by_name, const struct sbrec_chassis *chassis, const struct sset *active_tunnels, @@ -74,6 +77,29 @@ find_route_exchange_pb(struct ovsdb_idl_index *sbrec_port_binding_by_name, return NULL; } +static bool +pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels, + const char *port_name) +{ + if (lport_is_chassis_resident(sbrec_port_binding_by_name, chassis, + active_tunnels, port_name)) { + return true; + } + + const struct sbrec_port_binding *pb = lport_lookup_by_name( + sbrec_port_binding_by_name, port_name); + + const char *crp = smap_get(&pb->options, "chassis-redirect-port"); + if (!crp) { + return NULL; + } + + return lport_is_chassis_resident(sbrec_port_binding_by_name, chassis, + active_tunnels, crp); +} + static void advertise_datapath_cleanup(struct advertise_datapath_entry *ad) { @@ -92,6 +118,8 @@ route_run(struct route_ctx_in *r_ctx_in, struct route_ctx_out *r_ctx_out) { tracked_datapaths_destroy(r_ctx_out->tracked_re_datapaths); + sset_clear(r_ctx_out->tracked_ports_local); + sset_clear(r_ctx_out->tracked_ports_remote); const struct local_datapath *ld; HMAP_FOR_EACH (ld, hmap_node, r_ctx_in->local_datapaths) { @@ -176,11 +204,29 @@ route_run(struct route_ctx_in *r_ctx_in, continue; } + unsigned int priority = PRIORITY_DEFAULT; + + if (route->tracked_port) { + if (pb_is_local( + r_ctx_in->sbrec_port_binding_by_name, + r_ctx_in->chassis, + r_ctx_in->active_tunnels, + route->tracked_port->logical_port)) { + priority = PRIORITY_LOCAL_BOUND; + sset_add(r_ctx_out->tracked_ports_local, + route->tracked_port->logical_port); + } else { + sset_add(r_ctx_out->tracked_ports_remote, + route->tracked_port->logical_port); + } + } + struct advertise_route_entry *ar = xzalloc(sizeof(*ar)); hmap_insert(&ad->routes, &ar->node, advertise_route_hash(&prefix, plen)); ar->addr = prefix; ar->plen = plen; + ar->priority = priority; } sbrec_advertised_route_index_destroy_row(route_filter); diff --git a/controller/route.h b/controller/route.h index 4b71fa88a7..ef08312cba 100644 --- a/controller/route.h +++ b/controller/route.h @@ -39,6 +39,13 @@ struct route_ctx_in { struct route_ctx_out { struct hmap *tracked_re_datapaths; + + /* Contains the tracked_ports that in the last run where bound locally */ + struct sset *tracked_ports_local; + + /* Contains the tracked_ports that in the last run where bound not local */ + struct sset *tracked_ports_remote; + /* Contains struct advertise_datapath_entry */ struct hmap *announce_routes; }; @@ -61,11 +68,17 @@ struct advertise_route_entry { struct hmap_node node; struct in6_addr addr; unsigned int plen; + unsigned int priority; /* used by the route-exchange module to determine if the route is * already installed */ bool installed; }; +const struct sbrec_port_binding *find_route_exchange_pb( + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels, + const struct sbrec_port_binding *pb); bool route_exchange_relevant_port(const struct sbrec_port_binding *pb); uint32_t advertise_route_hash(const struct in6_addr *dst, unsigned int plen); void route_run(struct route_ctx_in *, diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 7ad666f206..08a9dc4182 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -14887,8 +14887,8 @@ ovnvrf1337 1337 # ip route list output has a trailing space on each line # the awk magic removes all trailing spaces. OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl -blackhole 192.0.2.0/24 proto 84 -blackhole 198.51.100.0/24 proto 84]) +blackhole 192.0.2.0/24 proto 84 metric 1000 +blackhole 198.51.100.0/24 proto 84 metric 1000]) # we now switch to announcing host routes and expect 192.0.2.0/24 to be gone # and the following to be added: @@ -14896,15 +14896,39 @@ blackhole 198.51.100.0/24 proto 84]) # * 192.0.2.2/32 # * 192.0.2.3/32 # * 192.0.2.10/32 +# the last 3 of them are local to the current chassis so we expect a better +# prio. check ovn-nbctl --wait=hv set Logical_Router_Port internet-public \ options:dynamic-routing-connected-as-host-routes=true OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl -blackhole 192.0.2.1 proto 84 -blackhole 192.0.2.2 proto 84 -blackhole 192.0.2.3 proto 84 -blackhole 192.0.2.10 proto 84 -blackhole 198.51.100.0/24 proto 84]) +blackhole 192.0.2.1 proto 84 metric 1000 +blackhole 192.0.2.2 proto 84 metric 100 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 100 +blackhole 198.51.100.0/24 proto 84 metric 1000]) + +# if the pr1-public lrp is now removed from this hypervisor the route metric +# will go back to the default. +# For this we just schedule it on a non existing chassis +check ovn-nbctl lrp-del-gateway-chassis pr1-public hv1 +check ovn-nbctl --wait=hv lrp-set-gateway-chassis pr1-public hv123 +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl +blackhole 192.0.2.1 proto 84 metric 1000 +blackhole 192.0.2.2 proto 84 metric 1000 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 1000 +blackhole 198.51.100.0/24 proto 84 metric 1000]) + +# moving pr1-public back will also change the route metrics again +check ovn-nbctl lrp-del-gateway-chassis pr1-public hv123 +check ovn-nbctl --wait=hv lrp-set-gateway-chassis pr1-public hv1 +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl +blackhole 192.0.2.1 proto 84 metric 1000 +blackhole 192.0.2.2 proto 84 metric 100 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 100 +blackhole 198.51.100.0/24 proto 84 metric 1000]) # now we test route learning check_row_count Learned_Route 0 @@ -15114,8 +15138,8 @@ ovnvrf1337 1337 # ip route list output has a trailing space on each line # the awk magic removes all trailing spaces. OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl -blackhole 192.0.2.0/24 proto 84 -blackhole 198.51.100.0/24 proto 84]) +blackhole 192.0.2.0/24 proto 84 metric 1000 +blackhole 198.51.100.0/24 proto 84 metric 1000]) # we now switch to announcing host routes and expect 192.0.2.0/24 to be gone # and the following to be added: @@ -15129,11 +15153,33 @@ check ovn-nbctl --wait=hv set Logical_Router_Port internet-public \ options:dynamic-routing-connected-as-host-routes=true OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl -blackhole 192.0.2.1 proto 84 -blackhole 192.0.2.2 proto 84 -blackhole 192.0.2.3 proto 84 -blackhole 192.0.2.10 proto 84 -blackhole 198.51.100.0/24 proto 84]) +blackhole 192.0.2.1 proto 84 metric 100 +blackhole 192.0.2.2 proto 84 metric 100 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 100 +blackhole 198.51.100.0/24 proto 84 metric 1000]) + +# if the pr1-public lrp is now removed from this hypervisor the route metric +# will go back to the default. +# For this we just schedule it on a non existing chassis +check ovn-nbctl lrp-del-gateway-chassis pr1-public hv1 +check ovn-nbctl --wait=hv lrp-set-gateway-chassis pr1-public hv123 +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl +blackhole 192.0.2.1 proto 84 metric 100 +blackhole 192.0.2.2 proto 84 metric 1000 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 1000 +blackhole 198.51.100.0/24 proto 84 metric 1000]) + +# moving pr1-public back will also change the route metrics again +check ovn-nbctl lrp-del-gateway-chassis pr1-public hv123 +check ovn-nbctl --wait=hv lrp-set-gateway-chassis pr1-public hv1 +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [dnl +blackhole 192.0.2.1 proto 84 metric 100 +blackhole 192.0.2.2 proto 84 metric 100 +blackhole 192.0.2.3 proto 84 metric 100 +blackhole 192.0.2.10 proto 84 metric 100 +blackhole 198.51.100.0/24 proto 84 metric 1000]) # now we test route learning check_row_count Learned_Route 0