diff --git a/NEWS b/NEWS index d2b3ee4600..124ce60a27 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,12 @@ Post v24.09.0 and "routing-protocols" are now also usable on distributed gateway ports. - ovn-nb: Changed schema of ovn-nb to make networks optional within Logical Router Ports. + - Add the option "active-active-lrp" to LRPs. If set northd will clone this + LRP based on its HA_Chassis_Group and the + other_config:ovn-active-active-mapping of the defined chassis. In + combination with the dynamic routing features this allows operators to + integrate OVN into the network fabric in a highly available way without + significant (or any) changes to the CMS. OVN v24.09.0 - 13 Sep 2024 -------------------------- diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml index 6a7d676afc..10407351b8 100644 --- a/controller/ovn-controller.8.xml +++ b/controller/ovn-controller.8.xml @@ -404,6 +404,36 @@ If the value is zero, it disables the inactivity probe.

+
external_ids:ovn-active-active-mappings
+
+

+ Setting is used for the chassis specific values of the + options:active-active-lrp in the northbound database. + + The following is an example of such an option: + ovn-active-active-mappings="phys;00:fe:fe:fe:fe:01,172.16.0.10/25;00:fe:fe:fe:fe:33,172.17.0.10/25|phys2;00:aa:bb:cc:dd:ee,192.168.0.10/24" + + This configures 2 separate active-active mapping for two external + networks. + +

+

+

diff --git a/lib/automake.mk b/lib/automake.mk index 25e5164065..b43f8a7f3f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -26,6 +26,8 @@ lib_libovn_la_SOURCES = \ lib/ovn-parallel-hmap.c \ lib/ip-mcast-index.c \ lib/ip-mcast-index.h \ + lib/lrp-index.c \ + lib/lrp-index.h \ lib/mac-binding-index.c \ lib/mac-binding-index.h \ lib/mcast-group-index.c \ diff --git a/lib/lrp-index.c b/lib/lrp-index.c new file mode 100644 index 0000000000..ac64c4b45d --- /dev/null +++ b/lib/lrp-index.c @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "lib/lrp-index.h" +#include "lib/ovn-nb-idl.h" + +struct ovsdb_idl_index * +lrp_index_create(struct ovsdb_idl *idl) +{ + return ovsdb_idl_index_create1(idl, &nbrec_logical_router_port_col_name); +} + + +/* Finds and returns the lrp with the given 'name', or NULL if no such + * lrp exists. */ +const struct nbrec_logical_router_port * +lrp_lookup_by_name(struct ovsdb_idl_index *nbrec_lrp_by_name, + const char *name) +{ + struct nbrec_logical_router_port *target = + nbrec_logical_router_port_index_init_row(nbrec_lrp_by_name); + nbrec_logical_router_port_index_set_name(target, name); + + struct nbrec_logical_router_port *retval = + nbrec_logical_router_port_index_find(nbrec_lrp_by_name, target); + + nbrec_logical_router_port_index_destroy_row(target); + + return retval; +} + diff --git a/lib/lrp-index.h b/lib/lrp-index.h new file mode 100644 index 0000000000..2c56933fcf --- /dev/null +++ b/lib/lrp-index.h @@ -0,0 +1,25 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVN_LRP_INDEX_H +#define OVN_LRP_INDEX_H 1 + +struct ovsdb_idl; + +struct ovsdb_idl_index *lrp_index_create(struct ovsdb_idl *); + +const struct nbrec_logical_router_port *lrp_lookup_by_name( + struct ovsdb_idl_index *nbrec_lrp_by_name, const char *name); + +#endif /* lib/lrp-index.h */ diff --git a/lib/ovn-util.c b/lib/ovn-util.c index b78bdbfa11..04f3e5f88c 100644 --- a/lib/ovn-util.c +++ b/lib/ovn-util.c @@ -1084,6 +1084,103 @@ get_chassis_external_id_value_bool(const struct smap *external_ids, return ret; } +bool +chassis_find_active_active_networks(const struct sbrec_chassis *chassis, + const char *network_name, + struct chassis_aa_network + *chassis_aa_network) { + memset(chassis_aa_network, 0, sizeof *chassis_aa_network); + + const char *aa_ports = smap_get(&chassis->other_config, + "ovn-active-active-mappings"); + bool found = false; + char *curnet, *nextnet, *curport, *nextport, *start; + + /* Structure + * ovn-active-active-mappings="|" + * network=";;" + * port="," */ + nextnet = start = xstrdup(aa_ports); + while ((curnet = strsep(&nextnet, "|")) && *curnet) { + nextport = curnet; + char *network = strsep(&nextport, ";"); + if (strcmp(network, network_name)) { + continue; + } + found = true; + chassis_aa_network->network_name = xstrdup(network); + chassis_aa_network->n_addresses = 0; + while ((curport = strsep(&nextport, ";")) && *curport) { + char *mac, *ip; + + mac = strsep(&curport, ","); + ip = curport; + + if (!mac || !ip || !*mac || !*ip) { + VLOG_ERR("Invalid format for " + "ovn-active-active-mappings '%s'", + aa_ports); + continue; + } + + chassis_aa_network->addresses = xrealloc( + chassis_aa_network->addresses, + (chassis_aa_network->n_addresses + 1 + ) * sizeof *chassis_aa_network->addresses); + struct lport_addresses *address = + &chassis_aa_network->addresses[ + chassis_aa_network->n_addresses]; + init_lport_addresses(address); + + if (!eth_addr_from_string(mac, &address->ea)) { + VLOG_ERR("Invalid mac address in " + "ovn-active-active-mappings '%s'", + aa_ports); + free(address); + continue; + } + snprintf(address->ea_s, sizeof address->ea_s, ETH_ADDR_FMT, + ETH_ADDR_ARGS(address->ea)); + + ovs_be32 ip4; + struct in6_addr ip6; + unsigned int plen; + char *error; + + error = ip_parse_cidr(ip, &ip4, &plen); + if (!error) { + if (!ip4) { + VLOG_ERR("Invalid ip address in " + "ovn-active-active-mappings '%s'", + aa_ports); + destroy_lport_addresses(address); + continue; + } + + add_ipv4_netaddr(address, ip4, plen); + } else { + free(error); + + error = ipv6_parse_cidr(ip, &ip6, &plen); + if (!error) { + add_ipv6_netaddr(address, ip6, plen); + } else { + VLOG_ERR("Invalid ip address in " + "ovn-active-active-mappings '%s'", + aa_ports); + destroy_lport_addresses(address); + free(error); + continue; + } + } + chassis_aa_network->n_addresses++; + } + } + + free(start); + return found; +} + void flow_collector_ids_init(struct flow_collector_ids *ids) { ovs_list_init(&ids->list); diff --git a/lib/ovn-util.h b/lib/ovn-util.h index 899bd9d12c..a91e3f5406 100644 --- a/lib/ovn-util.h +++ b/lib/ovn-util.h @@ -44,6 +44,7 @@ struct ovsrec_flow_sample_collector_set_table; struct sbrec_datapath_binding; struct sbrec_logical_flow; struct sbrec_port_binding; +struct sbrec_chassis; struct smap; struct svec; struct uuid; @@ -353,6 +354,16 @@ int64_t daemon_startup_ts(void); char *lr_lb_address_set_name(uint32_t lr_tunnel_key, int addr_family); char *lr_lb_address_set_ref(uint32_t lr_tunnel_key, int addr_family); +struct chassis_aa_network { + char *network_name; + struct lport_addresses *addresses; + size_t n_addresses; +}; + +bool chassis_find_active_active_networks(const struct sbrec_chassis *, + const char *, + struct chassis_aa_network *); + const char * get_chassis_external_id_value(const struct smap *, const char *chassis_id, diff --git a/northd/en-northd.c b/northd/en-northd.c index c7d1ebcb35..57158c0296 100644 --- a/northd/en-northd.c +++ b/northd/en-northd.c @@ -41,6 +41,10 @@ static void northd_get_input_data(struct engine_node *node, struct northd_input *input_data) { + input_data->nbrec_lrp_by_name = + engine_ovsdb_node_get_index( + engine_get_input("NB_logical_router", node), + "nbrec_lrp_by_name"); input_data->sbrec_chassis_by_name = engine_ovsdb_node_get_index( engine_get_input("SB_chassis", node), diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index 6e0aa04c46..d72566f7ac 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -19,6 +19,7 @@ #include #include "chassis-index.h" +#include "lrp-index.h" #include "ip-mcast-index.h" #include "lib/inc-proc-eng.h" #include "lib/mac-binding-index.h" @@ -350,6 +351,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, .sb_idl = sb->idl, }; + struct ovsdb_idl_index *nbrec_lrp_by_name = + lrp_index_create(nb->idl); struct ovsdb_idl_index *sbrec_chassis_by_name = chassis_index_create(sb->idl); struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name = @@ -367,6 +370,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_init(&en_northd_output, &engine_arg); + engine_ovsdb_node_add_index(&en_nb_logical_router, + "nbrec_lrp_by_name", + nbrec_lrp_by_name); engine_ovsdb_node_add_index(&en_sb_chassis, "sbrec_chassis_by_name", sbrec_chassis_by_name); diff --git a/northd/northd.c b/northd/northd.c index b01e40ecda..8cd1586171 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -14,6 +14,7 @@ #include +#include #include #include @@ -30,9 +31,11 @@ #include "hmapx.h" #include "openvswitch/hmap.h" #include "openvswitch/json.h" +#include "openvswitch/shash.h" #include "ovn/lex.h" #include "lb.h" #include "lib/chassis-index.h" +#include "lib/lrp-index.h" #include "lib/ip-mcast-index.h" #include "lib/copp.h" #include "lib/mcast-group-index.h" @@ -1255,6 +1258,11 @@ ovn_port_cleanup(struct ovn_port *port) free(port->ps_addrs); port->ps_addrs = NULL; port->n_ps_addrs = 0; + if (port->is_active_active) { + ovs_assert(port->aa_chassis_name); + free(port->aa_mac); + free(port->aa_chassis_name); + } destroy_lport_addresses(&port->lrp_networks); destroy_lport_addresses(&port->proxy_arp_addrs); @@ -1436,6 +1444,32 @@ lrport_is_enabled(const struct nbrec_logical_router_port *lrport) return !lrport->enabled || *lrport->enabled; } +static bool +lrport_is_active_active(const struct nbrec_logical_router_port *lrport) +{ + if (!lrport) { + return false; + } + return smap_get_bool(&lrport->options, "active-active-lrp", false); +} + +static const struct nbrec_logical_router_port* +lsp_get_peer(struct ovsdb_idl_index *nbrec_lrp_by_name, + const struct nbrec_logical_switch_port *nbsp) +{ + if (!lsp_is_router(nbsp)) { + return NULL; + } + + const char *peer_name = smap_get( ->options, "router-port"); + if (!peer_name) { + return NULL; + } + + return lrp_lookup_by_name(nbrec_lrp_by_name, peer_name); +} + + static bool lsp_force_fdb_lookup(const struct ovn_port *op) { @@ -1465,6 +1499,18 @@ ovn_port_get_peer(const struct hmap *lr_ports, struct ovn_port *op) return ovn_port_find(lr_ports, peer_name); } +static const char * +ovn_port_get_mac(struct ovn_port *op) +{ + if (op->is_active_active) { + return op->aa_mac; + } else if (op->primary_port && op->primary_port->is_active_active) { + return op->primary_port->aa_mac; + } else { + return op->nbrp->mac; + } +} + static void ipam_insert_ip_for_datapath(struct ovn_datapath *od, uint32_t ip, bool dynamic) { @@ -2301,13 +2347,19 @@ join_logical_ports_lrp(struct hmap *ports, return op; } +struct active_active_port { + const struct nbrec_logical_switch_port *nbsp; + const struct nbrec_logical_router_port *nbrp; + struct ovn_datapath *switch_dp; + struct ovn_datapath *router_dp; +}; + static struct ovn_port * create_cr_port(struct ovn_port *op, struct hmap *ports, struct ovs_list *both_dbs, struct ovs_list *nb_only) { - char *redirect_name = ovn_chassis_redirect_name( - op->nbsp ? op->nbsp->name : op->nbrp->name); + char *redirect_name = ovn_chassis_redirect_name(op->key); struct ovn_port *crp = ovn_port_find(ports, redirect_name); if (crp && crp->sb && crp->sb->datapath == op->od->sb) { @@ -2352,6 +2404,8 @@ peer_needs_cr_port_creation(struct ovn_port *op) static void join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, + struct ovsdb_idl_index *nbrec_lrp_by_name, + struct ovsdb_idl_index *sbrec_chassis_by_name, struct hmap *ls_datapaths, struct hmap *lr_datapaths, struct hmap *ports, unsigned long *queue_id_bitmap, struct hmap *tag_alloc_table, struct ovs_list *sb_only, @@ -2361,6 +2415,8 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, ovs_list_init(nb_only); ovs_list_init(both); + struct shash active_active_ports = SHASH_INITIALIZER(&active_active_ports); + const struct sbrec_port_binding *sb; SBREC_PORT_BINDING_TABLE_FOR_EACH (sb, sbrec_pb_table) { struct ovn_port *op = ovn_port_create(ports, sb->logical_port, @@ -2377,6 +2433,20 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, = od->nbr->ports[i]; struct lport_addresses lrp_networks; + + if (lrport_is_active_active(nbrp)) { + struct ovn_port *op = ovn_port_find_bound(ports, nbrp->name); + if (op) { + ovs_list_remove(&op->list); + } + struct active_active_port *aap = xzalloc( + sizeof(struct active_active_port)); + aap->nbrp = nbrp; + aap->router_dp = od; + shash_add(&active_active_ports, nbrp->name, aap); + continue; + } + if (!extract_lrp_networks(nbrp, &lrp_networks)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); @@ -2394,6 +2464,16 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, for (size_t i = 0; i < od->nbs->n_ports; i++) { const struct nbrec_logical_switch_port *nbsp = od->nbs->ports[i]; + const struct nbrec_logical_router_port *nbrp + = lsp_get_peer(nbrec_lrp_by_name, nbsp); + if (lrport_is_active_active(nbrp)) { + struct active_active_port *aap = + shash_find_data(&active_active_ports, nbrp->name); + ovs_assert(aap); + aap->nbsp = nbsp; + aap->switch_dp = od; + continue; + } join_logical_ports_lsp(ports, nb_only, both, od, nbsp, nbsp->name, queue_id_bitmap, tag_alloc_table); @@ -2472,6 +2552,109 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, } } + /* Now we setup the active-active lrp/lsps */ + struct shash_node *aa_snode; + SHASH_FOR_EACH (aa_snode, &active_active_ports) { + const struct active_active_port *aap = aa_snode->data; + const struct nbrec_logical_switch_port *nbsp = aap->nbsp; + const struct nbrec_logical_router_port *nbrp = aap->nbrp; + ovs_assert(nbrp); + ovs_assert(aap->router_dp); + if (!aap->switch_dp) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "active-active lrp '%s' is connected to a LSP " + "which is not in northbound.", nbrp->name); + continue; + } + + if (aap->switch_dp->n_localnet_ports != 1) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "active-active lrp '%s' is not connect to a " + "ls with exactly one localnet port", nbrp->name); + continue; + } + + const struct ovn_port *localnet_port = + aap->switch_dp->localnet_ports[0]; + + const char *network_name = + smap_get(&localnet_port->nbsp->options, "network_name"); + if (!network_name) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "active-active lrp '%s' has a localnet port " + "connected with no network_name", nbrp->name); + continue; + } + + if (!nbrp->ha_chassis_group) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "missing 'ha_chassis_group' for" + " active-active-port %s", nbrp->name); + continue; + } + + for (size_t i = 0; i < nbrp->ha_chassis_group->n_ha_chassis; i++) { + const struct nbrec_ha_chassis *hc + = nbrp->ha_chassis_group->ha_chassis[i]; + + const struct sbrec_chassis *chassis = chassis_lookup_by_name( + sbrec_chassis_by_name, hc->chassis_name); + if (!chassis) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "'ha_chassis_group' contains not found" + " chassis %s", hc->chassis_name); + continue; + } + + struct chassis_aa_network networks; + if (!chassis_find_active_active_networks(chassis, network_name, + &networks)) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "chassis %s does not contain network" + " but it is in ha_chassis_group", chassis->name); + continue; + } + + for (size_t j = 0; j < networks.n_addresses; j++) { + char *lrp_name = xasprintf("%s-%s-%"PRIuSIZE, + nbrp->name, chassis->name, j); + char *lsp_name = xasprintf("%s-%s-%"PRIuSIZE, + nbsp->name, chassis->name, j); + struct ovn_port *lrp = + join_logical_ports_lrp(ports, nb_only, both, &dgps, + aap->router_dp, nbrp, + lrp_name, &networks.addresses[j]); + struct ovn_port *lsp = + join_logical_ports_lsp(ports, nb_only, both, + aap->switch_dp, nbsp, + lsp_name, queue_id_bitmap, + tag_alloc_table); + free(lrp_name); + free(lsp_name); + if (!lrp || !lsp) { + continue; + } + lrp->peer = lsp; + lsp->peer = lrp; + lrp->is_active_active = true; + lsp->is_active_active = true; + lrp->aa_mac = xstrdup(networks.addresses[j].ea_s); + lrp->aa_chassis_name = xstrdup(chassis->name); + lsp->aa_chassis_name = xstrdup(chassis->name); + lrp->aa_chassis_index = j; + lsp->aa_chassis_index = j; + } + free(networks.network_name); + free(networks.addresses); + } + } + struct hmapx_node *hmapx_node; HMAPX_FOR_EACH (hmapx_node, &dgps) { op = hmapx_node->data; @@ -2528,6 +2711,8 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, HMAP_FOR_EACH (op, key_node, ports) { ipam_add_port_addresses(op->od, op); } + + shash_destroy_free_data(&active_active_ports); } /* Returns an array of strings, each consisting of a MAC address followed @@ -2881,6 +3066,51 @@ sync_ha_chassis_group_for_sbpb( sbrec_port_binding_set_ha_chassis_group(pb, sb_ha_grp); } +static char * +generate_ha_chassis_group_active_active( + struct ovsdb_idl_txn *ovnsb_txn, + struct ovsdb_idl_index *sbrec_chassis_by_name, + struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, + const char *chassis_name, + const struct sbrec_port_binding *pb) +{ + bool new_sb_chassis_group = false; + char *chassis_group_name = xasprintf( + "active-active-fixed-%s", chassis_name); + const struct sbrec_ha_chassis_group *sb_ha_grp = + ha_chassis_group_lookup_by_name( + sbrec_ha_chassis_grp_by_name, chassis_group_name); + + if (!sb_ha_grp) { + sb_ha_grp = sbrec_ha_chassis_group_insert(ovnsb_txn); + sbrec_ha_chassis_group_set_name(sb_ha_grp, chassis_group_name); + new_sb_chassis_group = true; + } + + if (new_sb_chassis_group) { + struct sbrec_ha_chassis **sb_ha_chassis = NULL; + sb_ha_chassis = xcalloc(1, sizeof *sb_ha_chassis); + const struct sbrec_chassis *chassis = + chassis_lookup_by_name(sbrec_chassis_by_name, chassis_name); + sb_ha_chassis[0] = sbrec_ha_chassis_insert(ovnsb_txn); + /* It's perfectly ok if the chassis is NULL. This could + * happen when ovn-controller exits and removes its row + * from the chassis table in OVN SB DB. */ + sbrec_ha_chassis_set_chassis(sb_ha_chassis[0], chassis); + sbrec_ha_chassis_set_priority(sb_ha_chassis[0], 1); + const struct smap external_ids = + SMAP_CONST1(&external_ids, "chassis-name", + chassis_name); + sbrec_ha_chassis_set_external_ids(sb_ha_chassis[0], &external_ids); + sbrec_ha_chassis_group_set_ha_chassis(sb_ha_grp, sb_ha_chassis, + 1); + free(sb_ha_chassis); + } + + sbrec_port_binding_set_ha_chassis_group(pb, sb_ha_grp); + return chassis_group_name; +} + /* This functions translates the gw chassis on the nb database * to HA chassis group in the sb database entries. */ @@ -3135,14 +3365,29 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn, "ignoring the latter.", op->nbrp->name); } - /* HA Chassis group is set. Ignore 'gateway_chassis'. */ - sync_ha_chassis_group_for_sbpb(ovnsb_txn, - sbrec_chassis_by_name, - sbrec_ha_chassis_grp_by_name, - op->nbrp->ha_chassis_group, - op->sb); - sset_add(active_ha_chassis_grps, - op->nbrp->ha_chassis_group->name); + if (op->primary_port && op->primary_port->is_active_active) { + + /* Generate new HA Chassis group just bound to one node. */ + char *ha_chassis_group = + generate_ha_chassis_group_active_active(ovnsb_txn, + sbrec_chassis_by_name, + sbrec_ha_chassis_grp_by_name, + op->primary_port->aa_chassis_name, + op->sb); + sset_add(active_ha_chassis_grps, + ha_chassis_group); + free(ha_chassis_group); + } else { + + /* HA Chassis group is set. Ignore 'gateway_chassis'. */ + sync_ha_chassis_group_for_sbpb(ovnsb_txn, + sbrec_chassis_by_name, + sbrec_ha_chassis_grp_by_name, + op->nbrp->ha_chassis_group, + op->sb); + sset_add(active_ha_chassis_grps, + op->nbrp->ha_chassis_group->name); + } } else if (op->nbrp->n_gateway_chassis) { /* Legacy gateway_chassis support. * Create ha_chassis_group for the Northbound gateway_chassis @@ -4210,6 +4455,7 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn, const struct sbrec_mirror_table *sbrec_mirror_table, const struct sbrec_mac_binding_table *sbrec_mac_binding_table, const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table, + struct ovsdb_idl_index *nbrec_lrp_by_name, struct ovsdb_idl_index *sbrec_chassis_by_name, struct ovsdb_idl_index *sbrec_chassis_by_hostname, struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, @@ -4230,7 +4476,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn, /* Borrow ls_ports for joining NB and SB for both LSPs and LRPs. * We will split them later. */ struct hmap *ports = ls_ports; - join_logical_ports(sbrec_port_binding_table, ls_datapaths, lr_datapaths, + join_logical_ports(sbrec_port_binding_table, + nbrec_lrp_by_name, + sbrec_chassis_by_name, + ls_datapaths, lr_datapaths, ports, queue_id_bitmap, &tag_alloc_table, &sb_only, &nb_only, &both); @@ -13125,7 +13374,7 @@ build_lrouter_icmp_packet_toobig_admin_flows( " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" " eth.dst == %s && !is_chassis_resident(%s) &&" " flags.tunnel_rx == 1", - op->nbrp->mac, op->cr_port->json_key); + ovn_port_get_mac(op), op->cr_port->json_key); ds_clear(actions); ds_put_format(actions, "outport <-> inport; inport = %s; next;", op->json_key); @@ -13168,7 +13417,7 @@ build_lswitch_icmp_packet_toobig_admin_flows( "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " "eth.src == %s && outport == %s && flags.tunnel_rx == 1", - peer->nbrp->mac, op->json_key); + ovn_port_get_mac(peer), op->json_key); ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, ds_cstr(match), "outport <-> inport; next;", op->lflow_ref); @@ -13177,7 +13426,7 @@ build_lswitch_icmp_packet_toobig_admin_flows( "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " "eth.dst == %s && flags.tunnel_rx == 1", - peer->nbrp->mac); + ovn_port_get_mac(peer)); ds_clear(actions); ds_put_format(actions, "outport <-> inport; next(pipeline=ingress,table=%d);", @@ -19183,6 +19432,7 @@ ovnnb_db_run(struct northd_input *input_data, input_data->sbrec_mirror_table, input_data->sbrec_mac_binding_table, input_data->sbrec_ha_chassis_group_table, + input_data->nbrec_lrp_by_name, input_data->sbrec_chassis_by_name, input_data->sbrec_chassis_by_hostname, input_data->sbrec_ha_chassis_grp_by_name, diff --git a/northd/northd.h b/northd/northd.h index 9457a7be6a..1d837f151d 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -64,6 +64,7 @@ struct northd_input { const struct chassis_features *features; /* Indexes */ + struct ovsdb_idl_index *nbrec_lrp_by_name; struct ovsdb_idl_index *sbrec_chassis_by_name; struct ovsdb_idl_index *sbrec_chassis_by_hostname; struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name; @@ -662,6 +663,15 @@ struct ovn_port { /* Only used for the router type LSP whose peer is l3dgw_port */ bool enable_router_port_acl; + /* Used for active-active port bindings to store the data they where + * generated from */ + bool is_active_active; + char *aa_chassis_name; + size_t aa_chassis_index; + /* The following value is only set on the lrp side of an + * active-active port binding */ + char *aa_mac; + /* Reference of lflows generated for this ovn_port. * * This data is initialized and destroyed by the en_northd node, but diff --git a/ovn-nb.xml b/ovn-nb.xml index 8373ddb998..9896d93106 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3672,6 +3672,33 @@ or learned by the ovn-ic daemon.

+ + +

+ If set to true this turns this LRP and associated LSP into a template + that will be used by ovn-northd to generate multiple LRP/LSP + combinations. + One or multiple LRP/LSP combinations will be built for each chassis + specified in the of this LRP + independent of their priority. + The amount of LRP/LSP combinations per chassis as well as their IP + and MAC addresses are determined based on the value of + on that chassis. + + The MAC and IP configuration of this LRP in the northbound database + is ignored. +

+

+ The LRP must be connected via its LSP to an external network. + + Having multiple such LRPs on a single router is not supported. + + Having multiple such LRPs on different routers connected to the same + external network is also not supported. +

+
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at index efb333a47c..3958aec363 100644 --- a/tests/ovn-macros.at +++ b/tests/ovn-macros.at @@ -82,6 +82,18 @@ m4_divert_text([PREPARE_TESTS], [dump_diff__ "$rcv_pcap" "$exp_text"]) } + ovn_wait_packets_parts__ () { + echo "$3: waiting for packets matching $2 at $1:" + rcv_pcap=$1 + rcv_text=`echo "$rcv_pcap.packets" | sed 's/\.pcap//'` + exp_text=$2 + OVS_WAIT_UNTIL( + [$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $rcv_pcap > $rcv_text + grep -q "$exp_text" "$rcv_text"], + [echo "Received:" + cat "$rcv_text"]) + } + ovn_wait_patch_port_flows () { for localnet in $1; do patch_port="patch-br-int-to-$localnet" @@ -168,6 +180,11 @@ m4_define([OVN_CHECK_PACKETS_CONTAIN], m4_define([OVN_CHECK_PACKETS_UNIQ], [ovn_wait_packets_uniq__ "$1" "$2" "__file__:__line__" $3]) +# OVN_CHECK_PACKETS_CONTAIN_PARTS succeeds if the expected part is found +# in some packets. Other packets are ignored. +m4_define([OVN_CHECK_PACKETS_CONTAIN_PARTS], + [ovn_wait_packets_parts__ "$1" "$2" "__file__:__line__"]) + m4_define([OVN_WAIT_PATCH_PORT_FLOWS], [ovn_wait_patch_port_flows "$1" "$2" "__file__:__line__"]) diff --git a/tests/ovn.at b/tests/ovn.at index b127198952..fbf83d2fcc 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -39767,7 +39767,297 @@ check ovn-nbctl --wait=hv sync check_column $remote_chassis Port_Binding chassis logical_port=lsp OVN_CLEANUP([hv1]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([active-active LRPs]) + +# This test uses active-active LRPs on a simulated multi-tenant internet +# connection. +# Tenant 1 (pr1, p1, vif1) is connected to the internet via NAT on pr1. +# Tenant 2 (pr2, p2, vif2) is connected to the internet via routing. +# The connections of pr1 and pr2 to public are using DGPs. +# pr1 is thereby bound to hv1, while pr2 is bound to hv2. +# The connection from internet to phys is also using a DGP. +# This DGP is built active-active over hv1 and hv2. +# It has two interfaces on hv1 and one on hv2. +# The LS phys is assumed to be used for peering with a router outside OVN. +# +# +# +----+ +----+ +# |vif1| |vif2| +# +--+-+ +--+-+ +# | | +# +--+--+ +--+--+ +# |LS p1| |LS p2| +# +--+--+ +--+--+ +# | | +# +--+---+ +--+---+ +# |LR pr1| |LR pr2| +# +-----++ ++-----+ +# | | +# ++-------++ +# |LS public| +# +-----+---+ +# | +# +-----+-----+ +# |LR internet| +# +-----+-----+ +# | +# +---+---+ +# |LS phys| +# +-------+ + +# hv setup + +ovn_start + +# n1 is the overlay network +net_add n1 + +# these are the respective chassis-to-switch networks +net_add hv1-1 +net_add hv1-2 +net_add hv2-1 + +for i in 1 2; do + sim_add hv${i} + as hv${i} + ovs-vsctl add-br br-phys + ovs-vsctl add-br br-tun + ovn_attach n1 br-tun 172.16.0.${i} + ovs-vsctl set open . external_ids:ovn-bridge-mappings=phys:br-phys +done + +as hv1 net_attach hv1-1 br-phys +as hv2 net_attach hv2-1 br-phys + +# having two ports on hv1 br-phys is more complex as net_attach does not like +# it. We just attach it to another brige and then connect both. +# Also we need to add fdb entries so packets can later be counted correctly +# and we don't flood +as hv1 ovs-vsctl add-br br-phys2 \ + -- add-port br-phys patch-br-phys-to-br-phys2 \ + -- add-port br-phys2 patch-br-phys2-to-br-phys \ + -- set Interface patch-br-phys-to-br-phys2 type=patch \ + options:peer=patch-br-phys2-to-br-phys \ + -- set Interface patch-br-phys2-to-br-phys type=patch \ + options:peer=patch-br-phys-to-br-phys2 +as hv1 net_attach hv1-2 br-phys2 +as hv1 ovs-appctl fdb/add br-phys br-phys_hv1-1 0 00:cc:cc:cc:cc:10 +as hv1 ovs-appctl fdb/add br-phys patch-br-phys-to-br-phys2 0 00:cc:cc:cc:cc:11 + +# active-active mappings + +as hv1 ovs-vsctl set open . external_ids:ovn-active-active-mappings="phys;00:01:01:00:00:01,192.168.10.10/24;00:01:02:00:00:01,192.168.11.10/24" +as hv2 ovs-vsctl set open . external_ids:ovn-active-active-mappings="phys;00:02:01:00:00:01,192.168.20.10/24" +# LS setup + +check ovn-nbctl ls-add public +check ovn-nbctl ls-add phys +check ovn-nbctl ls-add p1 +check ovn-nbctl ls-add p2 + +# LR internet setup + +check ovn-nbctl lr-add internet + +check ovn-nbctl lrp-add internet internet-public \ + 00:00:02:01:02:03 192.0.2.1/24 +check ovn-nbctl lsp-add public public-internet \ + -- set Logical_Switch_Port public-internet type=router \ + options:router-port=internet-public \ + -- lsp-set-addresses public-internet router + +check ovn-nbctl ha-chassis-group-add to-phys +check ovn-nbctl ha-chassis-group-add-chassis to-phys hv1 1 +check ovn-nbctl ha-chassis-group-add-chassis to-phys hv2 2 +ha_chassis_uuid=$(ovn-nbctl --column _uuid --bare list HA_Chassis_group) + +# actual mac address is irrelevant here, it is ignored if active-active-lrp +# is set. +check ovn-nbctl lrp-add internet internet-phys \ + "00:00:00:00:00:01" "1.1.1.1/32" \ + -- set Logical_Router_Port internet-phys \ + options:active-active-lrp=true \ + ha_chassis_group=${ha_chassis_uuid} +check ovn-nbctl lsp-add phys phys-internet \ + -- set Logical_Switch_Port phys-internet type=router \ + options:router-port=internet-phys \ + -- lsp-set-addresses phys-internet router + +check ovn-nbctl --ecmp lr-route-add internet 0.0.0.0/0 192.168.10.1 +check ovn-nbctl --ecmp lr-route-add internet 0.0.0.0/0 192.168.11.1 +check ovn-nbctl --ecmp lr-route-add internet 0.0.0.0/0 192.168.20.1 +# since there are no real systems behind these nexthops we need static mac +# entries. These must be built with the future logical port names after northd +# dervies them. We cant use static-mac-binding-add here as the port does not +# exist on the northbound side. +ovn-nbctl create Static_MAC_Binding logical_port=internet-phys-hv1-0 \ + ip=192.168.10.1 mac='"00:cc:cc:cc:cc:10"' +ovn-nbctl create Static_MAC_Binding logical_port=internet-phys-hv1-1 \ + ip=192.168.11.1 mac='"00:cc:cc:cc:cc:11"' +ovn-nbctl create Static_MAC_Binding logical_port=internet-phys-hv2-0 \ + ip=192.168.20.1 mac='"00:cc:cc:cc:cc:20"' + +# LR pr1 setup + +check ovn-nbctl lr-add pr1 \ + -- set Logical_Router pr1 options:requested-tnl-key=1338 + +check ovn-nbctl lrp-add pr1 pr1-public \ + 00:00:02:01:02:04 192.0.2.2/24 +check ovn-nbctl lrp-set-gateway-chassis pr1-public hv1 +check ovn-nbctl lsp-add public public-pr1 \ + -- set Logical_Switch_Port public-pr1 type=router \ + options:router-port=pr1-public \ + -- lsp-set-addresses public-pr1 router + +check ovn-nbctl lrp-add pr1 pr1-p1 \ + 00:00:03:00:00:01 10.0.0.1/24 +check ovn-nbctl lsp-add p1 p1-pr1 \ + -- set Logical_Switch_Port p1-pr1 type=router \ + options:router-port=pr1-p1 \ + -- lsp-set-addresses p1-pr1 router + +check ovn-nbctl lr-route-add pr1 0.0.0.0/0 192.0.2.1 + +# LR pr2 setup + +check ovn-nbctl lr-add pr2 \ + -- set Logical_Router pr2 options:requested-tnl-key=1339 + +check ovn-nbctl lrp-add pr2 pr2-public \ + 00:00:02:01:02:05 192.0.2.3/24 +check ovn-nbctl lrp-set-gateway-chassis pr2-public hv2 +check ovn-nbctl lsp-add public public-pr2 \ + -- set Logical_Switch_Port public-pr2 type=router \ + options:router-port=pr2-public \ + -- lsp-set-addresses public-pr2 router + +check ovn-nbctl lrp-add pr2 pr2-p2 \ + 00:00:04:00:00:01 198.51.100.1/24 +check ovn-nbctl lsp-add p2 p2-pr2 \ + -- set Logical_Switch_Port p2-pr2 type=router \ + options:router-port=pr2-p2 \ + -- lsp-set-addresses p2-pr2 router + +check ovn-nbctl lr-route-add pr2 0.0.0.0/0 192.0.2.1 + +# Setup lsp "vif1" with NAT +check ovn-nbctl lsp-add p1 vif1 \ + -- lsp-set-addresses vif1 "00:00:ff:ff:ff:01 10.0.0.2" +as hv1 ovs-vsctl add-port br-int vif1 -- \ + set Interface vif1 external-ids:iface-id=vif1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap +check ovn-nbctl lr-nat-add pr1 dnat_and_snat 192.0.2.10 10.0.0.2 + +# Setup lsp "vif2" with a static route on LR internet +check ovn-nbctl lsp-add p2 vif2 \ + -- lsp-set-addresses vif2 "00:00:ff:ff:ff:02 198.51.100.10" +as hv2 ovs-vsctl add-port br-int vif2 -- \ + set Interface vif2 external-ids:iface-id=vif2 \ + options:tx_pcap=hv2/vif2-tx.pcap \ + options:rxq_pcap=hv2/vif2-rx.pcap +check ovn-nbctl lr-route-add internet 198.51.100.0/24 192.0.2.3 + +# Configure external connectivity +check ovn-nbctl lsp-add phys phys1 \ + -- lsp-set-addresses phys1 unknown \ + -- lsp-set-type phys1 localnet \ + -- lsp-set-options phys1 network_name=phys + +check ovn-nbctl --wait=hv sync +wait_for_ports_up +OVN_POPULATE_ARP + +# we should now have for each active-active-mapping there should now be one +# LRP/LSP combination on the southbound side +check_row_count Port_Binding 1 logical_port=internet-phys-hv1-0 +check_row_count Port_Binding 1 logical_port=internet-phys-hv1-1 +check_row_count Port_Binding 1 logical_port=internet-phys-hv2-0 +check_row_count Port_Binding 1 logical_port=phys-internet-hv1-0 +check_row_count Port_Binding 1 logical_port=phys-internet-hv1-1 +check_row_count Port_Binding 1 logical_port=phys-internet-hv2-0 + +outside_to_vif() { + local hv=$1 vif=$2 send=$3 receive=$4 + local packet=$(fmt_pkt \ + "Ether(dst='00:01:01:00:00:01', src='00:aa:bb:cc:dd:ee')/ $send") + as hv1 ovs-appctl netdev-dummy/receive br-phys_hv1-1 $packet + local packet=$(fmt_pkt \ + "Ether(dst='00:01:02:00:00:01', src='00:aa:bb:cc:dd:ee')/ $send") + as hv1 ovs-appctl netdev-dummy/receive br-phys2_hv1-2 $packet + local packet=$(fmt_pkt \ + "Ether(dst='00:02:01:00:00:01', src='00:aa:bb:cc:dd:ee')/ $send") + as hv2 ovs-appctl netdev-dummy/receive br-phys_hv2-1 $packet + local out_packet=$(fmt_pkt "$receive") + local filename="hv$hv-vif$vif-tx.expected" + rm -f $filename + for i in `seq 1 3`; do + echo $out_packet >> $filename + done; + + OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv$hv/vif$vif-tx.pcap], [$filename]) +} + +# injecting packets on any of the active-active interfaces will allow them +# to reach vif1 and vif2 +outside_to_vif 1 1 "IP(dst='192.0.2.10', src='1.1.1.1')/ TCP()" \ + "Ether(dst='00:00:ff:ff:ff:01', src='00:00:03:00:00:01')/ \ + IP(dst='10.0.0.2', src='1.1.1.1', ttl=62)/ TCP()" +outside_to_vif 2 2 "IP(dst='198.51.100.10', src='1.1.1.1')/ TCP()" \ + "Ether(dst='00:00:ff:ff:ff:02', src='00:00:04:00:00:01')/ \ + IP(dst='198.51.100.10', src='1.1.1.1', ttl=62)/ TCP()" + +# packets from vif1 and vif2 to the internet will use any of the active-active +# interfaces. As the dp_hash will decide which they use we only check if +# each of the egress interfaces has at least one expected packet going out. +for i in `seq 1 100`; do + packet=$(fmt_pkt "Ether(dst='00:00:03:00:00:01', src='00:00:ff:ff:ff:01')/ \ + IP(dst='1.1.1.1', src='10.0.0.2')/ TCP(dport=5$i)") + as hv1 ovs-appctl netdev-dummy/receive vif1 $packet + packet=$(fmt_pkt "Ether(dst='00:00:04:00:00:01', src='00:00:ff:ff:ff:02')/ \ + IP(dst='1.1.1.1', src='198.51.100.10')/ TCP(dport=5$i)") + as hv2 ovs-appctl netdev-dummy/receive vif2 $packet +done + +# for vif1 +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:10', src='00:01:01:00:00:01')/ \ + IP(dst='1.1.1.1', src='192.0.2.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys_hv1-1-tx.pcap], ["$packet"]) +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:11', src='00:01:02:00:00:01')/ \ + IP(dst='1.1.1.1', src='192.0.2.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys2_hv1-2-tx.pcap], ["$packet"]) +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:20', src='00:02:01:00:00:01')/ \ + IP(dst='1.1.1.1', src='192.0.2.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv2/br-phys_hv2-1-tx.pcap], ["$packet"]) + +# for vif2 +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:10', src='00:01:01:00:00:01')/ \ + IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys_hv1-1-tx.pcap], ["$packet"]) +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:11', src='00:01:02:00:00:01')/ \ + IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv1/br-phys2_hv1-2-tx.pcap], ["$packet"]) +packet=$(fmt_pkt "Ether(dst='00:cc:cc:cc:cc:20', src='00:02:01:00:00:01')/ \ + IP(dst='1.1.1.1', src='198.51.100.10', ttl=62)/ TCP()" | \ + cut -c 1-68) +OVN_CHECK_PACKETS_CONTAIN_PARTS([hv2/br-phys_hv2-1-tx.pcap], ["$packet"]) + +OVN_CLEANUP([hv1 +/Couldn't parse MAC binding/d +/left allocated when ofproto/d],[hv2 +/Couldn't parse MAC binding/d +/left allocated when ofproto/d]) AT_CLEANUP ])