diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 32a4cdf37dd4..84c36a7f873f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) +#define for_each_netdev_dump(net, d, ifindex) \ + xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + static inline struct net_device *next_net_device(struct net_device *dev) { struct list_head *lh; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 78beaa765c73..9f6add96de2d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -42,6 +42,7 @@ #include #include #include +#include struct user_namespace; struct proc_dir_entry; @@ -69,7 +70,7 @@ struct net { atomic_t dev_unreg_count; unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; + u32 ifindex; spinlock_t nsid_lock; atomic_t fnhe_genid; @@ -110,6 +111,7 @@ struct net { struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; + struct xarray dev_by_index; struct raw_notifier_head netdev_chain; /* Note that @hash_mix can be read millions times per second, diff --git a/net/core/dev.c b/net/core/dev.c index e7ffcfa037f7..b58674774a57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -388,6 +388,8 @@ static void list_netdevice(struct net_device *dev) hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock(&dev_base_lock); + /* We reserved the ifindex, this can't fail */ + WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL)); dev_base_seq_inc(net); } @@ -397,8 +399,12 @@ static void list_netdevice(struct net_device *dev) */ static void unlist_netdevice(struct net_device *dev, bool lock) { + struct net *net = dev_net(dev); + ASSERT_RTNL(); + xa_erase(&net->dev_by_index, dev->ifindex); + /* Unlink dev from the device chain */ if (lock) write_lock(&dev_base_lock); @@ -9565,23 +9571,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, } /** - * dev_new_index - allocate an ifindex - * @net: the applicable net namespace + * dev_index_reserve() - allocate an ifindex in a namespace + * @net: the applicable net namespace + * @ifindex: requested ifindex, pass %0 to get one allocated + * + * Allocate a ifindex for a new device. Caller must either use the ifindex + * to store the device (via list_netdevice()) or call dev_index_release() + * to give the index up. * - * Returns a suitable unique value for a new device interface - * number. The caller must hold the rtnl semaphore or the - * dev_base_lock to be sure it remains unique. + * Return: a suitable unique value for a new device interface number or -errno. */ -static int dev_new_index(struct net *net) +static int dev_index_reserve(struct net *net, u32 ifindex) { - int ifindex = net->ifindex; + int err; - for (;;) { - if (++ifindex <= 0) - ifindex = 1; - if (!__dev_get_by_index(net, ifindex)) - return net->ifindex = ifindex; - } + if (!ifindex) + err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL, + xa_limit_31b, &net->ifindex, GFP_KERNEL); + else + err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL); + if (err < 0) + return err; + + return ifindex; +} + +static void dev_index_release(struct net *net, int ifindex) +{ + /* Expect only unused indexes, unlist_netdevice() removes the used */ + WARN_ON(xa_erase(&net->dev_by_index, ifindex)); } /* Delayed registration/unregisteration */ @@ -10051,11 +10069,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } - ret = -EBUSY; - if (!dev->ifindex) - dev->ifindex = dev_new_index(net); - else if (__dev_get_by_index(net, dev->ifindex)) + ret = dev_index_reserve(net, dev->ifindex); + if (ret < 0) goto err_uninit; + dev->ifindex = ret; /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). @@ -10102,7 +10119,7 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) - goto err_uninit; + goto err_ifindex_release; ret = netdev_register_kobject(dev); write_lock(&dev_base_lock); @@ -10158,6 +10175,8 @@ int register_netdevice(struct net_device *dev) err_uninit_notify: call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); +err_ifindex_release: + dev_index_release(net, dev->ifindex); err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -11035,9 +11054,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, } /* Check that new_ifindex isn't used yet. */ - err = -EBUSY; - if (new_ifindex && __dev_get_by_index(net, new_ifindex)) - goto out; + if (new_ifindex) { + err = dev_index_reserve(net, new_ifindex); + if (err < 0) + goto out; + } else { + /* If there is an ifindex conflict assign a new one */ + err = dev_index_reserve(net, dev->ifindex); + if (err == -EBUSY) + err = dev_index_reserve(net, 0); + if (err < 0) + goto out; + new_ifindex = err; + } /* * And now a mini version of register_netdevice unregister_netdevice. @@ -11065,13 +11094,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, rcu_barrier(); new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); - /* If there is an ifindex conflict assign a new one */ - if (!new_ifindex) { - if (__dev_get_by_index(net, dev->ifindex)) - new_ifindex = dev_new_index(net); - else - new_ifindex = dev->ifindex; - } rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, new_ifindex); @@ -11249,6 +11271,9 @@ static int __net_init netdev_init(struct net *net) if (net->dev_index_head == NULL) goto err_idx; + net->ifindex = 1; + xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC); + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); return 0; @@ -11346,6 +11371,7 @@ static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); kfree(net->dev_index_head); + xa_destroy(&net->dev_by_index); if (net != &init_net) WARN_ON_ONCE(!list_empty(&net->dev_base_head)); } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 65ef4867fc49..797c813c7c77 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -101,43 +101,22 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct net_device *netdev; - int idx = 0, s_idx; - int h, s_h; - int err; - - s_h = cb->args[0]; - s_idx = cb->args[1]; + int err = 0; rtnl_lock(); - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - struct hlist_head *head; - - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(netdev, head, index_hlist) { - if (idx < s_idx) - goto cont; - err = netdev_nl_dev_fill(netdev, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NETDEV_CMD_DEV_GET); - if (err < 0) - break; -cont: - idx++; - } + for_each_netdev_dump(net, netdev, cb->args[0]) { + err = netdev_nl_dev_fill(netdev, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NETDEV_CMD_DEV_GET); + if (err < 0) + break; } - rtnl_unlock(); if (err != -EMSGSIZE) return err; - cb->args[1] = idx; - cb->args[0] = h; - cb->seq = net->dev_base_seq; - return skb->len; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 39a459b0111b..ae344f1b0bbd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) * @ops: request ops of currently processed message type * @req_info: parsed request header of processed request * @reply_data: data needed to compose the reply - * @pos_hash: saved iteration position - hashbucket - * @pos_idx: saved iteration position - index + * @pos_ifindex: saved iteration position - ifindex * * These parameters are kept in struct netlink_callback as context preserved * between iterations. They are initialized by ethnl_default_start() and used @@ -263,8 +262,7 @@ struct ethnl_dump_ctx { const struct ethnl_request_ops *ops; struct ethnl_req_info *req_info; struct ethnl_reply_data *reply_data; - int pos_hash; - int pos_idx; + unsigned long pos_ifindex; }; static const struct ethnl_request_ops * @@ -490,55 +488,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb, { struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); struct net *net = sock_net(skb->sk); - int s_idx = ctx->pos_idx; - int h, idx = 0; + struct net_device *dev; int ret = 0; rtnl_lock(); - for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - struct hlist_head *head; - struct net_device *dev; - unsigned int seq; - - head = &net->dev_index_head[h]; - -restart_chain: - seq = net->dev_base_seq; - cb->seq = seq; - idx = 0; - hlist_for_each_entry(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - dev_hold(dev); - rtnl_unlock(); - - ret = ethnl_default_dump_one(skb, dev, ctx, cb); - dev_put(dev); - if (ret < 0) { - if (ret == -EOPNOTSUPP) - goto lock_and_cont; - if (likely(skb->len)) - ret = skb->len; - goto out; - } -lock_and_cont: - rtnl_lock(); - if (net->dev_base_seq != seq) { - s_idx = idx + 1; - goto restart_chain; - } -cont: - idx++; - } + for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + dev_hold(dev); + rtnl_unlock(); + + ret = ethnl_default_dump_one(skb, dev, ctx, cb); + + rtnl_lock(); + dev_put(dev); + if (ret < 0 && ret != -EOPNOTSUPP) { + if (likely(skb->len)) + ret = skb->len; + break; + } } rtnl_unlock(); -out: - ctx->pos_hash = h; - ctx->pos_idx = idx; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); - return ret; } @@ -584,8 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb) ctx->ops = ops; ctx->req_info = req_info; ctx->reply_data = reply_data; - ctx->pos_hash = 0; - ctx->pos_idx = 0; + ctx->pos_ifindex = 0; return 0; diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 67fb414ca859..05f752557b5e 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -212,8 +212,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) struct ethnl_tunnel_info_dump_ctx { struct ethnl_req_info req_info; - int pos_hash; - int pos_idx; + unsigned long ifindex; }; int ethnl_tunnel_info_start(struct netlink_callback *cb) @@ -243,57 +242,39 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); - int s_idx = ctx->pos_idx; - int h, idx = 0; + struct net_device *dev; int ret = 0; void *ehdr; rtnl_lock(); - cb->seq = net->dev_base_seq; - for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - struct hlist_head *head; - struct net_device *dev; - - head = &net->dev_index_head[h]; - idx = 0; - hlist_for_each_entry(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - - ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); - if (!ehdr) { - ret = -EMSGSIZE; - goto out; - } - - ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER); - if (ret < 0) { - genlmsg_cancel(skb, ehdr); - goto out; - } - - ctx->req_info.dev = dev; - ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb); - ctx->req_info.dev = NULL; - if (ret < 0) { - genlmsg_cancel(skb, ehdr); - if (ret == -EOPNOTSUPP) - goto cont; - goto out; - } - genlmsg_end(skb, ehdr); -cont: - idx++; + for_each_netdev_dump(net, dev, ctx->ifindex) { + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); + if (!ehdr) { + ret = -EMSGSIZE; + break; } + + ret = ethnl_fill_reply_header(skb, dev, + ETHTOOL_A_TUNNEL_INFO_HEADER); + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + break; + } + + ctx->req_info.dev = dev; + ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb); + ctx->req_info.dev = NULL; + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + if (ret == -EOPNOTSUPP) + continue; + break; + } + genlmsg_end(skb, ehdr); } -out: rtnl_unlock(); - ctx->pos_hash = h; - ctx->pos_idx = idx; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); - if (ret == -EMSGSIZE && skb->len) return skb->len; return ret;