Skip to content

Commit

Permalink
ipv6: give an IPv6 dev to blackhole_netdev
Browse files Browse the repository at this point in the history
IPv6 addrconf notifiers wants the loopback device to
be the last device being dismantled at netns deletion.

This caused many limitations and work arounds.

Back in linux-5.3, Mahesh added a per host blackhole_netdev
that can be used whenever we need to make sure objects no longer
refer to a disappearing device.

If we attach to blackhole_netdev an ip6_ptr (allocate an idev),
then we can use this special device (which is never freed)
in place of the loopback_dev (which can be freed).

This will permit improvements in netdev_run_todo() and other parts
of the stack where had steps to make sure loopback_dev was
the last device to disappear.

Signed-off-by: Eric Dumazet <[email protected]>
Cc: Mahesh Bandewar <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Eric Dumazet authored and davem330 committed Feb 11, 2022
1 parent 2d4feb2 commit e5f80fc
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 59 deletions.
78 changes: 32 additions & 46 deletions net/ipv6/addrconf.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)

ASSERT_RTNL();

if (dev->mtu < IPV6_MIN_MTU)
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);

ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
Expand Down Expand Up @@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
/* We refer to the device */
dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);

if (snmp6_alloc_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
__func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put_track(dev, &ndev->dev_tracker);
kfree(ndev);
return ERR_PTR(err);
}
if (dev != blackhole_netdev) {
if (snmp6_alloc_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
__func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put_track(dev, &ndev->dev_tracker);
kfree(ndev);
return ERR_PTR(err);
}

if (snmp6_register_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
goto err_release;
if (snmp6_register_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
goto err_release;
}
}

/* One reference from device. */
refcount_set(&ndev->refcnt, 1);

Expand Down Expand Up @@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)

ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
err = addrconf_sysctl_register(ndev);
if (err) {
ipv6_mc_destroy_dev(ndev);
snmp6_unregister_dev(ndev);
goto err_release;
if (dev != blackhole_netdev) {
err = addrconf_sysctl_register(ndev);
if (err) {
ipv6_mc_destroy_dev(ndev);
snmp6_unregister_dev(ndev);
goto err_release;
}
}
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);

/* Join interface-local all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
if (dev != blackhole_netdev) {
/* Join interface-local all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);

/* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);

/* Join all-router multicast group if forwarding is set */
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
/* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);

/* Join all-router multicast group if forwarding is set */
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
}
return ndev;

err_release:
Expand Down Expand Up @@ -7233,26 +7237,8 @@ int __init addrconf_init(void)
goto out_nowq;
}

/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
* to other devices which are up.
*
* Unfortunately, loopback_dev is not necessarily the first
* entry in the global dev_base list of net devices. In fact,
* it is likely to be the very last entry on that list.
* So this causes the notifier registry below to try and
* give link-local addresses to all devices besides loopback_dev
* first, then loopback_dev, which cases all the non-loopback_dev
* devices to fail to get a link-local address.
*
* So, as a temporary fix, allocate the ipv6 structure for
* loopback_dev first by hand.
* Longer term, all of the dependencies ipv6 has upon the loopback
* device and it being up should be removed.
*/
rtnl_lock();
idev = ipv6_add_dev(init_net.loopback_dev);
idev = ipv6_add_dev(blackhole_netdev);
rtnl_unlock();
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
Expand Down
21 changes: 8 additions & 13 deletions net/ipv6/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,10 @@ void rt6_uncached_list_del(struct rt6_info *rt)
}
}

static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
static void rt6_uncached_list_flush_dev(struct net_device *dev)
{
struct net_device *loopback_dev = net->loopback_dev;
int cpu;

if (dev == loopback_dev)
return;

for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
struct rt6_info *rt;
Expand All @@ -174,7 +170,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
struct net_device *rt_dev = rt->dst.dev;

if (rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(loopback_dev);
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
}

Expand Down Expand Up @@ -371,13 +367,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;

if (idev && idev->dev != loopback_dev) {
struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
if (loopback_idev) {
rt->rt6i_idev = loopback_idev;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);

if (blackhole_idev) {
rt->rt6i_idev = blackhole_idev;
in6_dev_put(idev);
}
}
Expand Down Expand Up @@ -4892,7 +4887,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
void rt6_disable_ip(struct net_device *dev, unsigned long event)
{
rt6_sync_down_dev(dev, event);
rt6_uncached_list_flush_dev(dev_net(dev), dev);
rt6_uncached_list_flush_dev(dev);
neigh_ifdown(&nd_tbl, dev);
}

Expand Down

0 comments on commit e5f80fc

Please sign in to comment.