From 17ef8efc00b34918b966388b2af0993811895a8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:56 +0000 Subject: [PATCH 1/6] ipv6: mcast: remove one synchronize_net() barrier in ipv6_mc_down() As discussed in the past (commit 2d3916f31891 ("ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report()")) I think the synchronize_net() call in ipv6_mc_down() is not needed. Under load, synchronize_net() can last between 200 usec and 5 ms. KASAN seems to agree as well. Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Cc: Cong Wang Cc: David Ahern Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bc6e0a0bad3c12..76ee1615ff2a07 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2719,7 +2719,6 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Should stop work after group drop. or we will * start work again in mld_ifc_event() */ - synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); From 4cd582ffa5a9a5d58e5bac9c5e55ca8eeabffddc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:57 +0000 Subject: [PATCH 2/6] net: use synchronize_net() in dev_change_name() dev_change_name() holds RTNL, we better use synchronize_net() instead of plain synchronize_rcu(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 31f2c97d19903a..7cf15d2bf78def 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1239,7 +1239,7 @@ int dev_change_name(struct net_device *dev, const char *newname) netdev_name_node_del(dev->name_node); write_unlock(&dev_base_lock); - synchronize_rcu(); + synchronize_net(); write_lock(&dev_base_lock); netdev_name_node_add(net, dev->name_node); From 48ebf6ebbc91a6e6b2c0ac1217573e81b37ef2ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:58 +0000 Subject: [PATCH 3/6] bridge: vlan: use synchronize_net() when holding RTNL br_vlan_flush() and nbp_vlan_flush() should use synchronize_net() instead of syncronize_rcu() to release RTNL sooner. Signed-off-by: Eric Dumazet Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 15f44d026e75a8..9c2fffb827ab19 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -841,7 +841,7 @@ void br_vlan_flush(struct net_bridge *br) vg = br_vlan_group(br); __vlan_flush(br, NULL, vg); RCU_INIT_POINTER(br->vlgrp, NULL); - synchronize_rcu(); + synchronize_net(); __vlan_group_free(vg); } @@ -1372,7 +1372,7 @@ void nbp_vlan_flush(struct net_bridge_port *port) vg = nbp_vlan_group(port); __vlan_flush(port->br, port, vg); RCU_INIT_POINTER(port->vlgrp, NULL); - synchronize_rcu(); + synchronize_net(); __vlan_group_free(vg); } From 2cd0c51e3baf7aa49e802c06cb1b2ffa9c82fbe1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:59 +0000 Subject: [PATCH 4/6] ipv4/fib: use synchronize_net() when holding RTNL tnode_free() should use synchronize_net() instead of syncronize_rcu() to release RTNL sooner. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3ff35f81176559..0fc7ab5832d1ae 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -501,7 +501,7 @@ static void tnode_free(struct key_vector *tn) if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; - synchronize_rcu(); + synchronize_net(); } } From 78c3253f27e579f7f3a1f5c0cb8266693a7b4f41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:31:00 +0000 Subject: [PATCH 5/6] net: use synchronize_rcu_expedited in cleanup_net() cleanup_net() is calling synchronize_rcu() right before acquiring RTNL. synchronize_rcu() is much slower than synchronize_rcu_expedited(), and cleanup_net() is currently single threaded. In many workloads we want cleanup_net() to be fast, in order to free memory and various sysfs and procfs entries as fast as possible. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 233ec0cdd0111d..f0540c55751571 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -622,7 +622,7 @@ static void cleanup_net(struct work_struct *work) * the rcu_barrier() below isn't sufficient alone. * Also the pre_exit() and exit() methods need this barrier. */ - synchronize_rcu(); + synchronize_rcu_expedited(); rtnl_lock(); list_for_each_entry_reverse(ops, &pernet_list, list) { From 1ebb85f9c03de0b66c334de219f224159e24e549 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:31:01 +0000 Subject: [PATCH 6/6] netfilter: conntrack: expedite rcu in nf_conntrack_cleanup_net_list nf_conntrack_cleanup_net_list() is calling synchronize_net() while RTNL is not held. This effectively calls synchronize_rcu(). synchronize_rcu() is much slower than synchronize_rcu_expedited(), and cleanup_net() is currently single threaded. In many workloads we want cleanup_net() to be faster, in order to free memory and various sysfs and procfs entries as fast as possible. Signed-off-by: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e5f3864d353a3..90e6bd2c30002c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2530,7 +2530,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) * netfilter framework. Roll on, two-stage module * delete... */ - synchronize_net(); + synchronize_rcu_expedited(); i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) {