From 37734aa4f5d75d3bcfac9bf96604e5578f4f2c97 Mon Sep 17 00:00:00 2001 From: lguohan Date: Thu, 10 Jan 2019 19:51:36 -0800 Subject: [PATCH] L3mdev cgroup (#73) * add driver-l3mdev-cgroup patch Signed-off-by: Guohan Lu * update abiname to 8-1 Signed-off-by: Guohan Lu --- Makefile | 5 +- patch/config-l3mdev-cgroup.patch | 21 ++ patch/driver-l3mdev-cgroup.patch | 338 ++++++++++++++++++ .../packaging-update-abiname-to-8-1.patch | 20 ++ patch/preconfig/series | 1 + patch/series | 2 + 6 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 patch/config-l3mdev-cgroup.patch create mode 100644 patch/driver-l3mdev-cgroup.patch create mode 100644 patch/preconfig/packaging-update-abiname-to-8-1.patch diff --git a/Makefile b/Makefile index 35bd43c08862..b5051bf7c5af 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,8 @@ SHELL = /bin/bash .SHELLFLAGS += -e -KVERSION_SHORT ?= 4.9.0-8 +KERNEL_ABI_MINOR_VERSION = 1 +KVERSION_SHORT ?= 4.9.0-8-$(KERNEL_ABI_MINOR_VERSION) KVERSION ?= $(KVERSION_SHORT)-amd64 KERNEL_VERSION ?= 4.9.110 KERNEL_SUBVERSION ?= 3+deb9u6 @@ -79,6 +80,8 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% : git add debian/build/build_amd64_none_amd64/.config -f git add debian/config.defines.dump -f git add debian/control -f + git add debian/rules.gen -f + git add debian/tests/control -f git commit -m "unmodified debian source" # Learning new git repo head (above commit) by calling stg repair. diff --git a/patch/config-l3mdev-cgroup.patch b/patch/config-l3mdev-cgroup.patch new file mode 100644 index 000000000000..0d372a101f56 --- /dev/null +++ b/patch/config-l3mdev-cgroup.patch @@ -0,0 +1,21 @@ +enable CONFIG_CGROUP_L3MDEV=y + +From: Guohan Lu + + +--- + debian/build/build_amd64_none_amd64/.config | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/debian/build/build_amd64_none_amd64/.config b/debian/build/build_amd64_none_amd64/.config +index b7bcf15..a8d64a7 100644 +--- a/debian/build/build_amd64_none_amd64/.config ++++ b/debian/build/build_amd64_none_amd64/.config +@@ -1493,6 +1492,7 @@ CONFIG_MPLS_IPTUNNEL=m + # CONFIG_HSR is not set + # CONFIG_NET_SWITCHDEV is not set + CONFIG_NET_L3_MASTER_DEV=y ++CONFIG_CGROUP_L3MDEV=y + # CONFIG_NET_NCSI is not set + CONFIG_RPS=y + CONFIG_RFS_ACCEL=y diff --git a/patch/driver-l3mdev-cgroup.patch b/patch/driver-l3mdev-cgroup.patch new file mode 100644 index 000000000000..026e9f83ce40 --- /dev/null +++ b/patch/driver-l3mdev-cgroup.patch @@ -0,0 +1,338 @@ +Add cgroup to assoicate tasks with L3 networking domains. AF_INET{6} +sockets opened by tasks associated with an l3mdev cgroup are bound to +the associated master device when the socket is created. This allows a +user to run a command (and its children) within an L3 networking context. + +The master-device for an l3mdev cgroup must be an L3 master device +(e.g., VRF), and it must be set before attaching tasks to the cgroup. Once +set the master-device can not change. Nested l3mdev cgroups are not +supported. The root (aka default) l3mdev cgroup can not be bound to a +master device. + +Example: + ip link add vrf-red type vrf table vrf-red + ip link set dev vrf-red up + ip link set dev eth1 master vrf-red + + cgcreate -g l3mdev:vrf-red + cgset -r l3mdev.master-device=vrf-red vrf-red + cgexec -g l3mdev:vrf-red bash + +At this point the current shell and its child processes are attached to +the vrf-red L3 domain. Any AF_INET and AF_INET6 sockets opened by the +tasks are bound to the vrf-red device. + +TO-DO: +- how to auto-create the cgroup when a VRF device is created and auto-deleted + when a VRF device is destroyed + +Signed-off-by: David Ahern +--- + include/linux/cgroup_subsys.h | 4 + + include/net/l3mdev_cgroup.h | 27 ++++++ + net/core/sock.c | 2 + net/l3mdev/Kconfig | 12 +++ + net/l3mdev/Makefile | 1 + net/l3mdev/l3mdev_cgroup.c | 195 +++++++++++++++++++++++++++++++++++++++++ + 6 files changed, 241 insertions(+) + create mode 100644 include/net/l3mdev_cgroup.h + create mode 100644 net/l3mdev/l3mdev_cgroup.c + +diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h +index 0df0336..72c220a 100644 +--- a/include/linux/cgroup_subsys.h ++++ b/include/linux/cgroup_subsys.h +@@ -48,6 +48,10 @@ SUBSYS(perf_event) + SUBSYS(net_prio) + #endif + ++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) ++SUBSYS(l3mdev) ++#endif ++ + #if IS_ENABLED(CONFIG_CGROUP_HUGETLB) + SUBSYS(hugetlb) + #endif +diff --git a/include/net/l3mdev_cgroup.h b/include/net/l3mdev_cgroup.h +new file mode 100644 +index 0000000..7d2bef1 +--- /dev/null ++++ b/include/net/l3mdev_cgroup.h +@@ -0,0 +1,27 @@ ++/* ++ * l3mdev_cgroup.h Control Group for L3 Master Device ++ * ++ * Copyright (c) 2015 Cumulus Networks. All rights reserved. ++ * Copyright (c) 2015 David Ahern ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ */ ++ ++#ifndef _L3MDEV_CGROUP_H ++#define _L3MDEV_CGROUP_H ++ ++#if IS_ENABLED(CONFIG_CGROUP_L3MDEV) ++ ++void sock_update_l3mdev(struct sock *sk); ++ ++#else /* !CONFIG_CGROUP_L3MDEV */ ++ ++static inline void sock_update_l3mdev(struct sock *sk) ++{ ++} ++ ++#endif /* CONFIG_CGROUP_L3MDEV */ ++#endif /* _L3MDEV_CGROUP_H */ +diff --git a/net/core/sock.c b/net/core/sock.c +index 1c4c434..ebb25ca 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -131,6 +131,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -1402,6 +1403,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + cgroup_sk_alloc(&sk->sk_cgrp_data); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); ++ sock_update_l3mdev(sk); + } + + return sk; +diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig +index 5d47325..3142d81 100644 +--- a/net/l3mdev/Kconfig ++++ b/net/l3mdev/Kconfig +@@ -8,3 +8,15 @@ config NET_L3_MASTER_DEV + ---help--- + This module provides glue between core networking code and device + drivers to support L3 master devices like VRF. ++ ++config CGROUP_L3MDEV ++ bool "L3 Master Device cgroup" ++ depends on CGROUPS ++ depends on NET_L3_MASTER_DEV ++ ---help--- ++ Cgroup subsystem for assigning processes to an L3 domain. ++ When a process is assigned to an l3mdev domain all AF_INET and ++ AF_INET6 sockets opened by the process are bound to the L3 master ++ device. ++ ++ +diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile +index 84a53a6..ae74eba 100644 +--- a/net/l3mdev/Makefile ++++ b/net/l3mdev/Makefile +@@ -3,3 +3,4 @@ + # + + obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o ++obj-$(CONFIG_CGROUP_L3MDEV) += l3mdev_cgroup.o +diff --git a/net/l3mdev/l3mdev_cgroup.c b/net/l3mdev/l3mdev_cgroup.c +new file mode 100644 +index 0000000..b5fea03 +--- /dev/null ++++ b/net/l3mdev/l3mdev_cgroup.c +@@ -0,0 +1,195 @@ ++/* ++ * net/l3mdev/l3mdev_cgroup.c Control Group for L3 Master Devices ++ * ++ * Copyright (c) 2015 Cumulus Networks. All rights reserved. ++ * Copyright (c) 2015 David Ahern ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct l3mdev_cgroup { ++ struct cgroup_subsys_state css; ++ struct net *net; ++ int dev_idx; ++}; ++ ++static inline struct l3mdev_cgroup *css_l3mdev(struct cgroup_subsys_state *css) ++{ ++ return css ? container_of(css, struct l3mdev_cgroup, css) : NULL; ++} ++ ++static void l3mdev_set_bound_dev(struct sock *sk) ++{ ++ struct task_struct *tsk = current; ++ struct l3mdev_cgroup *l3mdev_cgrp; ++ ++ rcu_read_lock(); ++ ++ l3mdev_cgrp = css_l3mdev(task_css(tsk, l3mdev_cgrp_id)); ++ if (l3mdev_cgrp && l3mdev_cgrp->dev_idx) ++ sk->sk_bound_dev_if = l3mdev_cgrp->dev_idx; ++ ++ rcu_read_unlock(); ++} ++ ++void sock_update_l3mdev(struct sock *sk) ++{ ++ switch (sk->sk_family) { ++ case AF_INET: ++ case AF_INET6: ++ l3mdev_set_bound_dev(sk); ++ break; ++ } ++} ++ ++static bool is_root_cgroup(struct cgroup_subsys_state *css) ++{ ++ return !css || !css->parent; ++} ++ ++static struct cgroup_subsys_state * ++l3mdev_css_alloc(struct cgroup_subsys_state *parent_css) ++{ ++ struct l3mdev_cgroup *l3mdev_cgrp; ++ ++ /* nested l3mdev domains are not supportd */ ++ if (!is_root_cgroup(parent_css)) ++ return ERR_PTR(-EINVAL); ++ ++ l3mdev_cgrp = kzalloc(sizeof(*l3mdev_cgrp), GFP_KERNEL); ++ if (!l3mdev_cgrp) ++ return ERR_PTR(-ENOMEM); ++ ++ return &l3mdev_cgrp->css; ++} ++ ++static int l3mdev_css_online(struct cgroup_subsys_state *css) ++{ ++ return 0; ++} ++ ++static void l3mdev_css_free(struct cgroup_subsys_state *css) ++{ ++ kfree(css_l3mdev(css)); ++} ++ ++static int l3mdev_read(struct seq_file *sf, void *v) ++{ ++ struct cgroup_subsys_state *css = seq_css(sf); ++ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css); ++ ++ if (!l3mdev_cgrp) ++ return -EINVAL; ++ ++ if (l3mdev_cgrp->net) { ++ struct net_device *dev; ++ ++ dev = dev_get_by_index(l3mdev_cgrp->net, l3mdev_cgrp->dev_idx); ++ ++ seq_printf(sf, "net[%u]: device index %d ==> %s\n", ++ l3mdev_cgrp->net->ns.inum, l3mdev_cgrp->dev_idx, ++ dev ? dev->name : ""); ++ ++ if (dev) ++ dev_put(dev); ++ } ++ return 0; ++} ++ ++static ssize_t l3mdev_write(struct kernfs_open_file *of, ++ char *buf, size_t nbytes, loff_t off) ++{ ++ struct cgroup_subsys_state *css = of_css(of); ++ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css); ++ struct net *net = current->nsproxy->net_ns; ++ struct net_device *dev; ++ char name[IFNAMSIZ]; ++ int rc = -EINVAL; ++ ++ /* once master device is set can not undo. Must delete ++ * cgroup and reset ++ */ ++ if (l3mdev_cgrp->dev_idx) ++ goto out; ++ ++ /* root cgroup does not bind to an L3 domain */ ++ if (is_root_cgroup(css)) ++ goto out; ++ ++ if (sscanf(buf, "%" __stringify(IFNAMSIZ) "s", name) != 1) ++ goto out; ++ ++ dev = dev_get_by_name(net, name); ++ if (!dev) { ++ rc = -ENODEV; ++ goto out; ++ } ++ ++ if (netif_is_l3_master(dev)) { ++ l3mdev_cgrp->net = net; ++ l3mdev_cgrp->dev_idx = dev->ifindex; ++ rc = 0; ++ } ++ ++ dev_put(dev); ++out: ++ return rc ? : nbytes; ++} ++ ++/* make master device is set for non-root cgroups before tasks can be added */ ++static int l3mdev_can_attach(struct cgroup_taskset *tset) ++{ ++ struct cgroup_subsys_state *dst_css; ++ struct task_struct *tsk; ++ int rc = 0; ++ ++ cgroup_taskset_for_each(tsk, dst_css, tset) { ++ struct l3mdev_cgroup *l3mdev_cgrp; ++ ++ l3mdev_cgrp = css_l3mdev(dst_css); ++ if (!is_root_cgroup(dst_css) && !l3mdev_cgrp->dev_idx) { ++ rc = -ENODEV; ++ break; ++ } ++ } ++ ++ return rc; ++} ++ ++static struct cftype ss_files[] = { ++ { ++ .name = "master-device", ++ .seq_show = l3mdev_read, ++ .write = l3mdev_write, ++ }, ++ { } /* terminate */ ++}; ++ ++struct cgroup_subsys l3mdev_cgrp_subsys = { ++ .css_alloc = l3mdev_css_alloc, ++ .css_online = l3mdev_css_online, ++ .css_free = l3mdev_css_free, ++ .can_attach = l3mdev_can_attach, ++ .legacy_cftypes = ss_files, ++}; ++ ++static int __init init_cgroup_l3mdev(void) ++{ ++ return 0; ++} ++ ++subsys_initcall(init_cgroup_l3mdev); ++MODULE_AUTHOR("David Ahern"); ++MODULE_DESCRIPTION("Control Group for L3 Networking Domains"); ++MODULE_LICENSE("GPL"); diff --git a/patch/preconfig/packaging-update-abiname-to-8-1.patch b/patch/preconfig/packaging-update-abiname-to-8-1.patch new file mode 100644 index 000000000000..e995789f59a0 --- /dev/null +++ b/patch/preconfig/packaging-update-abiname-to-8-1.patch @@ -0,0 +1,20 @@ +Update debian abiname to 8-1 + +From: Guohan Lu + + +--- + debian/config/defines | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/debian/config/defines b/debian/config/defines +index cfc4409..fae58f7 100644 +--- a/debian/config/defines ++++ b/debian/config/defines +@@ -1,5 +1,5 @@ + [abi] +-abiname: 8 ++abiname: 8-1 + ignore-changes: + __cpuhp_* + bpf_analyzer diff --git a/patch/preconfig/series b/patch/preconfig/series index b3d41d369bec..f8627322758d 100644 --- a/patch/preconfig/series +++ b/patch/preconfig/series @@ -1 +1,2 @@ changelog.patch +packaging-update-abiname-to-8-1.patch diff --git a/patch/series b/patch/series index f12ed7bd2c25..65e97c34adae 100644 --- a/patch/series +++ b/patch/series @@ -24,6 +24,7 @@ driver-i2c-bus-intel-ismt-add-delay-param.patch driver-uart-fix-race-between-uart_put_char-and-uart_shutdow.patch driver-uart-fix-another-race-between-uart_put_char-and-uart_shutdow.patch driver-pca954x-i2c-mux-force-deselect-on-exit-flag.patch +driver-l3mdev-cgroup.patch kernel-add-kexec-reboot-string.patch driver-hwmon-max6620.patch driver-hwmon-max6620-fix-rpm-calc.patch @@ -44,6 +45,7 @@ config-arista-dps1900.patch config-arista-7060-cx32s.patch config-mitac-ly1200.patch config-optoe.patch +config-l3mdev-cgroup.patch config-inventec-d7032.patch 0014-mlxsw-qsfp_sysfs-Support-CPLD-version-reading-based-.patch 0015-platform-x86-mlx-platform-Add-support-for-new-msn201.patch