Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 0988b2101bb26c0ea174d01e8e1dba7a6c809f22 Mon Sep 17 00:00:00 2001
- From: Matteo Croce <mcroce@redhat.com>
- Date: Wed, 26 Jul 2017 19:03:33 +0200
- Subject: [PATCH] net: make net.core.{r,w}mem_{default,max} namespaced
- The following sysctl are global and can't be read or set from a netns:
- net.core.rmem_default
- net.core.rmem_max
- net.core.wmem_default
- net.core.wmem_max
- Make the following sysctl parameters available from within a network
- namespace, allowing to set unique values per network namespace.
- My concern is about the initial value of this sysctl in the newly
- creates netns: I'm not sure if is better to copy them from the init
- namespace or set them to the default values.
- Setting them to the default value has the advantage that a new namespace
- behaves like a freshly booted system, while copying them from the init
- netns has the advantage of keeping the current behaviour as the values
- from the init netns are used.
- Signed-off-by: Matteo Croce <mcroce@redhat.com>
- ---
- include/net/netns/core.h | 5 +++
- include/net/sock.h | 3 --
- include/net/tcp.h | 3 +-
- net/core/net_namespace.c | 22 +++++++++++++
- net/core/sock.c | 21 +++++--------
- net/core/sysctl_net_core.c | 70 ++++++++++++++++++++++-------------------
- net/ipv4/ip_output.c | 2 +-
- net/ipv4/syncookies.c | 3 +-
- net/ipv4/tcp_minisocks.c | 3 +-
- net/ipv4/tcp_output.c | 12 ++++---
- net/ipv6/syncookies.c | 3 +-
- net/netfilter/ipvs/ip_vs_sync.c | 4 +--
- 12 files changed, 89 insertions(+), 62 deletions(-)
- diff --git a/include/net/netns/core.h b/include/net/netns/core.h
- index 36c2d998a43c..71a57abf8d70 100644
- --- a/include/net/netns/core.h
- +++ b/include/net/netns/core.h
- @@ -10,6 +10,11 @@ struct netns_core {
- struct ctl_table_header *sysctl_hdr;
- int sysctl_somaxconn;
- + u32 sysctl_wmem_max;
- + u32 sysctl_rmem_max;
- +
- + u32 sysctl_wmem_default;
- + u32 sysctl_rmem_default;
- #ifdef CONFIG_PROC_FS
- int __percpu *sock_inuse;
- diff --git a/include/net/sock.h b/include/net/sock.h
- index 74d725fdbe0f..b7899813cc87 100644
- --- a/include/net/sock.h
- +++ b/include/net/sock.h
- @@ -2363,9 +2363,6 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
- #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
- #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
- -extern __u32 sysctl_wmem_max;
- -extern __u32 sysctl_rmem_max;
- -
- extern int sysctl_tstamp_allow_data;
- extern int sysctl_optmem_max;
- diff --git a/include/net/tcp.h b/include/net/tcp.h
- index 9c9b3768b350..ecae987a0f80 100644
- --- a/include/net/tcp.h
- +++ b/include/net/tcp.h
- @@ -1279,7 +1279,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
- void tcp_select_initial_window(const struct sock *sk, int __space,
- __u32 mss, __u32 *rcv_wnd,
- __u32 *window_clamp, int wscale_ok,
- - __u8 *rcv_wscale, __u32 init_rcv_wnd);
- + __u8 *rcv_wscale, __u32 init_rcv_wnd,
- + __u32 rmem_max);
- static inline int tcp_win_from_space(const struct sock *sk, int space)
- {
- diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
- index a11e03f920d3..265f0c1de55f 100644
- --- a/net/core/net_namespace.c
- +++ b/net/core/net_namespace.c
- @@ -23,6 +23,16 @@
- #include <net/net_namespace.h>
- #include <net/netns/generic.h>
- +/* Take into consideration the size of the struct sk_buff overhead in the
- + * determination of these values, since that is non-constant across
- + * platforms. This makes socket queueing behavior and performance
- + * not depend upon such differences.
- + */
- +#define _SK_MEM_PACKETS 256
- +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
- +#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
- +#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
- +
- /*
- * Our network namespace constructor/destructor lists
- */
- @@ -340,6 +350,18 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
- static int __net_init net_defaults_init_net(struct net *net)
- {
- net->core.sysctl_somaxconn = SOMAXCONN;
- + if (net_eq(net, &init_net)) {
- + init_net.core.sysctl_wmem_max = SK_WMEM_MAX;
- + init_net.core.sysctl_rmem_max = SK_RMEM_MAX;
- + init_net.core.sysctl_wmem_default = SK_WMEM_MAX;
- + init_net.core.sysctl_rmem_default = SK_RMEM_MAX;
- + } else {
- + net->core.sysctl_wmem_max = init_net.core.sysctl_wmem_max;
- + net->core.sysctl_rmem_max = init_net.core.sysctl_rmem_max;
- + net->core.sysctl_wmem_default = init_net.core.sysctl_wmem_default;
- + net->core.sysctl_rmem_default = init_net.core.sysctl_rmem_default;
- + }
- +
- return 0;
- }
- diff --git a/net/core/sock.c b/net/core/sock.c
- index 6444525f610c..a6eaf5abe997 100644
- --- a/net/core/sock.c
- +++ b/net/core/sock.c
- @@ -309,14 +309,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
- static struct lock_class_key af_elock_keys[AF_MAX];
- static struct lock_class_key af_kern_callback_keys[AF_MAX];
- -/* Run time adjustable parameters. */
- -__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
- -EXPORT_SYMBOL(sysctl_wmem_max);
- -__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
- -EXPORT_SYMBOL(sysctl_rmem_max);
- -__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
- -__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
- -
- /* Maximal space eaten by iovec or ancillary data plus some space */
- int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
- EXPORT_SYMBOL(sysctl_optmem_max);
- @@ -694,6 +686,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
- {
- struct sock *sk = sock->sk;
- + struct net *net = sock_net(sk);
- int val;
- int valbool;
- struct linger ling;
- @@ -747,7 +740,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
- * play 'guess the biggest size' games. RCVBUF/SNDBUF
- * are treated in BSD as hints
- */
- - val = min_t(u32, val, sysctl_wmem_max);
- + val = min_t(u32, val, net->core.sysctl_wmem_max);
- set_sndbuf:
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
- @@ -768,7 +761,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
- * play 'guess the biggest size' games. RCVBUF/SNDBUF
- * are treated in BSD as hints
- */
- - val = min_t(u32, val, sysctl_rmem_max);
- + val = min_t(u32, val, net->core.sysctl_rmem_max);
- set_rcvbuf:
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- @@ -812,7 +805,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
- case SO_PRIORITY:
- if ((val >= 0 && val <= 6) ||
- - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- + ns_capable(net->user_ns, CAP_NET_ADMIN))
- sk->sk_priority = val;
- else
- ret = -EPERM;
- @@ -986,7 +979,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
- clear_bit(SOCK_PASSSEC, &sock->flags);
- break;
- case SO_MARK:
- - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- + if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- ret = -EPERM;
- else
- sk->sk_mark = val;
- @@ -2759,8 +2752,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
- timer_setup(&sk->sk_timer, NULL, 0);
- sk->sk_allocation = GFP_KERNEL;
- - sk->sk_rcvbuf = sysctl_rmem_default;
- - sk->sk_sndbuf = sysctl_wmem_default;
- + sk->sk_rcvbuf = sock_net(sk)->core.sysctl_rmem_default;
- + sk->sk_sndbuf = sock_net(sk)->core.sysctl_wmem_default;
- sk->sk_state = TCP_CLOSE;
- sk_set_socket(sk, sock);
- diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
- index b1a2c5e38530..39b49545279f 100644
- --- a/net/core/sysctl_net_core.c
- +++ b/net/core/sysctl_net_core.c
- @@ -295,38 +295,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
- static struct ctl_table net_core_table[] = {
- #ifdef CONFIG_NET
- - {
- - .procname = "wmem_max",
- - .data = &sysctl_wmem_max,
- - .maxlen = sizeof(int),
- - .mode = 0644,
- - .proc_handler = proc_dointvec_minmax,
- - .extra1 = &min_sndbuf,
- - },
- - {
- - .procname = "rmem_max",
- - .data = &sysctl_rmem_max,
- - .maxlen = sizeof(int),
- - .mode = 0644,
- - .proc_handler = proc_dointvec_minmax,
- - .extra1 = &min_rcvbuf,
- - },
- - {
- - .procname = "wmem_default",
- - .data = &sysctl_wmem_default,
- - .maxlen = sizeof(int),
- - .mode = 0644,
- - .proc_handler = proc_dointvec_minmax,
- - .extra1 = &min_sndbuf,
- - },
- - {
- - .procname = "rmem_default",
- - .data = &sysctl_rmem_default,
- - .maxlen = sizeof(int),
- - .mode = 0644,
- - .proc_handler = proc_dointvec_minmax,
- - .extra1 = &min_rcvbuf,
- - },
- {
- .procname = "dev_weight",
- .data = &weight_p,
- @@ -536,6 +504,38 @@ static struct ctl_table netns_core_table[] = {
- .extra1 = &zero,
- .proc_handler = proc_dointvec_minmax
- },
- + {
- + .procname = "wmem_max",
- + .data = &init_net.core.sysctl_wmem_max,
- + .maxlen = sizeof(int),
- + .mode = 0644,
- + .proc_handler = proc_dointvec_minmax,
- + .extra1 = &min_sndbuf,
- + },
- + {
- + .procname = "rmem_max",
- + .data = &init_net.core.sysctl_rmem_max,
- + .maxlen = sizeof(int),
- + .mode = 0644,
- + .proc_handler = proc_dointvec_minmax,
- + .extra1 = &min_rcvbuf,
- + },
- + {
- + .procname = "wmem_default",
- + .data = &init_net.core.sysctl_wmem_default,
- + .maxlen = sizeof(int),
- + .mode = 0644,
- + .proc_handler = proc_dointvec_minmax,
- + .extra1 = &min_sndbuf,
- + },
- + {
- + .procname = "rmem_default",
- + .data = &init_net.core.sysctl_rmem_default,
- + .maxlen = sizeof(int),
- + .mode = 0644,
- + .proc_handler = proc_dointvec_minmax,
- + .extra1 = &min_rcvbuf,
- + },
- { }
- };
- @@ -545,11 +545,15 @@ static __net_init int sysctl_core_net_init(struct net *net)
- tbl = netns_core_table;
- if (!net_eq(net, &init_net)) {
- + int i;
- +
- tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
- if (tbl == NULL)
- goto err_dup;
- - tbl[0].data = &net->core.sysctl_somaxconn;
- + /* Update the variables to point into the current struct net */
- + for (i = 0; i < ARRAY_SIZE(netns_core_table) - 1; i++)
- + tbl[i].data += (void *)net - (void *)&init_net;
- /* Don't export any sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
- index 83c73bab2c3d..d61f03beb015 100644
- --- a/net/ipv4/ip_output.c
- +++ b/net/ipv4/ip_output.c
- @@ -1569,7 +1569,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
- sk->sk_priority = skb->priority;
- sk->sk_protocol = ip_hdr(skb)->protocol;
- sk->sk_bound_dev_if = arg->bound_dev_if;
- - sk->sk_sndbuf = sysctl_wmem_default;
- + sk->sk_sndbuf = net->core.sysctl_wmem_default;
- sk->sk_mark = fl4.flowi4_mark;
- err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
- len, 0, &ipc, &rt, MSG_DONTWAIT);
- diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
- index c3387dfd725b..0f1c85b9b9fc 100644
- --- a/net/ipv4/syncookies.c
- +++ b/net/ipv4/syncookies.c
- @@ -390,7 +390,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
- &req->rsk_rcv_wnd, &req->rsk_window_clamp,
- ireq->wscale_ok, &rcv_wscale,
- - dst_metric(&rt->dst, RTAX_INITRWND));
- + dst_metric(&rt->dst, RTAX_INITRWND),
- + sock_net(sk)->core.sysctl_rmem_max);
- ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
- diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
- index 57b5468b5139..1960eadb15b5 100644
- --- a/net/ipv4/tcp_minisocks.c
- +++ b/net/ipv4/tcp_minisocks.c
- @@ -383,7 +383,8 @@ void tcp_openreq_init_rwin(struct request_sock *req,
- &req->rsk_window_clamp,
- ireq->wscale_ok,
- &rcv_wscale,
- - rcv_wnd);
- + rcv_wnd,
- + sock_net(sk_listener)->core.sysctl_rmem_max);
- ireq->rcv_wscale = rcv_wscale;
- }
- EXPORT_SYMBOL(tcp_openreq_init_rwin);
- diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
- index 383cac0ff0ec..d1730a188a75 100644
- --- a/net/ipv4/tcp_output.c
- +++ b/net/ipv4/tcp_output.c
- @@ -191,7 +191,7 @@ u32 tcp_default_init_rwnd(u32 mss)
- void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- __u32 *rcv_wnd, __u32 *window_clamp,
- int wscale_ok, __u8 *rcv_wscale,
- - __u32 init_rcv_wnd)
- + __u32 init_rcv_wnd, __u32 rmem_max)
- {
- unsigned int space = (__space < 0 ? 0 : __space);
- @@ -221,7 +221,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- if (wscale_ok) {
- /* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
- - space = max_t(u32, space, sysctl_rmem_max);
- + space = max_t(u32, space, rmem_max);
- space = min_t(u32, space, *window_clamp);
- while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
- space >>= 1;
- @@ -3288,6 +3288,7 @@ static void tcp_connect_init(struct sock *sk)
- {
- const struct dst_entry *dst = __sk_dst_get(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- + struct net *net = sock_net(sk);
- __u8 rcv_wscale;
- u32 rcv_wnd;
- @@ -3295,7 +3296,7 @@ static void tcp_connect_init(struct sock *sk)
- * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
- */
- tp->tcp_header_len = sizeof(struct tcphdr);
- - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
- + if (net->ipv4.sysctl_tcp_timestamps)
- tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
- #ifdef CONFIG_TCP_MD5SIG
- @@ -3331,9 +3332,10 @@ static void tcp_connect_init(struct sock *sk)
- tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
- &tp->rcv_wnd,
- &tp->window_clamp,
- - sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
- + net->ipv4.sysctl_tcp_window_scaling,
- &rcv_wscale,
- - rcv_wnd);
- + rcv_wnd,
- + net->core.sysctl_rmem_max);
- tp->rx_opt.rcv_wscale = rcv_wscale;
- tp->rcv_ssthresh = tp->rcv_wnd;
- diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
- index e997141aed8c..511f06533821 100644
- --- a/net/ipv6/syncookies.c
- +++ b/net/ipv6/syncookies.c
- @@ -249,7 +249,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
- &req->rsk_rcv_wnd, &req->rsk_window_clamp,
- ireq->wscale_ok, &rcv_wscale,
- - dst_metric(dst, RTAX_INITRWND));
- + dst_metric(dst, RTAX_INITRWND),
- + sock_net(sk)->core.sysctl_rmem_max);
- ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
- diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
- index 001501e25625..7dc89ed15d3e 100644
- --- a/net/netfilter/ipvs/ip_vs_sync.c
- +++ b/net/netfilter/ipvs/ip_vs_sync.c
- @@ -1285,12 +1285,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
- lock_sock(sk);
- if (mode) {
- val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- - sysctl_wmem_max);
- + sock_net(sk)->core.sysctl_wmem_max);
- sk->sk_sndbuf = val * 2;
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- } else {
- val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- - sysctl_rmem_max);
- + sock_net(sk)->core.sysctl_rmem_max);
- sk->sk_rcvbuf = val * 2;
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- }
- --
- 2.14.3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement