]> Pileus Git - ~andy/linux/blob - net/ipv4/tcp_memcontrol.c
tcp_memcontrol: Remove tcp_max_memory
[~andy/linux] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
10 {
11         return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
12 }
13
14 static void memcg_tcp_enter_memory_pressure(struct sock *sk)
15 {
16         if (sk->sk_cgrp->memory_pressure)
17                 *sk->sk_cgrp->memory_pressure = 1;
18 }
19 EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
20
21 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
22 {
23         /*
24          * The root cgroup does not use res_counters, but rather,
25          * rely on the data already collected by the network
26          * subsystem
27          */
28         struct res_counter *res_parent = NULL;
29         struct cg_proto *cg_proto, *parent_cg;
30         struct tcp_memcontrol *tcp;
31         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
32         struct net *net = current->nsproxy->net_ns;
33
34         cg_proto = tcp_prot.proto_cgroup(memcg);
35         if (!cg_proto)
36                 return 0;
37
38         tcp = tcp_from_cgproto(cg_proto);
39
40         tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
41         tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
42         tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
43         tcp->tcp_memory_pressure = 0;
44
45         parent_cg = tcp_prot.proto_cgroup(parent);
46         if (parent_cg)
47                 res_parent = parent_cg->memory_allocated;
48
49         res_counter_init(&tcp->tcp_memory_allocated, res_parent);
50         percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
51
52         cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
53         cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
54         cg_proto->sysctl_mem = tcp->tcp_prot_mem;
55         cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
56         cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
57         cg_proto->memcg = memcg;
58
59         return 0;
60 }
61 EXPORT_SYMBOL(tcp_init_cgroup);
62
63 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
64 {
65         struct cg_proto *cg_proto;
66         struct tcp_memcontrol *tcp;
67
68         cg_proto = tcp_prot.proto_cgroup(memcg);
69         if (!cg_proto)
70                 return;
71
72         tcp = tcp_from_cgproto(cg_proto);
73         percpu_counter_destroy(&tcp->tcp_sockets_allocated);
74 }
75 EXPORT_SYMBOL(tcp_destroy_cgroup);
76
77 static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
78 {
79         struct net *net = current->nsproxy->net_ns;
80         struct tcp_memcontrol *tcp;
81         struct cg_proto *cg_proto;
82         u64 old_lim;
83         int i;
84         int ret;
85
86         cg_proto = tcp_prot.proto_cgroup(memcg);
87         if (!cg_proto)
88                 return -EINVAL;
89
90         if (val > RES_COUNTER_MAX)
91                 val = RES_COUNTER_MAX;
92
93         tcp = tcp_from_cgproto(cg_proto);
94
95         old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
96         ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
97         if (ret)
98                 return ret;
99
100         for (i = 0; i < 3; i++)
101                 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
102                                              net->ipv4.sysctl_tcp_mem[i]);
103
104         if (val == RES_COUNTER_MAX)
105                 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
106         else if (val != RES_COUNTER_MAX) {
107                 /*
108                  * The active bit needs to be written after the static_key
109                  * update. This is what guarantees that the socket activation
110                  * function is the last one to run. See sock_update_memcg() for
111                  * details, and note that we don't mark any socket as belonging
112                  * to this memcg until that flag is up.
113                  *
114                  * We need to do this, because static_keys will span multiple
115                  * sites, but we can't control their order. If we mark a socket
116                  * as accounted, but the accounting functions are not patched in
117                  * yet, we'll lose accounting.
118                  *
119                  * We never race with the readers in sock_update_memcg(),
120                  * because when this value change, the code to process it is not
121                  * patched in yet.
122                  *
123                  * The activated bit is used to guarantee that no two writers
124                  * will do the update in the same memcg. Without that, we can't
125                  * properly shutdown the static key.
126                  */
127                 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
128                         static_key_slow_inc(&memcg_socket_limit_enabled);
129                 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
130         }
131
132         return 0;
133 }
134
135 static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
136                             const char *buffer)
137 {
138         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
139         unsigned long long val;
140         int ret = 0;
141
142         switch (cft->private) {
143         case RES_LIMIT:
144                 /* see memcontrol.c */
145                 ret = res_counter_memparse_write_strategy(buffer, &val);
146                 if (ret)
147                         break;
148                 ret = tcp_update_limit(memcg, val);
149                 break;
150         default:
151                 ret = -EINVAL;
152                 break;
153         }
154         return ret;
155 }
156
157 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
158 {
159         struct tcp_memcontrol *tcp;
160         struct cg_proto *cg_proto;
161
162         cg_proto = tcp_prot.proto_cgroup(memcg);
163         if (!cg_proto)
164                 return default_val;
165
166         tcp = tcp_from_cgproto(cg_proto);
167         return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
168 }
169
170 static u64 tcp_read_usage(struct mem_cgroup *memcg)
171 {
172         struct tcp_memcontrol *tcp;
173         struct cg_proto *cg_proto;
174
175         cg_proto = tcp_prot.proto_cgroup(memcg);
176         if (!cg_proto)
177                 return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
178
179         tcp = tcp_from_cgproto(cg_proto);
180         return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
181 }
182
183 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
184 {
185         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
186         u64 val;
187
188         switch (cft->private) {
189         case RES_LIMIT:
190                 val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
191                 break;
192         case RES_USAGE:
193                 val = tcp_read_usage(memcg);
194                 break;
195         case RES_FAILCNT:
196         case RES_MAX_USAGE:
197                 val = tcp_read_stat(memcg, cft->private, 0);
198                 break;
199         default:
200                 BUG();
201         }
202         return val;
203 }
204
205 static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
206 {
207         struct mem_cgroup *memcg;
208         struct tcp_memcontrol *tcp;
209         struct cg_proto *cg_proto;
210
211         memcg = mem_cgroup_from_css(css);
212         cg_proto = tcp_prot.proto_cgroup(memcg);
213         if (!cg_proto)
214                 return 0;
215         tcp = tcp_from_cgproto(cg_proto);
216
217         switch (event) {
218         case RES_MAX_USAGE:
219                 res_counter_reset_max(&tcp->tcp_memory_allocated);
220                 break;
221         case RES_FAILCNT:
222                 res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
223                 break;
224         }
225
226         return 0;
227 }
228
229 void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
230 {
231         struct tcp_memcontrol *tcp;
232         struct cg_proto *cg_proto;
233
234         cg_proto = tcp_prot.proto_cgroup(memcg);
235         if (!cg_proto)
236                 return;
237
238         tcp = tcp_from_cgproto(cg_proto);
239
240         tcp->tcp_prot_mem[idx] = val;
241 }
242
243 static struct cftype tcp_files[] = {
244         {
245                 .name = "kmem.tcp.limit_in_bytes",
246                 .write_string = tcp_cgroup_write,
247                 .read_u64 = tcp_cgroup_read,
248                 .private = RES_LIMIT,
249         },
250         {
251                 .name = "kmem.tcp.usage_in_bytes",
252                 .read_u64 = tcp_cgroup_read,
253                 .private = RES_USAGE,
254         },
255         {
256                 .name = "kmem.tcp.failcnt",
257                 .private = RES_FAILCNT,
258                 .trigger = tcp_cgroup_reset,
259                 .read_u64 = tcp_cgroup_read,
260         },
261         {
262                 .name = "kmem.tcp.max_usage_in_bytes",
263                 .private = RES_MAX_USAGE,
264                 .trigger = tcp_cgroup_reset,
265                 .read_u64 = tcp_cgroup_read,
266         },
267         { }     /* terminate */
268 };
269
270 static int __init tcp_memcontrol_init(void)
271 {
272         WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
273         return 0;
274 }
275 __initcall(tcp_memcontrol_init);