]> Pileus Git - ~andy/linux/blob - net/ipv4/tcp_memcontrol.c
86feaa0d6d703c5231978fed12b5762c2a8ebb63
[~andy/linux] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
10 {
11         return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
12 }
13
14 static void memcg_tcp_enter_memory_pressure(struct sock *sk)
15 {
16         if (sk->sk_cgrp->memory_pressure)
17                 *sk->sk_cgrp->memory_pressure = 1;
18 }
19 EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
20
21 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
22 {
23         /*
24          * The root cgroup does not use res_counters, but rather,
25          * rely on the data already collected by the network
26          * subsystem
27          */
28         struct res_counter *res_parent = NULL;
29         struct cg_proto *cg_proto, *parent_cg;
30         struct tcp_memcontrol *tcp;
31         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
32
33         cg_proto = tcp_prot.proto_cgroup(memcg);
34         if (!cg_proto)
35                 return 0;
36
37         tcp = tcp_from_cgproto(cg_proto);
38
39         tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
40         tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
41         tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
42         tcp->tcp_memory_pressure = 0;
43
44         parent_cg = tcp_prot.proto_cgroup(parent);
45         if (parent_cg)
46                 res_parent = parent_cg->memory_allocated;
47
48         res_counter_init(&tcp->tcp_memory_allocated, res_parent);
49         percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
50
51         cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
52         cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
53         cg_proto->sysctl_mem = tcp->tcp_prot_mem;
54         cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
55         cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
56         cg_proto->memcg = memcg;
57
58         return 0;
59 }
60 EXPORT_SYMBOL(tcp_init_cgroup);
61
62 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
63 {
64         struct cg_proto *cg_proto;
65         struct tcp_memcontrol *tcp;
66
67         cg_proto = tcp_prot.proto_cgroup(memcg);
68         if (!cg_proto)
69                 return;
70
71         tcp = tcp_from_cgproto(cg_proto);
72         percpu_counter_destroy(&tcp->tcp_sockets_allocated);
73 }
74 EXPORT_SYMBOL(tcp_destroy_cgroup);
75
76 static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
77 {
78         struct tcp_memcontrol *tcp;
79         struct cg_proto *cg_proto;
80         u64 old_lim;
81         int i;
82         int ret;
83
84         cg_proto = tcp_prot.proto_cgroup(memcg);
85         if (!cg_proto)
86                 return -EINVAL;
87
88         if (val > RES_COUNTER_MAX)
89                 val = RES_COUNTER_MAX;
90
91         tcp = tcp_from_cgproto(cg_proto);
92
93         old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
94         ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
95         if (ret)
96                 return ret;
97
98         for (i = 0; i < 3; i++)
99                 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
100                                              sysctl_tcp_mem[i]);
101
102         if (val == RES_COUNTER_MAX)
103                 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
104         else if (val != RES_COUNTER_MAX) {
105                 /*
106                  * The active bit needs to be written after the static_key
107                  * update. This is what guarantees that the socket activation
108                  * function is the last one to run. See sock_update_memcg() for
109                  * details, and note that we don't mark any socket as belonging
110                  * to this memcg until that flag is up.
111                  *
112                  * We need to do this, because static_keys will span multiple
113                  * sites, but we can't control their order. If we mark a socket
114                  * as accounted, but the accounting functions are not patched in
115                  * yet, we'll lose accounting.
116                  *
117                  * We never race with the readers in sock_update_memcg(),
118                  * because when this value change, the code to process it is not
119                  * patched in yet.
120                  *
121                  * The activated bit is used to guarantee that no two writers
122                  * will do the update in the same memcg. Without that, we can't
123                  * properly shutdown the static key.
124                  */
125                 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
126                         static_key_slow_inc(&memcg_socket_limit_enabled);
127                 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
128         }
129
130         return 0;
131 }
132
133 static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
134                             const char *buffer)
135 {
136         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
137         unsigned long long val;
138         int ret = 0;
139
140         switch (cft->private) {
141         case RES_LIMIT:
142                 /* see memcontrol.c */
143                 ret = res_counter_memparse_write_strategy(buffer, &val);
144                 if (ret)
145                         break;
146                 ret = tcp_update_limit(memcg, val);
147                 break;
148         default:
149                 ret = -EINVAL;
150                 break;
151         }
152         return ret;
153 }
154
155 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
156 {
157         struct tcp_memcontrol *tcp;
158         struct cg_proto *cg_proto;
159
160         cg_proto = tcp_prot.proto_cgroup(memcg);
161         if (!cg_proto)
162                 return default_val;
163
164         tcp = tcp_from_cgproto(cg_proto);
165         return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
166 }
167
168 static u64 tcp_read_usage(struct mem_cgroup *memcg)
169 {
170         struct tcp_memcontrol *tcp;
171         struct cg_proto *cg_proto;
172
173         cg_proto = tcp_prot.proto_cgroup(memcg);
174         if (!cg_proto)
175                 return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
176
177         tcp = tcp_from_cgproto(cg_proto);
178         return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
179 }
180
181 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
182 {
183         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
184         u64 val;
185
186         switch (cft->private) {
187         case RES_LIMIT:
188                 val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
189                 break;
190         case RES_USAGE:
191                 val = tcp_read_usage(memcg);
192                 break;
193         case RES_FAILCNT:
194         case RES_MAX_USAGE:
195                 val = tcp_read_stat(memcg, cft->private, 0);
196                 break;
197         default:
198                 BUG();
199         }
200         return val;
201 }
202
203 static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
204 {
205         struct mem_cgroup *memcg;
206         struct tcp_memcontrol *tcp;
207         struct cg_proto *cg_proto;
208
209         memcg = mem_cgroup_from_css(css);
210         cg_proto = tcp_prot.proto_cgroup(memcg);
211         if (!cg_proto)
212                 return 0;
213         tcp = tcp_from_cgproto(cg_proto);
214
215         switch (event) {
216         case RES_MAX_USAGE:
217                 res_counter_reset_max(&tcp->tcp_memory_allocated);
218                 break;
219         case RES_FAILCNT:
220                 res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
221                 break;
222         }
223
224         return 0;
225 }
226
227 static struct cftype tcp_files[] = {
228         {
229                 .name = "kmem.tcp.limit_in_bytes",
230                 .write_string = tcp_cgroup_write,
231                 .read_u64 = tcp_cgroup_read,
232                 .private = RES_LIMIT,
233         },
234         {
235                 .name = "kmem.tcp.usage_in_bytes",
236                 .read_u64 = tcp_cgroup_read,
237                 .private = RES_USAGE,
238         },
239         {
240                 .name = "kmem.tcp.failcnt",
241                 .private = RES_FAILCNT,
242                 .trigger = tcp_cgroup_reset,
243                 .read_u64 = tcp_cgroup_read,
244         },
245         {
246                 .name = "kmem.tcp.max_usage_in_bytes",
247                 .private = RES_MAX_USAGE,
248                 .trigger = tcp_cgroup_reset,
249                 .read_u64 = tcp_cgroup_read,
250         },
251         { }     /* terminate */
252 };
253
254 static int __init tcp_memcontrol_init(void)
255 {
256         WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
257         return 0;
258 }
259 __initcall(tcp_memcontrol_init);