]> Pileus Git - ~andy/linux/blob - net/dccp/proto.c
[DCCP]: Initialize dccp_sock before calling the ccid constructors
[~andy/linux] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
64                       dccp_role(sk), sk,
65                       dccp_state_name(oldstate), dccp_state_name(state));
66         WARN_ON(state == oldstate);
67
68         switch (state) {
69         case DCCP_OPEN:
70                 if (oldstate != DCCP_OPEN)
71                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
72                 break;
73
74         case DCCP_CLOSED:
75                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 void dccp_done(struct sock *sk)
97 {
98         dccp_set_state(sk, DCCP_CLOSED);
99         dccp_clear_xmit_timers(sk);
100
101         sk->sk_shutdown = SHUTDOWN_MASK;
102
103         if (!sock_flag(sk, SOCK_DEAD))
104                 sk->sk_state_change(sk);
105         else
106                 inet_csk_destroy_sock(sk);
107 }
108
109 EXPORT_SYMBOL_GPL(dccp_done);
110
111 const char *dccp_packet_name(const int type)
112 {
113         static const char *dccp_packet_names[] = {
114                 [DCCP_PKT_REQUEST]  = "REQUEST",
115                 [DCCP_PKT_RESPONSE] = "RESPONSE",
116                 [DCCP_PKT_DATA]     = "DATA",
117                 [DCCP_PKT_ACK]      = "ACK",
118                 [DCCP_PKT_DATAACK]  = "DATAACK",
119                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120                 [DCCP_PKT_CLOSE]    = "CLOSE",
121                 [DCCP_PKT_RESET]    = "RESET",
122                 [DCCP_PKT_SYNC]     = "SYNC",
123                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
124         };
125
126         if (type >= DCCP_NR_PKT_TYPES)
127                 return "INVALID";
128         else
129                 return dccp_packet_names[type];
130 }
131
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
133
134 const char *dccp_state_name(const int state)
135 {
136         static char *dccp_state_names[] = {
137         [DCCP_OPEN]       = "OPEN",
138         [DCCP_REQUESTING] = "REQUESTING",
139         [DCCP_PARTOPEN]   = "PARTOPEN",
140         [DCCP_LISTEN]     = "LISTEN",
141         [DCCP_RESPOND]    = "RESPOND",
142         [DCCP_CLOSING]    = "CLOSING",
143         [DCCP_TIME_WAIT]  = "TIME_WAIT",
144         [DCCP_CLOSED]     = "CLOSED",
145         };
146
147         if (state >= DCCP_MAX_STATES)
148                 return "INVALID STATE!";
149         else
150                 return dccp_state_names[state];
151 }
152
153 EXPORT_SYMBOL_GPL(dccp_state_name);
154
155 void dccp_hash(struct sock *sk)
156 {
157         inet_hash(&dccp_hashinfo, sk);
158 }
159
160 EXPORT_SYMBOL_GPL(dccp_hash);
161
162 void dccp_unhash(struct sock *sk)
163 {
164         inet_unhash(&dccp_hashinfo, sk);
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_unhash);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171         struct dccp_sock *dp = dccp_sk(sk);
172         struct dccp_minisock *dmsk = dccp_msk(sk);
173         struct inet_connection_sock *icsk = inet_csk(sk);
174
175         dccp_minisock_init(&dp->dccps_minisock);
176
177         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
178         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
179         sk->sk_state            = DCCP_CLOSED;
180         sk->sk_write_space      = dccp_write_space;
181         icsk->icsk_sync_mss     = dccp_sync_mss;
182         dp->dccps_mss_cache     = 536;
183         dp->dccps_rate_last     = jiffies;
184         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
185         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
186         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
187
188         dccp_init_xmit_timers(sk);
189
190         /*
191          * FIXME: We're hardcoding the CCID, and doing this at this point makes
192          * the listening (master) sock get CCID control blocks, which is not
193          * necessary, but for now, to not mess with the test userspace apps,
194          * lets leave it here, later the real solution is to do this in a
195          * setsockopt(CCIDs-I-want/accept). -acme
196          */
197         if (likely(ctl_sock_initialized)) {
198                 int rc = dccp_feat_init(dmsk);
199
200                 if (rc)
201                         return rc;
202
203                 if (dmsk->dccpms_send_ack_vector) {
204                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
205                         if (dp->dccps_hc_rx_ackvec == NULL)
206                                 return -ENOMEM;
207                 }
208                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
209                                                       sk, GFP_KERNEL);
210                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
211                                                       sk, GFP_KERNEL);
212                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
213                              dp->dccps_hc_tx_ccid == NULL)) {
214                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
215                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
216                         if (dmsk->dccpms_send_ack_vector) {
217                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
218                                 dp->dccps_hc_rx_ackvec = NULL;
219                         }
220                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
221                         return -ENOMEM;
222                 }
223         } else {
224                 /* control socket doesn't need feat nego */
225                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
226                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
227         }
228
229         return 0;
230 }
231
232 EXPORT_SYMBOL_GPL(dccp_init_sock);
233
234 int dccp_destroy_sock(struct sock *sk)
235 {
236         struct dccp_sock *dp = dccp_sk(sk);
237         struct dccp_minisock *dmsk = dccp_msk(sk);
238
239         /*
240          * DCCP doesn't use sk_write_queue, just sk_send_head
241          * for retransmissions
242          */
243         if (sk->sk_send_head != NULL) {
244                 kfree_skb(sk->sk_send_head);
245                 sk->sk_send_head = NULL;
246         }
247
248         /* Clean up a referenced DCCP bind bucket. */
249         if (inet_csk(sk)->icsk_bind_hash != NULL)
250                 inet_put_port(&dccp_hashinfo, sk);
251
252         kfree(dp->dccps_service_list);
253         dp->dccps_service_list = NULL;
254
255         if (dmsk->dccpms_send_ack_vector) {
256                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
257                 dp->dccps_hc_rx_ackvec = NULL;
258         }
259         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
260         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
261         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
262
263         /* clean up feature negotiation state */
264         dccp_feat_clean(dmsk);
265
266         return 0;
267 }
268
269 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
270
271 static inline int dccp_listen_start(struct sock *sk, int backlog)
272 {
273         struct dccp_sock *dp = dccp_sk(sk);
274
275         dp->dccps_role = DCCP_ROLE_LISTEN;
276         return inet_csk_listen_start(sk, backlog);
277 }
278
279 int dccp_disconnect(struct sock *sk, int flags)
280 {
281         struct inet_connection_sock *icsk = inet_csk(sk);
282         struct inet_sock *inet = inet_sk(sk);
283         int err = 0;
284         const int old_state = sk->sk_state;
285
286         if (old_state != DCCP_CLOSED)
287                 dccp_set_state(sk, DCCP_CLOSED);
288
289         /* ABORT function of RFC793 */
290         if (old_state == DCCP_LISTEN) {
291                 inet_csk_listen_stop(sk);
292         /* FIXME: do the active reset thing */
293         } else if (old_state == DCCP_REQUESTING)
294                 sk->sk_err = ECONNRESET;
295
296         dccp_clear_xmit_timers(sk);
297         __skb_queue_purge(&sk->sk_receive_queue);
298         if (sk->sk_send_head != NULL) {
299                 __kfree_skb(sk->sk_send_head);
300                 sk->sk_send_head = NULL;
301         }
302
303         inet->dport = 0;
304
305         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
306                 inet_reset_saddr(sk);
307
308         sk->sk_shutdown = 0;
309         sock_reset_flag(sk, SOCK_DONE);
310
311         icsk->icsk_backoff = 0;
312         inet_csk_delack_init(sk);
313         __sk_dst_reset(sk);
314
315         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316
317         sk->sk_error_report(sk);
318         return err;
319 }
320
321 EXPORT_SYMBOL_GPL(dccp_disconnect);
322
323 /*
324  *      Wait for a DCCP event.
325  *
326  *      Note that we don't need to lock the socket, as the upper poll layers
327  *      take care of normal races (between the test and the event) and we don't
328  *      go look at any of the socket buffers directly.
329  */
330 unsigned int dccp_poll(struct file *file, struct socket *sock,
331                        poll_table *wait)
332 {
333         unsigned int mask;
334         struct sock *sk = sock->sk;
335
336         poll_wait(file, sk->sk_sleep, wait);
337         if (sk->sk_state == DCCP_LISTEN)
338                 return inet_csk_listen_poll(sk);
339
340         /* Socket is not locked. We are protected from async events
341            by poll logic and correct handling of state changes
342            made by another threads is impossible in any case.
343          */
344
345         mask = 0;
346         if (sk->sk_err)
347                 mask = POLLERR;
348
349         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350                 mask |= POLLHUP;
351         if (sk->sk_shutdown & RCV_SHUTDOWN)
352                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
353
354         /* Connected? */
355         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
356                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
357                         mask |= POLLIN | POLLRDNORM;
358
359                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
360                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
361                                 mask |= POLLOUT | POLLWRNORM;
362                         } else {  /* send SIGIO later */
363                                 set_bit(SOCK_ASYNC_NOSPACE,
364                                         &sk->sk_socket->flags);
365                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366
367                                 /* Race breaker. If space is freed after
368                                  * wspace test but before the flags are set,
369                                  * IO signal will be lost.
370                                  */
371                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
372                                         mask |= POLLOUT | POLLWRNORM;
373                         }
374                 }
375         }
376         return mask;
377 }
378
379 EXPORT_SYMBOL_GPL(dccp_poll);
380
381 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 {
383         int rc = -ENOTCONN;
384
385         lock_sock(sk);
386
387         if (sk->sk_state == DCCP_LISTEN)
388                 goto out;
389
390         switch (cmd) {
391         case SIOCINQ: {
392                 struct sk_buff *skb;
393                 unsigned long amount = 0;
394
395                 skb = skb_peek(&sk->sk_receive_queue);
396                 if (skb != NULL) {
397                         /*
398                          * We will only return the amount of this packet since
399                          * that is all that will be read.
400                          */
401                         amount = skb->len;
402                 }
403                 rc = put_user(amount, (int __user *)arg);
404         }
405                 break;
406         default:
407                 rc = -ENOIOCTLCMD;
408                 break;
409         }
410 out:
411         release_sock(sk);
412         return rc;
413 }
414
415 EXPORT_SYMBOL_GPL(dccp_ioctl);
416
417 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
418                                    char __user *optval, int optlen)
419 {
420         struct dccp_sock *dp = dccp_sk(sk);
421         struct dccp_service_list *sl = NULL;
422
423         if (service == DCCP_SERVICE_INVALID_VALUE ||
424             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
425                 return -EINVAL;
426
427         if (optlen > sizeof(service)) {
428                 sl = kmalloc(optlen, GFP_KERNEL);
429                 if (sl == NULL)
430                         return -ENOMEM;
431
432                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
433                 if (copy_from_user(sl->dccpsl_list,
434                                    optval + sizeof(service),
435                                    optlen - sizeof(service)) ||
436                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
437                         kfree(sl);
438                         return -EFAULT;
439                 }
440         }
441
442         lock_sock(sk);
443         dp->dccps_service = service;
444
445         kfree(dp->dccps_service_list);
446
447         dp->dccps_service_list = sl;
448         release_sock(sk);
449         return 0;
450 }
451
452 /* byte 1 is feature.  the rest is the preference list */
453 static int dccp_setsockopt_change(struct sock *sk, int type,
454                                   struct dccp_so_feat __user *optval)
455 {
456         struct dccp_so_feat opt;
457         u8 *val;
458         int rc;
459
460         if (copy_from_user(&opt, optval, sizeof(opt)))
461                 return -EFAULT;
462
463         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
464         if (!val)
465                 return -ENOMEM;
466
467         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
468                 rc = -EFAULT;
469                 goto out_free_val;
470         }
471
472         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
473                               val, opt.dccpsf_len, GFP_KERNEL);
474         if (rc)
475                 goto out_free_val;
476
477 out:
478         return rc;
479
480 out_free_val:
481         kfree(val);
482         goto out;
483 }
484
485 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
486                 char __user *optval, int optlen)
487 {
488         struct dccp_sock *dp = dccp_sk(sk);
489         int val, err = 0;
490
491         if (optlen < sizeof(int))
492                 return -EINVAL;
493
494         if (get_user(val, (int __user *)optval))
495                 return -EFAULT;
496
497         if (optname == DCCP_SOCKOPT_SERVICE)
498                 return dccp_setsockopt_service(sk, val, optval, optlen);
499
500         lock_sock(sk);
501         switch (optname) {
502         case DCCP_SOCKOPT_PACKET_SIZE:
503                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
504                 err = 0;
505                 break;
506         case DCCP_SOCKOPT_CHANGE_L:
507                 if (optlen != sizeof(struct dccp_so_feat))
508                         err = -EINVAL;
509                 else
510                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
511                                                      (struct dccp_so_feat __user *)
512                                                      optval);
513                 break;
514         case DCCP_SOCKOPT_CHANGE_R:
515                 if (optlen != sizeof(struct dccp_so_feat))
516                         err = -EINVAL;
517                 else
518                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
519                                                      (struct dccp_so_feat __user *)
520                                                      optval);
521                 break;
522         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
523                 if (val < 0 || val > 15)
524                         err = -EINVAL;
525                 else
526                         dp->dccps_pcslen = val;
527                 break;
528         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
529                 if (val < 0 || val > 15)
530                         err = -EINVAL;
531                 else {
532                         dp->dccps_pcrlen = val;
533                         /* FIXME: add feature negotiation,
534                          * ChangeL(MinimumChecksumCoverage, val) */
535                 }
536                 break;
537         default:
538                 err = -ENOPROTOOPT;
539                 break;
540         }
541
542         release_sock(sk);
543         return err;
544 }
545
546 int dccp_setsockopt(struct sock *sk, int level, int optname,
547                     char __user *optval, int optlen)
548 {
549         if (level != SOL_DCCP)
550                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
551                                                              optname, optval,
552                                                              optlen);
553         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
554 }
555
556 EXPORT_SYMBOL_GPL(dccp_setsockopt);
557
558 #ifdef CONFIG_COMPAT
559 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
560                            char __user *optval, int optlen)
561 {
562         if (level != SOL_DCCP)
563                 return inet_csk_compat_setsockopt(sk, level, optname,
564                                                   optval, optlen);
565         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
566 }
567
568 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
569 #endif
570
571 static int dccp_getsockopt_service(struct sock *sk, int len,
572                                    __be32 __user *optval,
573                                    int __user *optlen)
574 {
575         const struct dccp_sock *dp = dccp_sk(sk);
576         const struct dccp_service_list *sl;
577         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
578
579         lock_sock(sk);
580         if ((sl = dp->dccps_service_list) != NULL) {
581                 slen = sl->dccpsl_nr * sizeof(u32);
582                 total_len += slen;
583         }
584
585         err = -EINVAL;
586         if (total_len > len)
587                 goto out;
588
589         err = 0;
590         if (put_user(total_len, optlen) ||
591             put_user(dp->dccps_service, optval) ||
592             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
593                 err = -EFAULT;
594 out:
595         release_sock(sk);
596         return err;
597 }
598
599 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
600                     char __user *optval, int __user *optlen)
601 {
602         struct dccp_sock *dp;
603         int val, len;
604
605         if (get_user(len, optlen))
606                 return -EFAULT;
607
608         if (len < (int)sizeof(int))
609                 return -EINVAL;
610
611         dp = dccp_sk(sk);
612
613         switch (optname) {
614         case DCCP_SOCKOPT_PACKET_SIZE:
615                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
616                 return 0;
617         case DCCP_SOCKOPT_SERVICE:
618                 return dccp_getsockopt_service(sk, len,
619                                                (__be32 __user *)optval, optlen);
620         case DCCP_SOCKOPT_GET_CUR_MPS:
621                 val = dp->dccps_mss_cache;
622                 len = sizeof(val);
623                 break;
624         case DCCP_SOCKOPT_SEND_CSCOV:
625                 val = dp->dccps_pcslen;
626                 len = sizeof(val);
627                 break;
628         case DCCP_SOCKOPT_RECV_CSCOV:
629                 val = dp->dccps_pcrlen;
630                 len = sizeof(val);
631                 break;
632         case 128 ... 191:
633                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
634                                              len, (u32 __user *)optval, optlen);
635         case 192 ... 255:
636                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
637                                              len, (u32 __user *)optval, optlen);
638         default:
639                 return -ENOPROTOOPT;
640         }
641
642         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
643                 return -EFAULT;
644
645         return 0;
646 }
647
648 int dccp_getsockopt(struct sock *sk, int level, int optname,
649                     char __user *optval, int __user *optlen)
650 {
651         if (level != SOL_DCCP)
652                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
653                                                              optname, optval,
654                                                              optlen);
655         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
656 }
657
658 EXPORT_SYMBOL_GPL(dccp_getsockopt);
659
660 #ifdef CONFIG_COMPAT
661 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
662                            char __user *optval, int __user *optlen)
663 {
664         if (level != SOL_DCCP)
665                 return inet_csk_compat_getsockopt(sk, level, optname,
666                                                   optval, optlen);
667         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
668 }
669
670 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
671 #endif
672
673 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
674                  size_t len)
675 {
676         const struct dccp_sock *dp = dccp_sk(sk);
677         const int flags = msg->msg_flags;
678         const int noblock = flags & MSG_DONTWAIT;
679         struct sk_buff *skb;
680         int rc, size;
681         long timeo;
682
683         if (len > dp->dccps_mss_cache)
684                 return -EMSGSIZE;
685
686         lock_sock(sk);
687
688         if (sysctl_dccp_tx_qlen &&
689             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
690                 rc = -EAGAIN;
691                 goto out_release;
692         }
693
694         timeo = sock_sndtimeo(sk, noblock);
695
696         /*
697          * We have to use sk_stream_wait_connect here to set sk_write_pending,
698          * so that the trick in dccp_rcv_request_sent_state_process.
699          */
700         /* Wait for a connection to finish. */
701         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
702                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
703                         goto out_release;
704
705         size = sk->sk_prot->max_header + len;
706         release_sock(sk);
707         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
708         lock_sock(sk);
709         if (skb == NULL)
710                 goto out_release;
711
712         skb_reserve(skb, sk->sk_prot->max_header);
713         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
714         if (rc != 0)
715                 goto out_discard;
716
717         skb_queue_tail(&sk->sk_write_queue, skb);
718         dccp_write_xmit(sk,0);
719 out_release:
720         release_sock(sk);
721         return rc ? : len;
722 out_discard:
723         kfree_skb(skb);
724         goto out_release;
725 }
726
727 EXPORT_SYMBOL_GPL(dccp_sendmsg);
728
729 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
730                  size_t len, int nonblock, int flags, int *addr_len)
731 {
732         const struct dccp_hdr *dh;
733         long timeo;
734
735         lock_sock(sk);
736
737         if (sk->sk_state == DCCP_LISTEN) {
738                 len = -ENOTCONN;
739                 goto out;
740         }
741
742         timeo = sock_rcvtimeo(sk, nonblock);
743
744         do {
745                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
746
747                 if (skb == NULL)
748                         goto verify_sock_status;
749
750                 dh = dccp_hdr(skb);
751
752                 if (dh->dccph_type == DCCP_PKT_DATA ||
753                     dh->dccph_type == DCCP_PKT_DATAACK)
754                         goto found_ok_skb;
755
756                 if (dh->dccph_type == DCCP_PKT_RESET ||
757                     dh->dccph_type == DCCP_PKT_CLOSE) {
758                         dccp_pr_debug("found fin ok!\n");
759                         len = 0;
760                         goto found_fin_ok;
761                 }
762                 dccp_pr_debug("packet_type=%s\n",
763                               dccp_packet_name(dh->dccph_type));
764                 sk_eat_skb(sk, skb, 0);
765 verify_sock_status:
766                 if (sock_flag(sk, SOCK_DONE)) {
767                         len = 0;
768                         break;
769                 }
770
771                 if (sk->sk_err) {
772                         len = sock_error(sk);
773                         break;
774                 }
775
776                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
777                         len = 0;
778                         break;
779                 }
780
781                 if (sk->sk_state == DCCP_CLOSED) {
782                         if (!sock_flag(sk, SOCK_DONE)) {
783                                 /* This occurs when user tries to read
784                                  * from never connected socket.
785                                  */
786                                 len = -ENOTCONN;
787                                 break;
788                         }
789                         len = 0;
790                         break;
791                 }
792
793                 if (!timeo) {
794                         len = -EAGAIN;
795                         break;
796                 }
797
798                 if (signal_pending(current)) {
799                         len = sock_intr_errno(timeo);
800                         break;
801                 }
802
803                 sk_wait_data(sk, &timeo);
804                 continue;
805         found_ok_skb:
806                 if (len > skb->len)
807                         len = skb->len;
808                 else if (len < skb->len)
809                         msg->msg_flags |= MSG_TRUNC;
810
811                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
812                         /* Exception. Bailout! */
813                         len = -EFAULT;
814                         break;
815                 }
816         found_fin_ok:
817                 if (!(flags & MSG_PEEK))
818                         sk_eat_skb(sk, skb, 0);
819                 break;
820         } while (1);
821 out:
822         release_sock(sk);
823         return len;
824 }
825
826 EXPORT_SYMBOL_GPL(dccp_recvmsg);
827
828 int inet_dccp_listen(struct socket *sock, int backlog)
829 {
830         struct sock *sk = sock->sk;
831         unsigned char old_state;
832         int err;
833
834         lock_sock(sk);
835
836         err = -EINVAL;
837         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
838                 goto out;
839
840         old_state = sk->sk_state;
841         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
842                 goto out;
843
844         /* Really, if the socket is already in listen state
845          * we can only allow the backlog to be adjusted.
846          */
847         if (old_state != DCCP_LISTEN) {
848                 /*
849                  * FIXME: here it probably should be sk->sk_prot->listen_start
850                  * see tcp_listen_start
851                  */
852                 err = dccp_listen_start(sk, backlog);
853                 if (err)
854                         goto out;
855         }
856         sk->sk_max_ack_backlog = backlog;
857         err = 0;
858
859 out:
860         release_sock(sk);
861         return err;
862 }
863
864 EXPORT_SYMBOL_GPL(inet_dccp_listen);
865
866 static const unsigned char dccp_new_state[] = {
867         /* current state:   new state:      action:     */
868         [0]               = DCCP_CLOSED,
869         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
870         [DCCP_REQUESTING] = DCCP_CLOSED,
871         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
872         [DCCP_LISTEN]     = DCCP_CLOSED,
873         [DCCP_RESPOND]    = DCCP_CLOSED,
874         [DCCP_CLOSING]    = DCCP_CLOSED,
875         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
876         [DCCP_CLOSED]     = DCCP_CLOSED,
877 };
878
879 static int dccp_close_state(struct sock *sk)
880 {
881         const int next = dccp_new_state[sk->sk_state];
882         const int ns = next & DCCP_STATE_MASK;
883
884         if (ns != sk->sk_state)
885                 dccp_set_state(sk, ns);
886
887         return next & DCCP_ACTION_FIN;
888 }
889
890 void dccp_close(struct sock *sk, long timeout)
891 {
892         struct dccp_sock *dp = dccp_sk(sk);
893         struct sk_buff *skb;
894         int state;
895
896         lock_sock(sk);
897
898         sk->sk_shutdown = SHUTDOWN_MASK;
899
900         if (sk->sk_state == DCCP_LISTEN) {
901                 dccp_set_state(sk, DCCP_CLOSED);
902
903                 /* Special case. */
904                 inet_csk_listen_stop(sk);
905
906                 goto adjudge_to_death;
907         }
908
909         sk_stop_timer(sk, &dp->dccps_xmit_timer);
910
911         /*
912          * We need to flush the recv. buffs.  We do this only on the
913          * descriptor close, not protocol-sourced closes, because the
914           *reader process may not have drained the data yet!
915          */
916         /* FIXME: check for unread data */
917         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
918                 __kfree_skb(skb);
919         }
920
921         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
922                 /* Check zero linger _after_ checking for unread data. */
923                 sk->sk_prot->disconnect(sk, 0);
924         } else if (dccp_close_state(sk)) {
925                 dccp_send_close(sk, 1);
926         }
927
928         sk_stream_wait_close(sk, timeout);
929
930 adjudge_to_death:
931         state = sk->sk_state;
932         sock_hold(sk);
933         sock_orphan(sk);
934         atomic_inc(sk->sk_prot->orphan_count);
935
936         /*
937          * It is the last release_sock in its life. It will remove backlog.
938          */
939         release_sock(sk);
940         /*
941          * Now socket is owned by kernel and we acquire BH lock
942          * to finish close. No need to check for user refs.
943          */
944         local_bh_disable();
945         bh_lock_sock(sk);
946         BUG_TRAP(!sock_owned_by_user(sk));
947
948         /* Have we already been destroyed by a softirq or backlog? */
949         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
950                 goto out;
951
952         /*
953          * The last release_sock may have processed the CLOSE or RESET
954          * packet moving sock to CLOSED state, if not we have to fire
955          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
956          * in draft-ietf-dccp-spec-11. -acme
957          */
958         if (sk->sk_state == DCCP_CLOSING) {
959                 /* FIXME: should start at 2 * RTT */
960                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
961                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
962                                           inet_csk(sk)->icsk_rto,
963                                           DCCP_RTO_MAX);
964 #if 0
965                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
966                 dccp_set_state(sk, DCCP_CLOSED);
967 #endif
968         }
969
970         if (sk->sk_state == DCCP_CLOSED)
971                 inet_csk_destroy_sock(sk);
972
973         /* Otherwise, socket is reprieved until protocol close. */
974
975 out:
976         bh_unlock_sock(sk);
977         local_bh_enable();
978         sock_put(sk);
979 }
980
981 EXPORT_SYMBOL_GPL(dccp_close);
982
983 void dccp_shutdown(struct sock *sk, int how)
984 {
985         dccp_pr_debug("called shutdown(%x)\n", how);
986 }
987
988 EXPORT_SYMBOL_GPL(dccp_shutdown);
989
990 static int __init dccp_mib_init(void)
991 {
992         int rc = -ENOMEM;
993
994         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
995         if (dccp_statistics[0] == NULL)
996                 goto out;
997
998         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
999         if (dccp_statistics[1] == NULL)
1000                 goto out_free_one;
1001
1002         rc = 0;
1003 out:
1004         return rc;
1005 out_free_one:
1006         free_percpu(dccp_statistics[0]);
1007         dccp_statistics[0] = NULL;
1008         goto out;
1009
1010 }
1011
1012 static void dccp_mib_exit(void)
1013 {
1014         free_percpu(dccp_statistics[0]);
1015         free_percpu(dccp_statistics[1]);
1016         dccp_statistics[0] = dccp_statistics[1] = NULL;
1017 }
1018
1019 static int thash_entries;
1020 module_param(thash_entries, int, 0444);
1021 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1022
1023 #ifdef CONFIG_IP_DCCP_DEBUG
1024 int dccp_debug;
1025 module_param(dccp_debug, bool, 0444);
1026 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1027
1028 EXPORT_SYMBOL_GPL(dccp_debug);
1029 #endif
1030
1031 static int __init dccp_init(void)
1032 {
1033         unsigned long goal;
1034         int ehash_order, bhash_order, i;
1035         int rc = -ENOBUFS;
1036
1037         dccp_hashinfo.bind_bucket_cachep =
1038                 kmem_cache_create("dccp_bind_bucket",
1039                                   sizeof(struct inet_bind_bucket), 0,
1040                                   SLAB_HWCACHE_ALIGN, NULL);
1041         if (!dccp_hashinfo.bind_bucket_cachep)
1042                 goto out;
1043
1044         /*
1045          * Size and allocate the main established and bind bucket
1046          * hash tables.
1047          *
1048          * The methodology is similar to that of the buffer cache.
1049          */
1050         if (num_physpages >= (128 * 1024))
1051                 goal = num_physpages >> (21 - PAGE_SHIFT);
1052         else
1053                 goal = num_physpages >> (23 - PAGE_SHIFT);
1054
1055         if (thash_entries)
1056                 goal = (thash_entries *
1057                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1058         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1059                 ;
1060         do {
1061                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1062                                         sizeof(struct inet_ehash_bucket);
1063                 while (dccp_hashinfo.ehash_size &
1064                        (dccp_hashinfo.ehash_size - 1))
1065                         dccp_hashinfo.ehash_size--;
1066                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1067                         __get_free_pages(GFP_ATOMIC, ehash_order);
1068         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1069
1070         if (!dccp_hashinfo.ehash) {
1071                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1072                 goto out_free_bind_bucket_cachep;
1073         }
1074
1075         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1076                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1077                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1078         }
1079
1080         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1081                         goto out_free_dccp_ehash;
1082
1083         bhash_order = ehash_order;
1084
1085         do {
1086                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1087                                         sizeof(struct inet_bind_hashbucket);
1088                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1089                     bhash_order > 0)
1090                         continue;
1091                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1092                         __get_free_pages(GFP_ATOMIC, bhash_order);
1093         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1094
1095         if (!dccp_hashinfo.bhash) {
1096                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1097                 goto out_free_dccp_locks;
1098         }
1099
1100         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1101                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1102                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1103         }
1104
1105         rc = dccp_mib_init();
1106         if (rc)
1107                 goto out_free_dccp_bhash;
1108
1109         rc = dccp_ackvec_init();
1110         if (rc)
1111                 goto out_free_dccp_mib;
1112
1113         rc = dccp_sysctl_init();
1114         if (rc)
1115                 goto out_ackvec_exit;
1116
1117         dccp_timestamping_init();
1118 out:
1119         return rc;
1120 out_ackvec_exit:
1121         dccp_ackvec_exit();
1122 out_free_dccp_mib:
1123         dccp_mib_exit();
1124 out_free_dccp_bhash:
1125         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1126         dccp_hashinfo.bhash = NULL;
1127 out_free_dccp_locks:
1128         inet_ehash_locks_free(&dccp_hashinfo);
1129 out_free_dccp_ehash:
1130         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1131         dccp_hashinfo.ehash = NULL;
1132 out_free_bind_bucket_cachep:
1133         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1134         dccp_hashinfo.bind_bucket_cachep = NULL;
1135         goto out;
1136 }
1137
1138 static void __exit dccp_fini(void)
1139 {
1140         dccp_mib_exit();
1141         free_pages((unsigned long)dccp_hashinfo.bhash,
1142                    get_order(dccp_hashinfo.bhash_size *
1143                              sizeof(struct inet_bind_hashbucket)));
1144         free_pages((unsigned long)dccp_hashinfo.ehash,
1145                    get_order(dccp_hashinfo.ehash_size *
1146                              sizeof(struct inet_ehash_bucket)));
1147         inet_ehash_locks_free(&dccp_hashinfo);
1148         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1149         dccp_ackvec_exit();
1150         dccp_sysctl_exit();
1151 }
1152
1153 module_init(dccp_init);
1154 module_exit(dccp_fini);
1155
1156 MODULE_LICENSE("GPL");
1157 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1158 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");