]> Pileus Git - ~andy/linux/blob - net/dccp/proto.c
[DCCP]: Check for unread data on close
[~andy/linux] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
64                       dccp_role(sk), sk,
65                       dccp_state_name(oldstate), dccp_state_name(state));
66         WARN_ON(state == oldstate);
67
68         switch (state) {
69         case DCCP_OPEN:
70                 if (oldstate != DCCP_OPEN)
71                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
72                 break;
73
74         case DCCP_CLOSED:
75                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 void dccp_done(struct sock *sk)
97 {
98         dccp_set_state(sk, DCCP_CLOSED);
99         dccp_clear_xmit_timers(sk);
100
101         sk->sk_shutdown = SHUTDOWN_MASK;
102
103         if (!sock_flag(sk, SOCK_DEAD))
104                 sk->sk_state_change(sk);
105         else
106                 inet_csk_destroy_sock(sk);
107 }
108
109 EXPORT_SYMBOL_GPL(dccp_done);
110
111 const char *dccp_packet_name(const int type)
112 {
113         static const char *dccp_packet_names[] = {
114                 [DCCP_PKT_REQUEST]  = "REQUEST",
115                 [DCCP_PKT_RESPONSE] = "RESPONSE",
116                 [DCCP_PKT_DATA]     = "DATA",
117                 [DCCP_PKT_ACK]      = "ACK",
118                 [DCCP_PKT_DATAACK]  = "DATAACK",
119                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120                 [DCCP_PKT_CLOSE]    = "CLOSE",
121                 [DCCP_PKT_RESET]    = "RESET",
122                 [DCCP_PKT_SYNC]     = "SYNC",
123                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
124         };
125
126         if (type >= DCCP_NR_PKT_TYPES)
127                 return "INVALID";
128         else
129                 return dccp_packet_names[type];
130 }
131
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
133
134 const char *dccp_state_name(const int state)
135 {
136         static char *dccp_state_names[] = {
137         [DCCP_OPEN]       = "OPEN",
138         [DCCP_REQUESTING] = "REQUESTING",
139         [DCCP_PARTOPEN]   = "PARTOPEN",
140         [DCCP_LISTEN]     = "LISTEN",
141         [DCCP_RESPOND]    = "RESPOND",
142         [DCCP_CLOSING]    = "CLOSING",
143         [DCCP_TIME_WAIT]  = "TIME_WAIT",
144         [DCCP_CLOSED]     = "CLOSED",
145         };
146
147         if (state >= DCCP_MAX_STATES)
148                 return "INVALID STATE!";
149         else
150                 return dccp_state_names[state];
151 }
152
153 EXPORT_SYMBOL_GPL(dccp_state_name);
154
155 void dccp_hash(struct sock *sk)
156 {
157         inet_hash(&dccp_hashinfo, sk);
158 }
159
160 EXPORT_SYMBOL_GPL(dccp_hash);
161
162 void dccp_unhash(struct sock *sk)
163 {
164         inet_unhash(&dccp_hashinfo, sk);
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_unhash);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171         struct dccp_sock *dp = dccp_sk(sk);
172         struct dccp_minisock *dmsk = dccp_msk(sk);
173         struct inet_connection_sock *icsk = inet_csk(sk);
174
175         dccp_minisock_init(&dp->dccps_minisock);
176
177         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
178         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
179         sk->sk_state            = DCCP_CLOSED;
180         sk->sk_write_space      = dccp_write_space;
181         icsk->icsk_sync_mss     = dccp_sync_mss;
182         dp->dccps_mss_cache     = 536;
183         dp->dccps_rate_last     = jiffies;
184         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
185         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
186         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
187
188         dccp_init_xmit_timers(sk);
189
190         /*
191          * FIXME: We're hardcoding the CCID, and doing this at this point makes
192          * the listening (master) sock get CCID control blocks, which is not
193          * necessary, but for now, to not mess with the test userspace apps,
194          * lets leave it here, later the real solution is to do this in a
195          * setsockopt(CCIDs-I-want/accept). -acme
196          */
197         if (likely(ctl_sock_initialized)) {
198                 int rc = dccp_feat_init(dmsk);
199
200                 if (rc)
201                         return rc;
202
203                 if (dmsk->dccpms_send_ack_vector) {
204                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
205                         if (dp->dccps_hc_rx_ackvec == NULL)
206                                 return -ENOMEM;
207                 }
208                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
209                                                       sk, GFP_KERNEL);
210                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
211                                                       sk, GFP_KERNEL);
212                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
213                              dp->dccps_hc_tx_ccid == NULL)) {
214                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
215                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
216                         if (dmsk->dccpms_send_ack_vector) {
217                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
218                                 dp->dccps_hc_rx_ackvec = NULL;
219                         }
220                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
221                         return -ENOMEM;
222                 }
223         } else {
224                 /* control socket doesn't need feat nego */
225                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
226                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
227         }
228
229         return 0;
230 }
231
232 EXPORT_SYMBOL_GPL(dccp_init_sock);
233
234 int dccp_destroy_sock(struct sock *sk)
235 {
236         struct dccp_sock *dp = dccp_sk(sk);
237         struct dccp_minisock *dmsk = dccp_msk(sk);
238
239         /*
240          * DCCP doesn't use sk_write_queue, just sk_send_head
241          * for retransmissions
242          */
243         if (sk->sk_send_head != NULL) {
244                 kfree_skb(sk->sk_send_head);
245                 sk->sk_send_head = NULL;
246         }
247
248         /* Clean up a referenced DCCP bind bucket. */
249         if (inet_csk(sk)->icsk_bind_hash != NULL)
250                 inet_put_port(&dccp_hashinfo, sk);
251
252         kfree(dp->dccps_service_list);
253         dp->dccps_service_list = NULL;
254
255         if (dmsk->dccpms_send_ack_vector) {
256                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
257                 dp->dccps_hc_rx_ackvec = NULL;
258         }
259         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
260         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
261         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
262
263         /* clean up feature negotiation state */
264         dccp_feat_clean(dmsk);
265
266         return 0;
267 }
268
269 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
270
271 static inline int dccp_listen_start(struct sock *sk, int backlog)
272 {
273         struct dccp_sock *dp = dccp_sk(sk);
274
275         dp->dccps_role = DCCP_ROLE_LISTEN;
276         return inet_csk_listen_start(sk, backlog);
277 }
278
279 int dccp_disconnect(struct sock *sk, int flags)
280 {
281         struct inet_connection_sock *icsk = inet_csk(sk);
282         struct inet_sock *inet = inet_sk(sk);
283         int err = 0;
284         const int old_state = sk->sk_state;
285
286         if (old_state != DCCP_CLOSED)
287                 dccp_set_state(sk, DCCP_CLOSED);
288
289         /* ABORT function of RFC793 */
290         if (old_state == DCCP_LISTEN) {
291                 inet_csk_listen_stop(sk);
292         /* FIXME: do the active reset thing */
293         } else if (old_state == DCCP_REQUESTING)
294                 sk->sk_err = ECONNRESET;
295
296         dccp_clear_xmit_timers(sk);
297         __skb_queue_purge(&sk->sk_receive_queue);
298         if (sk->sk_send_head != NULL) {
299                 __kfree_skb(sk->sk_send_head);
300                 sk->sk_send_head = NULL;
301         }
302
303         inet->dport = 0;
304
305         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
306                 inet_reset_saddr(sk);
307
308         sk->sk_shutdown = 0;
309         sock_reset_flag(sk, SOCK_DONE);
310
311         icsk->icsk_backoff = 0;
312         inet_csk_delack_init(sk);
313         __sk_dst_reset(sk);
314
315         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316
317         sk->sk_error_report(sk);
318         return err;
319 }
320
321 EXPORT_SYMBOL_GPL(dccp_disconnect);
322
323 /*
324  *      Wait for a DCCP event.
325  *
326  *      Note that we don't need to lock the socket, as the upper poll layers
327  *      take care of normal races (between the test and the event) and we don't
328  *      go look at any of the socket buffers directly.
329  */
330 unsigned int dccp_poll(struct file *file, struct socket *sock,
331                        poll_table *wait)
332 {
333         unsigned int mask;
334         struct sock *sk = sock->sk;
335
336         poll_wait(file, sk->sk_sleep, wait);
337         if (sk->sk_state == DCCP_LISTEN)
338                 return inet_csk_listen_poll(sk);
339
340         /* Socket is not locked. We are protected from async events
341            by poll logic and correct handling of state changes
342            made by another threads is impossible in any case.
343          */
344
345         mask = 0;
346         if (sk->sk_err)
347                 mask = POLLERR;
348
349         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350                 mask |= POLLHUP;
351         if (sk->sk_shutdown & RCV_SHUTDOWN)
352                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
353
354         /* Connected? */
355         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
356                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
357                         mask |= POLLIN | POLLRDNORM;
358
359                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
360                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
361                                 mask |= POLLOUT | POLLWRNORM;
362                         } else {  /* send SIGIO later */
363                                 set_bit(SOCK_ASYNC_NOSPACE,
364                                         &sk->sk_socket->flags);
365                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366
367                                 /* Race breaker. If space is freed after
368                                  * wspace test but before the flags are set,
369                                  * IO signal will be lost.
370                                  */
371                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
372                                         mask |= POLLOUT | POLLWRNORM;
373                         }
374                 }
375         }
376         return mask;
377 }
378
379 EXPORT_SYMBOL_GPL(dccp_poll);
380
381 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 {
383         int rc = -ENOTCONN;
384
385         lock_sock(sk);
386
387         if (sk->sk_state == DCCP_LISTEN)
388                 goto out;
389
390         switch (cmd) {
391         case SIOCINQ: {
392                 struct sk_buff *skb;
393                 unsigned long amount = 0;
394
395                 skb = skb_peek(&sk->sk_receive_queue);
396                 if (skb != NULL) {
397                         /*
398                          * We will only return the amount of this packet since
399                          * that is all that will be read.
400                          */
401                         amount = skb->len;
402                 }
403                 rc = put_user(amount, (int __user *)arg);
404         }
405                 break;
406         default:
407                 rc = -ENOIOCTLCMD;
408                 break;
409         }
410 out:
411         release_sock(sk);
412         return rc;
413 }
414
415 EXPORT_SYMBOL_GPL(dccp_ioctl);
416
417 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
418                                    char __user *optval, int optlen)
419 {
420         struct dccp_sock *dp = dccp_sk(sk);
421         struct dccp_service_list *sl = NULL;
422
423         if (service == DCCP_SERVICE_INVALID_VALUE ||
424             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
425                 return -EINVAL;
426
427         if (optlen > sizeof(service)) {
428                 sl = kmalloc(optlen, GFP_KERNEL);
429                 if (sl == NULL)
430                         return -ENOMEM;
431
432                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
433                 if (copy_from_user(sl->dccpsl_list,
434                                    optval + sizeof(service),
435                                    optlen - sizeof(service)) ||
436                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
437                         kfree(sl);
438                         return -EFAULT;
439                 }
440         }
441
442         lock_sock(sk);
443         dp->dccps_service = service;
444
445         kfree(dp->dccps_service_list);
446
447         dp->dccps_service_list = sl;
448         release_sock(sk);
449         return 0;
450 }
451
452 /* byte 1 is feature.  the rest is the preference list */
453 static int dccp_setsockopt_change(struct sock *sk, int type,
454                                   struct dccp_so_feat __user *optval)
455 {
456         struct dccp_so_feat opt;
457         u8 *val;
458         int rc;
459
460         if (copy_from_user(&opt, optval, sizeof(opt)))
461                 return -EFAULT;
462
463         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
464         if (!val)
465                 return -ENOMEM;
466
467         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
468                 rc = -EFAULT;
469                 goto out_free_val;
470         }
471
472         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
473                               val, opt.dccpsf_len, GFP_KERNEL);
474         if (rc)
475                 goto out_free_val;
476
477 out:
478         return rc;
479
480 out_free_val:
481         kfree(val);
482         goto out;
483 }
484
485 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
486                 char __user *optval, int optlen)
487 {
488         struct dccp_sock *dp = dccp_sk(sk);
489         int val, err = 0;
490
491         if (optlen < sizeof(int))
492                 return -EINVAL;
493
494         if (get_user(val, (int __user *)optval))
495                 return -EFAULT;
496
497         if (optname == DCCP_SOCKOPT_SERVICE)
498                 return dccp_setsockopt_service(sk, val, optval, optlen);
499
500         lock_sock(sk);
501         switch (optname) {
502         case DCCP_SOCKOPT_PACKET_SIZE:
503                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
504                 err = 0;
505                 break;
506         case DCCP_SOCKOPT_CHANGE_L:
507                 if (optlen != sizeof(struct dccp_so_feat))
508                         err = -EINVAL;
509                 else
510                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
511                                                      (struct dccp_so_feat __user *)
512                                                      optval);
513                 break;
514         case DCCP_SOCKOPT_CHANGE_R:
515                 if (optlen != sizeof(struct dccp_so_feat))
516                         err = -EINVAL;
517                 else
518                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
519                                                      (struct dccp_so_feat __user *)
520                                                      optval);
521                 break;
522         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
523                 if (val < 0 || val > 15)
524                         err = -EINVAL;
525                 else
526                         dp->dccps_pcslen = val;
527                 break;
528         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
529                 if (val < 0 || val > 15)
530                         err = -EINVAL;
531                 else {
532                         dp->dccps_pcrlen = val;
533                         /* FIXME: add feature negotiation,
534                          * ChangeL(MinimumChecksumCoverage, val) */
535                 }
536                 break;
537         default:
538                 err = -ENOPROTOOPT;
539                 break;
540         }
541
542         release_sock(sk);
543         return err;
544 }
545
546 int dccp_setsockopt(struct sock *sk, int level, int optname,
547                     char __user *optval, int optlen)
548 {
549         if (level != SOL_DCCP)
550                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
551                                                              optname, optval,
552                                                              optlen);
553         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
554 }
555
556 EXPORT_SYMBOL_GPL(dccp_setsockopt);
557
558 #ifdef CONFIG_COMPAT
559 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
560                            char __user *optval, int optlen)
561 {
562         if (level != SOL_DCCP)
563                 return inet_csk_compat_setsockopt(sk, level, optname,
564                                                   optval, optlen);
565         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
566 }
567
568 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
569 #endif
570
571 static int dccp_getsockopt_service(struct sock *sk, int len,
572                                    __be32 __user *optval,
573                                    int __user *optlen)
574 {
575         const struct dccp_sock *dp = dccp_sk(sk);
576         const struct dccp_service_list *sl;
577         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
578
579         lock_sock(sk);
580         if ((sl = dp->dccps_service_list) != NULL) {
581                 slen = sl->dccpsl_nr * sizeof(u32);
582                 total_len += slen;
583         }
584
585         err = -EINVAL;
586         if (total_len > len)
587                 goto out;
588
589         err = 0;
590         if (put_user(total_len, optlen) ||
591             put_user(dp->dccps_service, optval) ||
592             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
593                 err = -EFAULT;
594 out:
595         release_sock(sk);
596         return err;
597 }
598
599 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
600                     char __user *optval, int __user *optlen)
601 {
602         struct dccp_sock *dp;
603         int val, len;
604
605         if (get_user(len, optlen))
606                 return -EFAULT;
607
608         if (len < (int)sizeof(int))
609                 return -EINVAL;
610
611         dp = dccp_sk(sk);
612
613         switch (optname) {
614         case DCCP_SOCKOPT_PACKET_SIZE:
615                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
616                 return 0;
617         case DCCP_SOCKOPT_SERVICE:
618                 return dccp_getsockopt_service(sk, len,
619                                                (__be32 __user *)optval, optlen);
620         case DCCP_SOCKOPT_GET_CUR_MPS:
621                 val = dp->dccps_mss_cache;
622                 len = sizeof(val);
623                 break;
624         case DCCP_SOCKOPT_SEND_CSCOV:
625                 val = dp->dccps_pcslen;
626                 len = sizeof(val);
627                 break;
628         case DCCP_SOCKOPT_RECV_CSCOV:
629                 val = dp->dccps_pcrlen;
630                 len = sizeof(val);
631                 break;
632         case 128 ... 191:
633                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
634                                              len, (u32 __user *)optval, optlen);
635         case 192 ... 255:
636                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
637                                              len, (u32 __user *)optval, optlen);
638         default:
639                 return -ENOPROTOOPT;
640         }
641
642         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
643                 return -EFAULT;
644
645         return 0;
646 }
647
648 int dccp_getsockopt(struct sock *sk, int level, int optname,
649                     char __user *optval, int __user *optlen)
650 {
651         if (level != SOL_DCCP)
652                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
653                                                              optname, optval,
654                                                              optlen);
655         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
656 }
657
658 EXPORT_SYMBOL_GPL(dccp_getsockopt);
659
660 #ifdef CONFIG_COMPAT
661 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
662                            char __user *optval, int __user *optlen)
663 {
664         if (level != SOL_DCCP)
665                 return inet_csk_compat_getsockopt(sk, level, optname,
666                                                   optval, optlen);
667         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
668 }
669
670 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
671 #endif
672
673 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
674                  size_t len)
675 {
676         const struct dccp_sock *dp = dccp_sk(sk);
677         const int flags = msg->msg_flags;
678         const int noblock = flags & MSG_DONTWAIT;
679         struct sk_buff *skb;
680         int rc, size;
681         long timeo;
682
683         if (len > dp->dccps_mss_cache)
684                 return -EMSGSIZE;
685
686         lock_sock(sk);
687
688         if (sysctl_dccp_tx_qlen &&
689             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
690                 rc = -EAGAIN;
691                 goto out_release;
692         }
693
694         timeo = sock_sndtimeo(sk, noblock);
695
696         /*
697          * We have to use sk_stream_wait_connect here to set sk_write_pending,
698          * so that the trick in dccp_rcv_request_sent_state_process.
699          */
700         /* Wait for a connection to finish. */
701         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
702                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
703                         goto out_release;
704
705         size = sk->sk_prot->max_header + len;
706         release_sock(sk);
707         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
708         lock_sock(sk);
709         if (skb == NULL)
710                 goto out_release;
711
712         skb_reserve(skb, sk->sk_prot->max_header);
713         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
714         if (rc != 0)
715                 goto out_discard;
716
717         skb_queue_tail(&sk->sk_write_queue, skb);
718         dccp_write_xmit(sk,0);
719 out_release:
720         release_sock(sk);
721         return rc ? : len;
722 out_discard:
723         kfree_skb(skb);
724         goto out_release;
725 }
726
727 EXPORT_SYMBOL_GPL(dccp_sendmsg);
728
729 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
730                  size_t len, int nonblock, int flags, int *addr_len)
731 {
732         const struct dccp_hdr *dh;
733         long timeo;
734
735         lock_sock(sk);
736
737         if (sk->sk_state == DCCP_LISTEN) {
738                 len = -ENOTCONN;
739                 goto out;
740         }
741
742         timeo = sock_rcvtimeo(sk, nonblock);
743
744         do {
745                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
746
747                 if (skb == NULL)
748                         goto verify_sock_status;
749
750                 dh = dccp_hdr(skb);
751
752                 if (dh->dccph_type == DCCP_PKT_DATA ||
753                     dh->dccph_type == DCCP_PKT_DATAACK)
754                         goto found_ok_skb;
755
756                 if (dh->dccph_type == DCCP_PKT_RESET ||
757                     dh->dccph_type == DCCP_PKT_CLOSE) {
758                         dccp_pr_debug("found fin ok!\n");
759                         len = 0;
760                         goto found_fin_ok;
761                 }
762                 dccp_pr_debug("packet_type=%s\n",
763                               dccp_packet_name(dh->dccph_type));
764                 sk_eat_skb(sk, skb, 0);
765 verify_sock_status:
766                 if (sock_flag(sk, SOCK_DONE)) {
767                         len = 0;
768                         break;
769                 }
770
771                 if (sk->sk_err) {
772                         len = sock_error(sk);
773                         break;
774                 }
775
776                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
777                         len = 0;
778                         break;
779                 }
780
781                 if (sk->sk_state == DCCP_CLOSED) {
782                         if (!sock_flag(sk, SOCK_DONE)) {
783                                 /* This occurs when user tries to read
784                                  * from never connected socket.
785                                  */
786                                 len = -ENOTCONN;
787                                 break;
788                         }
789                         len = 0;
790                         break;
791                 }
792
793                 if (!timeo) {
794                         len = -EAGAIN;
795                         break;
796                 }
797
798                 if (signal_pending(current)) {
799                         len = sock_intr_errno(timeo);
800                         break;
801                 }
802
803                 sk_wait_data(sk, &timeo);
804                 continue;
805         found_ok_skb:
806                 if (len > skb->len)
807                         len = skb->len;
808                 else if (len < skb->len)
809                         msg->msg_flags |= MSG_TRUNC;
810
811                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
812                         /* Exception. Bailout! */
813                         len = -EFAULT;
814                         break;
815                 }
816         found_fin_ok:
817                 if (!(flags & MSG_PEEK))
818                         sk_eat_skb(sk, skb, 0);
819                 break;
820         } while (1);
821 out:
822         release_sock(sk);
823         return len;
824 }
825
826 EXPORT_SYMBOL_GPL(dccp_recvmsg);
827
828 int inet_dccp_listen(struct socket *sock, int backlog)
829 {
830         struct sock *sk = sock->sk;
831         unsigned char old_state;
832         int err;
833
834         lock_sock(sk);
835
836         err = -EINVAL;
837         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
838                 goto out;
839
840         old_state = sk->sk_state;
841         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
842                 goto out;
843
844         /* Really, if the socket is already in listen state
845          * we can only allow the backlog to be adjusted.
846          */
847         if (old_state != DCCP_LISTEN) {
848                 /*
849                  * FIXME: here it probably should be sk->sk_prot->listen_start
850                  * see tcp_listen_start
851                  */
852                 err = dccp_listen_start(sk, backlog);
853                 if (err)
854                         goto out;
855         }
856         sk->sk_max_ack_backlog = backlog;
857         err = 0;
858
859 out:
860         release_sock(sk);
861         return err;
862 }
863
864 EXPORT_SYMBOL_GPL(inet_dccp_listen);
865
866 static const unsigned char dccp_new_state[] = {
867         /* current state:   new state:      action:     */
868         [0]               = DCCP_CLOSED,
869         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
870         [DCCP_REQUESTING] = DCCP_CLOSED,
871         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
872         [DCCP_LISTEN]     = DCCP_CLOSED,
873         [DCCP_RESPOND]    = DCCP_CLOSED,
874         [DCCP_CLOSING]    = DCCP_CLOSED,
875         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
876         [DCCP_CLOSED]     = DCCP_CLOSED,
877 };
878
879 static int dccp_close_state(struct sock *sk)
880 {
881         const int next = dccp_new_state[sk->sk_state];
882         const int ns = next & DCCP_STATE_MASK;
883
884         if (ns != sk->sk_state)
885                 dccp_set_state(sk, ns);
886
887         return next & DCCP_ACTION_FIN;
888 }
889
890 void dccp_close(struct sock *sk, long timeout)
891 {
892         struct dccp_sock *dp = dccp_sk(sk);
893         struct sk_buff *skb;
894         u32 data_was_unread = 0;
895         int state;
896
897         lock_sock(sk);
898
899         sk->sk_shutdown = SHUTDOWN_MASK;
900
901         if (sk->sk_state == DCCP_LISTEN) {
902                 dccp_set_state(sk, DCCP_CLOSED);
903
904                 /* Special case. */
905                 inet_csk_listen_stop(sk);
906
907                 goto adjudge_to_death;
908         }
909
910         sk_stop_timer(sk, &dp->dccps_xmit_timer);
911
912         /*
913          * We need to flush the recv. buffs.  We do this only on the
914          * descriptor close, not protocol-sourced closes, because the
915           *reader process may not have drained the data yet!
916          */
917         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
918                 data_was_unread += skb->len;
919                 __kfree_skb(skb);
920         }
921
922         if (data_was_unread) {
923                 /* Unread data was tossed, send an appropriate Reset Code */
924                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
925                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
926                 dccp_set_state(sk, DCCP_CLOSED);
927         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
928                 /* Check zero linger _after_ checking for unread data. */
929                 sk->sk_prot->disconnect(sk, 0);
930         } else if (dccp_close_state(sk)) {
931                 dccp_send_close(sk, 1);
932         }
933
934         sk_stream_wait_close(sk, timeout);
935
936 adjudge_to_death:
937         state = sk->sk_state;
938         sock_hold(sk);
939         sock_orphan(sk);
940         atomic_inc(sk->sk_prot->orphan_count);
941
942         /*
943          * It is the last release_sock in its life. It will remove backlog.
944          */
945         release_sock(sk);
946         /*
947          * Now socket is owned by kernel and we acquire BH lock
948          * to finish close. No need to check for user refs.
949          */
950         local_bh_disable();
951         bh_lock_sock(sk);
952         BUG_TRAP(!sock_owned_by_user(sk));
953
954         /* Have we already been destroyed by a softirq or backlog? */
955         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
956                 goto out;
957
958         /*
959          * The last release_sock may have processed the CLOSE or RESET
960          * packet moving sock to CLOSED state, if not we have to fire
961          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
962          * in draft-ietf-dccp-spec-11. -acme
963          */
964         if (sk->sk_state == DCCP_CLOSING) {
965                 /* FIXME: should start at 2 * RTT */
966                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
967                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
968                                           inet_csk(sk)->icsk_rto,
969                                           DCCP_RTO_MAX);
970 #if 0
971                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
972                 dccp_set_state(sk, DCCP_CLOSED);
973 #endif
974         }
975
976         if (sk->sk_state == DCCP_CLOSED)
977                 inet_csk_destroy_sock(sk);
978
979         /* Otherwise, socket is reprieved until protocol close. */
980
981 out:
982         bh_unlock_sock(sk);
983         local_bh_enable();
984         sock_put(sk);
985 }
986
987 EXPORT_SYMBOL_GPL(dccp_close);
988
989 void dccp_shutdown(struct sock *sk, int how)
990 {
991         dccp_pr_debug("called shutdown(%x)\n", how);
992 }
993
994 EXPORT_SYMBOL_GPL(dccp_shutdown);
995
996 static int __init dccp_mib_init(void)
997 {
998         int rc = -ENOMEM;
999
1000         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1001         if (dccp_statistics[0] == NULL)
1002                 goto out;
1003
1004         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1005         if (dccp_statistics[1] == NULL)
1006                 goto out_free_one;
1007
1008         rc = 0;
1009 out:
1010         return rc;
1011 out_free_one:
1012         free_percpu(dccp_statistics[0]);
1013         dccp_statistics[0] = NULL;
1014         goto out;
1015
1016 }
1017
1018 static void dccp_mib_exit(void)
1019 {
1020         free_percpu(dccp_statistics[0]);
1021         free_percpu(dccp_statistics[1]);
1022         dccp_statistics[0] = dccp_statistics[1] = NULL;
1023 }
1024
1025 static int thash_entries;
1026 module_param(thash_entries, int, 0444);
1027 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1028
1029 #ifdef CONFIG_IP_DCCP_DEBUG
1030 int dccp_debug;
1031 module_param(dccp_debug, bool, 0444);
1032 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1033
1034 EXPORT_SYMBOL_GPL(dccp_debug);
1035 #endif
1036
1037 static int __init dccp_init(void)
1038 {
1039         unsigned long goal;
1040         int ehash_order, bhash_order, i;
1041         int rc = -ENOBUFS;
1042
1043         dccp_hashinfo.bind_bucket_cachep =
1044                 kmem_cache_create("dccp_bind_bucket",
1045                                   sizeof(struct inet_bind_bucket), 0,
1046                                   SLAB_HWCACHE_ALIGN, NULL);
1047         if (!dccp_hashinfo.bind_bucket_cachep)
1048                 goto out;
1049
1050         /*
1051          * Size and allocate the main established and bind bucket
1052          * hash tables.
1053          *
1054          * The methodology is similar to that of the buffer cache.
1055          */
1056         if (num_physpages >= (128 * 1024))
1057                 goal = num_physpages >> (21 - PAGE_SHIFT);
1058         else
1059                 goal = num_physpages >> (23 - PAGE_SHIFT);
1060
1061         if (thash_entries)
1062                 goal = (thash_entries *
1063                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1064         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1065                 ;
1066         do {
1067                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1068                                         sizeof(struct inet_ehash_bucket);
1069                 while (dccp_hashinfo.ehash_size &
1070                        (dccp_hashinfo.ehash_size - 1))
1071                         dccp_hashinfo.ehash_size--;
1072                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1073                         __get_free_pages(GFP_ATOMIC, ehash_order);
1074         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1075
1076         if (!dccp_hashinfo.ehash) {
1077                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1078                 goto out_free_bind_bucket_cachep;
1079         }
1080
1081         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1082                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1083                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1084         }
1085
1086         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1087                         goto out_free_dccp_ehash;
1088
1089         bhash_order = ehash_order;
1090
1091         do {
1092                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1093                                         sizeof(struct inet_bind_hashbucket);
1094                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1095                     bhash_order > 0)
1096                         continue;
1097                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1098                         __get_free_pages(GFP_ATOMIC, bhash_order);
1099         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1100
1101         if (!dccp_hashinfo.bhash) {
1102                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1103                 goto out_free_dccp_locks;
1104         }
1105
1106         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1107                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1108                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1109         }
1110
1111         rc = dccp_mib_init();
1112         if (rc)
1113                 goto out_free_dccp_bhash;
1114
1115         rc = dccp_ackvec_init();
1116         if (rc)
1117                 goto out_free_dccp_mib;
1118
1119         rc = dccp_sysctl_init();
1120         if (rc)
1121                 goto out_ackvec_exit;
1122
1123         dccp_timestamping_init();
1124 out:
1125         return rc;
1126 out_ackvec_exit:
1127         dccp_ackvec_exit();
1128 out_free_dccp_mib:
1129         dccp_mib_exit();
1130 out_free_dccp_bhash:
1131         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1132         dccp_hashinfo.bhash = NULL;
1133 out_free_dccp_locks:
1134         inet_ehash_locks_free(&dccp_hashinfo);
1135 out_free_dccp_ehash:
1136         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1137         dccp_hashinfo.ehash = NULL;
1138 out_free_bind_bucket_cachep:
1139         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1140         dccp_hashinfo.bind_bucket_cachep = NULL;
1141         goto out;
1142 }
1143
1144 static void __exit dccp_fini(void)
1145 {
1146         dccp_mib_exit();
1147         free_pages((unsigned long)dccp_hashinfo.bhash,
1148                    get_order(dccp_hashinfo.bhash_size *
1149                              sizeof(struct inet_bind_hashbucket)));
1150         free_pages((unsigned long)dccp_hashinfo.ehash,
1151                    get_order(dccp_hashinfo.ehash_size *
1152                              sizeof(struct inet_ehash_bucket)));
1153         inet_ehash_locks_free(&dccp_hashinfo);
1154         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1155         dccp_ackvec_exit();
1156         dccp_sysctl_exit();
1157 }
1158
1159 module_init(dccp_init);
1160 module_exit(dccp_fini);
1161
1162 MODULE_LICENSE("GPL");
1163 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");