2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/module.h>
33 #include <linux/list.h>
34 #include <linux/workqueue.h>
35 #include <linux/skbuff.h>
36 #include <linux/timer.h>
37 #include <linux/notifier.h>
38 #include <linux/inetdevice.h>
40 #include <linux/tcp.h>
42 #include <net/neighbour.h>
43 #include <net/netevent.h>
44 #include <net/route.h>
48 static char *states[] = {
64 static int dack_mode = 1;
65 module_param(dack_mode, int, 0644);
66 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
68 int c4iw_max_read_depth = 8;
69 module_param(c4iw_max_read_depth, int, 0644);
70 MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)");
72 static int enable_tcp_timestamps;
73 module_param(enable_tcp_timestamps, int, 0644);
74 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
76 static int enable_tcp_sack;
77 module_param(enable_tcp_sack, int, 0644);
78 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
80 static int enable_tcp_window_scaling = 1;
81 module_param(enable_tcp_window_scaling, int, 0644);
82 MODULE_PARM_DESC(enable_tcp_window_scaling,
83 "Enable tcp window scaling (default=1)");
86 module_param(c4iw_debug, int, 0644);
87 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
90 module_param(peer2peer, int, 0644);
91 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
93 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
94 module_param(p2p_type, int, 0644);
95 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
96 "1=RDMA_READ 0=RDMA_WRITE (default 1)");
98 static int ep_timeout_secs = 60;
99 module_param(ep_timeout_secs, int, 0644);
100 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
101 "in seconds (default=60)");
103 static int mpa_rev = 1;
104 module_param(mpa_rev, int, 0644);
105 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
106 "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
107 " compliant (default=1)");
109 static int markers_enabled;
110 module_param(markers_enabled, int, 0644);
111 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
113 static int crc_enabled = 1;
114 module_param(crc_enabled, int, 0644);
115 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
117 static int rcv_win = 256 * 1024;
118 module_param(rcv_win, int, 0644);
119 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
121 static int snd_win = 128 * 1024;
122 module_param(snd_win, int, 0644);
123 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
125 static struct workqueue_struct *workq;
127 static struct sk_buff_head rxq;
129 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
130 static void ep_timeout(unsigned long arg);
131 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
133 static LIST_HEAD(timeout_list);
134 static spinlock_t timeout_lock;
136 static void start_ep_timer(struct c4iw_ep *ep)
138 PDBG("%s ep %p\n", __func__, ep);
139 if (timer_pending(&ep->timer)) {
140 PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
141 del_timer_sync(&ep->timer);
143 c4iw_get_ep(&ep->com);
144 ep->timer.expires = jiffies + ep_timeout_secs * HZ;
145 ep->timer.data = (unsigned long)ep;
146 ep->timer.function = ep_timeout;
147 add_timer(&ep->timer);
150 static void stop_ep_timer(struct c4iw_ep *ep)
152 PDBG("%s ep %p\n", __func__, ep);
153 if (!timer_pending(&ep->timer)) {
154 printk(KERN_ERR "%s timer stopped when its not running! "
155 "ep %p state %u\n", __func__, ep, ep->com.state);
159 del_timer_sync(&ep->timer);
160 c4iw_put_ep(&ep->com);
163 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
164 struct l2t_entry *l2e)
168 if (c4iw_fatal_error(rdev)) {
170 PDBG("%s - device in error state - dropping\n", __func__);
173 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
176 return error < 0 ? error : 0;
179 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
183 if (c4iw_fatal_error(rdev)) {
185 PDBG("%s - device in error state - dropping\n", __func__);
188 error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
191 return error < 0 ? error : 0;
194 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
196 struct cpl_tid_release *req;
198 skb = get_skb(skb, sizeof *req, GFP_KERNEL);
201 req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
202 INIT_TP_WR(req, hwtid);
203 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
204 set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
205 c4iw_ofld_send(rdev, skb);
209 static void set_emss(struct c4iw_ep *ep, u16 opt)
211 ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
213 if (GET_TCPOPT_TSTAMP(opt))
217 PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
221 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
223 enum c4iw_ep_state state;
225 mutex_lock(&epc->mutex);
227 mutex_unlock(&epc->mutex);
231 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
236 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
238 mutex_lock(&epc->mutex);
239 PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
240 __state_set(epc, new);
241 mutex_unlock(&epc->mutex);
245 static void *alloc_ep(int size, gfp_t gfp)
247 struct c4iw_ep_common *epc;
249 epc = kzalloc(size, gfp);
251 kref_init(&epc->kref);
252 mutex_init(&epc->mutex);
253 c4iw_init_wr_wait(&epc->wr_wait);
255 PDBG("%s alloc ep %p\n", __func__, epc);
259 void _c4iw_free_ep(struct kref *kref)
263 ep = container_of(kref, struct c4iw_ep, com.kref);
264 PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
265 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
266 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
267 dst_release(ep->dst);
268 cxgb4_l2t_release(ep->l2t);
273 static void release_ep_resources(struct c4iw_ep *ep)
275 set_bit(RELEASE_RESOURCES, &ep->com.flags);
276 c4iw_put_ep(&ep->com);
279 static int status2errno(int status)
284 case CPL_ERR_CONN_RESET:
286 case CPL_ERR_ARP_MISS:
287 return -EHOSTUNREACH;
288 case CPL_ERR_CONN_TIMEDOUT:
290 case CPL_ERR_TCAM_FULL:
292 case CPL_ERR_CONN_EXIST:
300 * Try and reuse skbs already allocated...
302 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
304 if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
307 skb_reset_transport_header(skb);
309 skb = alloc_skb(len, gfp);
314 static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
315 __be32 peer_ip, __be16 local_port,
316 __be16 peer_port, u8 tos)
321 rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
322 peer_port, local_port, IPPROTO_TCP,
329 static void arp_failure_discard(void *handle, struct sk_buff *skb)
331 PDBG("%s c4iw_dev %p\n", __func__, handle);
336 * Handle an ARP failure for an active open.
338 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
340 printk(KERN_ERR MOD "ARP failure duing connect\n");
345 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
348 static void abort_arp_failure(void *handle, struct sk_buff *skb)
350 struct c4iw_rdev *rdev = handle;
351 struct cpl_abort_req *req = cplhdr(skb);
353 PDBG("%s rdev %p\n", __func__, rdev);
354 req->cmd = CPL_ABORT_NO_RST;
355 c4iw_ofld_send(rdev, skb);
358 static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
360 unsigned int flowclen = 80;
361 struct fw_flowc_wr *flowc;
364 skb = get_skb(skb, flowclen, GFP_KERNEL);
365 flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
367 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) |
368 FW_FLOWC_WR_NPARAMS(8));
369 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen,
370 16)) | FW_WR_FLOWID(ep->hwtid));
372 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
373 flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8);
374 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
375 flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
376 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
377 flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
378 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
379 flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
380 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
381 flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
382 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
383 flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
384 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
385 flowc->mnemval[6].val = cpu_to_be32(snd_win);
386 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
387 flowc->mnemval[7].val = cpu_to_be32(ep->emss);
388 /* Pad WR to 16 byte boundary */
389 flowc->mnemval[8].mnemonic = 0;
390 flowc->mnemval[8].val = 0;
391 for (i = 0; i < 9; i++) {
392 flowc->mnemval[i].r4[0] = 0;
393 flowc->mnemval[i].r4[1] = 0;
394 flowc->mnemval[i].r4[2] = 0;
397 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
398 c4iw_ofld_send(&ep->com.dev->rdev, skb);
401 static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
403 struct cpl_close_con_req *req;
405 int wrlen = roundup(sizeof *req, 16);
407 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
408 skb = get_skb(NULL, wrlen, gfp);
410 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
413 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
414 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
415 req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
416 memset(req, 0, wrlen);
417 INIT_TP_WR(req, ep->hwtid);
418 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
420 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
423 static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
425 struct cpl_abort_req *req;
426 int wrlen = roundup(sizeof *req, 16);
428 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
429 skb = get_skb(skb, wrlen, gfp);
431 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
435 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
436 t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
437 req = (struct cpl_abort_req *) skb_put(skb, wrlen);
438 memset(req, 0, wrlen);
439 INIT_TP_WR(req, ep->hwtid);
440 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
441 req->cmd = CPL_ABORT_SEND_RST;
442 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
445 static int send_connect(struct c4iw_ep *ep)
447 struct cpl_act_open_req *req;
451 unsigned int mtu_idx;
453 int wrlen = roundup(sizeof *req, 16);
455 PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
457 skb = get_skb(NULL, wrlen, GFP_KERNEL);
459 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
463 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
465 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
466 wscale = compute_wscale(rcv_win);
467 opt0 = KEEP_ALIVE(1) |
471 L2T_IDX(ep->l2t->idx) |
472 TX_CHAN(ep->tx_chan) |
473 SMAC_SEL(ep->smac_idx) |
475 ULP_MODE(ULP_MODE_TCPDDP) |
476 RCV_BUFSIZ(rcv_win>>10);
477 opt2 = RX_CHANNEL(0) |
478 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
479 if (enable_tcp_timestamps)
480 opt2 |= TSTAMPS_EN(1);
483 if (wscale && enable_tcp_window_scaling)
484 opt2 |= WND_SCALE_EN(1);
485 t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
487 req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
489 OPCODE_TID(req) = cpu_to_be32(
490 MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid)));
491 req->local_port = ep->com.local_addr.sin_port;
492 req->peer_port = ep->com.remote_addr.sin_port;
493 req->local_ip = ep->com.local_addr.sin_addr.s_addr;
494 req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
495 req->opt0 = cpu_to_be64(opt0);
497 req->opt2 = cpu_to_be32(opt2);
498 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
501 static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
505 struct fw_ofld_tx_data_wr *req;
506 struct mpa_message *mpa;
507 struct mpa_v2_conn_params mpa_v2_params;
509 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
511 BUG_ON(skb_cloned(skb));
513 mpalen = sizeof(*mpa) + ep->plen;
514 if (mpa_rev_to_use == 2)
515 mpalen += sizeof(struct mpa_v2_conn_params);
516 wrlen = roundup(mpalen + sizeof *req, 16);
517 skb = get_skb(skb, wrlen, GFP_KERNEL);
519 connect_reply_upcall(ep, -ENOMEM);
522 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
524 req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
525 memset(req, 0, wrlen);
526 req->op_to_immdlen = cpu_to_be32(
527 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
529 FW_WR_IMMDLEN(mpalen));
530 req->flowid_len16 = cpu_to_be32(
531 FW_WR_FLOWID(ep->hwtid) |
532 FW_WR_LEN16(wrlen >> 4));
533 req->plen = cpu_to_be32(mpalen);
534 req->tunnel_to_proxy = cpu_to_be32(
535 FW_OFLD_TX_DATA_WR_FLUSH(1) |
536 FW_OFLD_TX_DATA_WR_SHOVE(1));
538 mpa = (struct mpa_message *)(req + 1);
539 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
540 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
541 (markers_enabled ? MPA_MARKERS : 0) |
542 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
543 mpa->private_data_size = htons(ep->plen);
544 mpa->revision = mpa_rev_to_use;
545 if (mpa_rev_to_use == 1)
546 ep->tried_with_mpa_v1 = 1;
548 if (mpa_rev_to_use == 2) {
549 mpa->private_data_size +=
550 htons(sizeof(struct mpa_v2_conn_params));
551 mpa_v2_params.ird = htons((u16)ep->ird);
552 mpa_v2_params.ord = htons((u16)ep->ord);
555 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
556 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
558 htons(MPA_V2_RDMA_WRITE_RTR);
559 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
561 htons(MPA_V2_RDMA_READ_RTR);
563 memcpy(mpa->private_data, &mpa_v2_params,
564 sizeof(struct mpa_v2_conn_params));
567 memcpy(mpa->private_data +
568 sizeof(struct mpa_v2_conn_params),
569 ep->mpa_pkt + sizeof(*mpa), ep->plen);
572 memcpy(mpa->private_data,
573 ep->mpa_pkt + sizeof(*mpa), ep->plen);
576 * Reference the mpa skb. This ensures the data area
577 * will remain in memory until the hw acks the tx.
578 * Function fw4_ack() will deref it.
581 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
584 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
586 state_set(&ep->com, MPA_REQ_SENT);
587 ep->mpa_attr.initiator = 1;
591 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
594 struct fw_ofld_tx_data_wr *req;
595 struct mpa_message *mpa;
597 struct mpa_v2_conn_params mpa_v2_params;
599 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
601 mpalen = sizeof(*mpa) + plen;
602 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
603 mpalen += sizeof(struct mpa_v2_conn_params);
604 wrlen = roundup(mpalen + sizeof *req, 16);
606 skb = get_skb(NULL, wrlen, GFP_KERNEL);
608 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
611 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
613 req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
614 memset(req, 0, wrlen);
615 req->op_to_immdlen = cpu_to_be32(
616 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
618 FW_WR_IMMDLEN(mpalen));
619 req->flowid_len16 = cpu_to_be32(
620 FW_WR_FLOWID(ep->hwtid) |
621 FW_WR_LEN16(wrlen >> 4));
622 req->plen = cpu_to_be32(mpalen);
623 req->tunnel_to_proxy = cpu_to_be32(
624 FW_OFLD_TX_DATA_WR_FLUSH(1) |
625 FW_OFLD_TX_DATA_WR_SHOVE(1));
627 mpa = (struct mpa_message *)(req + 1);
628 memset(mpa, 0, sizeof(*mpa));
629 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
630 mpa->flags = MPA_REJECT;
631 mpa->revision = mpa_rev;
632 mpa->private_data_size = htons(plen);
634 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
635 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
636 mpa->private_data_size +=
637 htons(sizeof(struct mpa_v2_conn_params));
638 mpa_v2_params.ird = htons(((u16)ep->ird) |
639 (peer2peer ? MPA_V2_PEER2PEER_MODEL :
641 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
643 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
644 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
645 FW_RI_INIT_P2PTYPE_READ_REQ ?
646 MPA_V2_RDMA_READ_RTR : 0) : 0));
647 memcpy(mpa->private_data, &mpa_v2_params,
648 sizeof(struct mpa_v2_conn_params));
651 memcpy(mpa->private_data +
652 sizeof(struct mpa_v2_conn_params), pdata, plen);
655 memcpy(mpa->private_data, pdata, plen);
658 * Reference the mpa skb again. This ensures the data area
659 * will remain in memory until the hw acks the tx.
660 * Function fw4_ack() will deref it.
663 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
664 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
667 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
670 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
673 struct fw_ofld_tx_data_wr *req;
674 struct mpa_message *mpa;
676 struct mpa_v2_conn_params mpa_v2_params;
678 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
680 mpalen = sizeof(*mpa) + plen;
681 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
682 mpalen += sizeof(struct mpa_v2_conn_params);
683 wrlen = roundup(mpalen + sizeof *req, 16);
685 skb = get_skb(NULL, wrlen, GFP_KERNEL);
687 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
690 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
692 req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
693 memset(req, 0, wrlen);
694 req->op_to_immdlen = cpu_to_be32(
695 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
697 FW_WR_IMMDLEN(mpalen));
698 req->flowid_len16 = cpu_to_be32(
699 FW_WR_FLOWID(ep->hwtid) |
700 FW_WR_LEN16(wrlen >> 4));
701 req->plen = cpu_to_be32(mpalen);
702 req->tunnel_to_proxy = cpu_to_be32(
703 FW_OFLD_TX_DATA_WR_FLUSH(1) |
704 FW_OFLD_TX_DATA_WR_SHOVE(1));
706 mpa = (struct mpa_message *)(req + 1);
707 memset(mpa, 0, sizeof(*mpa));
708 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
709 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
710 (markers_enabled ? MPA_MARKERS : 0);
711 mpa->revision = ep->mpa_attr.version;
712 mpa->private_data_size = htons(plen);
714 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
715 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
716 mpa->private_data_size +=
717 htons(sizeof(struct mpa_v2_conn_params));
718 mpa_v2_params.ird = htons((u16)ep->ird);
719 mpa_v2_params.ord = htons((u16)ep->ord);
720 if (peer2peer && (ep->mpa_attr.p2p_type !=
721 FW_RI_INIT_P2PTYPE_DISABLED)) {
722 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
724 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
726 htons(MPA_V2_RDMA_WRITE_RTR);
727 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
729 htons(MPA_V2_RDMA_READ_RTR);
732 memcpy(mpa->private_data, &mpa_v2_params,
733 sizeof(struct mpa_v2_conn_params));
736 memcpy(mpa->private_data +
737 sizeof(struct mpa_v2_conn_params), pdata, plen);
740 memcpy(mpa->private_data, pdata, plen);
743 * Reference the mpa skb. This ensures the data area
744 * will remain in memory until the hw acks the tx.
745 * Function fw4_ack() will deref it.
748 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
750 state_set(&ep->com, MPA_REP_SENT);
751 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
754 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
757 struct cpl_act_establish *req = cplhdr(skb);
758 unsigned int tid = GET_TID(req);
759 unsigned int atid = GET_TID_TID(ntohl(req->tos_atid));
760 struct tid_info *t = dev->rdev.lldi.tids;
762 ep = lookup_atid(t, atid);
764 PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
765 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
767 dst_confirm(ep->dst);
769 /* setup the hwtid for this connection */
771 cxgb4_insert_tid(t, ep, tid);
773 ep->snd_seq = be32_to_cpu(req->snd_isn);
774 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
776 set_emss(ep, ntohs(req->tcp_opt));
778 /* dealloc the atid */
779 cxgb4_free_atid(t, atid);
781 /* start MPA negotiation */
782 send_flowc(ep, NULL);
783 if (ep->retry_with_mpa_v1)
784 send_mpa_req(ep, skb, 1);
786 send_mpa_req(ep, skb, mpa_rev);
791 static void close_complete_upcall(struct c4iw_ep *ep)
793 struct iw_cm_event event;
795 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
796 memset(&event, 0, sizeof(event));
797 event.event = IW_CM_EVENT_CLOSE;
799 PDBG("close complete delivered ep %p cm_id %p tid %u\n",
800 ep, ep->com.cm_id, ep->hwtid);
801 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
802 ep->com.cm_id->rem_ref(ep->com.cm_id);
803 ep->com.cm_id = NULL;
808 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
810 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
811 close_complete_upcall(ep);
812 state_set(&ep->com, ABORTING);
813 return send_abort(ep, skb, gfp);
816 static void peer_close_upcall(struct c4iw_ep *ep)
818 struct iw_cm_event event;
820 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
821 memset(&event, 0, sizeof(event));
822 event.event = IW_CM_EVENT_DISCONNECT;
824 PDBG("peer close delivered ep %p cm_id %p tid %u\n",
825 ep, ep->com.cm_id, ep->hwtid);
826 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
830 static void peer_abort_upcall(struct c4iw_ep *ep)
832 struct iw_cm_event event;
834 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
835 memset(&event, 0, sizeof(event));
836 event.event = IW_CM_EVENT_CLOSE;
837 event.status = -ECONNRESET;
839 PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
840 ep->com.cm_id, ep->hwtid);
841 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
842 ep->com.cm_id->rem_ref(ep->com.cm_id);
843 ep->com.cm_id = NULL;
848 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
850 struct iw_cm_event event;
852 PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
853 memset(&event, 0, sizeof(event));
854 event.event = IW_CM_EVENT_CONNECT_REPLY;
855 event.status = status;
856 event.local_addr = ep->com.local_addr;
857 event.remote_addr = ep->com.remote_addr;
859 if ((status == 0) || (status == -ECONNREFUSED)) {
860 if (!ep->tried_with_mpa_v1) {
861 /* this means MPA_v2 is used */
862 event.private_data_len = ep->plen -
863 sizeof(struct mpa_v2_conn_params);
864 event.private_data = ep->mpa_pkt +
865 sizeof(struct mpa_message) +
866 sizeof(struct mpa_v2_conn_params);
868 /* this means MPA_v1 is used */
869 event.private_data_len = ep->plen;
870 event.private_data = ep->mpa_pkt +
871 sizeof(struct mpa_message);
875 PDBG("%s ep %p tid %u status %d\n", __func__, ep,
877 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
880 ep->com.cm_id->rem_ref(ep->com.cm_id);
881 ep->com.cm_id = NULL;
886 static void connect_request_upcall(struct c4iw_ep *ep)
888 struct iw_cm_event event;
890 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
891 memset(&event, 0, sizeof(event));
892 event.event = IW_CM_EVENT_CONNECT_REQUEST;
893 event.local_addr = ep->com.local_addr;
894 event.remote_addr = ep->com.remote_addr;
895 event.provider_data = ep;
896 if (!ep->tried_with_mpa_v1) {
897 /* this means MPA_v2 is used */
900 event.private_data_len = ep->plen -
901 sizeof(struct mpa_v2_conn_params);
902 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
903 sizeof(struct mpa_v2_conn_params);
905 /* this means MPA_v1 is used. Send max supported */
906 event.ord = c4iw_max_read_depth;
907 event.ird = c4iw_max_read_depth;
908 event.private_data_len = ep->plen;
909 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
911 if (state_read(&ep->parent_ep->com) != DEAD) {
912 c4iw_get_ep(&ep->com);
913 ep->parent_ep->com.cm_id->event_handler(
914 ep->parent_ep->com.cm_id,
917 c4iw_put_ep(&ep->parent_ep->com);
918 ep->parent_ep = NULL;
921 static void established_upcall(struct c4iw_ep *ep)
923 struct iw_cm_event event;
925 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
926 memset(&event, 0, sizeof(event));
927 event.event = IW_CM_EVENT_ESTABLISHED;
931 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
932 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
936 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
938 struct cpl_rx_data_ack *req;
940 int wrlen = roundup(sizeof *req, 16);
942 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
943 skb = get_skb(NULL, wrlen, GFP_KERNEL);
945 printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
949 req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
950 memset(req, 0, wrlen);
951 INIT_TP_WR(req, ep->hwtid);
952 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
954 req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) |
956 V_RX_DACK_MODE(dack_mode));
957 set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
958 c4iw_ofld_send(&ep->com.dev->rdev, skb);
962 static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
964 struct mpa_message *mpa;
965 struct mpa_v2_conn_params *mpa_v2_params;
967 u16 resp_ird, resp_ord;
968 u8 rtr_mismatch = 0, insuff_ird = 0;
969 struct c4iw_qp_attributes attrs;
970 enum c4iw_qp_attr_mask mask;
973 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
976 * Stop mpa timer. If it expired, then the state has
977 * changed and we bail since ep_timeout already aborted
981 if (state_read(&ep->com) != MPA_REQ_SENT)
985 * If we get more than the supported amount of private data
986 * then we must fail this connection.
988 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
994 * copy the new data into our accumulation buffer.
996 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
998 ep->mpa_pkt_len += skb->len;
1001 * if we don't even have the mpa message, then bail.
1003 if (ep->mpa_pkt_len < sizeof(*mpa))
1005 mpa = (struct mpa_message *) ep->mpa_pkt;
1007 /* Validate MPA header. */
1008 if (mpa->revision > mpa_rev) {
1009 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1010 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1014 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1019 plen = ntohs(mpa->private_data_size);
1022 * Fail if there's too much private data.
1024 if (plen > MPA_MAX_PRIVATE_DATA) {
1030 * If plen does not account for pkt size
1032 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1037 ep->plen = (u8) plen;
1040 * If we don't have all the pdata yet, then bail.
1041 * We'll continue process when more data arrives.
1043 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1046 if (mpa->flags & MPA_REJECT) {
1047 err = -ECONNREFUSED;
1052 * If we get here we have accumulated the entire mpa
1053 * start reply message including private data. And
1054 * the MPA header is valid.
1056 state_set(&ep->com, FPDU_MODE);
1057 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1058 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1059 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1060 ep->mpa_attr.version = mpa->revision;
1061 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1063 if (mpa->revision == 2) {
1064 ep->mpa_attr.enhanced_rdma_conn =
1065 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1066 if (ep->mpa_attr.enhanced_rdma_conn) {
1067 mpa_v2_params = (struct mpa_v2_conn_params *)
1068 (ep->mpa_pkt + sizeof(*mpa));
1069 resp_ird = ntohs(mpa_v2_params->ird) &
1070 MPA_V2_IRD_ORD_MASK;
1071 resp_ord = ntohs(mpa_v2_params->ord) &
1072 MPA_V2_IRD_ORD_MASK;
1075 * This is a double-check. Ideally, below checks are
1076 * not required since ird/ord stuff has been taken
1077 * care of in c4iw_accept_cr
1079 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1086 if (ntohs(mpa_v2_params->ird) &
1087 MPA_V2_PEER2PEER_MODEL) {
1088 if (ntohs(mpa_v2_params->ord) &
1089 MPA_V2_RDMA_WRITE_RTR)
1090 ep->mpa_attr.p2p_type =
1091 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1092 else if (ntohs(mpa_v2_params->ord) &
1093 MPA_V2_RDMA_READ_RTR)
1094 ep->mpa_attr.p2p_type =
1095 FW_RI_INIT_P2PTYPE_READ_REQ;
1098 } else if (mpa->revision == 1)
1100 ep->mpa_attr.p2p_type = p2p_type;
1102 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1103 "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
1104 "%d\n", __func__, ep->mpa_attr.crc_enabled,
1105 ep->mpa_attr.recv_marker_enabled,
1106 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1107 ep->mpa_attr.p2p_type, p2p_type);
1110 * If responder's RTR does not match with that of initiator, assign
1111 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1112 * generated when moving QP to RTS state.
1113 * A TERM message will be sent after QP has moved to RTS state
1115 if ((ep->mpa_attr.version == 2) &&
1116 (ep->mpa_attr.p2p_type != p2p_type)) {
1117 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1121 attrs.mpa_attr = ep->mpa_attr;
1122 attrs.max_ird = ep->ird;
1123 attrs.max_ord = ep->ord;
1124 attrs.llp_stream_handle = ep;
1125 attrs.next_state = C4IW_QP_STATE_RTS;
1127 mask = C4IW_QP_ATTR_NEXT_STATE |
1128 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1129 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1131 /* bind QP and TID with INIT_WR */
1132 err = c4iw_modify_qp(ep->com.qp->rhp,
1133 ep->com.qp, mask, &attrs, 1);
1138 * If responder's RTR requirement did not match with what initiator
1139 * supports, generate TERM message
1142 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1143 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1144 attrs.ecode = MPA_NOMATCH_RTR;
1145 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1146 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1147 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1153 * Generate TERM if initiator IRD is not sufficient for responder
1154 * provided ORD. Currently, we do the same behaviour even when
1155 * responder provided IRD is also not sufficient as regards to
1159 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1161 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1162 attrs.ecode = MPA_INSUFF_IRD;
1163 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1164 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1165 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1171 state_set(&ep->com, ABORTING);
1172 send_abort(ep, skb, GFP_KERNEL);
1174 connect_reply_upcall(ep, err);
1178 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1180 struct mpa_message *mpa;
1181 struct mpa_v2_conn_params *mpa_v2_params;
1184 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1186 if (state_read(&ep->com) != MPA_REQ_WAIT)
1190 * If we get more than the supported amount of private data
1191 * then we must fail this connection.
1193 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1195 abort_connection(ep, skb, GFP_KERNEL);
1199 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1202 * Copy the new data into our accumulation buffer.
1204 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1206 ep->mpa_pkt_len += skb->len;
1209 * If we don't even have the mpa message, then bail.
1210 * We'll continue process when more data arrives.
1212 if (ep->mpa_pkt_len < sizeof(*mpa))
1215 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1217 mpa = (struct mpa_message *) ep->mpa_pkt;
1220 * Validate MPA Header.
1222 if (mpa->revision > mpa_rev) {
1223 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1224 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1225 abort_connection(ep, skb, GFP_KERNEL);
1229 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1230 abort_connection(ep, skb, GFP_KERNEL);
1234 plen = ntohs(mpa->private_data_size);
1237 * Fail if there's too much private data.
1239 if (plen > MPA_MAX_PRIVATE_DATA) {
1240 abort_connection(ep, skb, GFP_KERNEL);
1245 * If plen does not account for pkt size
1247 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1248 abort_connection(ep, skb, GFP_KERNEL);
1251 ep->plen = (u8) plen;
1254 * If we don't have all the pdata yet, then bail.
1256 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1260 * If we get here we have accumulated the entire mpa
1261 * start reply message including private data.
1263 ep->mpa_attr.initiator = 0;
1264 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1265 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1266 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1267 ep->mpa_attr.version = mpa->revision;
1268 if (mpa->revision == 1)
1269 ep->tried_with_mpa_v1 = 1;
1270 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1272 if (mpa->revision == 2) {
1273 ep->mpa_attr.enhanced_rdma_conn =
1274 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1275 if (ep->mpa_attr.enhanced_rdma_conn) {
1276 mpa_v2_params = (struct mpa_v2_conn_params *)
1277 (ep->mpa_pkt + sizeof(*mpa));
1278 ep->ird = ntohs(mpa_v2_params->ird) &
1279 MPA_V2_IRD_ORD_MASK;
1280 ep->ord = ntohs(mpa_v2_params->ord) &
1281 MPA_V2_IRD_ORD_MASK;
1282 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1284 if (ntohs(mpa_v2_params->ord) &
1285 MPA_V2_RDMA_WRITE_RTR)
1286 ep->mpa_attr.p2p_type =
1287 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1288 else if (ntohs(mpa_v2_params->ord) &
1289 MPA_V2_RDMA_READ_RTR)
1290 ep->mpa_attr.p2p_type =
1291 FW_RI_INIT_P2PTYPE_READ_REQ;
1294 } else if (mpa->revision == 1)
1296 ep->mpa_attr.p2p_type = p2p_type;
1298 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1299 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1300 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1301 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1302 ep->mpa_attr.p2p_type);
1304 state_set(&ep->com, MPA_REQ_RCVD);
1307 connect_request_upcall(ep);
1311 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1314 struct cpl_rx_data *hdr = cplhdr(skb);
1315 unsigned int dlen = ntohs(hdr->len);
1316 unsigned int tid = GET_TID(hdr);
1317 struct tid_info *t = dev->rdev.lldi.tids;
1319 ep = lookup_tid(t, tid);
1320 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
1321 skb_pull(skb, sizeof(*hdr));
1322 skb_trim(skb, dlen);
1324 ep->rcv_seq += dlen;
1325 BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1327 /* update RX credits */
1328 update_rx_credits(ep, dlen);
1330 switch (state_read(&ep->com)) {
1332 process_mpa_reply(ep, skb);
1335 process_mpa_request(ep, skb);
1340 printk(KERN_ERR MOD "%s Unexpected streaming data."
1341 " ep %p state %d tid %u\n",
1342 __func__, ep, state_read(&ep->com), ep->hwtid);
1345 * The ep will timeout and inform the ULP of the failure.
1353 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1356 struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1358 unsigned int tid = GET_TID(rpl);
1359 struct tid_info *t = dev->rdev.lldi.tids;
1361 ep = lookup_tid(t, tid);
1362 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1364 mutex_lock(&ep->com.mutex);
1365 switch (ep->com.state) {
1367 __state_set(&ep->com, DEAD);
1371 printk(KERN_ERR "%s ep %p state %d\n",
1372 __func__, ep, ep->com.state);
1375 mutex_unlock(&ep->com.mutex);
1378 release_ep_resources(ep);
1383 * Return whether a failed active open has allocated a TID
1385 static inline int act_open_has_tid(int status)
1387 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1388 status != CPL_ERR_ARP_MISS;
1391 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1394 struct cpl_act_open_rpl *rpl = cplhdr(skb);
1395 unsigned int atid = GET_TID_TID(GET_AOPEN_ATID(
1396 ntohl(rpl->atid_status)));
1397 struct tid_info *t = dev->rdev.lldi.tids;
1398 int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
1400 ep = lookup_atid(t, atid);
1402 PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
1403 status, status2errno(status));
1405 if (status == CPL_ERR_RTX_NEG_ADVICE) {
1406 printk(KERN_WARNING MOD "Connection problems for atid %u\n",
1411 connect_reply_upcall(ep, status2errno(status));
1412 state_set(&ep->com, DEAD);
1414 if (status && act_open_has_tid(status))
1415 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
1417 cxgb4_free_atid(t, atid);
1418 dst_release(ep->dst);
1419 cxgb4_l2t_release(ep->l2t);
1420 c4iw_put_ep(&ep->com);
1425 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1427 struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1428 struct tid_info *t = dev->rdev.lldi.tids;
1429 unsigned int stid = GET_TID(rpl);
1430 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1433 printk(KERN_ERR MOD "stid %d lookup failure!\n", stid);
1436 PDBG("%s ep %p status %d error %d\n", __func__, ep,
1437 rpl->status, status2errno(rpl->status));
1438 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1443 static int listen_stop(struct c4iw_listen_ep *ep)
1445 struct sk_buff *skb;
1446 struct cpl_close_listsvr_req *req;
1448 PDBG("%s ep %p\n", __func__, ep);
1449 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1451 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
1454 req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req));
1456 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
1458 req->reply_ctrl = cpu_to_be16(
1459 QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0]));
1460 set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
1461 return c4iw_ofld_send(&ep->com.dev->rdev, skb);
1464 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1466 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
1467 struct tid_info *t = dev->rdev.lldi.tids;
1468 unsigned int stid = GET_TID(rpl);
1469 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1471 PDBG("%s ep %p\n", __func__, ep);
1472 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1476 static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1477 struct cpl_pass_accept_req *req)
1479 struct cpl_pass_accept_rpl *rpl;
1480 unsigned int mtu_idx;
1485 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1486 BUG_ON(skb_cloned(skb));
1487 skb_trim(skb, sizeof(*rpl));
1489 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
1490 wscale = compute_wscale(rcv_win);
1491 opt0 = KEEP_ALIVE(1) |
1495 L2T_IDX(ep->l2t->idx) |
1496 TX_CHAN(ep->tx_chan) |
1497 SMAC_SEL(ep->smac_idx) |
1499 ULP_MODE(ULP_MODE_TCPDDP) |
1500 RCV_BUFSIZ(rcv_win>>10);
1501 opt2 = RX_CHANNEL(0) |
1502 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
1504 if (enable_tcp_timestamps && req->tcpopt.tstamp)
1505 opt2 |= TSTAMPS_EN(1);
1506 if (enable_tcp_sack && req->tcpopt.sack)
1508 if (wscale && enable_tcp_window_scaling)
1509 opt2 |= WND_SCALE_EN(1);
1512 INIT_TP_WR(rpl, ep->hwtid);
1513 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1515 rpl->opt0 = cpu_to_be64(opt0);
1516 rpl->opt2 = cpu_to_be32(opt2);
1517 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
1518 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1523 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
1524 struct sk_buff *skb)
1526 PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid,
1528 BUG_ON(skb_cloned(skb));
1529 skb_trim(skb, sizeof(struct cpl_tid_release));
1531 release_tid(&dev->rdev, hwtid, skb);
1535 static void get_4tuple(struct cpl_pass_accept_req *req,
1536 __be32 *local_ip, __be32 *peer_ip,
1537 __be16 *local_port, __be16 *peer_port)
1539 int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
1540 int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
1541 struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
1542 struct tcphdr *tcp = (struct tcphdr *)
1543 ((u8 *)(req + 1) + eth_len + ip_len);
1545 PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
1546 ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
1549 *peer_ip = ip->saddr;
1550 *local_ip = ip->daddr;
1551 *peer_port = tcp->source;
1552 *local_port = tcp->dest;
1557 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1559 struct c4iw_ep *child_ep, *parent_ep;
1560 struct cpl_pass_accept_req *req = cplhdr(skb);
1561 unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid));
1562 struct tid_info *t = dev->rdev.lldi.tids;
1563 unsigned int hwtid = GET_TID(req);
1564 struct neighbour *neigh;
1565 struct dst_entry *dst;
1566 struct l2t_entry *l2t;
1568 __be32 local_ip, peer_ip;
1569 __be16 local_port, peer_port;
1570 struct net_device *pdev;
1571 u32 tx_chan, smac_idx;
1575 int txq_idx, ctrlq_idx;
1577 parent_ep = lookup_stid(t, stid);
1578 PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1580 get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
1582 if (state_read(&parent_ep->com) != LISTEN) {
1583 printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1588 /* Find output route */
1589 rt = find_route(dev, local_ip, peer_ip, local_port, peer_port,
1590 GET_POPEN_TOS(ntohl(req->tos_stid)));
1592 printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1597 neigh = dst_get_neighbour(dst);
1598 if (neigh->dev->flags & IFF_LOOPBACK) {
1599 pdev = ip_dev_find(&init_net, peer_ip);
1601 l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0);
1603 tx_chan = cxgb4_port_chan(pdev);
1604 smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1605 step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan;
1606 txq_idx = cxgb4_port_idx(pdev) * step;
1607 ctrlq_idx = cxgb4_port_idx(pdev);
1608 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
1609 rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step];
1612 l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, neigh->dev, 0);
1614 tx_chan = cxgb4_port_chan(neigh->dev);
1615 smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
1616 step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan;
1617 txq_idx = cxgb4_port_idx(neigh->dev) * step;
1618 ctrlq_idx = cxgb4_port_idx(neigh->dev);
1619 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
1620 rss_qid = dev->rdev.lldi.rxq_ids[
1621 cxgb4_port_idx(neigh->dev) * step];
1624 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1630 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1632 printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1634 cxgb4_l2t_release(l2t);
1638 state_set(&child_ep->com, CONNECTING);
1639 child_ep->com.dev = dev;
1640 child_ep->com.cm_id = NULL;
1641 child_ep->com.local_addr.sin_family = PF_INET;
1642 child_ep->com.local_addr.sin_port = local_port;
1643 child_ep->com.local_addr.sin_addr.s_addr = local_ip;
1644 child_ep->com.remote_addr.sin_family = PF_INET;
1645 child_ep->com.remote_addr.sin_port = peer_port;
1646 child_ep->com.remote_addr.sin_addr.s_addr = peer_ip;
1647 c4iw_get_ep(&parent_ep->com);
1648 child_ep->parent_ep = parent_ep;
1649 child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
1650 child_ep->l2t = l2t;
1651 child_ep->dst = dst;
1652 child_ep->hwtid = hwtid;
1653 child_ep->tx_chan = tx_chan;
1654 child_ep->smac_idx = smac_idx;
1655 child_ep->rss_qid = rss_qid;
1656 child_ep->mtu = mtu;
1657 child_ep->txq_idx = txq_idx;
1658 child_ep->ctrlq_idx = ctrlq_idx;
1660 PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
1661 tx_chan, smac_idx, rss_qid);
1663 init_timer(&child_ep->timer);
1664 cxgb4_insert_tid(t, child_ep, hwtid);
1665 accept_cr(child_ep, peer_ip, skb, req);
1668 reject_cr(dev, hwtid, peer_ip, skb);
1673 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1676 struct cpl_pass_establish *req = cplhdr(skb);
1677 struct tid_info *t = dev->rdev.lldi.tids;
1678 unsigned int tid = GET_TID(req);
1680 ep = lookup_tid(t, tid);
1681 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1682 ep->snd_seq = be32_to_cpu(req->snd_isn);
1683 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1685 set_emss(ep, ntohs(req->tcp_opt));
1687 dst_confirm(ep->dst);
1688 state_set(&ep->com, MPA_REQ_WAIT);
1690 send_flowc(ep, skb);
1695 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
1697 struct cpl_peer_close *hdr = cplhdr(skb);
1699 struct c4iw_qp_attributes attrs;
1702 struct tid_info *t = dev->rdev.lldi.tids;
1703 unsigned int tid = GET_TID(hdr);
1706 ep = lookup_tid(t, tid);
1707 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1708 dst_confirm(ep->dst);
1710 mutex_lock(&ep->com.mutex);
1711 switch (ep->com.state) {
1713 __state_set(&ep->com, CLOSING);
1716 __state_set(&ep->com, CLOSING);
1717 connect_reply_upcall(ep, -ECONNRESET);
1722 * We're gonna mark this puppy DEAD, but keep
1723 * the reference on it until the ULP accepts or
1724 * rejects the CR. Also wake up anyone waiting
1725 * in rdma connection migration (see c4iw_accept_cr()).
1727 __state_set(&ep->com, CLOSING);
1728 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1729 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1732 __state_set(&ep->com, CLOSING);
1733 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1734 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1738 __state_set(&ep->com, CLOSING);
1739 attrs.next_state = C4IW_QP_STATE_CLOSING;
1740 ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1741 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1742 if (ret != -ECONNRESET) {
1743 peer_close_upcall(ep);
1751 __state_set(&ep->com, MORIBUND);
1756 if (ep->com.cm_id && ep->com.qp) {
1757 attrs.next_state = C4IW_QP_STATE_IDLE;
1758 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1759 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1761 close_complete_upcall(ep);
1762 __state_set(&ep->com, DEAD);
1772 mutex_unlock(&ep->com.mutex);
1774 c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1776 release_ep_resources(ep);
1781 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1783 static int is_neg_adv_abort(unsigned int status)
1785 return status == CPL_ERR_RTX_NEG_ADVICE ||
1786 status == CPL_ERR_PERSIST_NEG_ADVICE;
1789 static int c4iw_reconnect(struct c4iw_ep *ep)
1793 struct net_device *pdev;
1794 struct neighbour *neigh;
1797 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1798 init_timer(&ep->timer);
1801 * Allocate an active TID to initiate a TCP connection.
1803 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1804 if (ep->atid == -1) {
1805 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1811 rt = find_route(ep->com.dev,
1812 ep->com.cm_id->local_addr.sin_addr.s_addr,
1813 ep->com.cm_id->remote_addr.sin_addr.s_addr,
1814 ep->com.cm_id->local_addr.sin_port,
1815 ep->com.cm_id->remote_addr.sin_port, 0);
1817 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1818 err = -EHOSTUNREACH;
1823 neigh = dst_get_neighbour(ep->dst);
1825 /* get a l2t entry */
1826 if (neigh->dev->flags & IFF_LOOPBACK) {
1827 PDBG("%s LOOPBACK\n", __func__);
1828 pdev = ip_dev_find(&init_net,
1829 ep->com.cm_id->remote_addr.sin_addr.s_addr);
1830 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1832 ep->mtu = pdev->mtu;
1833 ep->tx_chan = cxgb4_port_chan(pdev);
1834 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1835 step = ep->com.dev->rdev.lldi.ntxq /
1836 ep->com.dev->rdev.lldi.nchan;
1837 ep->txq_idx = cxgb4_port_idx(pdev) * step;
1838 step = ep->com.dev->rdev.lldi.nrxq /
1839 ep->com.dev->rdev.lldi.nchan;
1840 ep->ctrlq_idx = cxgb4_port_idx(pdev);
1841 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1842 cxgb4_port_idx(pdev) * step];
1845 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1846 neigh, neigh->dev, 0);
1847 ep->mtu = dst_mtu(ep->dst);
1848 ep->tx_chan = cxgb4_port_chan(neigh->dev);
1849 ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
1850 step = ep->com.dev->rdev.lldi.ntxq /
1851 ep->com.dev->rdev.lldi.nchan;
1852 ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
1853 ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
1854 step = ep->com.dev->rdev.lldi.nrxq /
1855 ep->com.dev->rdev.lldi.nchan;
1856 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1857 cxgb4_port_idx(neigh->dev) * step];
1860 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1865 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
1866 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
1869 state_set(&ep->com, CONNECTING);
1872 /* send connect request to rnic */
1873 err = send_connect(ep);
1877 cxgb4_l2t_release(ep->l2t);
1879 dst_release(ep->dst);
1881 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
1884 * remember to send notification to upper layer.
1885 * We are in here so the upper layer is not aware that this is
1886 * re-connect attempt and so, upper layer is still waiting for
1887 * response of 1st connect request.
1889 connect_reply_upcall(ep, -ECONNRESET);
1890 c4iw_put_ep(&ep->com);
1895 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1897 struct cpl_abort_req_rss *req = cplhdr(skb);
1899 struct cpl_abort_rpl *rpl;
1900 struct sk_buff *rpl_skb;
1901 struct c4iw_qp_attributes attrs;
1904 struct tid_info *t = dev->rdev.lldi.tids;
1905 unsigned int tid = GET_TID(req);
1907 ep = lookup_tid(t, tid);
1908 if (is_neg_adv_abort(req->status)) {
1909 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
1913 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
1917 * Wake up any threads in rdma_init() or rdma_fini().
1918 * However, this is not needed if com state is just
1921 if (ep->com.state != MPA_REQ_SENT)
1922 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1924 mutex_lock(&ep->com.mutex);
1925 switch (ep->com.state) {
1933 if (mpa_rev == 2 && ep->tried_with_mpa_v1)
1934 connect_reply_upcall(ep, -ECONNRESET);
1937 * we just don't send notification upwards because we
1938 * want to retry with mpa_v1 without upper layers even
1941 * do some housekeeping so as to re-initiate the
1944 PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
1946 ep->retry_with_mpa_v1 = 1;
1958 if (ep->com.cm_id && ep->com.qp) {
1959 attrs.next_state = C4IW_QP_STATE_ERROR;
1960 ret = c4iw_modify_qp(ep->com.qp->rhp,
1961 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
1965 "%s - qp <- error failed!\n",
1968 peer_abort_upcall(ep);
1973 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1974 mutex_unlock(&ep->com.mutex);
1980 dst_confirm(ep->dst);
1981 if (ep->com.state != ABORTING) {
1982 __state_set(&ep->com, DEAD);
1983 /* we don't release if we want to retry with mpa_v1 */
1984 if (!ep->retry_with_mpa_v1)
1987 mutex_unlock(&ep->com.mutex);
1989 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1991 printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
1996 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1997 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
1998 INIT_TP_WR(rpl, ep->hwtid);
1999 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
2000 rpl->cmd = CPL_ABORT_NO_RST;
2001 c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2004 release_ep_resources(ep);
2006 /* retry with mpa-v1 */
2007 if (ep && ep->retry_with_mpa_v1) {
2008 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2009 dst_release(ep->dst);
2010 cxgb4_l2t_release(ep->l2t);
2017 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2020 struct c4iw_qp_attributes attrs;
2021 struct cpl_close_con_rpl *rpl = cplhdr(skb);
2023 struct tid_info *t = dev->rdev.lldi.tids;
2024 unsigned int tid = GET_TID(rpl);
2026 ep = lookup_tid(t, tid);
2028 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2031 /* The cm_id may be null if we failed to connect */
2032 mutex_lock(&ep->com.mutex);
2033 switch (ep->com.state) {
2035 __state_set(&ep->com, MORIBUND);
2039 if ((ep->com.cm_id) && (ep->com.qp)) {
2040 attrs.next_state = C4IW_QP_STATE_IDLE;
2041 c4iw_modify_qp(ep->com.qp->rhp,
2043 C4IW_QP_ATTR_NEXT_STATE,
2046 close_complete_upcall(ep);
2047 __state_set(&ep->com, DEAD);
2057 mutex_unlock(&ep->com.mutex);
2059 release_ep_resources(ep);
2063 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2065 struct cpl_rdma_terminate *rpl = cplhdr(skb);
2066 struct tid_info *t = dev->rdev.lldi.tids;
2067 unsigned int tid = GET_TID(rpl);
2069 struct c4iw_qp_attributes attrs;
2071 ep = lookup_tid(t, tid);
2074 if (ep && ep->com.qp) {
2075 printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2076 ep->com.qp->wq.sq.qid);
2077 attrs.next_state = C4IW_QP_STATE_TERMINATE;
2078 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2079 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2081 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2087 * Upcall from the adapter indicating data has been transmitted.
2088 * For us its just the single MPA request or reply. We can now free
2089 * the skb holding the mpa message.
2091 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2094 struct cpl_fw4_ack *hdr = cplhdr(skb);
2095 u8 credits = hdr->credits;
2096 unsigned int tid = GET_TID(hdr);
2097 struct tid_info *t = dev->rdev.lldi.tids;
2100 ep = lookup_tid(t, tid);
2101 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2103 PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2104 __func__, ep, ep->hwtid, state_read(&ep->com));
2108 dst_confirm(ep->dst);
2110 PDBG("%s last streaming msg ack ep %p tid %u state %u "
2111 "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
2112 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2113 kfree_skb(ep->mpa_skb);
2119 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2122 struct c4iw_ep *ep = to_ep(cm_id);
2123 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2125 if (state_read(&ep->com) == DEAD) {
2126 c4iw_put_ep(&ep->com);
2129 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2131 abort_connection(ep, NULL, GFP_KERNEL);
2133 err = send_mpa_reject(ep, pdata, pdata_len);
2134 err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2136 c4iw_put_ep(&ep->com);
2140 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2143 struct c4iw_qp_attributes attrs;
2144 enum c4iw_qp_attr_mask mask;
2145 struct c4iw_ep *ep = to_ep(cm_id);
2146 struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2147 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2149 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2150 if (state_read(&ep->com) == DEAD) {
2155 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2158 if ((conn_param->ord > c4iw_max_read_depth) ||
2159 (conn_param->ird > c4iw_max_read_depth)) {
2160 abort_connection(ep, NULL, GFP_KERNEL);
2165 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2166 if (conn_param->ord > ep->ird) {
2167 ep->ird = conn_param->ird;
2168 ep->ord = conn_param->ord;
2169 send_mpa_reject(ep, conn_param->private_data,
2170 conn_param->private_data_len);
2171 abort_connection(ep, NULL, GFP_KERNEL);
2175 if (conn_param->ird > ep->ord) {
2177 conn_param->ird = 1;
2179 abort_connection(ep, NULL, GFP_KERNEL);
2186 ep->ird = conn_param->ird;
2187 ep->ord = conn_param->ord;
2189 if (ep->mpa_attr.version != 2)
2190 if (peer2peer && ep->ird == 0)
2193 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2195 cm_id->add_ref(cm_id);
2196 ep->com.cm_id = cm_id;
2199 /* bind QP to EP and move to RTS */
2200 attrs.mpa_attr = ep->mpa_attr;
2201 attrs.max_ird = ep->ird;
2202 attrs.max_ord = ep->ord;
2203 attrs.llp_stream_handle = ep;
2204 attrs.next_state = C4IW_QP_STATE_RTS;
2206 /* bind QP and TID with INIT_WR */
2207 mask = C4IW_QP_ATTR_NEXT_STATE |
2208 C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2209 C4IW_QP_ATTR_MPA_ATTR |
2210 C4IW_QP_ATTR_MAX_IRD |
2211 C4IW_QP_ATTR_MAX_ORD;
2213 err = c4iw_modify_qp(ep->com.qp->rhp,
2214 ep->com.qp, mask, &attrs, 1);
2217 err = send_mpa_reply(ep, conn_param->private_data,
2218 conn_param->private_data_len);
2222 state_set(&ep->com, FPDU_MODE);
2223 established_upcall(ep);
2224 c4iw_put_ep(&ep->com);
2227 ep->com.cm_id = NULL;
2229 cm_id->rem_ref(cm_id);
2231 c4iw_put_ep(&ep->com);
2235 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2238 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2241 struct net_device *pdev;
2242 struct neighbour *neigh;
2245 if ((conn_param->ord > c4iw_max_read_depth) ||
2246 (conn_param->ird > c4iw_max_read_depth)) {
2250 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2252 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2256 init_timer(&ep->timer);
2257 ep->plen = conn_param->private_data_len;
2259 memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2260 conn_param->private_data, ep->plen);
2261 ep->ird = conn_param->ird;
2262 ep->ord = conn_param->ord;
2264 if (peer2peer && ep->ord == 0)
2267 cm_id->add_ref(cm_id);
2269 ep->com.cm_id = cm_id;
2270 ep->com.qp = get_qhp(dev, conn_param->qpn);
2271 BUG_ON(!ep->com.qp);
2272 PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
2276 * Allocate an active TID to initiate a TCP connection.
2278 ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
2279 if (ep->atid == -1) {
2280 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
2285 PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
2286 ntohl(cm_id->local_addr.sin_addr.s_addr),
2287 ntohs(cm_id->local_addr.sin_port),
2288 ntohl(cm_id->remote_addr.sin_addr.s_addr),
2289 ntohs(cm_id->remote_addr.sin_port));
2292 rt = find_route(dev,
2293 cm_id->local_addr.sin_addr.s_addr,
2294 cm_id->remote_addr.sin_addr.s_addr,
2295 cm_id->local_addr.sin_port,
2296 cm_id->remote_addr.sin_port, 0);
2298 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2299 err = -EHOSTUNREACH;
2304 neigh = dst_get_neighbour(ep->dst);
2306 /* get a l2t entry */
2307 if (neigh->dev->flags & IFF_LOOPBACK) {
2308 PDBG("%s LOOPBACK\n", __func__);
2309 pdev = ip_dev_find(&init_net,
2310 cm_id->remote_addr.sin_addr.s_addr);
2311 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
2313 ep->mtu = pdev->mtu;
2314 ep->tx_chan = cxgb4_port_chan(pdev);
2315 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
2316 step = ep->com.dev->rdev.lldi.ntxq /
2317 ep->com.dev->rdev.lldi.nchan;
2318 ep->txq_idx = cxgb4_port_idx(pdev) * step;
2319 step = ep->com.dev->rdev.lldi.nrxq /
2320 ep->com.dev->rdev.lldi.nchan;
2321 ep->ctrlq_idx = cxgb4_port_idx(pdev);
2322 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
2323 cxgb4_port_idx(pdev) * step];
2326 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
2327 neigh, neigh->dev, 0);
2328 ep->mtu = dst_mtu(ep->dst);
2329 ep->tx_chan = cxgb4_port_chan(neigh->dev);
2330 ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
2331 step = ep->com.dev->rdev.lldi.ntxq /
2332 ep->com.dev->rdev.lldi.nchan;
2333 ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
2334 ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
2335 step = ep->com.dev->rdev.lldi.nrxq /
2336 ep->com.dev->rdev.lldi.nchan;
2337 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
2338 cxgb4_port_idx(neigh->dev) * step];
2339 ep->retry_with_mpa_v1 = 0;
2340 ep->tried_with_mpa_v1 = 0;
2343 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
2348 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2349 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2352 state_set(&ep->com, CONNECTING);
2354 ep->com.local_addr = cm_id->local_addr;
2355 ep->com.remote_addr = cm_id->remote_addr;
2357 /* send connect request to rnic */
2358 err = send_connect(ep);
2362 cxgb4_l2t_release(ep->l2t);
2364 dst_release(ep->dst);
2366 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2368 cm_id->rem_ref(cm_id);
2369 c4iw_put_ep(&ep->com);
2374 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2377 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2378 struct c4iw_listen_ep *ep;
2383 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2385 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2389 PDBG("%s ep %p\n", __func__, ep);
2390 cm_id->add_ref(cm_id);
2391 ep->com.cm_id = cm_id;
2393 ep->backlog = backlog;
2394 ep->com.local_addr = cm_id->local_addr;
2397 * Allocate a server TID.
2399 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
2400 if (ep->stid == -1) {
2401 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
2406 state_set(&ep->com, LISTEN);
2407 c4iw_init_wr_wait(&ep->com.wr_wait);
2408 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid,
2409 ep->com.local_addr.sin_addr.s_addr,
2410 ep->com.local_addr.sin_port,
2411 ep->com.dev->rdev.lldi.rxq_ids[0]);
2415 /* wait for pass_open_rpl */
2416 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2419 cm_id->provider_data = ep;
2423 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2425 cm_id->rem_ref(cm_id);
2426 c4iw_put_ep(&ep->com);
2432 int c4iw_destroy_listen(struct iw_cm_id *cm_id)
2435 struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2437 PDBG("%s ep %p\n", __func__, ep);
2440 state_set(&ep->com, DEAD);
2441 c4iw_init_wr_wait(&ep->com.wr_wait);
2442 err = listen_stop(ep);
2445 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2447 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2449 cm_id->rem_ref(cm_id);
2450 c4iw_put_ep(&ep->com);
2454 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2459 struct c4iw_rdev *rdev;
2461 mutex_lock(&ep->com.mutex);
2463 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
2464 states[ep->com.state], abrupt);
2466 rdev = &ep->com.dev->rdev;
2467 if (c4iw_fatal_error(rdev)) {
2469 close_complete_upcall(ep);
2470 ep->com.state = DEAD;
2472 switch (ep->com.state) {
2480 ep->com.state = ABORTING;
2482 ep->com.state = CLOSING;
2485 set_bit(CLOSE_SENT, &ep->com.flags);
2488 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2492 ep->com.state = ABORTING;
2494 ep->com.state = MORIBUND;
2500 PDBG("%s ignoring disconnect ep %p state %u\n",
2501 __func__, ep, ep->com.state);
2510 close_complete_upcall(ep);
2511 ret = send_abort(ep, NULL, gfp);
2513 ret = send_halfclose(ep, gfp);
2517 mutex_unlock(&ep->com.mutex);
2519 release_ep_resources(ep);
2523 static int async_event(struct c4iw_dev *dev, struct sk_buff *skb)
2525 struct cpl_fw6_msg *rpl = cplhdr(skb);
2526 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
2531 * These are the real handlers that are called from a
2534 static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
2535 [CPL_ACT_ESTABLISH] = act_establish,
2536 [CPL_ACT_OPEN_RPL] = act_open_rpl,
2537 [CPL_RX_DATA] = rx_data,
2538 [CPL_ABORT_RPL_RSS] = abort_rpl,
2539 [CPL_ABORT_RPL] = abort_rpl,
2540 [CPL_PASS_OPEN_RPL] = pass_open_rpl,
2541 [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
2542 [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
2543 [CPL_PASS_ESTABLISH] = pass_establish,
2544 [CPL_PEER_CLOSE] = peer_close,
2545 [CPL_ABORT_REQ_RSS] = peer_abort,
2546 [CPL_CLOSE_CON_RPL] = close_con_rpl,
2547 [CPL_RDMA_TERMINATE] = terminate,
2548 [CPL_FW4_ACK] = fw4_ack,
2549 [CPL_FW6_MSG] = async_event
2552 static void process_timeout(struct c4iw_ep *ep)
2554 struct c4iw_qp_attributes attrs;
2557 mutex_lock(&ep->com.mutex);
2558 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
2560 switch (ep->com.state) {
2562 __state_set(&ep->com, ABORTING);
2563 connect_reply_upcall(ep, -ETIMEDOUT);
2566 __state_set(&ep->com, ABORTING);
2570 if (ep->com.cm_id && ep->com.qp) {
2571 attrs.next_state = C4IW_QP_STATE_ERROR;
2572 c4iw_modify_qp(ep->com.qp->rhp,
2573 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2576 __state_set(&ep->com, ABORTING);
2579 printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n",
2580 __func__, ep, ep->hwtid, ep->com.state);
2584 mutex_unlock(&ep->com.mutex);
2586 abort_connection(ep, NULL, GFP_KERNEL);
2587 c4iw_put_ep(&ep->com);
2590 static void process_timedout_eps(void)
2594 spin_lock_irq(&timeout_lock);
2595 while (!list_empty(&timeout_list)) {
2596 struct list_head *tmp;
2598 tmp = timeout_list.next;
2600 spin_unlock_irq(&timeout_lock);
2601 ep = list_entry(tmp, struct c4iw_ep, entry);
2602 process_timeout(ep);
2603 spin_lock_irq(&timeout_lock);
2605 spin_unlock_irq(&timeout_lock);
2608 static void process_work(struct work_struct *work)
2610 struct sk_buff *skb = NULL;
2611 struct c4iw_dev *dev;
2612 struct cpl_act_establish *rpl;
2613 unsigned int opcode;
2616 while ((skb = skb_dequeue(&rxq))) {
2618 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
2619 opcode = rpl->ot.opcode;
2621 BUG_ON(!work_handlers[opcode]);
2622 ret = work_handlers[opcode](dev, skb);
2626 process_timedout_eps();
2629 static DECLARE_WORK(skb_work, process_work);
2631 static void ep_timeout(unsigned long arg)
2633 struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2635 spin_lock(&timeout_lock);
2636 list_add_tail(&ep->entry, &timeout_list);
2637 spin_unlock(&timeout_lock);
2638 queue_work(workq, &skb_work);
2642 * All the CM events are handled on a work queue to have a safe context.
2644 static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
2648 * Save dev in the skb->cb area.
2650 *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
2653 * Queue the skb and schedule the worker thread.
2655 skb_queue_tail(&rxq, skb);
2656 queue_work(workq, &skb_work);
2660 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2662 struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2664 if (rpl->status != CPL_ERR_NONE) {
2665 printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2666 "for tid %u\n", rpl->status, GET_TID(rpl));
2672 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
2674 struct cpl_fw6_msg *rpl = cplhdr(skb);
2675 struct c4iw_wr_wait *wr_waitp;
2678 PDBG("%s type %u\n", __func__, rpl->type);
2680 switch (rpl->type) {
2682 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
2683 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
2684 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
2686 c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2693 printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
2701 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
2703 struct cpl_abort_req_rss *req = cplhdr(skb);
2705 struct tid_info *t = dev->rdev.lldi.tids;
2706 unsigned int tid = GET_TID(req);
2708 ep = lookup_tid(t, tid);
2709 if (is_neg_adv_abort(req->status)) {
2710 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
2715 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2719 * Wake up any threads in rdma_init() or rdma_fini().
2720 * However, this is not needed if com state is just
2723 if (ep->com.state != MPA_REQ_SENT)
2724 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2730 * Most upcalls from the T4 Core go to sched() to
2731 * schedule the processing on a work queue.
2733 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
2734 [CPL_ACT_ESTABLISH] = sched,
2735 [CPL_ACT_OPEN_RPL] = sched,
2736 [CPL_RX_DATA] = sched,
2737 [CPL_ABORT_RPL_RSS] = sched,
2738 [CPL_ABORT_RPL] = sched,
2739 [CPL_PASS_OPEN_RPL] = sched,
2740 [CPL_CLOSE_LISTSRV_RPL] = sched,
2741 [CPL_PASS_ACCEPT_REQ] = sched,
2742 [CPL_PASS_ESTABLISH] = sched,
2743 [CPL_PEER_CLOSE] = sched,
2744 [CPL_CLOSE_CON_RPL] = sched,
2745 [CPL_ABORT_REQ_RSS] = peer_abort_intr,
2746 [CPL_RDMA_TERMINATE] = sched,
2747 [CPL_FW4_ACK] = sched,
2748 [CPL_SET_TCB_RPL] = set_tcb_rpl,
2749 [CPL_FW6_MSG] = fw6_msg
2752 int __init c4iw_cm_init(void)
2754 spin_lock_init(&timeout_lock);
2755 skb_queue_head_init(&rxq);
2757 workq = create_singlethread_workqueue("iw_cxgb4");
2764 void __exit c4iw_cm_term(void)
2766 WARN_ON(!list_empty(&timeout_list));
2767 flush_workqueue(workq);
2768 destroy_workqueue(workq);