From 294a39e7829dfd663e6c5c94cede0c6a0c13e37f Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 24 Aug 2009 15:02:27 +0100 Subject: [PATCH] USB: whci-hcd: support urbs with scatter-gather lists Support urbs with scatter-gather lists by trying to fit sg list elements into page lists in one or more qTDs. qTDs must end on a wMaxPacketSize boundary so if this isn't possible the urb's sg list must be copied into bounce buffers. Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/whci/hcd.c | 1 + drivers/usb/host/whci/qset.c | 348 +++++++++++++++++++++++++++++--- drivers/usb/host/whci/whcd.h | 9 + drivers/usb/host/whci/whci-hc.h | 5 +- 4 files changed, 332 insertions(+), 31 deletions(-) diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c index 687b622a161..e0d3401285c 100644 --- a/drivers/usb/host/whci/hcd.c +++ b/drivers/usb/host/whci/hcd.c @@ -250,6 +250,7 @@ static int whc_probe(struct umc_dev *umc) } usb_hcd->wireless = 1; + usb_hcd->self.sg_tablesize = 2048; /* somewhat arbitrary */ wusbhc = usb_hcd_to_wusbhc(usb_hcd); whc = wusbhc_to_whc(wusbhc); diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c index 1b9dc157157..88e51ea8620 100644 --- a/drivers/usb/host/whci/qset.c +++ b/drivers/usb/host/whci/qset.c @@ -57,8 +57,9 @@ static void qset_fill_qh(struct whc_qset *qset, struct urb *urb) is_out = usb_pipeout(urb->pipe); - epcd = (struct usb_wireless_ep_comp_descriptor *)qset->ep->extra; + qset->max_packet = le16_to_cpu(urb->ep->desc.wMaxPacketSize); + epcd = (struct usb_wireless_ep_comp_descriptor *)qset->ep->extra; if (epcd) { qset->max_seq = epcd->bMaxSequence; qset->max_burst = epcd->bMaxBurst; @@ -72,7 +73,7 @@ static void qset_fill_qh(struct whc_qset *qset, struct urb *urb) | (is_out ? QH_INFO1_DIR_OUT : QH_INFO1_DIR_IN) | usb_pipe_to_qh_type(urb->pipe) | QH_INFO1_DEV_INFO_IDX(wusb_port_no_to_idx(usb_dev->portnum)) - | QH_INFO1_MAX_PKT_LEN(usb_maxpacket(urb->dev, urb->pipe, is_out)) + | QH_INFO1_MAX_PKT_LEN(qset->max_packet) ); qset->qh.info2 = cpu_to_le32( QH_INFO2_BURST(qset->max_burst) @@ -241,6 +242,36 @@ static void qset_remove_qtd(struct whc *whc, struct whc_qset *qset) qset->ntds--; } +static void qset_copy_bounce_to_sg(struct whc *whc, struct whc_std *std) +{ + struct scatterlist *sg; + void *bounce; + size_t remaining, offset; + + bounce = std->bounce_buf; + remaining = std->len; + + sg = std->bounce_sg; + offset = std->bounce_offset; + + while (remaining) { + size_t len; + + len = min(sg->length - offset, remaining); + memcpy(sg_virt(sg) + offset, bounce, len); + + bounce += len; + remaining -= len; + + offset += len; + if (offset >= sg->length) { + sg = sg_next(sg); + offset = 0; + } + } + +} + /** * qset_free_std - remove an sTD and free it. * @whc: the WHCI host controller @@ -249,13 +280,29 @@ static void qset_remove_qtd(struct whc *whc, struct whc_qset *qset) void qset_free_std(struct whc *whc, struct whc_std *std) { list_del(&std->list_node); - if (std->num_pointers) { - dma_unmap_single(whc->wusbhc.dev, std->dma_addr, - std->num_pointers * sizeof(struct whc_page_list_entry), - DMA_TO_DEVICE); + if (std->bounce_buf) { + bool is_out = usb_pipeout(std->urb->pipe); + dma_addr_t dma_addr; + + if (std->num_pointers) + dma_addr = le64_to_cpu(std->pl_virt[0].buf_ptr); + else + dma_addr = std->dma_addr; + + dma_unmap_single(whc->wusbhc.dev, dma_addr, + std->len, is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (!is_out) + qset_copy_bounce_to_sg(whc, std); + kfree(std->bounce_buf); + } + if (std->pl_virt) { + if (std->dma_addr) + dma_unmap_single(whc->wusbhc.dev, std->dma_addr, + std->num_pointers * sizeof(struct whc_page_list_entry), + DMA_TO_DEVICE); kfree(std->pl_virt); + std->pl_virt = NULL; } - kfree(std); } @@ -293,12 +340,17 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f { dma_addr_t dma_addr = std->dma_addr; dma_addr_t sp, ep; - size_t std_len = std->len; size_t pl_len; int p; - sp = ALIGN(dma_addr, WHCI_PAGE_SIZE); - ep = dma_addr + std_len; + /* Short buffers don't need a page list. */ + if (std->len <= WHCI_PAGE_SIZE) { + std->num_pointers = 0; + return 0; + } + + sp = dma_addr & ~(WHCI_PAGE_SIZE-1); + ep = dma_addr + std->len; std->num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE); pl_len = std->num_pointers * sizeof(struct whc_page_list_entry); @@ -309,7 +361,7 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f for (p = 0; p < std->num_pointers; p++) { std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr); - dma_addr = ALIGN(dma_addr + WHCI_PAGE_SIZE, WHCI_PAGE_SIZE); + dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1); } return 0; @@ -339,6 +391,238 @@ static void urb_dequeue_work(struct work_struct *work) spin_unlock_irqrestore(&whc->lock, flags); } +static struct whc_std *qset_new_std(struct whc *whc, struct whc_qset *qset, + struct urb *urb, gfp_t mem_flags) +{ + struct whc_std *std; + + std = kzalloc(sizeof(struct whc_std), mem_flags); + if (std == NULL) + return NULL; + + std->urb = urb; + std->qtd = NULL; + + INIT_LIST_HEAD(&std->list_node); + list_add_tail(&std->list_node, &qset->stds); + + return std; +} + +static int qset_add_urb_sg(struct whc *whc, struct whc_qset *qset, struct urb *urb, + gfp_t mem_flags) +{ + size_t remaining; + struct scatterlist *sg; + int i; + int ntds = 0; + struct whc_std *std = NULL; + struct whc_page_list_entry *entry; + dma_addr_t prev_end = 0; + size_t pl_len; + int p = 0; + + dev_dbg(&whc->umc->dev, "adding urb w/ sg of length %d\n", urb->transfer_buffer_length); + + remaining = urb->transfer_buffer_length; + + for_each_sg(urb->sg->sg, sg, urb->num_sgs, i) { + dma_addr_t dma_addr; + size_t dma_remaining; + dma_addr_t sp, ep; + int num_pointers; + + if (remaining == 0) { + break; + } + + dma_addr = sg_dma_address(sg); + dma_remaining = min(sg_dma_len(sg), remaining); + + dev_dbg(&whc->umc->dev, "adding sg[%d] %08x %d\n", i, (unsigned)dma_addr, + dma_remaining); + + while (dma_remaining) { + size_t dma_len; + + /* + * We can use the previous std (if it exists) provided that: + * - the previous one ended on a page boundary. + * - the current one begins on a page boundary. + * - the previous one isn't full. + * + * If a new std is needed but the previous one + * did not end on a wMaxPacketSize boundary + * then this sg list cannot be mapped onto + * multiple qTDs. Return an error and let the + * caller sort it out. + */ + if (!std + || (prev_end & (WHCI_PAGE_SIZE-1)) + || (dma_addr & (WHCI_PAGE_SIZE-1)) + || std->len + WHCI_PAGE_SIZE > QTD_MAX_XFER_SIZE) { + if (prev_end % qset->max_packet != 0) + return -EINVAL; + dev_dbg(&whc->umc->dev, "need new std\n"); + std = qset_new_std(whc, qset, urb, mem_flags); + if (std == NULL) { + return -ENOMEM; + } + ntds++; + p = 0; + } + + dma_len = dma_remaining; + + /* + * If the remainder in this element doesn't + * fit in a single qTD, end the qTD on a + * wMaxPacketSize boundary. + */ + if (std->len + dma_len > QTD_MAX_XFER_SIZE) { + dma_len = QTD_MAX_XFER_SIZE - std->len; + ep = ((dma_addr + dma_len) / qset->max_packet) * qset->max_packet; + dma_len = ep - dma_addr; + } + + dev_dbg(&whc->umc->dev, "adding %d\n", dma_len); + + std->len += dma_len; + std->ntds_remaining = -1; /* filled in later */ + + sp = dma_addr & ~(WHCI_PAGE_SIZE-1); + ep = dma_addr + dma_len; + num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE); + std->num_pointers += num_pointers; + + dev_dbg(&whc->umc->dev, "need %d more (%d total) page pointers\n", + num_pointers, std->num_pointers); + + pl_len = std->num_pointers * sizeof(struct whc_page_list_entry); + + std->pl_virt = krealloc(std->pl_virt, pl_len, mem_flags); + if (std->pl_virt == NULL) { + return -ENOMEM; + } + + for (;p < std->num_pointers; p++, entry++) { + dev_dbg(&whc->umc->dev, "e[%d] %08x\n", p, dma_addr); + std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr); + dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1); + } + + prev_end = dma_addr = ep; + dma_remaining -= dma_len; + remaining -= dma_len; + } + } + + dev_dbg(&whc->umc->dev, "used %d tds\n", ntds); + + /* Now the number of stds is know, go back and fill in + std->ntds_remaining. */ + list_for_each_entry(std, &qset->stds, list_node) { + if (std->ntds_remaining == -1) { + pl_len = std->num_pointers * sizeof(struct whc_page_list_entry); + std->ntds_remaining = ntds--; + std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, + pl_len, DMA_TO_DEVICE); + } + } + return 0; +} + +/** + * qset_add_urb_sg_linearize - add an urb with sg list, copying the data + * + * If the URB contains an sg list whose elements cannot be directly + * mapped to qTDs then the data must be transferred via bounce + * buffers. + */ +static int qset_add_urb_sg_linearize(struct whc *whc, struct whc_qset *qset, + struct urb *urb, gfp_t mem_flags) +{ + bool is_out = usb_pipeout(urb->pipe); + size_t max_std_len; + size_t remaining; + int ntds = 0; + struct whc_std *std = NULL; + void *bounce = NULL; + struct scatterlist *sg; + int i; + + /* limit maximum bounce buffer to 16 * 3.5 KiB ~= 28 k */ + max_std_len = qset->max_burst * qset->max_packet; + + remaining = urb->transfer_buffer_length; + + for_each_sg(urb->sg->sg, sg, urb->sg->nents, i) { + size_t len; + size_t sg_remaining; + void *orig; + + if (remaining == 0) { + break; + } + + sg_remaining = min(remaining, sg->length); + orig = sg_virt(sg); + + dev_dbg(&whc->umc->dev, "adding sg[%d] %d\n", i, sg_remaining); + + while (sg_remaining) { + if (!std || std->len == max_std_len) { + dev_dbg(&whc->umc->dev, "need new std\n"); + std = qset_new_std(whc, qset, urb, mem_flags); + if (std == NULL) + return -ENOMEM; + std->bounce_buf = kmalloc(max_std_len, mem_flags); + if (std->bounce_buf == NULL) + return -ENOMEM; + std->bounce_sg = sg; + std->bounce_offset = orig - sg_virt(sg); + bounce = std->bounce_buf; + ntds++; + } + + len = min(sg_remaining, max_std_len - std->len); + + dev_dbg(&whc->umc->dev, "added %d from sg[%d] @ offset %d\n", + len, i, orig - sg_virt(sg)); + + if (is_out) + memcpy(bounce, orig, len); + + std->len += len; + std->ntds_remaining = -1; /* filled in later */ + + bounce += len; + orig += len; + sg_remaining -= len; + remaining -= len; + } + } + + /* + * For each of the new sTDs, map the bounce buffers, create + * page lists (if necessary), and fill in std->ntds_remaining. + */ + list_for_each_entry(std, &qset->stds, list_node) { + if (std->ntds_remaining != -1) + continue; + + std->dma_addr = dma_map_single(&whc->umc->dev, std->bounce_buf, std->len, + is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + + if (qset_fill_page_list(whc, std, mem_flags) < 0) + return -ENOMEM; + + std->ntds_remaining = ntds--; + } + + return 0; +} + /** * qset_add_urb - add an urb to the qset's queue. * @@ -353,10 +637,7 @@ int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb, int remaining = urb->transfer_buffer_length; u64 transfer_dma = urb->transfer_dma; int ntds_remaining; - - ntds_remaining = DIV_ROUND_UP(remaining, QTD_MAX_XFER_SIZE); - if (ntds_remaining == 0) - ntds_remaining = 1; + int ret; wurb = kzalloc(sizeof(struct whc_urb), mem_flags); if (wurb == NULL) @@ -366,32 +647,41 @@ int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb, wurb->urb = urb; INIT_WORK(&wurb->dequeue_work, urb_dequeue_work); + if (urb->sg) { + ret = qset_add_urb_sg(whc, qset, urb, mem_flags); + if (ret == -EINVAL) { + dev_dbg(&whc->umc->dev, "linearizing %d octet urb\n", + urb->transfer_buffer_length); + qset_free_stds(qset, urb); + ret = qset_add_urb_sg_linearize(whc, qset, urb, mem_flags); + } + if (ret < 0) + goto err_no_mem; + return 0; + } + + ntds_remaining = DIV_ROUND_UP(remaining, QTD_MAX_XFER_SIZE); + if (ntds_remaining == 0) + ntds_remaining = 1; + while (ntds_remaining) { struct whc_std *std; size_t std_len; - std = kmalloc(sizeof(struct whc_std), mem_flags); - if (std == NULL) - goto err_no_mem; - std_len = remaining; if (std_len > QTD_MAX_XFER_SIZE) std_len = QTD_MAX_XFER_SIZE; - std->urb = urb; + std = qset_new_std(whc, qset, urb, mem_flags); + if (std == NULL) + goto err_no_mem; + std->dma_addr = transfer_dma; std->len = std_len; std->ntds_remaining = ntds_remaining; - std->qtd = NULL; - INIT_LIST_HEAD(&std->list_node); - list_add_tail(&std->list_node, &qset->stds); - - if (std_len > WHCI_PAGE_SIZE) { - if (qset_fill_page_list(whc, std, mem_flags) < 0) - goto err_no_mem; - } else - std->num_pointers = 0; + if (qset_fill_page_list(whc, std, mem_flags) < 0) + goto err_no_mem; ntds_remaining--; remaining -= std_len; diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h index 24e94d983c5..c80c7d93bc4 100644 --- a/drivers/usb/host/whci/whcd.h +++ b/drivers/usb/host/whci/whcd.h @@ -84,6 +84,11 @@ struct whc { * @len: the length of data in the associated TD. * @ntds_remaining: number of TDs (starting from this one) in this transfer. * + * @bounce_buf: a bounce buffer if the std was from an urb with a sg + * list that could not be mapped to qTDs directly. + * @bounce_sg: the first scatterlist element bounce_buf is for. + * @bounce_offset: the offset into bounce_sg for the start of bounce_buf. + * * Queued URBs may require more TDs than are available in a qset so we * use a list of these "software TDs" (sTDs) to hold per-TD data. */ @@ -97,6 +102,10 @@ struct whc_std { int num_pointers; dma_addr_t dma_addr; struct whc_page_list_entry *pl_virt; + + void *bounce_buf; + struct scatterlist *bounce_sg; + unsigned bounce_offset; }; /** diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h index e8d0001605b..d5e5c3aacce 100644 --- a/drivers/usb/host/whci/whci-hc.h +++ b/drivers/usb/host/whci/whci-hc.h @@ -267,8 +267,9 @@ struct whc_qset { unsigned reset:1; struct urb *pause_after_urb; struct completion remove_complete; - int max_burst; - int max_seq; + uint16_t max_packet; + uint8_t max_burst; + uint8_t max_seq; }; static inline void whc_qset_set_link_ptr(u64 *ptr, u64 target) -- 2.43.2