]> Pileus Git - ~andy/linux/blobdiff - drivers/block/nvme-core.c
Merge git://git.infradead.org/users/willy/linux-nvme
[~andy/linux] / drivers / block / nvme-core.c
index 89021d710ec21995f8f66a070f72fe0c5af495ec..51824d1f23ea53df5d49cd6d4e10e07b6b45b1fb 100644 (file)
@@ -76,6 +76,7 @@ struct async_cmd_info {
 struct nvme_queue {
        struct device *q_dmadev;
        struct nvme_dev *dev;
+       char irqname[24];       /* nvme4294967295-65535\0 */
        spinlock_t q_lock;
        struct nvme_command *sq_cmds;
        volatile struct nvme_completion *cqes;
@@ -470,104 +471,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
        return total_len;
 }
 
-struct nvme_bio_pair {
-       struct bio b1, b2, *parent;
-       struct bio_vec *bv1, *bv2;
-       int err;
-       atomic_t cnt;
-};
-
-static void nvme_bio_pair_endio(struct bio *bio, int err)
-{
-       struct nvme_bio_pair *bp = bio->bi_private;
-
-       if (err)
-               bp->err = err;
-
-       if (atomic_dec_and_test(&bp->cnt)) {
-               bio_endio(bp->parent, bp->err);
-               kfree(bp->bv1);
-               kfree(bp->bv2);
-               kfree(bp);
-       }
-}
-
-static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
-                                                       int len, int offset)
-{
-       struct nvme_bio_pair *bp;
-
-       BUG_ON(len > bio->bi_size);
-       BUG_ON(idx > bio->bi_vcnt);
-
-       bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
-       if (!bp)
-               return NULL;
-       bp->err = 0;
-
-       bp->b1 = *bio;
-       bp->b2 = *bio;
-
-       bp->b1.bi_size = len;
-       bp->b2.bi_size -= len;
-       bp->b1.bi_vcnt = idx;
-       bp->b2.bi_idx = idx;
-       bp->b2.bi_sector += len >> 9;
-
-       if (offset) {
-               bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
-                                                               GFP_ATOMIC);
-               if (!bp->bv1)
-                       goto split_fail_1;
-
-               bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
-                                                               GFP_ATOMIC);
-               if (!bp->bv2)
-                       goto split_fail_2;
-
-               memcpy(bp->bv1, bio->bi_io_vec,
-                       bio->bi_max_vecs * sizeof(struct bio_vec));
-               memcpy(bp->bv2, bio->bi_io_vec,
-                       bio->bi_max_vecs * sizeof(struct bio_vec));
-
-               bp->b1.bi_io_vec = bp->bv1;
-               bp->b2.bi_io_vec = bp->bv2;
-               bp->b2.bi_io_vec[idx].bv_offset += offset;
-               bp->b2.bi_io_vec[idx].bv_len -= offset;
-               bp->b1.bi_io_vec[idx].bv_len = offset;
-               bp->b1.bi_vcnt++;
-       } else
-               bp->bv1 = bp->bv2 = NULL;
-
-       bp->b1.bi_private = bp;
-       bp->b2.bi_private = bp;
-
-       bp->b1.bi_end_io = nvme_bio_pair_endio;
-       bp->b2.bi_end_io = nvme_bio_pair_endio;
-
-       bp->parent = bio;
-       atomic_set(&bp->cnt, 2);
-
-       return bp;
-
- split_fail_2:
-       kfree(bp->bv1);
- split_fail_1:
-       kfree(bp);
-       return NULL;
-}
-
 static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
-                                               int idx, int len, int offset)
+                                int len)
 {
-       struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
-       if (!bp)
+       struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
+       if (!split)
                return -ENOMEM;
 
+       bio_chain(split, bio);
+
        if (bio_list_empty(&nvmeq->sq_cong))
                add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-       bio_list_add(&nvmeq->sq_cong, &bp->b1);
-       bio_list_add(&nvmeq->sq_cong, &bp->b2);
+       bio_list_add(&nvmeq->sq_cong, split);
+       bio_list_add(&nvmeq->sq_cong, bio);
 
        return 0;
 }
@@ -579,41 +495,44 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
 static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
                struct bio *bio, enum dma_data_direction dma_dir, int psegs)
 {
-       struct bio_vec *bvec, *bvprv = NULL;
+       struct bio_vec bvec, bvprv;
+       struct bvec_iter iter;
        struct scatterlist *sg = NULL;
-       int i, length = 0, nsegs = 0, split_len = bio->bi_size;
+       int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
+       int first = 1;
 
        if (nvmeq->dev->stripe_size)
                split_len = nvmeq->dev->stripe_size -
-                       ((bio->bi_sector << 9) & (nvmeq->dev->stripe_size - 1));
+                       ((bio->bi_iter.bi_sector << 9) &
+                        (nvmeq->dev->stripe_size - 1));
 
        sg_init_table(iod->sg, psegs);
-       bio_for_each_segment(bvec, bio, i) {
-               if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) {
-                       sg->length += bvec->bv_len;
+       bio_for_each_segment(bvec, bio, iter) {
+               if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
+                       sg->length += bvec.bv_len;
                } else {
-                       if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec))
-                               return nvme_split_and_submit(bio, nvmeq, i,
-                                                               length, 0);
+                       if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
+                               return nvme_split_and_submit(bio, nvmeq,
+                                                            length);
 
                        sg = sg ? sg + 1 : iod->sg;
-                       sg_set_page(sg, bvec->bv_page, bvec->bv_len,
-                                                       bvec->bv_offset);
+                       sg_set_page(sg, bvec.bv_page,
+                                   bvec.bv_len, bvec.bv_offset);
                        nsegs++;
                }
 
-               if (split_len - length < bvec->bv_len)
-                       return nvme_split_and_submit(bio, nvmeq, i, split_len,
-                                                       split_len - length);
-               length += bvec->bv_len;
+               if (split_len - length < bvec.bv_len)
+                       return nvme_split_and_submit(bio, nvmeq, split_len);
+               length += bvec.bv_len;
                bvprv = bvec;
+               first = 0;
        }
        iod->nents = nsegs;
        sg_mark_end(sg);
        if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
                return -ENOMEM;
 
-       BUG_ON(length != bio->bi_size);
+       BUG_ON(length != bio->bi_iter.bi_size);
        return length;
 }
 
@@ -637,8 +556,8 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
        iod->npages = 0;
 
        range->cattr = cpu_to_le32(0);
-       range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift);
-       range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+       range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
+       range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
 
        memset(cmnd, 0, sizeof(*cmnd));
        cmnd->dsm.opcode = nvme_cmd_dsm;
@@ -703,7 +622,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
        }
 
        result = -ENOMEM;
-       iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
+       iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
        if (!iod)
                goto nomem;
        iod->private = bio;
@@ -752,7 +671,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
        cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
        length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
                                                                GFP_ATOMIC);
-       cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+       cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
        cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
        cmnd->rw.control = cpu_to_le16(control);
        cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
@@ -1074,7 +993,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
                dev_warn(&dev->pci_dev->dev,
                        "I/O %d QID %d timeout, reset controller\n", cmdid,
                                                                nvmeq->qid);
-               INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
+               PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev);
                queue_work(nvme_workq, &dev->reset_work);
                return;
        }
@@ -1235,6 +1154,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 
        nvmeq->q_dmadev = dmadev;
        nvmeq->dev = dev;
+       snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
+                       dev->instance, qid);
        spin_lock_init(&nvmeq->q_lock);
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
@@ -1297,7 +1218,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        if (result < 0)
                goto release_cq;
 
-       result = queue_request_irq(dev, nvmeq, "nvme");
+       result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
        if (result < 0)
                goto release_sq;
 
@@ -1415,7 +1336,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        if (result)
                return result;
 
-       result = queue_request_irq(dev, nvmeq, "nvme admin");
+       result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
        if (result)
                return result;
 
@@ -1713,10 +1634,31 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl      NULL
 #endif
 
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+       struct nvme_ns *ns = bdev->bd_disk->private_data;
+       struct nvme_dev *dev = ns->dev;
+
+       kref_get(&dev->kref);
+       return 0;
+}
+
+static void nvme_free_dev(struct kref *kref);
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+       struct nvme_ns *ns = disk->private_data;
+       struct nvme_dev *dev = ns->dev;
+
+       kref_put(&dev->kref, nvme_free_dev);
+}
+
 static const struct block_device_operations nvme_fops = {
        .owner          = THIS_MODULE,
        .ioctl          = nvme_ioctl,
        .compat_ioctl   = nvme_compat_ioctl,
+       .open           = nvme_open,
+       .release        = nvme_release,
 };
 
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
@@ -1754,7 +1696,7 @@ static int nvme_kthread(void *data)
                                list_del_init(&dev->node);
                                dev_warn(&dev->pci_dev->dev,
                                        "Failed status, reset controller\n");
-                               INIT_WORK(&dev->reset_work,
+                               PREPARE_WORK(&dev->reset_work,
                                                        nvme_reset_failed_dev);
                                queue_work(nvme_workq, &dev->reset_work);
                                continue;
@@ -1846,13 +1788,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
        return NULL;
 }
 
-static void nvme_ns_free(struct nvme_ns *ns)
-{
-       put_disk(ns->disk);
-       blk_cleanup_queue(ns->queue);
-       kfree(ns);
-}
-
 static int set_queue_count(struct nvme_dev *dev, int count)
 {
        int status;
@@ -1873,6 +1808,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
+       struct nvme_queue *adminq = dev->queues[0];
        struct pci_dev *pdev = dev->pci_dev;
        int result, cpu, i, vecs, nr_io_queues, size, q_depth;
 
@@ -1899,7 +1835,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        }
 
        /* Deregister the admin queue's interrupt */
-       free_irq(dev->entry[0].vector, dev->queues[0]);
+       free_irq(dev->entry[0].vector, adminq);
 
        vecs = nr_io_queues;
        for (i = 0; i < vecs; i++)
@@ -1937,9 +1873,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
         */
        nr_io_queues = vecs;
 
-       result = queue_request_irq(dev, dev->queues[0], "nvme admin");
+       result = queue_request_irq(dev, adminq, adminq->irqname);
        if (result) {
-               dev->queues[0]->q_suspended = 1;
+               adminq->q_suspended = 1;
                goto free_queues;
        }
 
@@ -2283,12 +2219,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
-       struct nvme_ns *ns, *next;
+       struct nvme_ns *ns;
 
-       list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-               list_del(&ns->list);
-               del_gendisk(ns->disk);
-               nvme_ns_free(ns);
+       list_for_each_entry(ns, &dev->namespaces, list) {
+               if (ns->disk->flags & GENHD_FL_UP)
+                       del_gendisk(ns->disk);
+               if (!blk_queue_dying(ns->queue))
+                       blk_cleanup_queue(ns->queue);
        }
 }
 
@@ -2345,9 +2282,22 @@ static void nvme_release_instance(struct nvme_dev *dev)
        spin_unlock(&dev_list_lock);
 }
 
+static void nvme_free_namespaces(struct nvme_dev *dev)
+{
+       struct nvme_ns *ns, *next;
+
+       list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+               list_del(&ns->list);
+               put_disk(ns->disk);
+               kfree(ns);
+       }
+}
+
 static void nvme_free_dev(struct kref *kref)
 {
        struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+
+       nvme_free_namespaces(dev);
        kfree(dev->queues);
        kfree(dev->entry);
        kfree(dev);
@@ -2456,7 +2406,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
                return ret;
        if (ret == -EBUSY) {
                spin_lock(&dev_list_lock);
-               INIT_WORK(&dev->reset_work, nvme_remove_disks);
+               PREPARE_WORK(&dev->reset_work, nvme_remove_disks);
                queue_work(nvme_workq, &dev->reset_work);
                spin_unlock(&dev_list_lock);
        }
@@ -2503,6 +2453,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto free;
 
        INIT_LIST_HEAD(&dev->namespaces);
+       INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
        dev->pci_dev = pdev;
        pci_set_drvdata(pdev, dev);
        result = nvme_set_instance(dev);
@@ -2520,6 +2471,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto release_pools;
        }
 
+       kref_init(&dev->kref);
        result = nvme_dev_add(dev);
        if (result)
                goto shutdown;
@@ -2535,11 +2487,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto remove;
 
        dev->initialized = 1;
-       kref_init(&dev->kref);
        return 0;
 
  remove:
        nvme_dev_remove(dev);
+       nvme_free_namespaces(dev);
  shutdown:
        nvme_dev_shutdown(dev);
  release_pools:
@@ -2554,6 +2506,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        return result;
 }
 
+static void nvme_shutdown(struct pci_dev *pdev)
+{
+       struct nvme_dev *dev = pci_get_drvdata(pdev);
+       nvme_dev_shutdown(dev);
+}
+
 static void nvme_remove(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -2595,7 +2553,7 @@ static int nvme_resume(struct device *dev)
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
        if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
-               INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev);
+               PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev);
                queue_work(nvme_workq, &ndev->reset_work);
        }
        return 0;
@@ -2625,6 +2583,7 @@ static struct pci_driver nvme_driver = {
        .id_table       = nvme_id_table,
        .probe          = nvme_probe,
        .remove         = nvme_remove,
+       .shutdown       = nvme_shutdown,
        .driver         = {
                .pm     = &nvme_dev_pm_ops,
        },