]> Pileus Git - ~andy/linux/blobdiff - drivers/block/nvme.c
NVMe: Free admin queue memory on initialisation failure
[~andy/linux] / drivers / block / nvme.c
index 3f8cae9dc96023e475c0a7cb69a5cf892efb37ed..214037055e2a188a696b1e347ecca6be8a6afa3a 100644 (file)
@@ -78,6 +78,7 @@ struct nvme_dev {
        char serial[20];
        char model[40];
        char firmware_rev[8];
+       u32 max_hw_sectors;
 };
 
 /*
@@ -840,12 +841,26 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
 }
 
 static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
-                       unsigned dword11, dma_addr_t dma_addr, u32 *result)
+                               unsigned nsid, dma_addr_t dma_addr)
 {
        struct nvme_command c;
 
        memset(&c, 0, sizeof(c));
        c.features.opcode = nvme_admin_get_features;
+       c.features.nsid = cpu_to_le32(nsid);
+       c.features.prp1 = cpu_to_le64(dma_addr);
+       c.features.fid = cpu_to_le32(fid);
+
+       return nvme_submit_admin_cmd(dev, &c, NULL);
+}
+
+static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
+                       unsigned dword11, dma_addr_t dma_addr, u32 *result)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+       c.features.opcode = nvme_admin_set_features;
        c.features.prp1 = cpu_to_le64(dma_addr);
        c.features.fid = cpu_to_le32(fid);
        c.features.dword11 = cpu_to_le32(dword11);
@@ -853,6 +868,15 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
        return nvme_submit_admin_cmd(dev, &c, result);
 }
 
+static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
+{
+       dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+                               (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+       dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+                                       nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+       kfree(nvmeq);
+}
+
 static void nvme_free_queue(struct nvme_dev *dev, int qid)
 {
        struct nvme_queue *nvmeq = dev->queues[qid];
@@ -867,18 +891,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
                adapter_delete_cq(dev, qid);
        }
 
-       dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
-                               (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
-       dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
-                                       nvmeq->sq_cmds, nvmeq->sq_dma_addr);
-       kfree(nvmeq);
+       nvme_free_queue_mem(nvmeq);
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
                                                        int depth, int vector)
 {
        struct device *dmadev = &dev->pci_dev->dev;
-       unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info));
+       unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
+                                               sizeof(struct nvme_cmd_info));
        struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
        if (!nvmeq)
                return NULL;
@@ -966,7 +987,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
 
 static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 {
-       int result;
+       int result = 0;
        u32 aqa;
        u64 cap;
        unsigned long timeout;
@@ -996,17 +1017,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
        timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
        dev->db_stride = NVME_CAP_STRIDE(cap);
 
-       while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
+       while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
                msleep(100);
                if (fatal_signal_pending(current))
-                       return -EINTR;
+                       result = -EINTR;
                if (time_after(jiffies, timeout)) {
                        dev_err(&dev->pci_dev->dev,
                                "Device not ready; aborting initialisation\n");
-                       return -ENODEV;
+                       result = -ENODEV;
                }
        }
 
+       if (result) {
+               nvme_free_queue_mem(nvmeq);
+               return result;
+       }
+
        result = queue_request_irq(dev, nvmeq, "nvme admin");
        dev->queues[0] = nvmeq;
        return result;
@@ -1028,6 +1054,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
        offset = offset_in_page(addr);
        count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
        pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
+       if (!pages)
+               return ERR_PTR(-ENOMEM);
 
        err = get_user_pages_fast(addr, count, 1, pages);
        if (err < count) {
@@ -1045,6 +1073,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
                length -= (PAGE_SIZE - offset);
                offset = 0;
        }
+       sg_mark_end(&sg[i - 1]);
+       iod->nents = count;
 
        err = -ENOMEM;
        nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
@@ -1065,16 +1095,15 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 }
 
 static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
-                       unsigned long addr, int length, struct nvme_iod *iod)
+                       struct nvme_iod *iod)
 {
-       struct scatterlist *sg = iod->sg;
-       int i, count;
+       int i;
 
-       count = DIV_ROUND_UP(offset_in_page(addr) + length, PAGE_SIZE);
-       dma_unmap_sg(&dev->pci_dev->dev, sg, count, DMA_FROM_DEVICE);
+       dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
+                               write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
-       for (i = 0; i < count; i++)
-               put_page(sg_page(&sg[i]));
+       for (i = 0; i < iod->nents; i++)
+               put_page(sg_page(&iod->sg[i]));
 }
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
@@ -1131,19 +1160,18 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        else
                status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
 
-       nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, iod);
+       nvme_unmap_user_pages(dev, io.opcode & 1, iod);
        nvme_free_iod(dev, iod);
        return status;
 }
 
-static int nvme_user_admin_cmd(struct nvme_ns *ns,
+static int nvme_user_admin_cmd(struct nvme_dev *dev,
                                        struct nvme_admin_cmd __user *ucmd)
 {
-       struct nvme_dev *dev = ns->dev;
        struct nvme_admin_cmd cmd;
        struct nvme_command c;
        int status, length;
-       struct nvme_iod *iod;
+       struct nvme_iod *uninitialized_var(iod);
 
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
@@ -1179,8 +1207,7 @@ static int nvme_user_admin_cmd(struct nvme_ns *ns,
                status = nvme_submit_admin_cmd(dev, &c, NULL);
 
        if (cmd.data_len) {
-               nvme_unmap_user_pages(dev, cmd.opcode & 1, cmd.addr,
-                                                       cmd.data_len, iod);
+               nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
                nvme_free_iod(dev, iod);
        }
        return status;
@@ -1195,7 +1222,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
        case NVME_IOCTL_ID:
                return ns->ns_id;
        case NVME_IOCTL_ADMIN_CMD:
-               return nvme_user_admin_cmd(ns, (void __user *)arg);
+               return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
        case NVME_IOCTL_SUBMIT_IO:
                return nvme_submit_io(ns, (void __user *)arg);
        default:
@@ -1315,8 +1342,10 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
        ns->queue = blk_alloc_queue(GFP_KERNEL);
        if (!ns->queue)
                goto out_free_ns;
-       ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
-                               QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
+       ns->queue->queue_flags = QUEUE_FLAG_DEFAULT;
+       queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+/*     queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */
        blk_queue_make_request(ns->queue, nvme_make_request);
        ns->dev = dev;
        ns->queue->queuedata = ns;
@@ -1328,6 +1357,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
        ns->disk = disk;
        lbaf = id->flbas & 0xf;
        ns->lba_shift = id->lbaf[lbaf].ds;
+       blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+       if (dev->max_hw_sectors)
+               blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
 
        disk->major = nvme_major;
        disk->minors = NVME_MINORS;
@@ -1363,7 +1395,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
        u32 result;
        u32 q_count = (count - 1) | ((count - 1) << 16);
 
-       status = nvme_get_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
+       status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
                                                                &result);
        if (status)
                return -EIO;
@@ -1372,7 +1404,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 
 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 {
-       int result, cpu, i, nr_io_queues, db_bar_size;
+       int result, cpu, i, nr_io_queues, db_bar_size, q_depth;
 
        nr_io_queues = num_online_cpus();
        result = set_queue_count(dev, nr_io_queues);
@@ -1418,9 +1450,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
                cpu = cpumask_next(cpu, cpu_online_mask);
        }
 
+       q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
+                                                               NVME_Q_DEPTH);
        for (i = 0; i < nr_io_queues; i++) {
-               dev->queues[i + 1] = nvme_create_queue(dev, i + 1,
-                                                       NVME_Q_DEPTH, i);
+               dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
                if (IS_ERR(dev->queues[i + 1]))
                        return PTR_ERR(dev->queues[i + 1]);
                dev->queue_count++;
@@ -1469,6 +1502,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
        memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
        memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
        memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
+       if (ctrl->mdts) {
+               int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
+               dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
+       }
 
        id_ns = mem;
        for (i = 1; i <= nn; i++) {
@@ -1480,7 +1517,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
                        continue;
 
                res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
-                                                       dma_addr + 4096, NULL);
+                                                       dma_addr + 4096);
                if (res)
                        continue;
 
@@ -1549,15 +1586,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
        dma_pool_destroy(dev->prp_small_pool);
 }
 
-/* XXX: Use an ida or something to let remove / add work correctly */
-static void nvme_set_instance(struct nvme_dev *dev)
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_dev *dev)
 {
-       static int instance;
-       dev->instance = instance++;
+       int instance, error;
+
+       do {
+               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+                       return -ENODEV;
+
+               spin_lock(&dev_list_lock);
+               error = ida_get_new(&nvme_instance_ida, &instance);
+               spin_unlock(&dev_list_lock);
+       } while (error == -EAGAIN);
+
+       if (error)
+               return -ENODEV;
+
+       dev->instance = instance;
+       return 0;
 }
 
 static void nvme_release_instance(struct nvme_dev *dev)
 {
+       spin_lock(&dev_list_lock);
+       ida_remove(&nvme_instance_ida, dev->instance);
+       spin_unlock(&dev_list_lock);
 }
 
 static int __devinit nvme_probe(struct pci_dev *pdev,
@@ -1590,7 +1645,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev,
        pci_set_drvdata(pdev, dev);
        dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
        dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
-       nvme_set_instance(dev);
+       result = nvme_set_instance(dev);
+       if (result)
+               goto disable;
+
        dev->entry[0].vector = pdev->irq;
 
        result = nvme_setup_prp_pools(dev);
@@ -1693,15 +1751,17 @@ static struct pci_driver nvme_driver = {
 
 static int __init nvme_init(void)
 {
-       int result = -EBUSY;
+       int result;
 
        nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
        if (IS_ERR(nvme_thread))
                return PTR_ERR(nvme_thread);
 
-       nvme_major = register_blkdev(nvme_major, "nvme");
-       if (nvme_major <= 0)
+       result = register_blkdev(nvme_major, "nvme");
+       if (result < 0)
                goto kill_kthread;
+       else if (result > 0)
+               nvme_major = result;
 
        result = pci_register_driver(&nvme_driver);
        if (result)
@@ -1724,6 +1784,6 @@ static void __exit nvme_exit(void)
 
 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("0.7");
+MODULE_VERSION("0.8");
 module_init(nvme_init);
 module_exit(nvme_exit);