]> Pileus Git - ~andy/linux/blobdiff - block/blk-flush.c
blk-mq: new multi-queue block IO queueing mechanism
[~andy/linux] / block / blk-flush.c
index cc2b827a853cdea378f1c54edeadd65546b33651..3e4cc9c7890a61d853d5ef5a79e90e1b2b981703 100644 (file)
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/gfp.h>
+#include <linux/blk-mq.h>
 
 #include "blk.h"
+#include "blk-mq.h"
 
 /* FLUSH/FUA sequences */
 enum {
@@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq)
        /* make @rq a normal request */
        rq->cmd_flags &= ~REQ_FLUSH_SEQ;
        rq->end_io = rq->flush.saved_end_io;
+
+       blk_clear_rq_complete(rq);
+}
+
+static void mq_flush_data_run(struct work_struct *work)
+{
+       struct request *rq;
+
+       rq = container_of(work, struct request, mq_flush_data);
+
+       memset(&rq->csd, 0, sizeof(rq->csd));
+       blk_mq_run_request(rq, true, false);
+}
+
+static void blk_mq_flush_data_insert(struct request *rq)
+{
+       INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
+       kblockd_schedule_work(rq->q, &rq->mq_flush_data);
 }
 
 /**
@@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq)
  * completion and trigger the next step.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
  *
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
@@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 {
        struct request_queue *q = rq->q;
        struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
-       bool queued = false;
+       bool queued = false, kicked;
 
        BUG_ON(rq->flush.seq & seq);
        rq->flush.seq |= seq;
@@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 
        case REQ_FSEQ_DATA:
                list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
-               list_add(&rq->queuelist, &q->queue_head);
-               queued = true;
+               if (q->mq_ops)
+                       blk_mq_flush_data_insert(rq);
+               else {
+                       list_add(&rq->queuelist, &q->queue_head);
+                       queued = true;
+               }
                break;
 
        case REQ_FSEQ_DONE:
@@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
                BUG_ON(!list_empty(&rq->queuelist));
                list_del_init(&rq->flush.list);
                blk_flush_restore_request(rq);
-               __blk_end_request_all(rq, error);
+               if (q->mq_ops)
+                       blk_mq_end_io(rq, error);
+               else
+                       __blk_end_request_all(rq, error);
                break;
 
        default:
                BUG();
        }
 
-       return blk_kick_flush(q) | queued;
+       kicked = blk_kick_flush(q);
+       /* blk_mq_run_flush will run queue */
+       if (q->mq_ops)
+               return queued;
+       return kicked | queued;
 }
 
 static void flush_end_io(struct request *flush_rq, int error)
 {
        struct request_queue *q = flush_rq->q;
-       struct list_head *running = &q->flush_queue[q->flush_running_idx];
+       struct list_head *running;
        bool queued = false;
        struct request *rq, *n;
+       unsigned long flags = 0;
 
+       if (q->mq_ops) {
+               blk_mq_free_request(flush_rq);
+               spin_lock_irqsave(&q->mq_flush_lock, flags);
+       }
+       running = &q->flush_queue[q->flush_running_idx];
        BUG_ON(q->flush_pending_idx == q->flush_running_idx);
 
        /* account completion of the flush request */
        q->flush_running_idx ^= 1;
-       elv_completed_request(q, flush_rq);
+
+       if (!q->mq_ops)
+               elv_completed_request(q, flush_rq);
 
        /* and push the waiting requests to the next stage */
        list_for_each_entry_safe(rq, n, running, flush.list) {
@@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error)
         * directly into request_fn may confuse the driver.  Always use
         * kblockd.
         */
-       if (queued || q->flush_queue_delayed)
-               blk_run_queue_async(q);
+       if (queued || q->flush_queue_delayed) {
+               if (!q->mq_ops)
+                       blk_run_queue_async(q);
+               else
+               /*
+                * This can be optimized to only run queues with requests
+                * queued if necessary.
+                */
+                       blk_mq_run_queues(q, true);
+       }
        q->flush_queue_delayed = 0;
+       if (q->mq_ops)
+               spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
+
+static void mq_flush_work(struct work_struct *work)
+{
+       struct request_queue *q;
+       struct request *rq;
+
+       q = container_of(work, struct request_queue, mq_flush_work);
+
+       /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
+       rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
+               __GFP_WAIT|GFP_ATOMIC);
+       rq->cmd_type = REQ_TYPE_FS;
+       rq->end_io = flush_end_io;
+
+       blk_mq_run_request(rq, true, false);
+}
+
+/*
+ * We can't directly use q->flush_rq, because it doesn't have tag and is not in
+ * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
+ * so offload the work to workqueue.
+ *
+ * Note: we assume a flush request finished in any hardware queue will flush
+ * the whole disk cache.
+ */
+static void mq_run_flush(struct request_queue *q)
+{
+       kblockd_schedule_work(q, &q->mq_flush_work);
 }
 
 /**
@@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error)
  * Please read the comment at the top of this file for more info.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
  *
  * RETURNS:
  * %true if flush was issued, %false otherwise.
@@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q)
         * Issue flush and toggle pending_idx.  This makes pending_idx
         * different from running_idx, which means flush is in flight.
         */
+       q->flush_pending_idx ^= 1;
+       if (q->mq_ops) {
+               mq_run_flush(q);
+               return true;
+       }
+
        blk_rq_init(q, &q->flush_rq);
        q->flush_rq.cmd_type = REQ_TYPE_FS;
        q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
        q->flush_rq.rq_disk = first_rq->rq_disk;
        q->flush_rq.end_io = flush_end_io;
 
-       q->flush_pending_idx ^= 1;
        list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
        return true;
 }
@@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error)
                blk_run_queue_async(q);
 }
 
+static void mq_flush_data_end_io(struct request *rq, int error)
+{
+       struct request_queue *q = rq->q;
+       struct blk_mq_hw_ctx *hctx;
+       struct blk_mq_ctx *ctx;
+       unsigned long flags;
+
+       ctx = rq->mq_ctx;
+       hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+       /*
+        * After populating an empty queue, kick it to avoid stall.  Read
+        * the comment in flush_end_io().
+        */
+       spin_lock_irqsave(&q->mq_flush_lock, flags);
+       if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+               blk_mq_run_hw_queue(hctx, true);
+       spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
+
 /**
  * blk_insert_flush - insert a new FLUSH/FUA request
  * @rq: request to insert
  *
  * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
+ * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq)
         * complete the request.
         */
        if (!policy) {
-               __blk_end_bidi_request(rq, 0, 0, 0);
+               if (q->mq_ops)
+                       blk_mq_end_io(rq, 0);
+               else
+                       __blk_end_bidi_request(rq, 0, 0, 0);
                return;
        }
 
@@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq)
         */
        if ((policy & REQ_FSEQ_DATA) &&
            !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-               list_add_tail(&rq->queuelist, &q->queue_head);
+               if (q->mq_ops) {
+                       blk_mq_run_request(rq, false, true);
+               } else
+                       list_add_tail(&rq->queuelist, &q->queue_head);
                return;
        }
 
@@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq)
        INIT_LIST_HEAD(&rq->flush.list);
        rq->cmd_flags |= REQ_FLUSH_SEQ;
        rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
+       if (q->mq_ops) {
+               rq->end_io = mq_flush_data_end_io;
+
+               spin_lock_irq(&q->mq_flush_lock);
+               blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
+               spin_unlock_irq(&q->mq_flush_lock);
+               return;
+       }
        rq->end_io = flush_data_end_io;
 
        blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
@@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+
+void blk_mq_init_flush(struct request_queue *q)
+{
+       spin_lock_init(&q->mq_flush_lock);
+       INIT_WORK(&q->mq_flush_work, mq_flush_work);
+}