Merge commit 'v2.6.38-rc6' into for-2.6.39/core

[~andy/linux] / block / blk-core.c
diff --git a/block/blk-core.c b/block/blk-core.c

index 4ce953f1b3909f38f213adcc0a39251a34875330..3cc17e6064d68e5a3315d012a395c84f660bcb8a 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
  
  #include "blk.h"
  
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
  
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
                 return;
  
         cpu = part_stat_lock();
-       part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
  
-       if (!new_io)
+       if (!new_io) {
+               part = rq->part;
                 part_stat_inc(cpu, part, merges[rw]);
-       else {
+       } else {
+               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+               if (!hd_struct_try_get(part)) {
+                       /*
+                        * The partition is already being removed,
+                        * the request will be accounted on the disk only
+                        *
+                        * We take a reference on disk->part0 although that
+                        * partition will never be deleted, so we can treat
+                        * it as any other partition.
+                        */
+                       part = &rq->rq_disk->part0;
+                       hd_struct_get(part);
+               }
                 part_round_stats(cpu, part);
                 part_inc_in_flight(part, rw);
+               rq->part = part;
         }
  
         part_stat_unlock();
@@ -128,45 +142,36 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
         rq->ref_count = 1;
         rq->start_time = jiffies;
         set_start_time_ns(rq);
+       rq->part = NULL;
  }
  EXPORT_SYMBOL(blk_rq_init);
  
  static void req_bio_endio(struct request *rq, struct bio *bio,
                           unsigned int nbytes, int error)
  {
-       struct request_queue *q = rq->q;
-
-       if (&q->flush_rq != rq) {
-               if (error)
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
-               else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       error = -EIO;
+       if (error)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+               error = -EIO;
  
-               if (unlikely(nbytes > bio->bi_size)) {
-                       printk(KERN_ERR "%s: want %u bytes done, %u left\n",
-                              __func__, nbytes, bio->bi_size);
-                       nbytes = bio->bi_size;
-               }
+       if (unlikely(nbytes > bio->bi_size)) {
+               printk(KERN_ERR "%s: want %u bytes done, %u left\n",
+                      __func__, nbytes, bio->bi_size);
+               nbytes = bio->bi_size;
+       }
  
-               if (unlikely(rq->cmd_flags & REQ_QUIET))
-                       set_bit(BIO_QUIET, &bio->bi_flags);
+       if (unlikely(rq->cmd_flags & REQ_QUIET))
+               set_bit(BIO_QUIET, &bio->bi_flags);
  
-               bio->bi_size -= nbytes;
-               bio->bi_sector += (nbytes >> 9);
+       bio->bi_size -= nbytes;
+       bio->bi_sector += (nbytes >> 9);
  
-               if (bio_integrity(bio))
-                       bio_integrity_advance(bio, nbytes);
+       if (bio_integrity(bio))
+               bio_integrity_advance(bio, nbytes);
  
-               if (bio->bi_size == 0)
-                       bio_endio(bio, error);
-       } else {
-               /*
-                * Okay, this is the sequenced flush request in
-                * progress, just record the error;
-                */
-               if (error && !q->flush_err)
-                       q->flush_err = error;
-       }
+       /* don't actually finish bio if it's part of flush sequence */
+       if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+               bio_endio(bio, error);
  }
  
  void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -525,7 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         init_timer(&q->unplug_timer);
         setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
         INIT_LIST_HEAD(&q->timeout_list);
-       INIT_LIST_HEAD(&q->pending_flushes);
+       INIT_LIST_HEAD(&q->flush_queue[0]);
+       INIT_LIST_HEAD(&q->flush_queue[1]);
+       INIT_LIST_HEAD(&q->flush_data_in_flight);
         INIT_WORK(&q->unplug_work, blk_unplug_work);
  
         kobject_init(&q->kobj, &blk_queue_ktype);
@@ -745,6 +752,25 @@ static void freed_request(struct request_queue *q, int sync, int priv)
                 __freed_request(q, sync ^ 1);
  }
  
+/*
+ * Determine if elevator data should be initialized when allocating the
+ * request associated with @bio.
+ */
+static bool blk_rq_should_init_elevator(struct bio *bio)
+{
+       if (!bio)
+               return true;
+
+       /*
+        * Flush requests do not use the elevator so skip initialization.
+        * This allows a request to share the flush and elevator data.
+        */
+       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+               return false;
+
+       return true;
+}
+
  /*
   * Get a free request, queue_lock must be held.
   * Returns NULL on failure, with queue_lock held.
@@ -757,7 +783,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
         struct request_list *rl = &q->rq;
         struct io_context *ioc = NULL;
         const bool is_sync = rw_is_sync(rw_flags) != 0;
-       int may_queue, priv;
+       int may_queue, priv = 0;
  
         may_queue = elv_may_queue(q, rw_flags);
         if (may_queue == ELV_MQUEUE_NO)
@@ -801,9 +827,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
         rl->count[is_sync]++;
         rl->starved[is_sync] = 0;
  
-       priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-       if (priv)
-               rl->elvpriv++;
+       if (blk_rq_should_init_elevator(bio)) {
+               priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+               if (priv)
+                       rl->elvpriv++;
+       }
  
         if (blk_queue_io_stat(q))
                 rw_flags |= REQ_IO_STAT;
@@ -1204,7 +1232,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
         spin_lock_irq(q->queue_lock);
  
         if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-               where = ELEVATOR_INSERT_FRONT;
+               where = ELEVATOR_INSERT_FLUSH;
                 goto get_rq;
         }
  
@@ -1329,9 +1357,9 @@ static inline void blk_partition_remap(struct bio *bio)
                 bio->bi_sector += p->start_sect;
                 bio->bi_bdev = bdev->bd_contains;
  
-               trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-                                   bdev->bd_dev,
-                                   bio->bi_sector - p->start_sect);
+               trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
+                                     bdev->bd_dev,
+                                     bio->bi_sector - p->start_sect);
         }
  }
  
@@ -1500,7 +1528,7 @@ static inline void __generic_make_request(struct bio *bio)
                         goto end_io;
  
                 if (old_sector != -1)
-                       trace_block_remap(q, bio, old_dev, old_sector);
+                       trace_block_bio_remap(q, bio, old_dev, old_sector);
  
                 old_sector = bio->bi_sector;
                 old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1804,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
                 int cpu;
  
                 cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
                 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
                 part_stat_unlock();
         }
@@ -1789,20 +1817,21 @@ static void blk_account_io_done(struct request *req)
          * normal IO on queueing nor completion.  Accounting the
          * containing request is enough.
          */
-       if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
+       if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
                 unsigned long duration = jiffies - req->start_time;
                 const int rw = rq_data_dir(req);
                 struct hd_struct *part;
                 int cpu;
  
                 cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
  
                 part_stat_inc(cpu, part, ios[rw]);
                 part_stat_add(cpu, part, ticks[rw], duration);
                 part_round_stats(cpu, part);
                 part_dec_in_flight(part, rw);
  
+               hd_struct_put(part);
                 part_stat_unlock();
         }
  }
@@ -2606,7 +2635,9 @@ int __init blk_dev_init(void)
         BUILD_BUG_ON(__REQ_NR_BITS > 8 *
                         sizeof(((struct request *)0)->cmd_flags));
  
-       kblockd_workqueue = create_workqueue("kblockd");
+       /* used for unplugging and affects IO latency/throughput - HIGHPRI */
+       kblockd_workqueue = alloc_workqueue("kblockd",
+                                           WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
         if (!kblockd_workqueue)
                 panic("Failed to create kblockd\n");