Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)
diff --combined block/blk-cgroup.c

index b8c143d68ee02664758df3e6d702c79fd594d8d4,27886935804966b8246c3495898cdaec1088b9f6..fa8f26309444d2cdda41cae813cf6f5a70f1de06
--- 1/block/blk-cgroup.c
--- 2/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@@ -30,10 -30,8 +30,10 @@@ EXPORT_SYMBOL_GPL(blkio_root_cgroup)
   
   static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
                                                   struct cgroup *);
- -static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
- -static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
+ +static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+ +                            struct cgroup_taskset *);
+ +static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+ +                         struct cgroup_taskset *);
   static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
   static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
   
@@@ -46,8 -44,8 +46,8 @@@
   struct cgroup_subsys blkio_subsys = {
         .name = "blkio",
         .create = blkiocg_create,
- -      .can_attach_task = blkiocg_can_attach_task,
- -      .attach_task = blkiocg_attach_task,
+ +      .can_attach = blkiocg_can_attach,
+ +      .attach = blkiocg_attach,
         .destroy = blkiocg_destroy,
         .populate = blkiocg_populate,
   #ifdef CONFIG_BLK_CGROUP
@@@ -1628,38 -1626,30 +1628,39 @@@ done
    * of the main cic data structures.  For now we allow a task to change
    * its cgroup only if it's the only owner of its ioc.
    */
- -static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+ +static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ +                            struct cgroup_taskset *tset)
   {
+ +      struct task_struct *task;
         struct io_context *ioc;
         int ret = 0;
   
         /* task_lock() is needed to avoid races with exit_io_context() */
- -      task_lock(tsk);
- -      ioc = tsk->io_context;
- -      if (ioc && atomic_read(&ioc->nr_tasks) > 1)
- -              ret = -EINVAL;
- -      task_unlock(tsk);
- -
+ +      cgroup_taskset_for_each(task, cgrp, tset) {
+ +              task_lock(task);
+ +              ioc = task->io_context;
+ +              if (ioc && atomic_read(&ioc->nr_tasks) > 1)
+ +                      ret = -EINVAL;
+ +              task_unlock(task);
+ +              if (ret)
+ +                      break;
+ +      }
         return ret;
   }
   
- -static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+ +static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ +                         struct cgroup_taskset *tset)
   {
+ +      struct task_struct *task;
         struct io_context *ioc;
   
- -      /* we don't lose anything even if ioc allocation fails */
- -      ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
- -      if (ioc) {
- -              ioc_cgroup_changed(ioc);
- -              put_io_context(ioc, NULL);
+ +      cgroup_taskset_for_each(task, cgrp, tset) {
-               task_lock(task);
-               ioc = task->io_context;
-               if (ioc)
-                       ioc->cgroup_changed = 1;
-               task_unlock(task);
++              /* we don't lose anything even if ioc allocation fails */
++              ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
++              if (ioc) {
++                      ioc_cgroup_changed(ioc);
++                      put_io_context(ioc, NULL);
++              }
         }
   }
   
diff --combined block/blk-core.c

index 15de223c7f9371a9da852825ea8857789d94ae70,8fbdac7010bb425fb025f156a1fe4210e58428b9..e6c05a97ee2ba94538222d76273d0d2fbae644cc
--- 1/block/blk-core.c
--- 2/block/blk-core.c
+++ b/block/blk-core.c
@@@ -39,6 -39,8 +39,8 @@@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
   
+ DEFINE_IDA(blk_queue_ida);
+ 
   /*
    * For the allocated request tables
    */
@@@ -358,7 -360,8 +360,8 @@@ EXPORT_SYMBOL(blk_put_queue)
   void blk_drain_queue(struct request_queue *q, bool drain_all)
   {
         while (true) {
-               int nr_rqs;
+               bool drain = false;
+               int i;
   
                 spin_lock_irq(q->queue_lock);
   
@@@ -366,23 -369,27 +369,34 @@@
                 if (drain_all)
                         blk_throtl_drain(q);
   
- -              __blk_run_queue(q);
+ +              /*
+ +               * This function might be called on a queue which failed
+ +               * driver init after queue creation.  Some drivers
+ +               * (e.g. fd) get unhappy in such cases.  Kick queue iff
+ +               * dispatch queue has something on it.
+ +               */
+ +              if (!list_empty(&q->queue_head))
+ +                      __blk_run_queue(q);
   
-               if (drain_all)
-                       nr_rqs = q->rq.count[0] + q->rq.count[1];
-               else
-                       nr_rqs = q->rq.elvpriv;
+               drain |= q->rq.elvpriv;
+ 
+               /*
+                * Unfortunately, requests are queued at and tracked from
+                * multiple places and there's no single counter which can
+                * be drained.  Check all the queues and counters.
+                */
+               if (drain_all) {
+                       drain |= !list_empty(&q->queue_head);
+                       for (i = 0; i < 2; i++) {
+                               drain |= q->rq.count[i];
+                               drain |= q->in_flight[i];
+                               drain |= !list_empty(&q->flush_queue[i]);
+                       }
+               }
   
                 spin_unlock_irq(q->queue_lock);
   
-               if (!nr_rqs)
+               if (!drain)
                         break;
                 msleep(10);
         }
@@@ -469,28 -476,28 +483,29 @@@ struct request_queue *blk_alloc_queue_n
         if (!q)
                 return NULL;
   
+       q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+       if (q->id < 0)
+               goto fail_q;
+ 
         q->backing_dev_info.ra_pages =
                         (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
         q->backing_dev_info.state = 0;
         q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
         q->backing_dev_info.name = "block";
+ +      q->node = node_id;
   
         err = bdi_init(&q->backing_dev_info);
-       if (err) {
-               kmem_cache_free(blk_requestq_cachep, q);
-               return NULL;
-       }
+       if (err)
+               goto fail_id;
   
-       if (blk_throtl_init(q)) {
-               kmem_cache_free(blk_requestq_cachep, q);
-               return NULL;
-       }
+       if (blk_throtl_init(q))
+               goto fail_id;
   
         setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                     laptop_mode_timer_fn, (unsigned long) q);
         setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
         INIT_LIST_HEAD(&q->timeout_list);
+       INIT_LIST_HEAD(&q->icq_list);
         INIT_LIST_HEAD(&q->flush_queue[0]);
         INIT_LIST_HEAD(&q->flush_queue[1]);
         INIT_LIST_HEAD(&q->flush_data_in_flight);
@@@ -508,6 -515,12 +523,12 @@@
         q->queue_lock = &q->__queue_lock;
   
         return q;
+ 
+ fail_id:
+       ida_simple_remove(&blk_queue_ida, q->id);
+ fail_q:
+       kmem_cache_free(blk_requestq_cachep, q);
+       return NULL;
   }
   EXPORT_SYMBOL(blk_alloc_queue_node);
   
@@@ -559,7 -572,7 +580,7 @@@ blk_init_queue_node(request_fn_proc *rf
         if (!uninit_q)
                 return NULL;
   
- -      q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
+ +      q = blk_init_allocated_queue(uninit_q, rfn, lock);
         if (!q)
                 blk_cleanup_queue(uninit_q);
   
@@@ -570,10 -583,19 +591,10 @@@ EXPORT_SYMBOL(blk_init_queue_node)
   struct request_queue *
   blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
                          spinlock_t *lock)
- -{
- -      return blk_init_allocated_queue_node(q, rfn, lock, -1);
- -}
- -EXPORT_SYMBOL(blk_init_allocated_queue);
- -
- -struct request_queue *
- -blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
- -                            spinlock_t *lock, int node_id)
   {
         if (!q)
                 return NULL;
   
- -      q->node = node_id;
         if (blk_init_free_list(q))
                 return NULL;
   
@@@ -603,28 -625,33 +624,33 @@@
   
         return NULL;
   }
- -EXPORT_SYMBOL(blk_init_allocated_queue_node);
+ +EXPORT_SYMBOL(blk_init_allocated_queue);
   
- int blk_get_queue(struct request_queue *q)
+ bool blk_get_queue(struct request_queue *q)
   {
-       if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
-               kobject_get(&q->kobj);
-               return 0;
+       if (likely(!blk_queue_dead(q))) {
+               __blk_get_queue(q);
+               return true;
         }
   
-       return 1;
+       return false;
   }
   EXPORT_SYMBOL(blk_get_queue);
   
   static inline void blk_free_request(struct request_queue *q, struct request *rq)
   {
-       if (rq->cmd_flags & REQ_ELVPRIV)
+       if (rq->cmd_flags & REQ_ELVPRIV) {
                 elv_put_request(q, rq);
+               if (rq->elv.icq)
+                       put_io_context(rq->elv.icq->ioc, q);
+       }
+ 
         mempool_free(rq, q->rq.rq_pool);
   }
   
   static struct request *
- blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+                 unsigned int flags, gfp_t gfp_mask)
   {
         struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
   
@@@ -635,10 -662,15 +661,15 @@@
   
         rq->cmd_flags = flags | REQ_ALLOCED;
   
-       if ((flags & REQ_ELVPRIV) &&
-           unlikely(elv_set_request(q, rq, gfp_mask))) {
-               mempool_free(rq, q->rq.rq_pool);
-               return NULL;
+       if (flags & REQ_ELVPRIV) {
+               rq->elv.icq = icq;
+               if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+                       mempool_free(rq, q->rq.rq_pool);
+                       return NULL;
+               }
+               /* @rq->elv.icq holds on to io_context until @rq is freed */
+               if (icq)
+                       get_io_context(icq->ioc);
         }
   
         return rq;
@@@ -750,11 -782,17 +781,17 @@@ static struct request *get_request(stru
   {
         struct request *rq = NULL;
         struct request_list *rl = &q->rq;
-       struct io_context *ioc = NULL;
+       struct elevator_type *et;
+       struct io_context *ioc;
+       struct io_cq *icq = NULL;
         const bool is_sync = rw_is_sync(rw_flags) != 0;
+       bool retried = false;
         int may_queue;
+ retry:
+       et = q->elevator->type;
+       ioc = current->io_context;
   
-       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+       if (unlikely(blk_queue_dead(q)))
                 return NULL;
   
         may_queue = elv_may_queue(q, rw_flags);
@@@ -763,7 -801,20 +800,20 @@@
   
         if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
                 if (rl->count[is_sync]+1 >= q->nr_requests) {
-                       ioc = current_io_context(GFP_ATOMIC, q->node);
+                       /*
+                        * We want ioc to record batching state.  If it's
+                        * not already there, creating a new one requires
+                        * dropping queue_lock, which in turn requires
+                        * retesting conditions to avoid queue hang.
+                        */
+                       if (!ioc && !retried) {
+                               spin_unlock_irq(q->queue_lock);
+                               create_io_context(current, gfp_mask, q->node);
+                               spin_lock_irq(q->queue_lock);
+                               retried = true;
+                               goto retry;
+                       }
+ 
                         /*
                          * The queue will fill after this allocation, so set
                          * it as full, and mark this process as "batching".
@@@ -799,17 -850,36 +849,36 @@@
         rl->count[is_sync]++;
         rl->starved[is_sync] = 0;
   
+       /*
+        * Decide whether the new request will be managed by elevator.  If
+        * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+        * prevent the current elevator from being destroyed until the new
+        * request is freed.  This guarantees icq's won't be destroyed and
+        * makes creating new ones safe.
+        *
+        * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+        * it will be created after releasing queue_lock.
+        */
         if (blk_rq_should_init_elevator(bio) &&
             !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
                 rw_flags |= REQ_ELVPRIV;
                 rl->elvpriv++;
+               if (et->icq_cache && ioc)
+                       icq = ioc_lookup_icq(ioc, q);
         }
   
         if (blk_queue_io_stat(q))
                 rw_flags |= REQ_IO_STAT;
         spin_unlock_irq(q->queue_lock);
   
-       rq = blk_alloc_request(q, rw_flags, gfp_mask);
+       /* create icq if missing */
+       if (unlikely(et->icq_cache && !icq))
+               icq = ioc_create_icq(q, gfp_mask);
+ 
+       /* rqs are guaranteed to have icq on elv_set_request() if requested */
+       if (likely(!et->icq_cache || icq))
+               rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+ 
         if (unlikely(!rq)) {
                 /*
                  * Allocation failed presumably due to memory. Undo anything
@@@ -871,10 -941,9 +940,9 @@@ static struct request *get_request_wait
         rq = get_request(q, rw_flags, bio, GFP_NOIO);
         while (!rq) {
                 DEFINE_WAIT(wait);
-               struct io_context *ioc;
                 struct request_list *rl = &q->rq;
   
-               if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+               if (unlikely(blk_queue_dead(q)))
                         return NULL;
   
                 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@@ -891,8 -960,8 +959,8 @@@
                  * up to a big batch of them for a small period time.
                  * See ioc_batching, ioc_set_batching
                  */
-               ioc = current_io_context(GFP_NOIO, q->node);
-               ioc_set_batching(q, ioc);
+               create_io_context(current, GFP_NOIO, q->node);
+               ioc_set_batching(q, current->io_context);
   
                 spin_lock_irq(q->queue_lock);
                 finish_wait(&rl->wait[is_sync], &wait);
@@@ -1009,54 -1078,6 +1077,6 @@@ static void add_acct_request(struct req
         __elv_add_request(q, rq, where);
   }
   
- /**
-  * blk_insert_request - insert a special request into a request queue
-  * @q:                request queue where request should be inserted
-  * @rq:               request to be inserted
-  * @at_head:  insert request at head or tail of queue
-  * @data:     private data
-  *
-  * Description:
-  *    Many block devices need to execute commands asynchronously, so they don't
-  *    block the whole kernel from preemption during request execution.  This is
-  *    accomplished normally by inserting aritficial requests tagged as
-  *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
-  *    be scheduled for actual execution by the request queue.
-  *
-  *    We have the option of inserting the head or the tail of the queue.
-  *    Typically we use the tail for new ioctls and so forth.  We use the head
-  *    of the queue for things like a QUEUE_FULL message from a device, or a
-  *    host that is unable to accept a particular command.
-  */
- void blk_insert_request(struct request_queue *q, struct request *rq,
-                       int at_head, void *data)
- {
-       int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-       unsigned long flags;
- 
-       /*
-        * tell I/O scheduler that this isn't a regular read/write (ie it
-        * must not attempt merges on this) and that it acts as a soft
-        * barrier
-        */
-       rq->cmd_type = REQ_TYPE_SPECIAL;
- 
-       rq->special = data;
- 
-       spin_lock_irqsave(q->queue_lock, flags);
- 
-       /*
-        * If command is tagged, release the tag
-        */
-       if (blk_rq_tagged(rq))
-               blk_queue_end_tag(q, rq);
- 
-       add_acct_request(q, rq, where);
-       __blk_run_queue(q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
- }
- EXPORT_SYMBOL(blk_insert_request);
- 
   static void part_round_stats_single(int cpu, struct hd_struct *part,
                                     unsigned long now)
   {
@@@ -1766,6 -1787,10 +1786,10 @@@ int blk_insert_cloned_request(struct re
                 return -EIO;
   
         spin_lock_irqsave(q->queue_lock, flags);
+       if (unlikely(blk_queue_dead(q))) {
+               spin_unlock_irqrestore(q->queue_lock, flags);
+               return -ENODEV;
+       }
   
         /*
          * Submitting request must be dequeued before calling this function
@@@ -2739,6 -2764,14 +2763,14 @@@ static void queue_unplugged(struct requ
   {
         trace_block_unplug(q, depth, !from_schedule);
   
+       /*
+        * Don't mess with dead queue.
+        */
+       if (unlikely(blk_queue_dead(q))) {
+               spin_unlock(q->queue_lock);
+               return;
+       }
+ 
         /*
          * If we are punting this to kblockd, then we can safely drop
          * the queue_lock before waking kblockd (which needs to take
@@@ -2815,6 -2848,15 +2847,15 @@@ void blk_flush_plug_list(struct blk_plu
                         depth = 0;
                         spin_lock(q->queue_lock);
                 }
+ 
+               /*
+                * Short-circuit if @q is dead
+                */
+               if (unlikely(blk_queue_dead(q))) {
+                       __blk_end_request_all(rq, -ENODEV);
+                       continue;
+               }
+ 
                 /*
                  * rq is already accounted, so use raw insert
                  */
diff --combined block/bsg.c

index 9651ec7b87c22f1a14b9f5cbdad073006de222d1,167d586cece6e110ae7a0f39bc475c95643f7dd4..4cf703fd98bb8916fe74ae3b42d4856958d57950
--- 1/block/bsg.c
--- 2/block/bsg.c
+++ b/block/bsg.c
@@@ -769,12 -769,10 +769,10 @@@ static struct bsg_device *bsg_add_devic
                                          struct file *file)
   {
         struct bsg_device *bd;
-       int ret;
   #ifdef BSG_DEBUG
         unsigned char buf[32];
   #endif
-       ret = blk_get_queue(rq);
-       if (ret)
+       if (!blk_get_queue(rq))
                 return ERR_PTR(-ENXIO);
   
         bd = bsg_alloc_device();
@@@ -1070,7 -1068,7 +1068,7 @@@ EXPORT_SYMBOL_GPL(bsg_register_queue)
   
   static struct cdev bsg_cdev;
   
- -static char *bsg_devnode(struct device *dev, mode_t *mode)
+ +static char *bsg_devnode(struct device *dev, umode_t *mode)
   {
         return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
   }
diff --combined block/genhd.c

index 83e7c04015e1f920c813917bff937b4735902480,c958169d24f04c338624e842622955453f62b5ae..23b4f7063322c303dd5a1ab15c66a3a62daca7ca
--- 1/block/genhd.c
--- 2/block/genhd.c
+++ b/block/genhd.c
@@@ -15,6 -15,7 +15,6 @@@
   #include <linux/slab.h>
   #include <linux/kmod.h>
   #include <linux/kobj_map.h>
- -#include <linux/buffer_head.h>
   #include <linux/mutex.h>
   #include <linux/idr.h>
   #include <linux/log2.h>
@@@ -506,7 -507,7 +506,7 @@@ static int exact_lock(dev_t devt, void 
         return 0;
   }
   
- -void register_disk(struct gendisk *disk)
+ +static void register_disk(struct gendisk *disk)
   {
         struct device *ddev = disk_to_dev(disk);
         struct block_device *bdev;
@@@ -614,7 -615,7 +614,7 @@@ void add_disk(struct gendisk *disk
          * Take an extra ref on queue which will be put on disk_release()
          * so that it sticks around as long as @disk is there.
          */
-       WARN_ON_ONCE(blk_get_queue(disk->queue));
+       WARN_ON_ONCE(!blk_get_queue(disk->queue));
   
         retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
                                    "bdi");
@@@ -1108,7 -1109,7 +1108,7 @@@ struct class block_class = 
         .name           = "block",
   };
   
- -static char *block_devnode(struct device *dev, mode_t *mode)
+ +static char *block_devnode(struct device *dev, umode_t *mode)
   {
         struct gendisk *disk = dev_to_disk(dev);
   
diff --combined block/ioctl.c

index 4828fa34981314f34555a2e85ed381d660d9fa3b,337d207ab14dd1df31111d4d13cfa31e7f55b07c..ba15b2dbfb98ea55911109543f35889ea7b615da
--- 1/block/ioctl.c
--- 2/block/ioctl.c
+++ b/block/ioctl.c
@@@ -5,7 -5,7 +5,7 @@@
   #include <linux/blkpg.h>
   #include <linux/hdreg.h>
   #include <linux/backing-dev.h>
- -#include <linux/buffer_head.h>
+ +#include <linux/fs.h>
   #include <linux/blktrace_api.h>
   #include <asm/uaccess.h>
   
@@@ -179,26 -179,6 +179,26 @@@ int __blkdev_driver_ioctl(struct block_
    */
   EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
   
+ +/*
+ + * Is it an unrecognized ioctl? The correct returns are either
+ + * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
+ + * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
+ + * code before returning.
+ + *
+ + * Confused drivers sometimes return EINVAL, which is wrong. It
+ + * means "I understood the ioctl command, but the parameters to
+ + * it were wrong".
+ + *
+ + * We should aim to just fix the broken drivers, the EINVAL case
+ + * should go away.
+ + */
+ +static inline int is_unrecognized_ioctl(int ret)
+ +{
+ +      return  ret == -EINVAL ||
+ +              ret == -ENOTTY ||
+ +              ret == -ENOIOCTLCMD;
+ +}
+ +
   /*
    * always keep this in sync with compat_blkdev_ioctl()
    */
@@@ -216,7 -196,8 +216,7 @@@ int blkdev_ioctl(struct block_device *b
                         return -EACCES;
   
                 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- -              /* -EINVAL to handle old uncorrected drivers */
- -              if (ret != -EINVAL && ret != -ENOTTY)
+ +              if (!is_unrecognized_ioctl(ret))
                         return ret;
   
                 fsync_bdev(bdev);
@@@ -225,7 -206,8 +225,7 @@@
   
         case BLKROSET:
                 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- -              /* -EINVAL to handle old uncorrected drivers */
- -              if (ret != -EINVAL && ret != -ENOTTY)
+ +              if (!is_unrecognized_ioctl(ret))
                         return ret;
                 if (!capable(CAP_SYS_ADMIN))
                         return -EACCES;
@@@ -296,6 -278,8 +296,8 @@@
                 return put_uint(arg, bdev_discard_zeroes_data(bdev));
         case BLKSECTGET:
                 return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
+       case BLKROTATIONAL:
+               return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
         case BLKRASET:
         case BLKFRASET:
                 if(!capable(CAP_SYS_ADMIN))
diff --combined drivers/md/md.c

index da52acb60f520088fbb169de53769d5f81abc887,114ba155af87dc69e7576c4e8f803968bfbd5726..9417ae2fa0bbc68b061d6b50ba23f5127dae5b2c
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -36,7 -36,8 +36,7 @@@
   #include <linux/blkdev.h>
   #include <linux/sysctl.h>
   #include <linux/seq_file.h>
- -#include <linux/mutex.h>
- -#include <linux/buffer_head.h> /* for invalidate_bdev */
+ +#include <linux/fs.h>
   #include <linux/poll.h>
   #include <linux/ctype.h>
   #include <linux/string.h>
@@@ -1713,8 -1714,6 +1713,8 @@@ static int super_1_validate(struct mdde
                 }
                 if (sb->devflags & WriteMostly1)
                         set_bit(WriteMostly, &rdev->flags);
+ +              if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+ +                      set_bit(Replacement, &rdev->flags);
         } else /* MULTIPATH are always insync */
                 set_bit(In_sync, &rdev->flags);
   
@@@ -1768,9 -1767,6 +1768,9 @@@ static void super_1_sync(struct mddev *
                 sb->recovery_offset =
                         cpu_to_le64(rdev->recovery_offset);
         }
+ +      if (test_bit(Replacement, &rdev->flags))
+ +              sb->feature_map |=
+ +                      cpu_to_le32(MD_FEATURE_REPLACEMENT);
   
         if (mddev->reshape_position != MaxSector) {
                 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
@@@ -2564,15 -2560,6 +2564,15 @@@ state_show(struct md_rdev *rdev, char *
                 len += sprintf(page+len, "%swrite_error", sep);
                 sep = ",";
         }
+ +      if (test_bit(WantReplacement, &rdev->flags)) {
+ +              len += sprintf(page+len, "%swant_replacement", sep);
+ +              sep = ",";
+ +      }
+ +      if (test_bit(Replacement, &rdev->flags)) {
+ +              len += sprintf(page+len, "%sreplacement", sep);
+ +              sep = ",";
+ +      }
+ +
         return len+sprintf(page+len, "\n");
   }
   
@@@ -2641,42 -2628,6 +2641,42 @@@ state_store(struct md_rdev *rdev, cons
         } else if (cmd_match(buf, "-write_error")) {
                 clear_bit(WriteErrorSeen, &rdev->flags);
                 err = 0;
+ +      } else if (cmd_match(buf, "want_replacement")) {
+ +              /* Any non-spare device that is not a replacement can
+ +               * become want_replacement at any time, but we then need to
+ +               * check if recovery is needed.
+ +               */
+ +              if (rdev->raid_disk >= 0 &&
+ +                  !test_bit(Replacement, &rdev->flags))
+ +                      set_bit(WantReplacement, &rdev->flags);
+ +              set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ +              md_wakeup_thread(rdev->mddev->thread);
+ +              err = 0;
+ +      } else if (cmd_match(buf, "-want_replacement")) {
+ +              /* Clearing 'want_replacement' is always allowed.
+ +               * Once replacements starts it is too late though.
+ +               */
+ +              err = 0;
+ +              clear_bit(WantReplacement, &rdev->flags);
+ +      } else if (cmd_match(buf, "replacement")) {
+ +              /* Can only set a device as a replacement when array has not
+ +               * yet been started.  Once running, replacement is automatic
+ +               * from spares, or by assigning 'slot'.
+ +               */
+ +              if (rdev->mddev->pers)
+ +                      err = -EBUSY;
+ +              else {
+ +                      set_bit(Replacement, &rdev->flags);
+ +                      err = 0;
+ +              }
+ +      } else if (cmd_match(buf, "-replacement")) {
+ +              /* Similarly, can only clear Replacement before start */
+ +              if (rdev->mddev->pers)
+ +                      err = -EBUSY;
+ +              else {
+ +                      clear_bit(Replacement, &rdev->flags);
+ +                      err = 0;
+ +              }
         }
         if (!err)
                 sysfs_notify_dirent_safe(rdev->sysfs_state);
@@@ -2738,7 -2689,7 +2738,7 @@@ slot_store(struct md_rdev *rdev, const 
                 if (rdev->mddev->pers->hot_remove_disk == NULL)
                         return -EINVAL;
                 err = rdev->mddev->pers->
- -                      hot_remove_disk(rdev->mddev, rdev->raid_disk);
+ +                      hot_remove_disk(rdev->mddev, rdev);
                 if (err)
                         return err;
                 sysfs_unlink_rdev(rdev->mddev, rdev);
@@@ -2746,6 -2697,7 +2746,6 @@@
                 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                 md_wakeup_thread(rdev->mddev->thread);
         } else if (rdev->mddev->pers) {
- -              struct md_rdev *rdev2;
                 /* Activating a spare .. or possibly reactivating
                  * if we ever get bitmaps working here.
                  */
@@@ -2759,6 -2711,10 +2759,6 @@@
                 if (rdev->mddev->pers->hot_add_disk == NULL)
                         return -EINVAL;
   
- -              list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
- -                      if (rdev2->raid_disk == slot)
- -                              return -EEXIST;
- -
                 if (slot >= rdev->mddev->raid_disks &&
                     slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
                         return -ENOSPC;
@@@ -4666,6 -4622,7 +4666,7 @@@ static int md_alloc(dev_t dev, char *na
         mddev->queue->queuedata = mddev;
   
         blk_queue_make_request(mddev->queue, md_make_request);
+       blk_set_stacking_limits(&mddev->queue->limits);
   
         disk = alloc_disk(1 << shift);
         if (!disk) {
@@@ -6098,15 -6055,8 +6099,15 @@@ static int md_ioctl(struct block_devic
         struct mddev *mddev = NULL;
         int ro;
   
- -      if (!capable(CAP_SYS_ADMIN))
- -              return -EACCES;
+ +      switch (cmd) {
+ +      case RAID_VERSION:
+ +      case GET_ARRAY_INFO:
+ +      case GET_DISK_INFO:
+ +              break;
+ +      default:
+ +              if (!capable(CAP_SYS_ADMIN))
+ +                      return -EACCES;
+ +      }
   
         /*
          * Commands dealing with the RAID driver but not any
@@@ -6766,11 -6716,8 +6767,11 @@@ static int md_seq_show(struct seq_file 
                         if (test_bit(Faulty, &rdev->flags)) {
                                 seq_printf(seq, "(F)");
                                 continue;
- -                      } else if (rdev->raid_disk < 0)
+ +                      }
+ +                      if (rdev->raid_disk < 0)
                                 seq_printf(seq, "(S)"); /* spare */
+ +                      if (test_bit(Replacement, &rdev->flags))
+ +                              seq_printf(seq, "(R)");
                         sectors += rdev->sectors;
                 }
   
@@@ -7382,7 -7329,6 +7383,7 @@@ static int remove_and_add_spares(struc
   {
         struct md_rdev *rdev;
         int spares = 0;
+ +      int removed = 0;
   
         mddev->curr_resync_completed = 0;
   
@@@ -7393,32 -7339,30 +7394,32 @@@
                      ! test_bit(In_sync, &rdev->flags)) &&
                     atomic_read(&rdev->nr_pending)==0) {
                         if (mddev->pers->hot_remove_disk(
- -                                  mddev, rdev->raid_disk)==0) {
+ +                                  mddev, rdev) == 0) {
                                 sysfs_unlink_rdev(mddev, rdev);
                                 rdev->raid_disk = -1;
+ +                              removed++;
                         }
                 }
+ +      if (removed)
+ +              sysfs_notify(&mddev->kobj, NULL,
+ +                           "degraded");
   
- -      if (mddev->degraded) {
- -              list_for_each_entry(rdev, &mddev->disks, same_set) {
- -                      if (rdev->raid_disk >= 0 &&
- -                          !test_bit(In_sync, &rdev->flags) &&
- -                          !test_bit(Faulty, &rdev->flags))
+ +
+ +      list_for_each_entry(rdev, &mddev->disks, same_set) {
+ +              if (rdev->raid_disk >= 0 &&
+ +                  !test_bit(In_sync, &rdev->flags) &&
+ +                  !test_bit(Faulty, &rdev->flags))
+ +                      spares++;
+ +              if (rdev->raid_disk < 0
+ +                  && !test_bit(Faulty, &rdev->flags)) {
+ +                      rdev->recovery_offset = 0;
+ +                      if (mddev->pers->
+ +                          hot_add_disk(mddev, rdev) == 0) {
+ +                              if (sysfs_link_rdev(mddev, rdev))
+ +                                      /* failure here is OK */;
                                 spares++;
- -                      if (rdev->raid_disk < 0
- -                          && !test_bit(Faulty, &rdev->flags)) {
- -                              rdev->recovery_offset = 0;
- -                              if (mddev->pers->
- -                                  hot_add_disk(mddev, rdev) == 0) {
- -                                      if (sysfs_link_rdev(mddev, rdev))
- -                                              /* failure here is OK */;
- -                                      spares++;
- -                                      md_new_event(mddev);
- -                                      set_bit(MD_CHANGE_DEVS, &mddev->flags);
- -                              } else
- -                                      break;
+ +                              md_new_event(mddev);
+ +                              set_bit(MD_CHANGE_DEVS, &mddev->flags);
                         }
                 }
         }
@@@ -7533,7 -7477,7 +7534,7 @@@ void md_check_recovery(struct mddev *md
                                     test_bit(Faulty, &rdev->flags) &&
                                     atomic_read(&rdev->nr_pending)==0) {
                                         if (mddev->pers->hot_remove_disk(
- -                                                  mddev, rdev->raid_disk)==0) {
+ +                                                  mddev, rdev) == 0) {
                                                 sysfs_unlink_rdev(mddev, rdev);
                                                 rdev->raid_disk = -1;
                                         }
diff --combined include/linux/blkdev.h

index 0ed1eb062313fdf237124d94a8efe24a8a7680fa,5cfb9b22627f1ed61328ed058887b3bebb34d86e..6c6a1f008065984821435a50f12fd4c08d17fe42
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -111,10 -111,14 +111,14 @@@ struct request 
          * Three pointers are available for the IO schedulers, if they need
          * more they have to dynamically allocate it.  Flush requests are
          * never put on the IO scheduler. So let the flush fields share
-        * space with the three elevator_private pointers.
+        * space with the elevator data.
          */
         union {
-               void *elevator_private[3];
+               struct {
+                       struct io_cq            *icq;
+                       void                    *priv[2];
+               } elv;
+ 
                 struct {
                         unsigned int            seq;
                         struct list_head        list;
@@@ -310,6 -314,12 +314,12 @@@ struct request_queue 
          */
         unsigned long           queue_flags;
   
+       /*
+        * ida allocated id for this queue.  Used to index queues from
+        * ioctx.
+        */
+       int                     id;
+ 
         /*
          * queue needs bounce pages for pages above this limit
          */
@@@ -351,6 -361,8 +361,8 @@@
         struct timer_list       timeout;
         struct list_head        timeout_list;
   
+       struct list_head        icq_list;
+ 
         struct queue_limits     limits;
   
         /*
@@@ -387,6 -399,9 +399,9 @@@
         /* Throttle data */
         struct throtl_data *td;
   #endif
+ #ifdef CONFIG_LOCKDEP
+       int                     ioc_release_depth;
+ #endif
   };
   
   #define QUEUE_FLAG_QUEUED     1       /* uses generic tag queueing */
@@@ -481,6 -496,7 +496,7 @@@ static inline void queue_flag_clear(uns
   
   #define blk_queue_tagged(q)   test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
   #define blk_queue_stopped(q)  test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+ #define blk_queue_dead(q)     test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
   #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
   #define blk_queue_noxmerges(q)        \
         test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@@ -660,7 -676,6 +676,6 @@@ extern void __blk_put_request(struct re
   extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
   extern struct request *blk_make_request(struct request_queue *, struct bio *,
                                         gfp_t);
- extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
   extern void blk_requeue_request(struct request_queue *, struct request *);
   extern void blk_add_request_payload(struct request *rq, struct page *page,
                 unsigned int len);
@@@ -675,9 -690,6 +690,9 @@@ extern int blk_insert_cloned_request(st
                                      struct request *rq);
   extern void blk_delay_queue(struct request_queue *, unsigned long);
   extern void blk_recount_segments(struct request_queue *, struct bio *);
+ +extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
+ +extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
+ +                            unsigned int, void __user *);
   extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                           unsigned int, void __user *);
   extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
@@@ -808,6 -820,9 +823,6 @@@ extern void blk_unprep_request(struct r
    */
   extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
                                         spinlock_t *lock, int node_id);
- -extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
- -                                                         request_fn_proc *,
- -                                                         spinlock_t *, int node_id);
   extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
   extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
                                                       request_fn_proc *, spinlock_t *);
@@@ -829,6 -844,7 +844,7 @@@ extern void blk_queue_io_min(struct req
   extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
   extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
   extern void blk_set_default_limits(struct queue_limits *lim);
+ extern void blk_set_stacking_limits(struct queue_limits *lim);
   extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                             sector_t offset);
   extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
@@@ -859,7 -875,7 +875,7 @@@ extern int blk_rq_map_sg(struct request
   extern void blk_dump_rq_flags(struct request *, char *);
   extern long nr_blockdev_pages(void);
   
- int blk_get_queue(struct request_queue *);
+ bool __must_check blk_get_queue(struct request_queue *);
   struct request_queue *blk_alloc_queue(gfp_t);
   struct request_queue *blk_alloc_queue_node(gfp_t, int);
   extern void blk_put_queue(struct request_queue *);
@@@ -1282,19 -1298,70 +1298,70 @@@ queue_max_integrity_segments(struct req
   
   #else /* CONFIG_BLK_DEV_INTEGRITY */
   
- #define blk_integrity_rq(rq)                  (0)
- #define blk_rq_count_integrity_sg(a, b)               (0)
- #define blk_rq_map_integrity_sg(a, b, c)      (0)
- #define bdev_get_integrity(a)                 (0)
- #define blk_get_integrity(a)                  (0)
- #define blk_integrity_compare(a, b)           (0)
- #define blk_integrity_register(a, b)          (0)
- #define blk_integrity_unregister(a)           do { } while (0)
- #define blk_queue_max_integrity_segments(a, b)        do { } while (0)
- #define queue_max_integrity_segments(a)               (0)
- #define blk_integrity_merge_rq(a, b, c)               (0)
- #define blk_integrity_merge_bio(a, b, c)      (0)
- #define blk_integrity_is_initialized(a)               (0)
+ struct bio;
+ struct block_device;
+ struct gendisk;
+ struct blk_integrity;
+ 
+ static inline int blk_integrity_rq(struct request *rq)
+ {
+       return 0;
+ }
+ static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+                                           struct bio *b)
+ {
+       return 0;
+ }
+ static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+                                         struct bio *b,
+                                         struct scatterlist *s)
+ {
+       return 0;
+ }
+ static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+ {
+       return 0;
+ }
+ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+ {
+       return NULL;
+ }
+ static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+ {
+       return 0;
+ }
+ static inline int blk_integrity_register(struct gendisk *d,
+                                        struct blk_integrity *b)
+ {
+       return 0;
+ }
+ static inline void blk_integrity_unregister(struct gendisk *d)
+ {
+ }
+ static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+                                                   unsigned int segs)
+ {
+ }
+ static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
+ {
+       return 0;
+ }
+ static inline int blk_integrity_merge_rq(struct request_queue *rq,
+                                        struct request *r1,
+                                        struct request *r2)
+ {
+       return 0;
+ }
+ static inline int blk_integrity_merge_bio(struct request_queue *rq,
+                                         struct request *r,
+                                         struct bio *b)
+ {
+       return 0;
+ }
+ static inline bool blk_integrity_is_initialized(struct gendisk *g)
+ {
+       return 0;
+ }
   
   #endif /* CONFIG_BLK_DEV_INTEGRITY */
   
diff --combined include/linux/fs.h

index 4bc8169fb5a1edd97b5cc0a6703126d15596add6,95dd911506f159df838c8c3e1a85d8e1b135e80f..0244082d42c5794ba7c2b7b5a3d6a0181f73c6b9
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -319,6 -319,7 +319,7 @@@ struct inodes_stat_t 
   #define BLKPBSZGET _IO(0x12,123)
   #define BLKDISCARDZEROES _IO(0x12,124)
   #define BLKSECDISCARD _IO(0x12,125)
+ #define BLKROTATIONAL _IO(0x12,126)
   
   #define BMAP_IOCTL 1          /* obsolete - kept for compatibility */
   #define FIBMAP           _IO(0x00,1)  /* bmap access */
@@@ -525,7 -526,6 +526,7 @@@ enum positive_aop_returns 
   struct page;
   struct address_space;
   struct writeback_control;
+ +enum migrate_mode;
   
   struct iov_iter {
         const struct iovec *iov;
@@@ -610,12 -610,9 +611,12 @@@ struct address_space_operations 
                         loff_t offset, unsigned long nr_segs);
         int (*get_xip_mem)(struct address_space *, pgoff_t, int,
                                                 void **, unsigned long *);
- -      /* migrate the contents of a page to the specified target */
+ +      /*
+ +       * migrate the contents of a page to the specified target. If sync
+ +       * is false, it must not block.
+ +       */
         int (*migratepage) (struct address_space *,
- -                      struct page *, struct page *);
+ +                      struct page *, struct page *, enum migrate_mode);
         int (*launder_page) (struct page *);
         int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
                                         unsigned long);
@@@ -660,7 -657,6 +661,7 @@@ struct address_space 
          * must be enforced here for CRIS, to let the least significant bit
          * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
          */
+ +struct request_queue;
   
   struct block_device {
         dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
@@@ -683,7 -679,6 +684,7 @@@
         unsigned                bd_part_count;
         int                     bd_invalidated;
         struct gendisk *        bd_disk;
+ +      struct request_queue *  bd_queue;
         struct list_head        bd_list;
         /*
          * Private data.  You must have bd_claim'ed the block_device
@@@ -1007,7 -1002,6 +1008,7 @@@ struct file 
   #ifdef CONFIG_EPOLL
         /* Used by fs/eventpoll.c to link all the hooks to this file */
         struct list_head        f_ep_links;
+ +      struct list_head        f_tfile_llink;
   #endif /* #ifdef CONFIG_EPOLL */
         struct address_space    *f_mapping;
   #ifdef CONFIG_DEBUG_WRITECOUNT
@@@ -1435,7 -1429,6 +1436,7 @@@ struct super_block 
   #else
         struct list_head        s_files;
   #endif
+ +      struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
         /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
         struct list_head        s_dentry_lru;   /* unused dentry lru */
         int                     s_nr_dentry_unused;     /* # of dentry on lru */
@@@ -1448,7 -1441,7 +1449,7 @@@
         struct block_device     *s_bdev;
         struct backing_dev_info *s_bdi;
         struct mtd_info         *s_mtd;
- -      struct list_head        s_instances;
+ +      struct hlist_node       s_instances;
         struct quota_info       s_dquot;        /* Diskquota specific options */
   
         int                     s_frozen;
@@@ -1489,12 -1482,6 +1490,12 @@@
         int cleancache_poolid;
   
         struct shrinker s_shrink;       /* per-sb shrinker handle */
+ +
+ +      /* Number of inodes with nlink == 0 but still referenced */
+ +      atomic_long_t s_remove_count;
+ +
+ +      /* Being remounted read-only */
+ +      int s_readonly_remount;
   };
   
   /* superblock cache pruning functions */
@@@ -1530,9 -1517,9 +1531,9 @@@ extern void unlock_super(struct super_b
   /*
    * VFS helper functions..
    */
- -extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
- -extern int vfs_mkdir(struct inode *, struct dentry *, int);
- -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
+ +extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+ +extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
+ +extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
   extern int vfs_symlink(struct inode *, struct dentry *, const char *);
   extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
   extern int vfs_rmdir(struct inode *, struct dentry *);
@@@ -1548,7 -1535,7 +1549,7 @@@ extern void dentry_unhash(struct dentr
    * VFS file helper functions.
    */
   extern void inode_init_owner(struct inode *inode, const struct inode *dir,
- -                      mode_t mode);
+ +                      umode_t mode);
   /*
    * VFS FS_IOC_FIEMAP helper definitions.
    */
@@@ -1633,13 -1620,13 +1634,13 @@@ struct inode_operations 
         int (*readlink) (struct dentry *, char __user *,int);
         void (*put_link) (struct dentry *, struct nameidata *, void *);
   
- -      int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+ +      int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *);
         int (*link) (struct dentry *,struct inode *,struct dentry *);
         int (*unlink) (struct inode *,struct dentry *);
         int (*symlink) (struct inode *,struct dentry *,const char *);
- -      int (*mkdir) (struct inode *,struct dentry *,int);
+ +      int (*mkdir) (struct inode *,struct dentry *,umode_t);
         int (*rmdir) (struct inode *,struct dentry *);
- -      int (*mknod) (struct inode *,struct dentry *,int,dev_t);
+ +      int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
         int (*rename) (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
         void (*truncate) (struct inode *);
@@@ -1686,10 -1673,10 +1687,10 @@@ struct super_operations 
         int (*remount_fs) (struct super_block *, int *, char *);
         void (*umount_begin) (struct super_block *);
   
- -      int (*show_options)(struct seq_file *, struct vfsmount *);
- -      int (*show_devname)(struct seq_file *, struct vfsmount *);
- -      int (*show_path)(struct seq_file *, struct vfsmount *);
- -      int (*show_stats)(struct seq_file *, struct vfsmount *);
+ +      int (*show_options)(struct seq_file *, struct dentry *);
+ +      int (*show_devname)(struct seq_file *, struct dentry *);
+ +      int (*show_path)(struct seq_file *, struct dentry *);
+ +      int (*show_stats)(struct seq_file *, struct dentry *);
   #ifdef CONFIG_QUOTA
         ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
         ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
@@@ -1778,10 -1765,31 +1779,10 @@@ static inline void mark_inode_dirty_syn
         __mark_inode_dirty(inode, I_DIRTY_SYNC);
   }
   
- -/**
- - * set_nlink - directly set an inode's link count
- - * @inode: inode
- - * @nlink: new nlink (should be non-zero)
- - *
- - * This is a low-level filesystem helper to replace any
- - * direct filesystem manipulation of i_nlink.
- - */
- -static inline void set_nlink(struct inode *inode, unsigned int nlink)
- -{
- -      inode->__i_nlink = nlink;
- -}
- -
- -/**
- - * inc_nlink - directly increment an inode's link count
- - * @inode: inode
- - *
- - * This is a low-level filesystem helper to replace any
- - * direct filesystem manipulation of i_nlink.  Currently,
- - * it is only here for parity with dec_nlink().
- - */
- -static inline void inc_nlink(struct inode *inode)
- -{
- -      inode->__i_nlink++;
- -}
+ +extern void inc_nlink(struct inode *inode);
+ +extern void drop_nlink(struct inode *inode);
+ +extern void clear_nlink(struct inode *inode);
+ +extern void set_nlink(struct inode *inode, unsigned int nlink);
   
   static inline void inode_inc_link_count(struct inode *inode)
   {
@@@ -1789,6 -1797,35 +1790,6 @@@
         mark_inode_dirty(inode);
   }
   
- -/**
- - * drop_nlink - directly drop an inode's link count
- - * @inode: inode
- - *
- - * This is a low-level filesystem helper to replace any
- - * direct filesystem manipulation of i_nlink.  In cases
- - * where we are attempting to track writes to the
- - * filesystem, a decrement to zero means an imminent
- - * write when the file is truncated and actually unlinked
- - * on the filesystem.
- - */
- -static inline void drop_nlink(struct inode *inode)
- -{
- -      inode->__i_nlink--;
- -}
- -
- -/**
- - * clear_nlink - directly zero an inode's link count
- - * @inode: inode
- - *
- - * This is a low-level filesystem helper to replace any
- - * direct filesystem manipulation of i_nlink.  See
- - * drop_nlink() for why we care about i_nlink hitting zero.
- - */
- -static inline void clear_nlink(struct inode *inode)
- -{
- -      inode->__i_nlink = 0;
- -}
- -
   static inline void inode_dec_link_count(struct inode *inode)
   {
         drop_nlink(inode);
@@@ -1828,7 -1865,7 +1829,7 @@@ struct file_system_type 
         void (*kill_sb) (struct super_block *);
         struct module *owner;
         struct file_system_type * next;
- -      struct list_head fs_supers;
+ +      struct hlist_head fs_supers;
   
         struct lock_class_key s_lock_key;
         struct lock_class_key s_umount_key;
@@@ -1903,7 -1940,7 +1904,7 @@@ extern int iterate_mounts(int (*)(struc
   extern int vfs_statfs(struct path *, struct kstatfs *);
   extern int user_statfs(const char __user *, struct kstatfs *);
   extern int fd_statfs(int, struct kstatfs *);
- -extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
+ +extern int vfs_ustat(dev_t, struct kstatfs *);
   extern int freeze_super(struct super_block *super);
   extern int thaw_super(struct super_block *super);
   extern bool our_mnt(struct vfsmount *mnt);
@@@ -2018,8 -2055,8 +2019,8 @@@ extern int do_truncate(struct dentry *
   extern int do_fallocate(struct file *file, int mode, loff_t offset,
                         loff_t len);
   extern long do_sys_open(int dfd, const char __user *filename, int flags,
- -                      int mode);
- -extern struct file *filp_open(const char *, int, int);
+ +                      umode_t mode);
+ +extern struct file *filp_open(const char *, int, umode_t);
   extern struct file *file_open_root(struct dentry *, struct vfsmount *,
                                    const char *, int);
   extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
@@@ -2056,7 -2093,6 +2057,7 @@@ extern void bd_forget(struct inode *ino
   extern void bdput(struct block_device *);
   extern void invalidate_bdev(struct block_device *);
   extern int sync_blockdev(struct block_device *bdev);
+ +extern void kill_bdev(struct block_device *);
   extern struct super_block *freeze_bdev(struct block_device *);
   extern void emergency_thaw_all(void);
   extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
@@@ -2064,7 -2100,6 +2065,7 @@@ extern int fsync_bdev(struct block_devi
   #else
   static inline void bd_forget(struct inode *inode) {}
   static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+ +static inline void kill_bdev(struct block_device *bdev) {}
   static inline void invalidate_bdev(struct block_device *bdev) {}
   
   static inline struct super_block *freeze_bdev(struct block_device *sb)
@@@ -2157,6 -2192,8 +2158,6 @@@ extern const struct file_operations rea
   extern const struct file_operations write_pipefifo_fops;
   extern const struct file_operations rdwr_pipefifo_fops;
   
- -extern int fs_may_remount_ro(struct super_block *);
- -
   #ifdef CONFIG_BLOCK
   /*
    * return READ, READA, or WRITE
@@@ -2379,7 -2416,6 +2380,7 @@@ extern ssize_t blkdev_aio_write(struct 
                                 unsigned long nr_segs, loff_t pos);
   extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
                         int datasync);
+ +extern void block_sync_page(struct page *page);
   
   /* fs/splice.c */
   extern ssize_t generic_file_splice_read(struct file *, loff_t *,
@@@ -2496,6 -2532,7 +2497,6 @@@ extern void put_filesystem(struct file_
   extern struct file_system_type *get_fs_type(const char *name);
   extern struct super_block *get_super(struct block_device *);
   extern struct super_block *get_active_super(struct block_device *bdev);
- -extern struct super_block *user_get_super(dev_t);
   extern void drop_super(struct super_block *sb);
   extern void iterate_supers(void (*)(struct super_block *, void *), void *);
   extern void iterate_supers_type(struct file_system_type *,
@@@ -2543,8 -2580,7 +2544,8 @@@ extern int generic_check_addressable(un
   
   #ifdef CONFIG_MIGRATION
   extern int buffer_migrate_page(struct address_space *,
- -                              struct page *, struct page *);
+ +                              struct page *, struct page *,
+ +                              enum migrate_mode);
   #else
   #define buffer_migrate_page NULL
   #endif
@@@ -2555,7 -2591,7 +2556,7 @@@ extern void setattr_copy(struct inode *
   
   extern void file_update_time(struct file *file);
   
- -extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
+ +extern int generic_show_options(struct seq_file *m, struct dentry *root);
   extern void save_mount_options(struct super_block *sb, char *options);
   extern void replace_mount_options(struct super_block *sb, char *options);
   
@@@ -2656,7 -2692,7 +2657,7 @@@ int __init get_filesystem_list(char *bu
   #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
                                             (flag & __FMODE_NONOTIFY)))
   
- -static inline int is_sxid(mode_t mode)
+ +static inline int is_sxid(umode_t mode)
   {
         return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
   }
diff --combined kernel/fork.c

index 443f5125f11e39435929072e3abbd19d7a408ea9,2753449f2038af61f6ac1cb1e3f305adbd3063f0..f3fa18887cc9b8d7fbde14f0e6fe36f57f791b96
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -76,9 -76,6 +76,9 @@@
   
   #include <trace/events/sched.h>
   
+ +#define CREATE_TRACE_POINTS
+ +#include <trace/events/task.h>
+ +
   /*
    * Protected counters by write_lock_irq(&tasklist_lock)
    */
@@@ -873,6 -870,7 +873,7 @@@ static int copy_io(unsigned long clone_
   {
   #ifdef CONFIG_BLOCK
         struct io_context *ioc = current->io_context;
+       struct io_context *new_ioc;
   
         if (!ioc)
                 return 0;
@@@ -884,11 -882,12 +885,12 @@@
                 if (unlikely(!tsk->io_context))
                         return -ENOMEM;
         } else if (ioprio_valid(ioc->ioprio)) {
-               tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
-               if (unlikely(!tsk->io_context))
+               new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
+               if (unlikely(!new_ioc))
                         return -ENOMEM;
   
-               tsk->io_context->ioprio = ioc->ioprio;
+               new_ioc->ioprio = ioc->ioprio;
+               put_io_context(new_ioc, NULL);
         }
   #endif
         return 0;
@@@ -975,7 -974,7 +977,7 @@@ static int copy_signal(unsigned long cl
         sched_autogroup_fork(sig);
   
   #ifdef CONFIG_CGROUPS
- -      init_rwsem(&sig->threadgroup_fork_lock);
+ +      init_rwsem(&sig->group_rwsem);
   #endif
   
         sig->oom_adj = current->signal->oom_adj;
@@@ -995,6 -994,7 +997,6 @@@ static void copy_flags(unsigned long cl
         new_flags |= PF_FORKNOEXEC;
         new_flags |= PF_STARTING;
         p->flags = new_flags;
- -      clear_freeze_flag(p);
   }
   
   SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
@@@ -1025,8 -1025,8 +1027,8 @@@ void mm_init_owner(struct mm_struct *mm
    */
   static void posix_cpu_timers_init(struct task_struct *tsk)
   {
- -      tsk->cputime_expires.prof_exp = cputime_zero;
- -      tsk->cputime_expires.virt_exp = cputime_zero;
+ +      tsk->cputime_expires.prof_exp = 0;
+ +      tsk->cputime_expires.virt_exp = 0;
         tsk->cputime_expires.sched_exp = 0;
         INIT_LIST_HEAD(&tsk->cpu_timers[0]);
         INIT_LIST_HEAD(&tsk->cpu_timers[1]);
@@@ -1134,10 -1134,14 +1136,10 @@@ static struct task_struct *copy_process
   
         init_sigpending(&p->pending);
   
- -      p->utime = cputime_zero;
- -      p->stime = cputime_zero;
- -      p->gtime = cputime_zero;
- -      p->utimescaled = cputime_zero;
- -      p->stimescaled = cputime_zero;
+ +      p->utime = p->stime = p->gtime = 0;
+ +      p->utimescaled = p->stimescaled = 0;
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING
- -      p->prev_utime = cputime_zero;
- -      p->prev_stime = cputime_zero;
+ +      p->prev_utime = p->prev_stime = 0;
   #endif
   #if defined(SPLIT_RSS_COUNTING)
         memset(&p->rss_stat, 0, sizeof(p->rss_stat));
@@@ -1156,7 -1160,7 +1158,7 @@@
         p->io_context = NULL;
         p->audit_context = NULL;
         if (clone_flags & CLONE_THREAD)
- -              threadgroup_fork_read_lock(current);
+ +              threadgroup_change_begin(current);
         cgroup_fork(p);
   #ifdef CONFIG_NUMA
         p->mempolicy = mpol_dup(p->mempolicy);
@@@ -1294,7 -1298,6 +1296,7 @@@
   
         p->nr_dirtied = 0;
         p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+ +      p->dirty_paused_when = 0;
   
         /*
          * Ok, make it visible to the rest of the system.
@@@ -1372,11 -1375,8 +1374,11 @@@
         proc_fork_connector(p);
         cgroup_post_fork(p);
         if (clone_flags & CLONE_THREAD)
- -              threadgroup_fork_read_unlock(current);
+ +              threadgroup_change_end(current);
         perf_event_fork(p);
+ +
+ +      trace_task_newtask(p, clone_flags);
+ +
         return p;
   
   bad_fork_free_pid:
@@@ -1410,7 -1410,7 +1412,7 @@@ bad_fork_cleanup_policy
   bad_fork_cleanup_cgroup:
   #endif
         if (clone_flags & CLONE_THREAD)
- -              threadgroup_fork_read_unlock(current);
+ +              threadgroup_change_end(current);
         cgroup_exit(p, cgroup_callbacks_done);
         delayacct_tsk_free(p);
         module_put(task_thread_info(p)->exec_domain->module);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 15 Jan 2012 20:24:45 +0000 (12:24 -0800)
		1	2
block/blk-cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/bsg.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/genhd.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/md.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history