]> Pileus Git - ~andy/linux/blobdiff - drivers/block/rbd.c
rbd: fix image request leak on parent read
[~andy/linux] / drivers / block / rbd.c
index 0ca959f5c9341cf877720d16080f29d354ab7170..c2ca1818f33583679ac3d96f8bbaa4f33949640a 100644 (file)
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/decode.h>
 #include <linux/parser.h>
+#include <linux/bsearch.h>
 
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
+#include <linux/slab.h>
 
 #include "rbd_types.h"
 
@@ -343,6 +345,12 @@ static DEFINE_SPINLOCK(rbd_dev_list_lock);
 static LIST_HEAD(rbd_client_list);             /* clients */
 static DEFINE_SPINLOCK(rbd_client_list_lock);
 
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache       *rbd_img_request_cache;
+static struct kmem_cache       *rbd_obj_request_cache;
+static struct kmem_cache       *rbd_segment_name_cache;
+
 static int rbd_img_request_submit(struct rbd_img_request *img_request);
 
 static void rbd_dev_device_release(struct device *dev);
@@ -819,16 +827,39 @@ static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
        return kstrdup(snap_name, GFP_KERNEL);
 }
 
+/*
+ * Snapshot id comparison function for use with qsort()/bsearch().
+ * Note that result is for snapshots in *descending* order.
+ */
+static int snapid_compare_reverse(const void *s1, const void *s2)
+{
+       u64 snap_id1 = *(u64 *)s1;
+       u64 snap_id2 = *(u64 *)s2;
+
+       if (snap_id1 < snap_id2)
+               return 1;
+       return snap_id1 == snap_id2 ? 0 : -1;
+}
+
+/*
+ * Search a snapshot context to see if the given snapshot id is
+ * present.
+ *
+ * Returns the position of the snapshot id in the array if it's found,
+ * or BAD_SNAP_INDEX otherwise.
+ *
+ * Note: The snapshot array is in kept sorted (by the osd) in
+ * reverse order, highest snapshot id first.
+ */
 static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
 {
        struct ceph_snap_context *snapc = rbd_dev->header.snapc;
-       u32 which;
+       u64 *found;
 
-       for (which = 0; which < snapc->num_snaps; which++)
-               if (snapc->snaps[which] == snap_id)
-                       return which;
+       found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
+                               sizeof (snap_id), snapid_compare_reverse);
 
-       return BAD_SNAP_INDEX;
+       return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
 }
 
 static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
@@ -957,7 +988,7 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
        u64 segment;
        int ret;
 
-       name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO);
+       name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
        if (!name)
                return NULL;
        segment = offset >> rbd_dev->header.obj_order;
@@ -973,6 +1004,13 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
        return name;
 }
 
+static void rbd_segment_name_free(const char *name)
+{
+       /* The explicit cast here is needed to drop the const qualifier */
+
+       kmem_cache_free(rbd_segment_name_cache, (void *)name);
+}
+
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
        u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
@@ -1731,11 +1769,16 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
        rbd_assert(obj_request_type_valid(type));
 
        size = strlen(object_name) + 1;
-       obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL);
-       if (!obj_request)
+       name = kmalloc(size, GFP_KERNEL);
+       if (!name)
+               return NULL;
+
+       obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+       if (!obj_request) {
+               kfree(name);
                return NULL;
+       }
 
-       name = (char *)(obj_request + 1);
        obj_request->object_name = memcpy(name, object_name, size);
        obj_request->offset = offset;
        obj_request->length = length;
@@ -1781,7 +1824,9 @@ static void rbd_obj_request_destroy(struct kref *kref)
                break;
        }
 
-       kfree(obj_request);
+       kfree(obj_request->object_name);
+       obj_request->object_name = NULL;
+       kmem_cache_free(rbd_obj_request_cache, obj_request);
 }
 
 /*
@@ -1797,7 +1842,7 @@ static struct rbd_img_request *rbd_img_request_create(
 {
        struct rbd_img_request *img_request;
 
-       img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC);
+       img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC);
        if (!img_request)
                return NULL;
 
@@ -1860,7 +1905,7 @@ static void rbd_img_request_destroy(struct kref *kref)
        if (img_request_child_test(img_request))
                rbd_obj_request_put(img_request->obj_request);
 
-       kfree(img_request);
+       kmem_cache_free(rbd_img_request_cache, img_request);
 }
 
 static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
@@ -1998,7 +2043,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
                length = rbd_segment_length(rbd_dev, img_offset, resid);
                obj_request = rbd_obj_request_create(object_name,
                                                offset, length, type);
-               kfree(object_name);     /* object request has its own copy */
+               /* object request has its own copy of the object name */
+               rbd_segment_name_free(object_name);
                if (!obj_request)
                        goto out_unwind;
 
@@ -2501,6 +2547,7 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
                obj_request->xferred = img_request->xferred;
        }
 out:
+       rbd_img_request_put(img_request);
        rbd_img_obj_request_read_callback(obj_request);
        rbd_obj_request_complete(obj_request);
 }
@@ -3114,6 +3161,25 @@ static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev)
        return ret;
 }
 
+/*
+ * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
+ * has disappeared from the (just updated) snapshot context.
+ */
+static void rbd_exists_validate(struct rbd_device *rbd_dev)
+{
+       u64 snap_id;
+
+       if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
+               return;
+
+       snap_id = rbd_dev->spec->snap_id;
+       if (snap_id == CEPH_NOSNAP)
+               return;
+
+       if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
+               clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+}
+
 static int rbd_dev_refresh(struct rbd_device *rbd_dev)
 {
        u64 image_size;
@@ -3126,6 +3192,10 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
                ret = rbd_dev_v1_refresh(rbd_dev);
        else
                ret = rbd_dev_v2_refresh(rbd_dev);
+
+       /* If it's a mapped snapshot, validate its EXISTS flag */
+
+       rbd_exists_validate(rbd_dev);
        mutex_unlock(&ctl_mutex);
        if (ret)
                rbd_warn(rbd_dev, "got notification but failed to "
@@ -4945,6 +5015,56 @@ static void rbd_sysfs_cleanup(void)
        device_unregister(&rbd_root_dev);
 }
 
+static int rbd_slab_init(void)
+{
+       rbd_assert(!rbd_img_request_cache);
+       rbd_img_request_cache = kmem_cache_create("rbd_img_request",
+                                       sizeof (struct rbd_img_request),
+                                       __alignof__(struct rbd_img_request),
+                                       0, NULL);
+       if (!rbd_img_request_cache)
+               return -ENOMEM;
+
+       rbd_assert(!rbd_obj_request_cache);
+       rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
+                                       sizeof (struct rbd_obj_request),
+                                       __alignof__(struct rbd_obj_request),
+                                       0, NULL);
+       if (!rbd_obj_request_cache)
+               goto out_err;
+
+       rbd_assert(!rbd_segment_name_cache);
+       rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
+                                       MAX_OBJ_NAME_SIZE + 1, 1, 0, NULL);
+       if (rbd_segment_name_cache)
+               return 0;
+out_err:
+       if (rbd_obj_request_cache) {
+               kmem_cache_destroy(rbd_obj_request_cache);
+               rbd_obj_request_cache = NULL;
+       }
+
+       kmem_cache_destroy(rbd_img_request_cache);
+       rbd_img_request_cache = NULL;
+
+       return -ENOMEM;
+}
+
+static void rbd_slab_exit(void)
+{
+       rbd_assert(rbd_segment_name_cache);
+       kmem_cache_destroy(rbd_segment_name_cache);
+       rbd_segment_name_cache = NULL;
+
+       rbd_assert(rbd_obj_request_cache);
+       kmem_cache_destroy(rbd_obj_request_cache);
+       rbd_obj_request_cache = NULL;
+
+       rbd_assert(rbd_img_request_cache);
+       kmem_cache_destroy(rbd_img_request_cache);
+       rbd_img_request_cache = NULL;
+}
+
 static int __init rbd_init(void)
 {
        int rc;
@@ -4954,16 +5074,22 @@ static int __init rbd_init(void)
 
                return -EINVAL;
        }
-       rc = rbd_sysfs_init();
+       rc = rbd_slab_init();
        if (rc)
                return rc;
-       pr_info("loaded " RBD_DRV_NAME_LONG "\n");
-       return 0;
+       rc = rbd_sysfs_init();
+       if (rc)
+               rbd_slab_exit();
+       else
+               pr_info("loaded " RBD_DRV_NAME_LONG "\n");
+
+       return rc;
 }
 
 static void __exit rbd_exit(void)
 {
        rbd_sysfs_cleanup();
+       rbd_slab_exit();
 }
 
 module_init(rbd_init);