};
};
struct page **copyup_pages;
+ u32 copyup_page_count;
struct ceph_osd_request *osd_req;
struct rbd_obj_request *obj_request; /* obj req initiator */
};
struct page **copyup_pages;
+ u32 copyup_page_count;
spinlock_t completion_lock;/* protects next_completion */
u32 next_completion;
rbd_img_callback_t callback;
size_t count);
static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool read_only);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
static struct bus_attribute rbd_bus_attrs[] = {
__ATTR(add, S_IWUSR, NULL, rbd_add),
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
static int rbd_dev_refresh(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev);
static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
* Copy the names, and fill in each snapshot's id
* and size.
*
- * Note that rbd_dev_v1_header_read() guarantees the
+ * Note that rbd_dev_v1_header_info() guarantees the
* ondisk buffer we're working with has
* snap_names_len bytes beyond the end of the
* snapshot id array, this memcpy() is safe.
kref_put(&obj_request->kref, rbd_obj_request_destroy);
}
-static void rbd_img_request_get(struct rbd_img_request *img_request)
-{
- dout("%s: img %p (was %d)\n", __func__, img_request,
- atomic_read(&img_request->kref.refcount));
- kref_get(&img_request->kref);
-}
-
static void rbd_img_request_destroy(struct kref *kref);
static void rbd_img_request_put(struct rbd_img_request *img_request)
{
INIT_LIST_HEAD(&img_request->obj_requests);
kref_init(&img_request->kref);
- rbd_img_request_get(img_request); /* Avoid a warning */
- rbd_img_request_put(img_request); /* TEMPORARY */
-
dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
write_request ? "write" : "read", offset, length,
img_request);
{
struct rbd_img_request *img_request;
struct rbd_device *rbd_dev;
- u64 length;
+ struct page **pages;
u32 page_count;
rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
rbd_dev = img_request->rbd_dev;
rbd_assert(rbd_dev);
- length = (u64)1 << rbd_dev->header.obj_order;
- page_count = (u32)calc_pages_for(0, length);
- rbd_assert(obj_request->copyup_pages);
- ceph_release_page_vector(obj_request->copyup_pages, page_count);
+ pages = obj_request->copyup_pages;
+ rbd_assert(pages != NULL);
obj_request->copyup_pages = NULL;
+ page_count = obj_request->copyup_page_count;
+ rbd_assert(page_count);
+ obj_request->copyup_page_count = 0;
+ ceph_release_page_vector(pages, page_count);
/*
* We want the transfer count to reflect the size of the
struct ceph_osd_client *osdc;
struct rbd_device *rbd_dev;
struct page **pages;
+ u32 page_count;
int result;
- u64 obj_size;
- u64 xferred;
+ u64 parent_length;
rbd_assert(img_request_child_test(img_request));
pages = img_request->copyup_pages;
rbd_assert(pages != NULL);
img_request->copyup_pages = NULL;
+ page_count = img_request->copyup_page_count;
+ rbd_assert(page_count);
+ img_request->copyup_page_count = 0;
orig_request = img_request->obj_request;
rbd_assert(orig_request != NULL);
rbd_assert(orig_request->type == OBJ_REQUEST_BIO);
result = img_request->result;
- obj_size = img_request->length;
- xferred = img_request->xferred;
+ parent_length = img_request->length;
+ rbd_assert(parent_length == img_request->xferred);
+ rbd_img_request_put(img_request);
- rbd_dev = img_request->rbd_dev;
+ rbd_assert(orig_request->img_request);
+ rbd_dev = orig_request->img_request->rbd_dev;
rbd_assert(rbd_dev);
- rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order);
-
- rbd_img_request_put(img_request);
if (result)
goto out_err;
goto out_err;
orig_request->osd_req = osd_req;
orig_request->copyup_pages = pages;
+ orig_request->copyup_page_count = page_count;
/* Initialize the copyup op */
osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
- osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0,
+ osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
false, false);
/* Then the original write request op */
if (result)
goto out_err;
parent_request->copyup_pages = pages;
+ parent_request->copyup_page_count = page_count;
parent_request->callback = rbd_img_obj_parent_read_full_callback;
result = rbd_img_request_submit(parent_request);
return 0;
parent_request->copyup_pages = NULL;
+ parent_request->copyup_page_count = 0;
parent_request->obj_request = NULL;
rbd_obj_request_put(obj_request);
out_err:
rbd_assert(obj_request_img_data_test(obj_request));
rbd_assert(obj_request->img_request != NULL);
rbd_assert(obj_request->result == (s32) -ENOENT);
- rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+ rbd_assert(obj_request_type_valid(obj_request->type));
rbd_dev = obj_request->img_request->rbd_dev;
rbd_assert(rbd_dev->parent != NULL);
rbd_obj_request_get(obj_request);
img_request->obj_request = obj_request;
- result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
- obj_request->bio_list);
+ if (obj_request->type == OBJ_REQUEST_BIO)
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+ obj_request->bio_list);
+ else
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
+ obj_request->pages);
if (result)
goto out_err;
* Request sync osd watch/unwatch. The value of "start" determines
* whether a watch request is being initiated or torn down.
*/
-static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct rbd_obj_request *obj_request;
rbd_dev->watch_request->osd_req);
osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
- rbd_dev->watch_event->cookie, 0, start);
+ rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
rbd_osd_req_format_write(obj_request);
ret = rbd_obj_request_submit(osdc, obj_request);
* return, the rbd_dev->header field will contain up-to-date
* information about the image.
*/
-static int rbd_dev_v1_header_read(struct rbd_device *rbd_dev)
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
{
struct rbd_image_header_ondisk *ondisk = NULL;
u32 snap_count = 0;
return ret;
}
-/*
- * only read the first part of the ondisk header, without the snaps info
- */
-static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev)
-{
- return rbd_dev_v1_header_read(rbd_dev);
-}
-
/*
* Clear the rbd device's EXISTS flag if the snapshot it's mapped to
* has disappeared from the (just updated) snapshot context.
mapping_size = rbd_dev->mapping.size;
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
if (rbd_dev->image_format == 1)
- ret = rbd_dev_v1_refresh(rbd_dev);
+ ret = rbd_dev_v1_header_info(rbd_dev);
else
- ret = rbd_dev_v2_refresh(rbd_dev);
+ ret = rbd_dev_v2_header_info(rbd_dev);
/* If it's a mapped snapshot, validate its EXISTS flag */
return snap_name;
}
-static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
{
+ bool first_time = rbd_dev->header.object_prefix == NULL;
int ret;
down_write(&rbd_dev->header_rwsem);
+ if (first_time) {
+ ret = rbd_dev_v2_header_onetime(rbd_dev);
+ if (ret)
+ goto out;
+ }
+
ret = rbd_dev_v2_image_size(rbd_dev);
if (ret)
goto out;
memset(header, 0, sizeof (*header));
}
-static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
-{
- int ret;
-
- /* Populate rbd image metadata */
-
- ret = rbd_dev_v1_header_read(rbd_dev);
- if (ret < 0)
- goto out_err;
-
- /* Version 1 images have no parent (no layering) */
-
- rbd_dev->parent_spec = NULL;
- rbd_dev->parent_overlap = 0;
-
- dout("discovered version 1 image, header name is %s\n",
- rbd_dev->header_name);
-
- return 0;
-
-out_err:
- kfree(rbd_dev->header_name);
- rbd_dev->header_name = NULL;
- kfree(rbd_dev->spec->image_id);
- rbd_dev->spec->image_id = NULL;
-
- return ret;
-}
-
-static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
{
int ret;
- ret = rbd_dev_v2_image_size(rbd_dev);
- if (ret)
- goto out_err;
-
- /* Get the object prefix (a.k.a. block_name) for the image */
-
ret = rbd_dev_v2_object_prefix(rbd_dev);
if (ret)
goto out_err;
- /* Get the and check features for the image */
-
+ /*
+ * Get the and check features for the image. Currently the
+ * features are assumed to never change.
+ */
ret = rbd_dev_v2_features(rbd_dev);
if (ret)
goto out_err;
if (ret < 0)
goto out_err;
}
-
- /* crypto and compression type aren't (yet) supported for v2 images */
-
- rbd_dev->header.crypt_type = 0;
- rbd_dev->header.comp_type = 0;
-
- /* Get the snapshot context, plus the header version */
-
- ret = rbd_dev_v2_snap_context(rbd_dev);
- if (ret)
- goto out_err;
-
- dout("discovered version 2 image, header name is %s\n",
- rbd_dev->header_name);
+ /* No support for crypto and compression type format 2 images */
return 0;
out_err:
if (!parent)
goto out_err;
- ret = rbd_dev_image_probe(parent, true);
+ ret = rbd_dev_image_probe(parent, false);
if (ret < 0)
goto out_err;
rbd_dev->parent = parent;
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
- int ret;
-
rbd_dev_unprobe(rbd_dev);
- ret = rbd_dev_header_watch_sync(rbd_dev, 0);
- if (ret)
- rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
kfree(rbd_dev->header_name);
rbd_dev->header_name = NULL;
rbd_dev->image_format = 0;
/*
* Probe for the existence of the header object for the given rbd
- * device. For format 2 images this includes determining the image
- * id.
+ * device. If this image is the one being mapped (i.e., not a
+ * parent), initiate a watch on its header object before using that
+ * object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool read_only)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
{
int ret;
int tmp;
if (ret)
goto err_out_format;
- ret = rbd_dev_header_watch_sync(rbd_dev, 1);
- if (ret)
- goto out_header_name;
+ if (mapping) {
+ ret = rbd_dev_header_watch_sync(rbd_dev, true);
+ if (ret)
+ goto out_header_name;
+ }
if (rbd_dev->image_format == 1)
- ret = rbd_dev_v1_probe(rbd_dev);
+ ret = rbd_dev_v1_header_info(rbd_dev);
else
- ret = rbd_dev_v2_probe(rbd_dev);
+ ret = rbd_dev_v2_header_info(rbd_dev);
if (ret)
goto err_out_watch;
if (ret)
goto err_out_probe;
- /* If we are mapping a snapshot it must be marked read-only */
-
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
- read_only = true;
- rbd_dev->mapping.read_only = read_only;
-
ret = rbd_dev_probe_parent(rbd_dev);
- if (!ret)
- return 0;
+ if (ret)
+ goto err_out_probe;
+
+ dout("discovered format %u image, header name is %s\n",
+ rbd_dev->image_format, rbd_dev->header_name);
+ return 0;
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- tmp = rbd_dev_header_watch_sync(rbd_dev, 0);
- if (tmp)
- rbd_warn(rbd_dev, "unable to tear down watch request\n");
+ if (mapping) {
+ tmp = rbd_dev_header_watch_sync(rbd_dev, false);
+ if (tmp)
+ rbd_warn(rbd_dev, "unable to tear down "
+ "watch request (%d)\n", tmp);
+ }
out_header_name:
kfree(rbd_dev->header_name);
rbd_dev->header_name = NULL;
rbdc = NULL; /* rbd_dev now owns this */
spec = NULL; /* rbd_dev now owns this */
- rc = rbd_dev_image_probe(rbd_dev, read_only);
+ rc = rbd_dev_image_probe(rbd_dev, true);
if (rc < 0)
goto err_out_rbd_dev;
+ /* If we are mapping a snapshot it must be marked read-only */
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ read_only = true;
+ rbd_dev->mapping.read_only = read_only;
+
rc = rbd_dev_device_setup(rbd_dev);
if (!rc)
return count;
spin_unlock_irq(&rbd_dev->lock);
if (ret < 0)
goto done;
- ret = count;
rbd_bus_del_dev(rbd_dev);
+ ret = rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret)
+ rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
rbd_dev_image_release(rbd_dev);
module_put(THIS_MODULE);
+ ret = count;
done:
mutex_unlock(&ctl_mutex);