*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
atomic_inc(&lo->plh_refcount);
}
+static struct pnfs_layout_hdr *
+pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
+ return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
+ kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+}
+
+static void
+pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
+ return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
+}
+
static void
destroy_layout_hdr(struct pnfs_layout_hdr *lo)
{
dprintk("%s: freeing layout cache %p\n", __func__, lo);
BUG_ON(!list_empty(&lo->plh_layouts));
NFS_I(lo->plh_inode)->layout = NULL;
- kfree(lo);
+ pnfs_free_layout_hdr(lo);
}
static void
{
struct inode *inode = lseg->pls_layout->plh_inode;
- BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
if (list_empty(&lseg->pls_layout->plh_segs)) {
set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
}
static bool
-should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
+should_free_lseg(struct pnfs_layout_range *lseg_range,
+ struct pnfs_layout_range *recall_range)
{
- return (recall_iomode == IOMODE_ANY ||
- lseg_iomode == recall_iomode);
+ return (recall_range->iomode == IOMODE_ANY ||
+ lseg_range->iomode == recall_range->iomode) &&
+ lo_seg_intersecting(lseg_range, recall_range);
}
/* Returns 1 if lseg is removed from list, 0 otherwise */
int
mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- u32 iomode)
+ struct pnfs_layout_range *recall_range)
{
struct pnfs_layout_segment *lseg, *next;
int invalid = 0, removed = 0;
return 0;
}
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
- if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+ if (!recall_range ||
+ should_free_lseg(&lseg->pls_range, recall_range)) {
dprintk("%s: freeing lseg %p iomode %d "
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
lo = nfsi->layout;
if (lo) {
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
+ mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
}
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
void
pnfs_destroy_all_layouts(struct nfs_client *clp)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
+ nfs4_deviceid_mark_client_invalid(clp);
+ nfs4_deviceid_purge_client(clp);
+
spin_lock(&clp->cl_lock);
- list_splice_init(&clp->cl_layouts, &tmp_list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!list_empty(&server->layouts))
+ list_splice_init(&server->layouts, &tmp_list);
+ }
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
while (!list_empty(&tmp_list)) {
return NULL;
}
+/* Initiates a LAYOUTRETURN(FILE) */
+int
+_pnfs_return_layout(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo = NULL;
+ struct nfs_inode *nfsi = NFS_I(ino);
+ LIST_HEAD(tmp_list);
+ struct nfs4_layoutreturn *lrp;
+ nfs4_stateid stateid;
+ int status = 0;
+
+ dprintk("--> %s\n", __func__);
+
+ spin_lock(&ino->i_lock);
+ lo = nfsi->layout;
+ if (!lo) {
+ spin_unlock(&ino->i_lock);
+ dprintk("%s: no layout to return\n", __func__);
+ return status;
+ }
+ stateid = nfsi->layout->plh_stateid;
+ /* Reference matched in nfs4_layoutreturn_release */
+ get_layout_hdr(lo);
+ mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+ lo->plh_block_lgets++;
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+
+ WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
+
+ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+ if (unlikely(lrp == NULL)) {
+ status = -ENOMEM;
+ set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
+ set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
+ put_layout_hdr(lo);
+ goto out;
+ }
+
+ lrp->args.stateid = stateid;
+ lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
+ lrp->args.inode = ino;
+ lrp->args.layout = lo;
+ lrp->clp = NFS_SERVER(ino)->nfs_client;
+
+ status = nfs4_proc_layoutreturn(lrp);
+out:
+ dprintk("<-- %s status: %d\n", __func__, status);
+ return status;
+}
+
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
{
struct pnfs_layout_hdr *lo;
- lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+ lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
if (!lo)
return NULL;
atomic_set(&lo->plh_refcount, 1);
if (likely(nfsi->layout == NULL)) /* Won the race? */
nfsi->layout = new;
else
- kfree(new);
+ pnfs_free_layout_hdr(new);
return nfsi->layout;
}
ret = get_lseg(lseg);
break;
}
- if (cmp_layout(range, &lseg->pls_range) > 0)
+ if (lseg->pls_range.offset > range->offset)
break;
}
.offset = pos,
.length = count,
};
+ unsigned pg_offset;
struct nfs_inode *nfsi = NFS_I(ino);
- struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
bool first = false;
*/
spin_lock(&clp->cl_lock);
BUG_ON(!list_empty(&lo->plh_layouts));
- list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+ list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
+ pg_offset = arg.offset & ~PAGE_CACHE_MASK;
+ if (pg_offset) {
+ arg.offset -= pg_offset;
+ arg.length += pg_offset;
+ }
+ if (arg.length != NFS4_MAX_UINT64)
+ arg.length = PAGE_CACHE_ALIGN(arg.length);
+
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
if (!lseg && first) {
spin_lock(&clp->cl_lock);
spin_unlock(&ino->i_lock);
goto out;
}
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
int
pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out;
}
-static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *prev,
- struct nfs_page *req)
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (pgio->pg_count == prev->wb_bytes) {
- /* This is first coelesce call for a series of nfs_pages */
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- prev->wb_context,
- req_offset(req),
- pgio->pg_count,
- IOMODE_READ,
- GFP_KERNEL);
- } else if (pgio->pg_lseg &&
- req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length))
- return 0;
- return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_READ,
+ GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+
}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_RW,
+ GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
+bool
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
- struct pnfs_layoutdriver_type *ld;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
- ld = NFS_SERVER(inode)->pnfs_curr_ld;
- pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ return true;
}
-static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *prev,
- struct nfs_page *req)
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
{
- if (pgio->pg_count == prev->wb_bytes) {
- /* This is first coelesce call for a series of nfs_pages */
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- prev->wb_context,
- req_offset(req),
- pgio->pg_count,
- IOMODE_RW,
- GFP_NOFS);
- } else if (pgio->pg_lseg &&
- req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length))
- return 0;
- return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ return true;
}
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_lseg == NULL)
+ return nfs_generic_pg_test(pgio, prev, req);
+
+ /*
+ * Test if a nfs_page is fully contained in the pnfs_layout_range.
+ * Note that this test makes several assumptions:
+ * - that the previous nfs_page in the struct nfs_pageio_descriptor
+ * is known to lie within the range.
+ * - that the nfs_page being tested is known to be contiguous with the
+ * previous nfs_page.
+ * - Layout ranges are page aligned, so we only have to test the
+ * start offset of the request.
+ *
+ * Please also note that 'end_offset' is actually the offset of the
+ * first byte that lies outside the pnfs_layout_range. FIXME?
+ *
+ */
+ return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
+
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_write_done(struct nfs_write_data *data)
{
- struct pnfs_layoutdriver_type *ld;
+ int status;
- ld = NFS_SERVER(inode)->pnfs_curr_ld;
- pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+ if (!data->pnfs_error) {
+ pnfs_set_layoutcommit(data);
+ data->mds_ops->rpc_call_done(&data->task, data);
+ data->mds_ops->rpc_release(data);
+ return 0;
+ }
+
+ dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+ data->pnfs_error);
+ status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
+ data->mds_ops, NFS_FILE_SYNC);
+ return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
+
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_write_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_writedata_release(data);
}
-enum pnfs_try_status
+static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
- const struct rpc_call_ops *call_ops, int how)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
wdata->mds_ops = call_ops;
+ wdata->lseg = get_lseg(lseg);
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
return trypnfs;
}
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+ struct nfs_write_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_flush(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_read_done(struct nfs_read_data *data)
+{
+ int status;
+
+ if (!data->pnfs_error) {
+ __nfs4_read_done_cb(data);
+ data->mds_ops->rpc_call_done(&data->task, data);
+ data->mds_ops->rpc_release(data);
+ return 0;
+ }
+
+ dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+ data->pnfs_error);
+ status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
+ data->mds_ops);
+ return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_read_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_readdata_release(data);
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
-enum pnfs_try_status
+static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
- const struct rpc_call_ops *call_ops)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
{
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
rdata->mds_ops = call_ops;
+ rdata->lseg = get_lseg(lseg);
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
return trypnfs;
}
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ struct nfs_read_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_pagein(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_reads(desc, &head);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
/*
* Currently there is only one (whole file) write lseg.
*/
pnfs_set_layoutcommit(struct nfs_write_data *wdata)
{
struct nfs_inode *nfsi = NFS_I(wdata->inode);
- loff_t end_pos = wdata->args.offset + wdata->res.count;
+ loff_t end_pos = wdata->mds_offset + wdata->res.count;
bool mark_as_dirty = false;
spin_lock(&nfsi->vfs_inode.i_lock);