6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/module.h>
23 #include <asm/system.h>
31 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 static const struct nfs_pageio_ops nfs_pageio_read_ops;
34 static const struct rpc_call_ops nfs_read_partial_ops;
35 static const struct rpc_call_ops nfs_read_full_ops;
37 static struct kmem_cache *nfs_rdata_cachep;
38 static mempool_t *nfs_rdata_mempool;
40 #define MIN_POOL_READ (32)
42 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
44 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
47 memset(p, 0, sizeof(*p));
48 INIT_LIST_HEAD(&p->pages);
49 p->npages = pagecount;
50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array;
53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
55 mempool_free(p, nfs_rdata_mempool);
63 void nfs_readdata_free(struct nfs_read_data *p)
65 if (p && (p->pagevec != &p->page_array[0]))
67 mempool_free(p, nfs_rdata_mempool);
70 static void nfs_readdata_release(struct nfs_read_data *rdata)
72 put_lseg(rdata->lseg);
73 put_nfs_open_context(rdata->args.context);
74 nfs_readdata_free(rdata);
78 int nfs_return_empty_page(struct page *page)
80 zero_user(page, 0, PAGE_CACHE_SIZE);
81 SetPageUptodate(page);
86 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
88 unsigned int remainder = data->args.count - data->res.count;
89 unsigned int base = data->args.pgbase + data->res.count;
93 if (data->res.eof == 0 || remainder == 0)
96 * Note: "remainder" can never be negative, since we check for
97 * this in the XDR code.
99 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
100 base &= ~PAGE_CACHE_MASK;
101 pglen = PAGE_CACHE_SIZE - base;
103 if (remainder <= pglen) {
104 zero_user(*pages, base, remainder);
107 zero_user(*pages, base, pglen);
110 pglen = PAGE_CACHE_SIZE;
115 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
118 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
119 NFS_SERVER(inode)->rsize, 0);
121 EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
123 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
126 if (!pnfs_pageio_init_read(pgio, inode))
127 nfs_pageio_init_read_mds(pgio, inode);
130 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
133 struct nfs_page *new;
135 struct nfs_pageio_descriptor pgio;
137 len = nfs_page_length(page);
139 return nfs_return_empty_page(page);
140 new = nfs_create_request(ctx, inode, page, 0, len);
145 if (len < PAGE_CACHE_SIZE)
146 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 nfs_pageio_init_read(&pgio, inode);
149 nfs_pageio_add_request(&pgio, new);
150 nfs_pageio_complete(&pgio);
154 static void nfs_readpage_release(struct nfs_page *req)
156 struct inode *d_inode = req->wb_context->path.dentry->d_inode;
158 if (PageUptodate(req->wb_page))
159 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
161 unlock_page(req->wb_page);
163 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
164 req->wb_context->path.dentry->d_inode->i_sb->s_id,
165 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
167 (long long)req_offset(req));
168 nfs_release_request(req);
171 int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
172 const struct rpc_call_ops *call_ops)
174 struct inode *inode = data->inode;
175 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
176 struct rpc_task *task;
177 struct rpc_message msg = {
178 .rpc_argp = &data->args,
179 .rpc_resp = &data->res,
180 .rpc_cred = data->cred,
182 struct rpc_task_setup task_setup_data = {
186 .callback_ops = call_ops,
187 .callback_data = data,
188 .workqueue = nfsiod_workqueue,
189 .flags = RPC_TASK_ASYNC | swap_flags,
192 /* Set up the initial task struct. */
193 NFS_PROTO(inode)->read_setup(data, &msg);
195 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
199 (long long)NFS_FILEID(inode),
201 (unsigned long long)data->args.offset);
203 task = rpc_run_task(&task_setup_data);
205 return PTR_ERR(task);
209 EXPORT_SYMBOL_GPL(nfs_initiate_read);
212 * Set up the NFS read request struct
214 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
215 unsigned int count, unsigned int offset)
217 struct inode *inode = req->wb_context->path.dentry->d_inode;
221 data->cred = req->wb_context->cred;
223 data->args.fh = NFS_FH(inode);
224 data->args.offset = req_offset(req) + offset;
225 data->args.pgbase = req->wb_pgbase + offset;
226 data->args.pages = data->pagevec;
227 data->args.count = count;
228 data->args.context = get_nfs_open_context(req->wb_context);
229 data->args.lock_context = req->wb_lock_context;
231 data->res.fattr = &data->fattr;
232 data->res.count = count;
234 nfs_fattr_init(&data->fattr);
237 static int nfs_do_read(struct nfs_read_data *data,
238 const struct rpc_call_ops *call_ops,
239 struct pnfs_layout_segment *lseg)
241 struct inode *inode = data->args.context->path.dentry->d_inode;
244 data->lseg = get_lseg(lseg);
245 if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
247 put_lseg(data->lseg);
251 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
255 nfs_do_multiple_reads(struct list_head *head,
256 const struct rpc_call_ops *call_ops,
257 struct pnfs_layout_segment *lseg)
259 struct nfs_read_data *data;
262 while (!list_empty(head)) {
265 data = list_entry(head->next, struct nfs_read_data, list);
266 list_del_init(&data->list);
268 ret2 = nfs_do_read(data, call_ops, lseg);
276 nfs_async_read_error(struct list_head *head)
278 struct nfs_page *req;
280 while (!list_empty(head)) {
281 req = nfs_list_entry(head->next);
282 nfs_list_remove_request(req);
283 SetPageError(req->wb_page);
284 nfs_readpage_release(req);
289 * Generate multiple requests to fill a single page.
291 * We optimize to reduce the number of read operations on the wire. If we
292 * detect that we're reading a page, or an area of a page, that is past the
293 * end of file, we do not generate NFS read operations but just clear the
294 * parts of the page that would have come back zero from the server anyway.
296 * We rely on the cached value of i_size to make this determination; another
297 * client can fill pages on the server past our cached end-of-file, but we
298 * won't see the new data until our attribute cache is updated. This is more
299 * or less conventional NFS client behavior.
301 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
303 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
304 struct page *page = req->wb_page;
305 struct nfs_read_data *data;
306 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
311 nfs_list_remove_request(req);
314 nbytes = desc->pg_count;
316 size_t len = min(nbytes,rsize);
318 data = nfs_readdata_alloc(1);
321 data->pagevec[0] = page;
322 nfs_read_rpcsetup(req, data, len, offset);
323 list_add(&data->list, res);
327 } while(nbytes != 0);
328 atomic_set(&req->wb_complete, requests);
329 ClearPageError(page);
332 while (!list_empty(res)) {
333 data = list_entry(res->next, struct nfs_read_data, list);
334 list_del(&data->list);
335 nfs_readdata_free(data);
338 nfs_readpage_release(req);
342 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
344 struct nfs_page *req;
346 struct nfs_read_data *data;
347 struct list_head *head = &desc->pg_list;
350 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
353 nfs_async_read_error(head);
358 pages = data->pagevec;
359 while (!list_empty(head)) {
360 req = nfs_list_entry(head->next);
361 nfs_list_remove_request(req);
362 nfs_list_add_request(req, &data->pages);
363 ClearPageError(req->wb_page);
364 *pages++ = req->wb_page;
366 req = nfs_list_entry(data->pages.next);
368 nfs_read_rpcsetup(req, data, desc->pg_count, 0);
369 list_add(&data->list, res);
374 int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
379 if (desc->pg_bsize < PAGE_CACHE_SIZE) {
380 ret = nfs_pagein_multi(desc, &head);
382 ret = nfs_do_multiple_reads(&head,
383 &nfs_read_partial_ops,
386 ret = nfs_pagein_one(desc, &head);
388 ret = nfs_do_multiple_reads(&head,
392 put_lseg(desc->pg_lseg);
393 desc->pg_lseg = NULL;
396 EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
399 static const struct nfs_pageio_ops nfs_pageio_read_ops = {
400 .pg_test = nfs_generic_pg_test,
401 .pg_doio = nfs_generic_pg_readpages,
405 * This is the callback from RPC telling us whether a reply was
406 * received or some error occurred (timeout or socket shutdown).
408 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
412 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
415 status = NFS_PROTO(data->inode)->read_done(task, data);
419 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
421 if (task->tk_status == -ESTALE) {
422 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
423 nfs_mark_for_revalidate(data->inode);
428 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
430 struct nfs_readargs *argp = &data->args;
431 struct nfs_readres *resp = &data->res;
433 if (resp->eof || resp->count == argp->count)
436 /* This is a short read! */
437 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
438 /* Has the server at least made some progress? */
439 if (resp->count == 0)
442 /* Yes, so retry the read at the end of the data */
443 data->mds_offset += resp->count;
444 argp->offset += resp->count;
445 argp->pgbase += resp->count;
446 argp->count -= resp->count;
447 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
451 * Handle a read reply that fills part of a page.
453 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
455 struct nfs_read_data *data = calldata;
457 if (nfs_readpage_result(task, data) != 0)
459 if (task->tk_status < 0)
462 nfs_readpage_truncate_uninitialised_page(data);
463 nfs_readpage_retry(task, data);
466 static void nfs_readpage_release_partial(void *calldata)
468 struct nfs_read_data *data = calldata;
469 struct nfs_page *req = data->req;
470 struct page *page = req->wb_page;
471 int status = data->task.tk_status;
476 if (atomic_dec_and_test(&req->wb_complete)) {
477 if (!PageError(page))
478 SetPageUptodate(page);
479 nfs_readpage_release(req);
481 nfs_readdata_release(calldata);
484 #if defined(CONFIG_NFS_V4_1)
485 void nfs_read_prepare(struct rpc_task *task, void *calldata)
487 struct nfs_read_data *data = calldata;
489 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
490 &data->args.seq_args, &data->res.seq_res,
493 rpc_call_start(task);
495 #endif /* CONFIG_NFS_V4_1 */
497 static const struct rpc_call_ops nfs_read_partial_ops = {
498 #if defined(CONFIG_NFS_V4_1)
499 .rpc_call_prepare = nfs_read_prepare,
500 #endif /* CONFIG_NFS_V4_1 */
501 .rpc_call_done = nfs_readpage_result_partial,
502 .rpc_release = nfs_readpage_release_partial,
505 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
507 unsigned int count = data->res.count;
508 unsigned int base = data->args.pgbase;
512 count = data->args.count;
513 if (unlikely(count == 0))
515 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
516 base &= ~PAGE_CACHE_MASK;
518 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
519 SetPageUptodate(*pages);
522 /* Was this a short read? */
523 if (data->res.eof || data->res.count == data->args.count)
524 SetPageUptodate(*pages);
528 * This is the callback from RPC telling us whether a reply was
529 * received or some error occurred (timeout or socket shutdown).
531 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
533 struct nfs_read_data *data = calldata;
535 if (nfs_readpage_result(task, data) != 0)
537 if (task->tk_status < 0)
540 * Note: nfs_readpage_retry may change the values of
541 * data->args. In the multi-page case, we therefore need
542 * to ensure that we call nfs_readpage_set_pages_uptodate()
545 nfs_readpage_truncate_uninitialised_page(data);
546 nfs_readpage_set_pages_uptodate(data);
547 nfs_readpage_retry(task, data);
550 static void nfs_readpage_release_full(void *calldata)
552 struct nfs_read_data *data = calldata;
554 while (!list_empty(&data->pages)) {
555 struct nfs_page *req = nfs_list_entry(data->pages.next);
557 nfs_list_remove_request(req);
558 nfs_readpage_release(req);
560 nfs_readdata_release(calldata);
563 static const struct rpc_call_ops nfs_read_full_ops = {
564 #if defined(CONFIG_NFS_V4_1)
565 .rpc_call_prepare = nfs_read_prepare,
566 #endif /* CONFIG_NFS_V4_1 */
567 .rpc_call_done = nfs_readpage_result_full,
568 .rpc_release = nfs_readpage_release_full,
572 * Read a page over NFS.
573 * We read the page synchronously in the following case:
574 * - The error flag is set for this page. This happens only when a
575 * previous async read operation failed.
577 int nfs_readpage(struct file *file, struct page *page)
579 struct nfs_open_context *ctx;
580 struct inode *inode = page->mapping->host;
583 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
584 page, PAGE_CACHE_SIZE, page->index);
585 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
586 nfs_add_stats(inode, NFSIOS_READPAGES, 1);
589 * Try to flush any pending writes to the file..
591 * NOTE! Because we own the page lock, there cannot
592 * be any new pending writes generated at this point
593 * for this page (other pages can be written to).
595 error = nfs_wb_page(inode, page);
598 if (PageUptodate(page))
602 if (NFS_STALE(inode))
607 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
611 ctx = get_nfs_open_context(nfs_file_open_context(file));
613 if (!IS_SYNC(inode)) {
614 error = nfs_readpage_from_fscache(ctx, inode, page);
619 error = nfs_readpage_async(ctx, inode, page);
622 put_nfs_open_context(ctx);
629 struct nfs_readdesc {
630 struct nfs_pageio_descriptor *pgio;
631 struct nfs_open_context *ctx;
635 readpage_async_filler(void *data, struct page *page)
637 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
638 struct inode *inode = page->mapping->host;
639 struct nfs_page *new;
643 len = nfs_page_length(page);
645 return nfs_return_empty_page(page);
647 new = nfs_create_request(desc->ctx, inode, page, 0, len);
651 if (len < PAGE_CACHE_SIZE)
652 zero_user_segment(page, len, PAGE_CACHE_SIZE);
653 if (!nfs_pageio_add_request(desc->pgio, new)) {
654 error = desc->pgio->pg_error;
659 error = PTR_ERR(new);
666 int nfs_readpages(struct file *filp, struct address_space *mapping,
667 struct list_head *pages, unsigned nr_pages)
669 struct nfs_pageio_descriptor pgio;
670 struct nfs_readdesc desc = {
673 struct inode *inode = mapping->host;
674 unsigned long npages;
677 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
679 (long long)NFS_FILEID(inode),
681 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
683 if (NFS_STALE(inode))
687 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
688 if (desc.ctx == NULL)
691 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
693 /* attempt to read as many of the pages as possible from the cache
694 * - this returns -ENOBUFS immediately if the cookie is negative
696 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
699 goto read_complete; /* all pages were read */
701 nfs_pageio_init_read(&pgio, inode);
703 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
705 nfs_pageio_complete(&pgio);
706 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
707 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
709 put_nfs_open_context(desc.ctx);
714 int __init nfs_init_readpagecache(void)
716 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
717 sizeof(struct nfs_read_data),
718 0, SLAB_HWCACHE_ALIGN,
720 if (nfs_rdata_cachep == NULL)
723 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
725 if (nfs_rdata_mempool == NULL)
731 void nfs_destroy_readpagecache(void)
733 mempool_destroy(nfs_rdata_mempool);
734 kmem_cache_destroy(nfs_rdata_cachep);