]> Pileus Git - ~andy/linux/blob - fs/cifs/file.c
Merge tag 'pinctrl-fixes-v3.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[~andy/linux] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cFYI(1, "Application %s pid %d has incorrectly set O_EXCL flag"
82                         "but not O_CREAT on file open. Ignoring O_EXCL",
83                         current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cFYI(1, "posix open %s", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_sb->mnt_cifs_flags &
145                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
146         cifs_put_tlink(tlink);
147
148         if (rc)
149                 goto posix_open_ret;
150
151         if (presp_data->Type == cpu_to_le32(-1))
152                 goto posix_open_ret; /* open ok, caller does qpathinfo */
153
154         if (!pinode)
155                 goto posix_open_ret; /* caller does not need info */
156
157         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158
159         /* get new inode and set it up */
160         if (*pinode == NULL) {
161                 cifs_fill_uniqueid(sb, &fattr);
162                 *pinode = cifs_iget(sb, &fattr);
163                 if (!*pinode) {
164                         rc = -ENOMEM;
165                         goto posix_open_ret;
166                 }
167         } else {
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         rc = server->ops->open(xid, tcon, full_path, disposition,
229                                desired_access, create_options, fid, oplock, buf,
230                                cifs_sb);
231
232         if (rc)
233                 goto out;
234
235         if (tcon->unix_ext)
236                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
237                                               xid);
238         else
239                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240                                          xid, &fid->netfid);
241
242 out:
243         kfree(buf);
244         return rc;
245 }
246
247 static bool
248 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
249 {
250         struct cifs_fid_locks *cur;
251         bool has_locks = false;
252
253         down_read(&cinode->lock_sem);
254         list_for_each_entry(cur, &cinode->llist, llist) {
255                 if (!list_empty(&cur->locks)) {
256                         has_locks = true;
257                         break;
258                 }
259         }
260         up_read(&cinode->lock_sem);
261         return has_locks;
262 }
263
264 struct cifsFileInfo *
265 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
266                   struct tcon_link *tlink, __u32 oplock)
267 {
268         struct dentry *dentry = file->f_path.dentry;
269         struct inode *inode = dentry->d_inode;
270         struct cifsInodeInfo *cinode = CIFS_I(inode);
271         struct cifsFileInfo *cfile;
272         struct cifs_fid_locks *fdlocks;
273         struct cifs_tcon *tcon = tlink_tcon(tlink);
274         struct TCP_Server_Info *server = tcon->ses->server;
275
276         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
277         if (cfile == NULL)
278                 return cfile;
279
280         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
281         if (!fdlocks) {
282                 kfree(cfile);
283                 return NULL;
284         }
285
286         INIT_LIST_HEAD(&fdlocks->locks);
287         fdlocks->cfile = cfile;
288         cfile->llist = fdlocks;
289         down_write(&cinode->lock_sem);
290         list_add(&fdlocks->llist, &cinode->llist);
291         up_write(&cinode->lock_sem);
292
293         cfile->count = 1;
294         cfile->pid = current->tgid;
295         cfile->uid = current_fsuid();
296         cfile->dentry = dget(dentry);
297         cfile->f_flags = file->f_flags;
298         cfile->invalidHandle = false;
299         cfile->tlink = cifs_get_tlink(tlink);
300         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
301         mutex_init(&cfile->fh_mutex);
302
303         cifs_sb_active(inode->i_sb);
304
305         /*
306          * If the server returned a read oplock and we have mandatory brlocks,
307          * set oplock level to None.
308          */
309         if (oplock == server->vals->oplock_read &&
310                                                 cifs_has_mand_locks(cinode)) {
311                 cFYI(1, "Reset oplock val from read to None due to mand locks");
312                 oplock = 0;
313         }
314
315         spin_lock(&cifs_file_list_lock);
316         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
317                 oplock = fid->pending_open->oplock;
318         list_del(&fid->pending_open->olist);
319
320         server->ops->set_fid(cfile, fid, oplock);
321
322         list_add(&cfile->tlist, &tcon->openFileList);
323         /* if readable file instance put first in list*/
324         if (file->f_mode & FMODE_READ)
325                 list_add(&cfile->flist, &cinode->openFileList);
326         else
327                 list_add_tail(&cfile->flist, &cinode->openFileList);
328         spin_unlock(&cifs_file_list_lock);
329
330         file->private_data = cfile;
331         return cfile;
332 }
333
334 struct cifsFileInfo *
335 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
336 {
337         spin_lock(&cifs_file_list_lock);
338         cifsFileInfo_get_locked(cifs_file);
339         spin_unlock(&cifs_file_list_lock);
340         return cifs_file;
341 }
342
343 /*
344  * Release a reference on the file private data. This may involve closing
345  * the filehandle out on the server. Must be called without holding
346  * cifs_file_list_lock.
347  */
348 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
349 {
350         struct inode *inode = cifs_file->dentry->d_inode;
351         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
352         struct TCP_Server_Info *server = tcon->ses->server;
353         struct cifsInodeInfo *cifsi = CIFS_I(inode);
354         struct super_block *sb = inode->i_sb;
355         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
356         struct cifsLockInfo *li, *tmp;
357         struct cifs_fid fid;
358         struct cifs_pending_open open;
359
360         spin_lock(&cifs_file_list_lock);
361         if (--cifs_file->count > 0) {
362                 spin_unlock(&cifs_file_list_lock);
363                 return;
364         }
365
366         if (server->ops->get_lease_key)
367                 server->ops->get_lease_key(inode, &fid);
368
369         /* store open in pending opens to make sure we don't miss lease break */
370         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
371
372         /* remove it from the lists */
373         list_del(&cifs_file->flist);
374         list_del(&cifs_file->tlist);
375
376         if (list_empty(&cifsi->openFileList)) {
377                 cFYI(1, "closing last open instance for inode %p",
378                         cifs_file->dentry->d_inode);
379                 /*
380                  * In strict cache mode we need invalidate mapping on the last
381                  * close  because it may cause a error when we open this file
382                  * again and get at least level II oplock.
383                  */
384                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
385                         CIFS_I(inode)->invalid_mapping = true;
386                 cifs_set_oplock_level(cifsi, 0);
387         }
388         spin_unlock(&cifs_file_list_lock);
389
390         cancel_work_sync(&cifs_file->oplock_break);
391
392         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
393                 struct TCP_Server_Info *server = tcon->ses->server;
394                 unsigned int xid;
395
396                 xid = get_xid();
397                 if (server->ops->close)
398                         server->ops->close(xid, tcon, &cifs_file->fid);
399                 _free_xid(xid);
400         }
401
402         cifs_del_pending_open(&open);
403
404         /*
405          * Delete any outstanding lock records. We'll lose them when the file
406          * is closed anyway.
407          */
408         down_write(&cifsi->lock_sem);
409         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
410                 list_del(&li->llist);
411                 cifs_del_lock_waiters(li);
412                 kfree(li);
413         }
414         list_del(&cifs_file->llist->llist);
415         kfree(cifs_file->llist);
416         up_write(&cifsi->lock_sem);
417
418         cifs_put_tlink(cifs_file->tlink);
419         dput(cifs_file->dentry);
420         cifs_sb_deactive(sb);
421         kfree(cifs_file);
422 }
423
424 int cifs_open(struct inode *inode, struct file *file)
425
426 {
427         int rc = -EACCES;
428         unsigned int xid;
429         __u32 oplock;
430         struct cifs_sb_info *cifs_sb;
431         struct TCP_Server_Info *server;
432         struct cifs_tcon *tcon;
433         struct tcon_link *tlink;
434         struct cifsFileInfo *cfile = NULL;
435         char *full_path = NULL;
436         bool posix_open_ok = false;
437         struct cifs_fid fid;
438         struct cifs_pending_open open;
439
440         xid = get_xid();
441
442         cifs_sb = CIFS_SB(inode->i_sb);
443         tlink = cifs_sb_tlink(cifs_sb);
444         if (IS_ERR(tlink)) {
445                 free_xid(xid);
446                 return PTR_ERR(tlink);
447         }
448         tcon = tlink_tcon(tlink);
449         server = tcon->ses->server;
450
451         full_path = build_path_from_dentry(file->f_path.dentry);
452         if (full_path == NULL) {
453                 rc = -ENOMEM;
454                 goto out;
455         }
456
457         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
458                  inode, file->f_flags, full_path);
459
460         if (server->oplocks)
461                 oplock = REQ_OPLOCK;
462         else
463                 oplock = 0;
464
465         if (!tcon->broken_posix_open && tcon->unix_ext &&
466             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
467                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
468                 /* can not refresh inode info since size could be stale */
469                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
470                                 cifs_sb->mnt_file_mode /* ignored */,
471                                 file->f_flags, &oplock, &fid.netfid, xid);
472                 if (rc == 0) {
473                         cFYI(1, "posix open succeeded");
474                         posix_open_ok = true;
475                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
476                         if (tcon->ses->serverNOS)
477                                 cERROR(1, "server %s of type %s returned"
478                                            " unexpected error on SMB posix open"
479                                            ", disabling posix open support."
480                                            " Check if server update available.",
481                                            tcon->ses->serverName,
482                                            tcon->ses->serverNOS);
483                         tcon->broken_posix_open = true;
484                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
485                          (rc != -EOPNOTSUPP)) /* path not found or net err */
486                         goto out;
487                 /*
488                  * Else fallthrough to retry open the old way on network i/o
489                  * or DFS errors.
490                  */
491         }
492
493         if (server->ops->get_lease_key)
494                 server->ops->get_lease_key(inode, &fid);
495
496         cifs_add_pending_open(&fid, tlink, &open);
497
498         if (!posix_open_ok) {
499                 if (server->ops->get_lease_key)
500                         server->ops->get_lease_key(inode, &fid);
501
502                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
503                                   file->f_flags, &oplock, &fid, xid);
504                 if (rc) {
505                         cifs_del_pending_open(&open);
506                         goto out;
507                 }
508         }
509
510         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
511         if (cfile == NULL) {
512                 if (server->ops->close)
513                         server->ops->close(xid, tcon, &fid);
514                 cifs_del_pending_open(&open);
515                 rc = -ENOMEM;
516                 goto out;
517         }
518
519         cifs_fscache_set_inode_cookie(inode, file);
520
521         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
522                 /*
523                  * Time to set mode which we can not set earlier due to
524                  * problems creating new read-only files.
525                  */
526                 struct cifs_unix_set_info_args args = {
527                         .mode   = inode->i_mode,
528                         .uid    = INVALID_UID, /* no change */
529                         .gid    = INVALID_GID, /* no change */
530                         .ctime  = NO_CHANGE_64,
531                         .atime  = NO_CHANGE_64,
532                         .mtime  = NO_CHANGE_64,
533                         .device = 0,
534                 };
535                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
536                                        cfile->pid);
537         }
538
539 out:
540         kfree(full_path);
541         free_xid(xid);
542         cifs_put_tlink(tlink);
543         return rc;
544 }
545
546 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
547
548 /*
549  * Try to reacquire byte range locks that were released when session
550  * to server was lost.
551  */
552 static int
553 cifs_relock_file(struct cifsFileInfo *cfile)
554 {
555         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
556         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
557         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
558         int rc = 0;
559
560         /* we are going to update can_cache_brlcks here - need a write access */
561         down_write(&cinode->lock_sem);
562         if (cinode->can_cache_brlcks) {
563                 /* can cache locks - no need to push them */
564                 up_write(&cinode->lock_sem);
565                 return rc;
566         }
567
568         if (cap_unix(tcon->ses) &&
569             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
570             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
571                 rc = cifs_push_posix_locks(cfile);
572         else
573                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
574
575         up_write(&cinode->lock_sem);
576         return rc;
577 }
578
579 static int
580 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
581 {
582         int rc = -EACCES;
583         unsigned int xid;
584         __u32 oplock;
585         struct cifs_sb_info *cifs_sb;
586         struct cifs_tcon *tcon;
587         struct TCP_Server_Info *server;
588         struct cifsInodeInfo *cinode;
589         struct inode *inode;
590         char *full_path = NULL;
591         int desired_access;
592         int disposition = FILE_OPEN;
593         int create_options = CREATE_NOT_DIR;
594         struct cifs_fid fid;
595
596         xid = get_xid();
597         mutex_lock(&cfile->fh_mutex);
598         if (!cfile->invalidHandle) {
599                 mutex_unlock(&cfile->fh_mutex);
600                 rc = 0;
601                 free_xid(xid);
602                 return rc;
603         }
604
605         inode = cfile->dentry->d_inode;
606         cifs_sb = CIFS_SB(inode->i_sb);
607         tcon = tlink_tcon(cfile->tlink);
608         server = tcon->ses->server;
609
610         /*
611          * Can not grab rename sem here because various ops, including those
612          * that already have the rename sem can end up causing writepage to get
613          * called and if the server was down that means we end up here, and we
614          * can never tell if the caller already has the rename_sem.
615          */
616         full_path = build_path_from_dentry(cfile->dentry);
617         if (full_path == NULL) {
618                 rc = -ENOMEM;
619                 mutex_unlock(&cfile->fh_mutex);
620                 free_xid(xid);
621                 return rc;
622         }
623
624         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
625              full_path);
626
627         if (tcon->ses->server->oplocks)
628                 oplock = REQ_OPLOCK;
629         else
630                 oplock = 0;
631
632         if (tcon->unix_ext && cap_unix(tcon->ses) &&
633             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
634                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
635                 /*
636                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
637                  * original open. Must mask them off for a reopen.
638                  */
639                 unsigned int oflags = cfile->f_flags &
640                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
641
642                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
643                                      cifs_sb->mnt_file_mode /* ignored */,
644                                      oflags, &oplock, &fid.netfid, xid);
645                 if (rc == 0) {
646                         cFYI(1, "posix reopen succeeded");
647                         goto reopen_success;
648                 }
649                 /*
650                  * fallthrough to retry open the old way on errors, especially
651                  * in the reconnect path it is important to retry hard
652                  */
653         }
654
655         desired_access = cifs_convert_flags(cfile->f_flags);
656
657         if (backup_cred(cifs_sb))
658                 create_options |= CREATE_OPEN_BACKUP_INTENT;
659
660         if (server->ops->get_lease_key)
661                 server->ops->get_lease_key(inode, &fid);
662
663         /*
664          * Can not refresh inode by passing in file_info buf to be returned by
665          * CIFSSMBOpen and then calling get_inode_info with returned buf since
666          * file might have write behind data that needs to be flushed and server
667          * version of file size can be stale. If we knew for sure that inode was
668          * not dirty locally we could do this.
669          */
670         rc = server->ops->open(xid, tcon, full_path, disposition,
671                                desired_access, create_options, &fid, &oplock,
672                                NULL, cifs_sb);
673         if (rc) {
674                 mutex_unlock(&cfile->fh_mutex);
675                 cFYI(1, "cifs_reopen returned 0x%x", rc);
676                 cFYI(1, "oplock: %d", oplock);
677                 goto reopen_error_exit;
678         }
679
680 reopen_success:
681         cfile->invalidHandle = false;
682         mutex_unlock(&cfile->fh_mutex);
683         cinode = CIFS_I(inode);
684
685         if (can_flush) {
686                 rc = filemap_write_and_wait(inode->i_mapping);
687                 mapping_set_error(inode->i_mapping, rc);
688
689                 if (tcon->unix_ext)
690                         rc = cifs_get_inode_info_unix(&inode, full_path,
691                                                       inode->i_sb, xid);
692                 else
693                         rc = cifs_get_inode_info(&inode, full_path, NULL,
694                                                  inode->i_sb, xid, NULL);
695         }
696         /*
697          * Else we are writing out data to server already and could deadlock if
698          * we tried to flush data, and since we do not know if we have data that
699          * would invalidate the current end of file on the server we can not go
700          * to the server to get the new inode info.
701          */
702
703         server->ops->set_fid(cfile, &fid, oplock);
704         cifs_relock_file(cfile);
705
706 reopen_error_exit:
707         kfree(full_path);
708         free_xid(xid);
709         return rc;
710 }
711
712 int cifs_close(struct inode *inode, struct file *file)
713 {
714         if (file->private_data != NULL) {
715                 cifsFileInfo_put(file->private_data);
716                 file->private_data = NULL;
717         }
718
719         /* return code from the ->release op is always ignored */
720         return 0;
721 }
722
723 int cifs_closedir(struct inode *inode, struct file *file)
724 {
725         int rc = 0;
726         unsigned int xid;
727         struct cifsFileInfo *cfile = file->private_data;
728         struct cifs_tcon *tcon;
729         struct TCP_Server_Info *server;
730         char *buf;
731
732         cFYI(1, "Closedir inode = 0x%p", inode);
733
734         if (cfile == NULL)
735                 return rc;
736
737         xid = get_xid();
738         tcon = tlink_tcon(cfile->tlink);
739         server = tcon->ses->server;
740
741         cFYI(1, "Freeing private data in close dir");
742         spin_lock(&cifs_file_list_lock);
743         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
744                 cfile->invalidHandle = true;
745                 spin_unlock(&cifs_file_list_lock);
746                 if (server->ops->close_dir)
747                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
748                 else
749                         rc = -ENOSYS;
750                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
751                 /* not much we can do if it fails anyway, ignore rc */
752                 rc = 0;
753         } else
754                 spin_unlock(&cifs_file_list_lock);
755
756         buf = cfile->srch_inf.ntwrk_buf_start;
757         if (buf) {
758                 cFYI(1, "closedir free smb buf in srch struct");
759                 cfile->srch_inf.ntwrk_buf_start = NULL;
760                 if (cfile->srch_inf.smallBuf)
761                         cifs_small_buf_release(buf);
762                 else
763                         cifs_buf_release(buf);
764         }
765
766         cifs_put_tlink(cfile->tlink);
767         kfree(file->private_data);
768         file->private_data = NULL;
769         /* BB can we lock the filestruct while this is going on? */
770         free_xid(xid);
771         return rc;
772 }
773
774 static struct cifsLockInfo *
775 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
776 {
777         struct cifsLockInfo *lock =
778                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
779         if (!lock)
780                 return lock;
781         lock->offset = offset;
782         lock->length = length;
783         lock->type = type;
784         lock->pid = current->tgid;
785         INIT_LIST_HEAD(&lock->blist);
786         init_waitqueue_head(&lock->block_q);
787         return lock;
788 }
789
790 void
791 cifs_del_lock_waiters(struct cifsLockInfo *lock)
792 {
793         struct cifsLockInfo *li, *tmp;
794         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
795                 list_del_init(&li->blist);
796                 wake_up(&li->block_q);
797         }
798 }
799
800 #define CIFS_LOCK_OP    0
801 #define CIFS_READ_OP    1
802 #define CIFS_WRITE_OP   2
803
804 /* @rw_check : 0 - no op, 1 - read, 2 - write */
805 static bool
806 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
807                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
808                             struct cifsLockInfo **conf_lock, int rw_check)
809 {
810         struct cifsLockInfo *li;
811         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
812         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
813
814         list_for_each_entry(li, &fdlocks->locks, llist) {
815                 if (offset + length <= li->offset ||
816                     offset >= li->offset + li->length)
817                         continue;
818                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
819                     server->ops->compare_fids(cfile, cur_cfile)) {
820                         /* shared lock prevents write op through the same fid */
821                         if (!(li->type & server->vals->shared_lock_type) ||
822                             rw_check != CIFS_WRITE_OP)
823                                 continue;
824                 }
825                 if ((type & server->vals->shared_lock_type) &&
826                     ((server->ops->compare_fids(cfile, cur_cfile) &&
827                      current->tgid == li->pid) || type == li->type))
828                         continue;
829                 if (conf_lock)
830                         *conf_lock = li;
831                 return true;
832         }
833         return false;
834 }
835
836 bool
837 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
838                         __u8 type, struct cifsLockInfo **conf_lock,
839                         int rw_check)
840 {
841         bool rc = false;
842         struct cifs_fid_locks *cur;
843         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
844
845         list_for_each_entry(cur, &cinode->llist, llist) {
846                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
847                                                  cfile, conf_lock, rw_check);
848                 if (rc)
849                         break;
850         }
851
852         return rc;
853 }
854
855 /*
856  * Check if there is another lock that prevents us to set the lock (mandatory
857  * style). If such a lock exists, update the flock structure with its
858  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
859  * or leave it the same if we can't. Returns 0 if we don't need to request to
860  * the server or 1 otherwise.
861  */
862 static int
863 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
864                __u8 type, struct file_lock *flock)
865 {
866         int rc = 0;
867         struct cifsLockInfo *conf_lock;
868         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
869         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
870         bool exist;
871
872         down_read(&cinode->lock_sem);
873
874         exist = cifs_find_lock_conflict(cfile, offset, length, type,
875                                         &conf_lock, CIFS_LOCK_OP);
876         if (exist) {
877                 flock->fl_start = conf_lock->offset;
878                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
879                 flock->fl_pid = conf_lock->pid;
880                 if (conf_lock->type & server->vals->shared_lock_type)
881                         flock->fl_type = F_RDLCK;
882                 else
883                         flock->fl_type = F_WRLCK;
884         } else if (!cinode->can_cache_brlcks)
885                 rc = 1;
886         else
887                 flock->fl_type = F_UNLCK;
888
889         up_read(&cinode->lock_sem);
890         return rc;
891 }
892
893 static void
894 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
895 {
896         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
897         down_write(&cinode->lock_sem);
898         list_add_tail(&lock->llist, &cfile->llist->locks);
899         up_write(&cinode->lock_sem);
900 }
901
902 /*
903  * Set the byte-range lock (mandatory style). Returns:
904  * 1) 0, if we set the lock and don't need to request to the server;
905  * 2) 1, if no locks prevent us but we need to request to the server;
906  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
907  */
908 static int
909 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
910                  bool wait)
911 {
912         struct cifsLockInfo *conf_lock;
913         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
914         bool exist;
915         int rc = 0;
916
917 try_again:
918         exist = false;
919         down_write(&cinode->lock_sem);
920
921         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
922                                         lock->type, &conf_lock, CIFS_LOCK_OP);
923         if (!exist && cinode->can_cache_brlcks) {
924                 list_add_tail(&lock->llist, &cfile->llist->locks);
925                 up_write(&cinode->lock_sem);
926                 return rc;
927         }
928
929         if (!exist)
930                 rc = 1;
931         else if (!wait)
932                 rc = -EACCES;
933         else {
934                 list_add_tail(&lock->blist, &conf_lock->blist);
935                 up_write(&cinode->lock_sem);
936                 rc = wait_event_interruptible(lock->block_q,
937                                         (lock->blist.prev == &lock->blist) &&
938                                         (lock->blist.next == &lock->blist));
939                 if (!rc)
940                         goto try_again;
941                 down_write(&cinode->lock_sem);
942                 list_del_init(&lock->blist);
943         }
944
945         up_write(&cinode->lock_sem);
946         return rc;
947 }
948
949 /*
950  * Check if there is another lock that prevents us to set the lock (posix
951  * style). If such a lock exists, update the flock structure with its
952  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
953  * or leave it the same if we can't. Returns 0 if we don't need to request to
954  * the server or 1 otherwise.
955  */
956 static int
957 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
958 {
959         int rc = 0;
960         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
961         unsigned char saved_type = flock->fl_type;
962
963         if ((flock->fl_flags & FL_POSIX) == 0)
964                 return 1;
965
966         down_read(&cinode->lock_sem);
967         posix_test_lock(file, flock);
968
969         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
970                 flock->fl_type = saved_type;
971                 rc = 1;
972         }
973
974         up_read(&cinode->lock_sem);
975         return rc;
976 }
977
978 /*
979  * Set the byte-range lock (posix style). Returns:
980  * 1) 0, if we set the lock and don't need to request to the server;
981  * 2) 1, if we need to request to the server;
982  * 3) <0, if the error occurs while setting the lock.
983  */
984 static int
985 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
986 {
987         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
988         int rc = 1;
989
990         if ((flock->fl_flags & FL_POSIX) == 0)
991                 return rc;
992
993 try_again:
994         down_write(&cinode->lock_sem);
995         if (!cinode->can_cache_brlcks) {
996                 up_write(&cinode->lock_sem);
997                 return rc;
998         }
999
1000         rc = posix_lock_file(file, flock, NULL);
1001         up_write(&cinode->lock_sem);
1002         if (rc == FILE_LOCK_DEFERRED) {
1003                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1004                 if (!rc)
1005                         goto try_again;
1006                 locks_delete_block(flock);
1007         }
1008         return rc;
1009 }
1010
1011 int
1012 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1013 {
1014         unsigned int xid;
1015         int rc = 0, stored_rc;
1016         struct cifsLockInfo *li, *tmp;
1017         struct cifs_tcon *tcon;
1018         unsigned int num, max_num, max_buf;
1019         LOCKING_ANDX_RANGE *buf, *cur;
1020         int types[] = {LOCKING_ANDX_LARGE_FILES,
1021                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1022         int i;
1023
1024         xid = get_xid();
1025         tcon = tlink_tcon(cfile->tlink);
1026
1027         /*
1028          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1029          * and check it for zero before using.
1030          */
1031         max_buf = tcon->ses->server->maxBuf;
1032         if (!max_buf) {
1033                 free_xid(xid);
1034                 return -EINVAL;
1035         }
1036
1037         max_num = (max_buf - sizeof(struct smb_hdr)) /
1038                                                 sizeof(LOCKING_ANDX_RANGE);
1039         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1040         if (!buf) {
1041                 free_xid(xid);
1042                 return -ENOMEM;
1043         }
1044
1045         for (i = 0; i < 2; i++) {
1046                 cur = buf;
1047                 num = 0;
1048                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1049                         if (li->type != types[i])
1050                                 continue;
1051                         cur->Pid = cpu_to_le16(li->pid);
1052                         cur->LengthLow = cpu_to_le32((u32)li->length);
1053                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1054                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1055                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1056                         if (++num == max_num) {
1057                                 stored_rc = cifs_lockv(xid, tcon,
1058                                                        cfile->fid.netfid,
1059                                                        (__u8)li->type, 0, num,
1060                                                        buf);
1061                                 if (stored_rc)
1062                                         rc = stored_rc;
1063                                 cur = buf;
1064                                 num = 0;
1065                         } else
1066                                 cur++;
1067                 }
1068
1069                 if (num) {
1070                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1071                                                (__u8)types[i], 0, num, buf);
1072                         if (stored_rc)
1073                                 rc = stored_rc;
1074                 }
1075         }
1076
1077         kfree(buf);
1078         free_xid(xid);
1079         return rc;
1080 }
1081
1082 /* copied from fs/locks.c with a name change */
1083 #define cifs_for_each_lock(inode, lockp) \
1084         for (lockp = &inode->i_flock; *lockp != NULL; \
1085              lockp = &(*lockp)->fl_next)
1086
1087 struct lock_to_push {
1088         struct list_head llist;
1089         __u64 offset;
1090         __u64 length;
1091         __u32 pid;
1092         __u16 netfid;
1093         __u8 type;
1094 };
1095
1096 static int
1097 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1098 {
1099         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1100         struct file_lock *flock, **before;
1101         unsigned int count = 0, i = 0;
1102         int rc = 0, xid, type;
1103         struct list_head locks_to_send, *el;
1104         struct lock_to_push *lck, *tmp;
1105         __u64 length;
1106
1107         xid = get_xid();
1108
1109         lock_flocks();
1110         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1111                 if ((*before)->fl_flags & FL_POSIX)
1112                         count++;
1113         }
1114         unlock_flocks();
1115
1116         INIT_LIST_HEAD(&locks_to_send);
1117
1118         /*
1119          * Allocating count locks is enough because no FL_POSIX locks can be
1120          * added to the list while we are holding cinode->lock_sem that
1121          * protects locking operations of this inode.
1122          */
1123         for (; i < count; i++) {
1124                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1125                 if (!lck) {
1126                         rc = -ENOMEM;
1127                         goto err_out;
1128                 }
1129                 list_add_tail(&lck->llist, &locks_to_send);
1130         }
1131
1132         el = locks_to_send.next;
1133         lock_flocks();
1134         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1135                 flock = *before;
1136                 if ((flock->fl_flags & FL_POSIX) == 0)
1137                         continue;
1138                 if (el == &locks_to_send) {
1139                         /*
1140                          * The list ended. We don't have enough allocated
1141                          * structures - something is really wrong.
1142                          */
1143                         cERROR(1, "Can't push all brlocks!");
1144                         break;
1145                 }
1146                 length = 1 + flock->fl_end - flock->fl_start;
1147                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1148                         type = CIFS_RDLCK;
1149                 else
1150                         type = CIFS_WRLCK;
1151                 lck = list_entry(el, struct lock_to_push, llist);
1152                 lck->pid = flock->fl_pid;
1153                 lck->netfid = cfile->fid.netfid;
1154                 lck->length = length;
1155                 lck->type = type;
1156                 lck->offset = flock->fl_start;
1157                 el = el->next;
1158         }
1159         unlock_flocks();
1160
1161         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1162                 int stored_rc;
1163
1164                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1165                                              lck->offset, lck->length, NULL,
1166                                              lck->type, 0);
1167                 if (stored_rc)
1168                         rc = stored_rc;
1169                 list_del(&lck->llist);
1170                 kfree(lck);
1171         }
1172
1173 out:
1174         free_xid(xid);
1175         return rc;
1176 err_out:
1177         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1178                 list_del(&lck->llist);
1179                 kfree(lck);
1180         }
1181         goto out;
1182 }
1183
1184 static int
1185 cifs_push_locks(struct cifsFileInfo *cfile)
1186 {
1187         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1188         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1189         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1190         int rc = 0;
1191
1192         /* we are going to update can_cache_brlcks here - need a write access */
1193         down_write(&cinode->lock_sem);
1194         if (!cinode->can_cache_brlcks) {
1195                 up_write(&cinode->lock_sem);
1196                 return rc;
1197         }
1198
1199         if (cap_unix(tcon->ses) &&
1200             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1201             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1202                 rc = cifs_push_posix_locks(cfile);
1203         else
1204                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1205
1206         cinode->can_cache_brlcks = false;
1207         up_write(&cinode->lock_sem);
1208         return rc;
1209 }
1210
1211 static void
1212 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1213                 bool *wait_flag, struct TCP_Server_Info *server)
1214 {
1215         if (flock->fl_flags & FL_POSIX)
1216                 cFYI(1, "Posix");
1217         if (flock->fl_flags & FL_FLOCK)
1218                 cFYI(1, "Flock");
1219         if (flock->fl_flags & FL_SLEEP) {
1220                 cFYI(1, "Blocking lock");
1221                 *wait_flag = true;
1222         }
1223         if (flock->fl_flags & FL_ACCESS)
1224                 cFYI(1, "Process suspended by mandatory locking - "
1225                         "not implemented yet");
1226         if (flock->fl_flags & FL_LEASE)
1227                 cFYI(1, "Lease on file - not implemented yet");
1228         if (flock->fl_flags &
1229             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1230                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1231                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1232
1233         *type = server->vals->large_lock_type;
1234         if (flock->fl_type == F_WRLCK) {
1235                 cFYI(1, "F_WRLCK ");
1236                 *type |= server->vals->exclusive_lock_type;
1237                 *lock = 1;
1238         } else if (flock->fl_type == F_UNLCK) {
1239                 cFYI(1, "F_UNLCK");
1240                 *type |= server->vals->unlock_lock_type;
1241                 *unlock = 1;
1242                 /* Check if unlock includes more than one lock range */
1243         } else if (flock->fl_type == F_RDLCK) {
1244                 cFYI(1, "F_RDLCK");
1245                 *type |= server->vals->shared_lock_type;
1246                 *lock = 1;
1247         } else if (flock->fl_type == F_EXLCK) {
1248                 cFYI(1, "F_EXLCK");
1249                 *type |= server->vals->exclusive_lock_type;
1250                 *lock = 1;
1251         } else if (flock->fl_type == F_SHLCK) {
1252                 cFYI(1, "F_SHLCK");
1253                 *type |= server->vals->shared_lock_type;
1254                 *lock = 1;
1255         } else
1256                 cFYI(1, "Unknown type of lock");
1257 }
1258
1259 static int
1260 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1261            bool wait_flag, bool posix_lck, unsigned int xid)
1262 {
1263         int rc = 0;
1264         __u64 length = 1 + flock->fl_end - flock->fl_start;
1265         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1266         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1267         struct TCP_Server_Info *server = tcon->ses->server;
1268         __u16 netfid = cfile->fid.netfid;
1269
1270         if (posix_lck) {
1271                 int posix_lock_type;
1272
1273                 rc = cifs_posix_lock_test(file, flock);
1274                 if (!rc)
1275                         return rc;
1276
1277                 if (type & server->vals->shared_lock_type)
1278                         posix_lock_type = CIFS_RDLCK;
1279                 else
1280                         posix_lock_type = CIFS_WRLCK;
1281                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1282                                       flock->fl_start, length, flock,
1283                                       posix_lock_type, wait_flag);
1284                 return rc;
1285         }
1286
1287         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1288         if (!rc)
1289                 return rc;
1290
1291         /* BB we could chain these into one lock request BB */
1292         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1293                                     1, 0, false);
1294         if (rc == 0) {
1295                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1296                                             type, 0, 1, false);
1297                 flock->fl_type = F_UNLCK;
1298                 if (rc != 0)
1299                         cERROR(1, "Error unlocking previously locked "
1300                                   "range %d during test of lock", rc);
1301                 return 0;
1302         }
1303
1304         if (type & server->vals->shared_lock_type) {
1305                 flock->fl_type = F_WRLCK;
1306                 return 0;
1307         }
1308
1309         type &= ~server->vals->exclusive_lock_type;
1310
1311         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1312                                     type | server->vals->shared_lock_type,
1313                                     1, 0, false);
1314         if (rc == 0) {
1315                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1316                         type | server->vals->shared_lock_type, 0, 1, false);
1317                 flock->fl_type = F_RDLCK;
1318                 if (rc != 0)
1319                         cERROR(1, "Error unlocking previously locked "
1320                                   "range %d during test of lock", rc);
1321         } else
1322                 flock->fl_type = F_WRLCK;
1323
1324         return 0;
1325 }
1326
1327 void
1328 cifs_move_llist(struct list_head *source, struct list_head *dest)
1329 {
1330         struct list_head *li, *tmp;
1331         list_for_each_safe(li, tmp, source)
1332                 list_move(li, dest);
1333 }
1334
1335 void
1336 cifs_free_llist(struct list_head *llist)
1337 {
1338         struct cifsLockInfo *li, *tmp;
1339         list_for_each_entry_safe(li, tmp, llist, llist) {
1340                 cifs_del_lock_waiters(li);
1341                 list_del(&li->llist);
1342                 kfree(li);
1343         }
1344 }
1345
1346 int
1347 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1348                   unsigned int xid)
1349 {
1350         int rc = 0, stored_rc;
1351         int types[] = {LOCKING_ANDX_LARGE_FILES,
1352                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1353         unsigned int i;
1354         unsigned int max_num, num, max_buf;
1355         LOCKING_ANDX_RANGE *buf, *cur;
1356         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1357         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1358         struct cifsLockInfo *li, *tmp;
1359         __u64 length = 1 + flock->fl_end - flock->fl_start;
1360         struct list_head tmp_llist;
1361
1362         INIT_LIST_HEAD(&tmp_llist);
1363
1364         /*
1365          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1366          * and check it for zero before using.
1367          */
1368         max_buf = tcon->ses->server->maxBuf;
1369         if (!max_buf)
1370                 return -EINVAL;
1371
1372         max_num = (max_buf - sizeof(struct smb_hdr)) /
1373                                                 sizeof(LOCKING_ANDX_RANGE);
1374         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1375         if (!buf)
1376                 return -ENOMEM;
1377
1378         down_write(&cinode->lock_sem);
1379         for (i = 0; i < 2; i++) {
1380                 cur = buf;
1381                 num = 0;
1382                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1383                         if (flock->fl_start > li->offset ||
1384                             (flock->fl_start + length) <
1385                             (li->offset + li->length))
1386                                 continue;
1387                         if (current->tgid != li->pid)
1388                                 continue;
1389                         if (types[i] != li->type)
1390                                 continue;
1391                         if (cinode->can_cache_brlcks) {
1392                                 /*
1393                                  * We can cache brlock requests - simply remove
1394                                  * a lock from the file's list.
1395                                  */
1396                                 list_del(&li->llist);
1397                                 cifs_del_lock_waiters(li);
1398                                 kfree(li);
1399                                 continue;
1400                         }
1401                         cur->Pid = cpu_to_le16(li->pid);
1402                         cur->LengthLow = cpu_to_le32((u32)li->length);
1403                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1404                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1405                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1406                         /*
1407                          * We need to save a lock here to let us add it again to
1408                          * the file's list if the unlock range request fails on
1409                          * the server.
1410                          */
1411                         list_move(&li->llist, &tmp_llist);
1412                         if (++num == max_num) {
1413                                 stored_rc = cifs_lockv(xid, tcon,
1414                                                        cfile->fid.netfid,
1415                                                        li->type, num, 0, buf);
1416                                 if (stored_rc) {
1417                                         /*
1418                                          * We failed on the unlock range
1419                                          * request - add all locks from the tmp
1420                                          * list to the head of the file's list.
1421                                          */
1422                                         cifs_move_llist(&tmp_llist,
1423                                                         &cfile->llist->locks);
1424                                         rc = stored_rc;
1425                                 } else
1426                                         /*
1427                                          * The unlock range request succeed -
1428                                          * free the tmp list.
1429                                          */
1430                                         cifs_free_llist(&tmp_llist);
1431                                 cur = buf;
1432                                 num = 0;
1433                         } else
1434                                 cur++;
1435                 }
1436                 if (num) {
1437                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1438                                                types[i], num, 0, buf);
1439                         if (stored_rc) {
1440                                 cifs_move_llist(&tmp_llist,
1441                                                 &cfile->llist->locks);
1442                                 rc = stored_rc;
1443                         } else
1444                                 cifs_free_llist(&tmp_llist);
1445                 }
1446         }
1447
1448         up_write(&cinode->lock_sem);
1449         kfree(buf);
1450         return rc;
1451 }
1452
1453 static int
1454 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1455            bool wait_flag, bool posix_lck, int lock, int unlock,
1456            unsigned int xid)
1457 {
1458         int rc = 0;
1459         __u64 length = 1 + flock->fl_end - flock->fl_start;
1460         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1461         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1462         struct TCP_Server_Info *server = tcon->ses->server;
1463         struct inode *inode = cfile->dentry->d_inode;
1464
1465         if (posix_lck) {
1466                 int posix_lock_type;
1467
1468                 rc = cifs_posix_lock_set(file, flock);
1469                 if (!rc || rc < 0)
1470                         return rc;
1471
1472                 if (type & server->vals->shared_lock_type)
1473                         posix_lock_type = CIFS_RDLCK;
1474                 else
1475                         posix_lock_type = CIFS_WRLCK;
1476
1477                 if (unlock == 1)
1478                         posix_lock_type = CIFS_UNLCK;
1479
1480                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1481                                       current->tgid, flock->fl_start, length,
1482                                       NULL, posix_lock_type, wait_flag);
1483                 goto out;
1484         }
1485
1486         if (lock) {
1487                 struct cifsLockInfo *lock;
1488
1489                 lock = cifs_lock_init(flock->fl_start, length, type);
1490                 if (!lock)
1491                         return -ENOMEM;
1492
1493                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1494                 if (rc < 0) {
1495                         kfree(lock);
1496                         return rc;
1497                 }
1498                 if (!rc)
1499                         goto out;
1500
1501                 /*
1502                  * Windows 7 server can delay breaking lease from read to None
1503                  * if we set a byte-range lock on a file - break it explicitly
1504                  * before sending the lock to the server to be sure the next
1505                  * read won't conflict with non-overlapted locks due to
1506                  * pagereading.
1507                  */
1508                 if (!CIFS_I(inode)->clientCanCacheAll &&
1509                                         CIFS_I(inode)->clientCanCacheRead) {
1510                         cifs_invalidate_mapping(inode);
1511                         cFYI(1, "Set no oplock for inode=%p due to mand locks",
1512                              inode);
1513                         CIFS_I(inode)->clientCanCacheRead = false;
1514                 }
1515
1516                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1517                                             type, 1, 0, wait_flag);
1518                 if (rc) {
1519                         kfree(lock);
1520                         return rc;
1521                 }
1522
1523                 cifs_lock_add(cfile, lock);
1524         } else if (unlock)
1525                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1526
1527 out:
1528         if (flock->fl_flags & FL_POSIX)
1529                 posix_lock_file_wait(file, flock);
1530         return rc;
1531 }
1532
1533 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1534 {
1535         int rc, xid;
1536         int lock = 0, unlock = 0;
1537         bool wait_flag = false;
1538         bool posix_lck = false;
1539         struct cifs_sb_info *cifs_sb;
1540         struct cifs_tcon *tcon;
1541         struct cifsInodeInfo *cinode;
1542         struct cifsFileInfo *cfile;
1543         __u16 netfid;
1544         __u32 type;
1545
1546         rc = -EACCES;
1547         xid = get_xid();
1548
1549         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1550                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1551                 flock->fl_start, flock->fl_end);
1552
1553         cfile = (struct cifsFileInfo *)file->private_data;
1554         tcon = tlink_tcon(cfile->tlink);
1555
1556         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1557                         tcon->ses->server);
1558
1559         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1560         netfid = cfile->fid.netfid;
1561         cinode = CIFS_I(file_inode(file));
1562
1563         if (cap_unix(tcon->ses) &&
1564             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1565             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1566                 posix_lck = true;
1567         /*
1568          * BB add code here to normalize offset and length to account for
1569          * negative length which we can not accept over the wire.
1570          */
1571         if (IS_GETLK(cmd)) {
1572                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1573                 free_xid(xid);
1574                 return rc;
1575         }
1576
1577         if (!lock && !unlock) {
1578                 /*
1579                  * if no lock or unlock then nothing to do since we do not
1580                  * know what it is
1581                  */
1582                 free_xid(xid);
1583                 return -EOPNOTSUPP;
1584         }
1585
1586         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1587                         xid);
1588         free_xid(xid);
1589         return rc;
1590 }
1591
1592 /*
1593  * update the file size (if needed) after a write. Should be called with
1594  * the inode->i_lock held
1595  */
1596 void
1597 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1598                       unsigned int bytes_written)
1599 {
1600         loff_t end_of_write = offset + bytes_written;
1601
1602         if (end_of_write > cifsi->server_eof)
1603                 cifsi->server_eof = end_of_write;
1604 }
1605
1606 static ssize_t
1607 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1608            size_t write_size, loff_t *offset)
1609 {
1610         int rc = 0;
1611         unsigned int bytes_written = 0;
1612         unsigned int total_written;
1613         struct cifs_sb_info *cifs_sb;
1614         struct cifs_tcon *tcon;
1615         struct TCP_Server_Info *server;
1616         unsigned int xid;
1617         struct dentry *dentry = open_file->dentry;
1618         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1619         struct cifs_io_parms io_parms;
1620
1621         cifs_sb = CIFS_SB(dentry->d_sb);
1622
1623         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1624              *offset, dentry->d_name.name);
1625
1626         tcon = tlink_tcon(open_file->tlink);
1627         server = tcon->ses->server;
1628
1629         if (!server->ops->sync_write)
1630                 return -ENOSYS;
1631
1632         xid = get_xid();
1633
1634         for (total_written = 0; write_size > total_written;
1635              total_written += bytes_written) {
1636                 rc = -EAGAIN;
1637                 while (rc == -EAGAIN) {
1638                         struct kvec iov[2];
1639                         unsigned int len;
1640
1641                         if (open_file->invalidHandle) {
1642                                 /* we could deadlock if we called
1643                                    filemap_fdatawait from here so tell
1644                                    reopen_file not to flush data to
1645                                    server now */
1646                                 rc = cifs_reopen_file(open_file, false);
1647                                 if (rc != 0)
1648                                         break;
1649                         }
1650
1651                         len = min((size_t)cifs_sb->wsize,
1652                                   write_size - total_written);
1653                         /* iov[0] is reserved for smb header */
1654                         iov[1].iov_base = (char *)write_data + total_written;
1655                         iov[1].iov_len = len;
1656                         io_parms.pid = pid;
1657                         io_parms.tcon = tcon;
1658                         io_parms.offset = *offset;
1659                         io_parms.length = len;
1660                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1661                                                      &bytes_written, iov, 1);
1662                 }
1663                 if (rc || (bytes_written == 0)) {
1664                         if (total_written)
1665                                 break;
1666                         else {
1667                                 free_xid(xid);
1668                                 return rc;
1669                         }
1670                 } else {
1671                         spin_lock(&dentry->d_inode->i_lock);
1672                         cifs_update_eof(cifsi, *offset, bytes_written);
1673                         spin_unlock(&dentry->d_inode->i_lock);
1674                         *offset += bytes_written;
1675                 }
1676         }
1677
1678         cifs_stats_bytes_written(tcon, total_written);
1679
1680         if (total_written > 0) {
1681                 spin_lock(&dentry->d_inode->i_lock);
1682                 if (*offset > dentry->d_inode->i_size)
1683                         i_size_write(dentry->d_inode, *offset);
1684                 spin_unlock(&dentry->d_inode->i_lock);
1685         }
1686         mark_inode_dirty_sync(dentry->d_inode);
1687         free_xid(xid);
1688         return total_written;
1689 }
1690
1691 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1692                                         bool fsuid_only)
1693 {
1694         struct cifsFileInfo *open_file = NULL;
1695         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1696
1697         /* only filter by fsuid on multiuser mounts */
1698         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1699                 fsuid_only = false;
1700
1701         spin_lock(&cifs_file_list_lock);
1702         /* we could simply get the first_list_entry since write-only entries
1703            are always at the end of the list but since the first entry might
1704            have a close pending, we go through the whole list */
1705         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1706                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1707                         continue;
1708                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1709                         if (!open_file->invalidHandle) {
1710                                 /* found a good file */
1711                                 /* lock it so it will not be closed on us */
1712                                 cifsFileInfo_get_locked(open_file);
1713                                 spin_unlock(&cifs_file_list_lock);
1714                                 return open_file;
1715                         } /* else might as well continue, and look for
1716                              another, or simply have the caller reopen it
1717                              again rather than trying to fix this handle */
1718                 } else /* write only file */
1719                         break; /* write only files are last so must be done */
1720         }
1721         spin_unlock(&cifs_file_list_lock);
1722         return NULL;
1723 }
1724
1725 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1726                                         bool fsuid_only)
1727 {
1728         struct cifsFileInfo *open_file, *inv_file = NULL;
1729         struct cifs_sb_info *cifs_sb;
1730         bool any_available = false;
1731         int rc;
1732         unsigned int refind = 0;
1733
1734         /* Having a null inode here (because mapping->host was set to zero by
1735         the VFS or MM) should not happen but we had reports of on oops (due to
1736         it being zero) during stress testcases so we need to check for it */
1737
1738         if (cifs_inode == NULL) {
1739                 cERROR(1, "Null inode passed to cifs_writeable_file");
1740                 dump_stack();
1741                 return NULL;
1742         }
1743
1744         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1745
1746         /* only filter by fsuid on multiuser mounts */
1747         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1748                 fsuid_only = false;
1749
1750         spin_lock(&cifs_file_list_lock);
1751 refind_writable:
1752         if (refind > MAX_REOPEN_ATT) {
1753                 spin_unlock(&cifs_file_list_lock);
1754                 return NULL;
1755         }
1756         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1757                 if (!any_available && open_file->pid != current->tgid)
1758                         continue;
1759                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1760                         continue;
1761                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1762                         if (!open_file->invalidHandle) {
1763                                 /* found a good writable file */
1764                                 cifsFileInfo_get_locked(open_file);
1765                                 spin_unlock(&cifs_file_list_lock);
1766                                 return open_file;
1767                         } else {
1768                                 if (!inv_file)
1769                                         inv_file = open_file;
1770                         }
1771                 }
1772         }
1773         /* couldn't find useable FH with same pid, try any available */
1774         if (!any_available) {
1775                 any_available = true;
1776                 goto refind_writable;
1777         }
1778
1779         if (inv_file) {
1780                 any_available = false;
1781                 cifsFileInfo_get_locked(inv_file);
1782         }
1783
1784         spin_unlock(&cifs_file_list_lock);
1785
1786         if (inv_file) {
1787                 rc = cifs_reopen_file(inv_file, false);
1788                 if (!rc)
1789                         return inv_file;
1790                 else {
1791                         spin_lock(&cifs_file_list_lock);
1792                         list_move_tail(&inv_file->flist,
1793                                         &cifs_inode->openFileList);
1794                         spin_unlock(&cifs_file_list_lock);
1795                         cifsFileInfo_put(inv_file);
1796                         spin_lock(&cifs_file_list_lock);
1797                         ++refind;
1798                         goto refind_writable;
1799                 }
1800         }
1801
1802         return NULL;
1803 }
1804
1805 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1806 {
1807         struct address_space *mapping = page->mapping;
1808         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1809         char *write_data;
1810         int rc = -EFAULT;
1811         int bytes_written = 0;
1812         struct inode *inode;
1813         struct cifsFileInfo *open_file;
1814
1815         if (!mapping || !mapping->host)
1816                 return -EFAULT;
1817
1818         inode = page->mapping->host;
1819
1820         offset += (loff_t)from;
1821         write_data = kmap(page);
1822         write_data += from;
1823
1824         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1825                 kunmap(page);
1826                 return -EIO;
1827         }
1828
1829         /* racing with truncate? */
1830         if (offset > mapping->host->i_size) {
1831                 kunmap(page);
1832                 return 0; /* don't care */
1833         }
1834
1835         /* check to make sure that we are not extending the file */
1836         if (mapping->host->i_size - offset < (loff_t)to)
1837                 to = (unsigned)(mapping->host->i_size - offset);
1838
1839         open_file = find_writable_file(CIFS_I(mapping->host), false);
1840         if (open_file) {
1841                 bytes_written = cifs_write(open_file, open_file->pid,
1842                                            write_data, to - from, &offset);
1843                 cifsFileInfo_put(open_file);
1844                 /* Does mm or vfs already set times? */
1845                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1846                 if ((bytes_written > 0) && (offset))
1847                         rc = 0;
1848                 else if (bytes_written < 0)
1849                         rc = bytes_written;
1850         } else {
1851                 cFYI(1, "No writeable filehandles for inode");
1852                 rc = -EIO;
1853         }
1854
1855         kunmap(page);
1856         return rc;
1857 }
1858
1859 static int cifs_writepages(struct address_space *mapping,
1860                            struct writeback_control *wbc)
1861 {
1862         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1863         bool done = false, scanned = false, range_whole = false;
1864         pgoff_t end, index;
1865         struct cifs_writedata *wdata;
1866         struct TCP_Server_Info *server;
1867         struct page *page;
1868         int rc = 0;
1869
1870         /*
1871          * If wsize is smaller than the page cache size, default to writing
1872          * one page at a time via cifs_writepage
1873          */
1874         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1875                 return generic_writepages(mapping, wbc);
1876
1877         if (wbc->range_cyclic) {
1878                 index = mapping->writeback_index; /* Start from prev offset */
1879                 end = -1;
1880         } else {
1881                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1882                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1883                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1884                         range_whole = true;
1885                 scanned = true;
1886         }
1887 retry:
1888         while (!done && index <= end) {
1889                 unsigned int i, nr_pages, found_pages;
1890                 pgoff_t next = 0, tofind;
1891                 struct page **pages;
1892
1893                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1894                                 end - index) + 1;
1895
1896                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1897                                              cifs_writev_complete);
1898                 if (!wdata) {
1899                         rc = -ENOMEM;
1900                         break;
1901                 }
1902
1903                 /*
1904                  * find_get_pages_tag seems to return a max of 256 on each
1905                  * iteration, so we must call it several times in order to
1906                  * fill the array or the wsize is effectively limited to
1907                  * 256 * PAGE_CACHE_SIZE.
1908                  */
1909                 found_pages = 0;
1910                 pages = wdata->pages;
1911                 do {
1912                         nr_pages = find_get_pages_tag(mapping, &index,
1913                                                         PAGECACHE_TAG_DIRTY,
1914                                                         tofind, pages);
1915                         found_pages += nr_pages;
1916                         tofind -= nr_pages;
1917                         pages += nr_pages;
1918                 } while (nr_pages && tofind && index <= end);
1919
1920                 if (found_pages == 0) {
1921                         kref_put(&wdata->refcount, cifs_writedata_release);
1922                         break;
1923                 }
1924
1925                 nr_pages = 0;
1926                 for (i = 0; i < found_pages; i++) {
1927                         page = wdata->pages[i];
1928                         /*
1929                          * At this point we hold neither mapping->tree_lock nor
1930                          * lock on the page itself: the page may be truncated or
1931                          * invalidated (changing page->mapping to NULL), or even
1932                          * swizzled back from swapper_space to tmpfs file
1933                          * mapping
1934                          */
1935
1936                         if (nr_pages == 0)
1937                                 lock_page(page);
1938                         else if (!trylock_page(page))
1939                                 break;
1940
1941                         if (unlikely(page->mapping != mapping)) {
1942                                 unlock_page(page);
1943                                 break;
1944                         }
1945
1946                         if (!wbc->range_cyclic && page->index > end) {
1947                                 done = true;
1948                                 unlock_page(page);
1949                                 break;
1950                         }
1951
1952                         if (next && (page->index != next)) {
1953                                 /* Not next consecutive page */
1954                                 unlock_page(page);
1955                                 break;
1956                         }
1957
1958                         if (wbc->sync_mode != WB_SYNC_NONE)
1959                                 wait_on_page_writeback(page);
1960
1961                         if (PageWriteback(page) ||
1962                                         !clear_page_dirty_for_io(page)) {
1963                                 unlock_page(page);
1964                                 break;
1965                         }
1966
1967                         /*
1968                          * This actually clears the dirty bit in the radix tree.
1969                          * See cifs_writepage() for more commentary.
1970                          */
1971                         set_page_writeback(page);
1972
1973                         if (page_offset(page) >= i_size_read(mapping->host)) {
1974                                 done = true;
1975                                 unlock_page(page);
1976                                 end_page_writeback(page);
1977                                 break;
1978                         }
1979
1980                         wdata->pages[i] = page;
1981                         next = page->index + 1;
1982                         ++nr_pages;
1983                 }
1984
1985                 /* reset index to refind any pages skipped */
1986                 if (nr_pages == 0)
1987                         index = wdata->pages[0]->index + 1;
1988
1989                 /* put any pages we aren't going to use */
1990                 for (i = nr_pages; i < found_pages; i++) {
1991                         page_cache_release(wdata->pages[i]);
1992                         wdata->pages[i] = NULL;
1993                 }
1994
1995                 /* nothing to write? */
1996                 if (nr_pages == 0) {
1997                         kref_put(&wdata->refcount, cifs_writedata_release);
1998                         continue;
1999                 }
2000
2001                 wdata->sync_mode = wbc->sync_mode;
2002                 wdata->nr_pages = nr_pages;
2003                 wdata->offset = page_offset(wdata->pages[0]);
2004                 wdata->pagesz = PAGE_CACHE_SIZE;
2005                 wdata->tailsz =
2006                         min(i_size_read(mapping->host) -
2007                             page_offset(wdata->pages[nr_pages - 1]),
2008                             (loff_t)PAGE_CACHE_SIZE);
2009                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2010                                         wdata->tailsz;
2011
2012                 do {
2013                         if (wdata->cfile != NULL)
2014                                 cifsFileInfo_put(wdata->cfile);
2015                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2016                                                           false);
2017                         if (!wdata->cfile) {
2018                                 cERROR(1, "No writable handles for inode");
2019                                 rc = -EBADF;
2020                                 break;
2021                         }
2022                         wdata->pid = wdata->cfile->pid;
2023                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2024                         rc = server->ops->async_writev(wdata);
2025                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2026
2027                 for (i = 0; i < nr_pages; ++i)
2028                         unlock_page(wdata->pages[i]);
2029
2030                 /* send failure -- clean up the mess */
2031                 if (rc != 0) {
2032                         for (i = 0; i < nr_pages; ++i) {
2033                                 if (rc == -EAGAIN)
2034                                         redirty_page_for_writepage(wbc,
2035                                                            wdata->pages[i]);
2036                                 else
2037                                         SetPageError(wdata->pages[i]);
2038                                 end_page_writeback(wdata->pages[i]);
2039                                 page_cache_release(wdata->pages[i]);
2040                         }
2041                         if (rc != -EAGAIN)
2042                                 mapping_set_error(mapping, rc);
2043                 }
2044                 kref_put(&wdata->refcount, cifs_writedata_release);
2045
2046                 wbc->nr_to_write -= nr_pages;
2047                 if (wbc->nr_to_write <= 0)
2048                         done = true;
2049
2050                 index = next;
2051         }
2052
2053         if (!scanned && !done) {
2054                 /*
2055                  * We hit the last page and there is more work to be done: wrap
2056                  * back to the start of the file
2057                  */
2058                 scanned = true;
2059                 index = 0;
2060                 goto retry;
2061         }
2062
2063         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2064                 mapping->writeback_index = index;
2065
2066         return rc;
2067 }
2068
2069 static int
2070 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2071 {
2072         int rc;
2073         unsigned int xid;
2074
2075         xid = get_xid();
2076 /* BB add check for wbc flags */
2077         page_cache_get(page);
2078         if (!PageUptodate(page))
2079                 cFYI(1, "ppw - page not up to date");
2080
2081         /*
2082          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2083          *
2084          * A writepage() implementation always needs to do either this,
2085          * or re-dirty the page with "redirty_page_for_writepage()" in
2086          * the case of a failure.
2087          *
2088          * Just unlocking the page will cause the radix tree tag-bits
2089          * to fail to update with the state of the page correctly.
2090          */
2091         set_page_writeback(page);
2092 retry_write:
2093         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2094         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2095                 goto retry_write;
2096         else if (rc == -EAGAIN)
2097                 redirty_page_for_writepage(wbc, page);
2098         else if (rc != 0)
2099                 SetPageError(page);
2100         else
2101                 SetPageUptodate(page);
2102         end_page_writeback(page);
2103         page_cache_release(page);
2104         free_xid(xid);
2105         return rc;
2106 }
2107
2108 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2109 {
2110         int rc = cifs_writepage_locked(page, wbc);
2111         unlock_page(page);
2112         return rc;
2113 }
2114
2115 static int cifs_write_end(struct file *file, struct address_space *mapping,
2116                         loff_t pos, unsigned len, unsigned copied,
2117                         struct page *page, void *fsdata)
2118 {
2119         int rc;
2120         struct inode *inode = mapping->host;
2121         struct cifsFileInfo *cfile = file->private_data;
2122         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2123         __u32 pid;
2124
2125         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2126                 pid = cfile->pid;
2127         else
2128                 pid = current->tgid;
2129
2130         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2131                  page, pos, copied);
2132
2133         if (PageChecked(page)) {
2134                 if (copied == len)
2135                         SetPageUptodate(page);
2136                 ClearPageChecked(page);
2137         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2138                 SetPageUptodate(page);
2139
2140         if (!PageUptodate(page)) {
2141                 char *page_data;
2142                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2143                 unsigned int xid;
2144
2145                 xid = get_xid();
2146                 /* this is probably better than directly calling
2147                    partialpage_write since in this function the file handle is
2148                    known which we might as well leverage */
2149                 /* BB check if anything else missing out of ppw
2150                    such as updating last write time */
2151                 page_data = kmap(page);
2152                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2153                 /* if (rc < 0) should we set writebehind rc? */
2154                 kunmap(page);
2155
2156                 free_xid(xid);
2157         } else {
2158                 rc = copied;
2159                 pos += copied;
2160                 set_page_dirty(page);
2161         }
2162
2163         if (rc > 0) {
2164                 spin_lock(&inode->i_lock);
2165                 if (pos > inode->i_size)
2166                         i_size_write(inode, pos);
2167                 spin_unlock(&inode->i_lock);
2168         }
2169
2170         unlock_page(page);
2171         page_cache_release(page);
2172
2173         return rc;
2174 }
2175
2176 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2177                       int datasync)
2178 {
2179         unsigned int xid;
2180         int rc = 0;
2181         struct cifs_tcon *tcon;
2182         struct TCP_Server_Info *server;
2183         struct cifsFileInfo *smbfile = file->private_data;
2184         struct inode *inode = file_inode(file);
2185         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2186
2187         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2188         if (rc)
2189                 return rc;
2190         mutex_lock(&inode->i_mutex);
2191
2192         xid = get_xid();
2193
2194         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2195                 file->f_path.dentry->d_name.name, datasync);
2196
2197         if (!CIFS_I(inode)->clientCanCacheRead) {
2198                 rc = cifs_invalidate_mapping(inode);
2199                 if (rc) {
2200                         cFYI(1, "rc: %d during invalidate phase", rc);
2201                         rc = 0; /* don't care about it in fsync */
2202                 }
2203         }
2204
2205         tcon = tlink_tcon(smbfile->tlink);
2206         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2207                 server = tcon->ses->server;
2208                 if (server->ops->flush)
2209                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2210                 else
2211                         rc = -ENOSYS;
2212         }
2213
2214         free_xid(xid);
2215         mutex_unlock(&inode->i_mutex);
2216         return rc;
2217 }
2218
2219 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2220 {
2221         unsigned int xid;
2222         int rc = 0;
2223         struct cifs_tcon *tcon;
2224         struct TCP_Server_Info *server;
2225         struct cifsFileInfo *smbfile = file->private_data;
2226         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2227         struct inode *inode = file->f_mapping->host;
2228
2229         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2230         if (rc)
2231                 return rc;
2232         mutex_lock(&inode->i_mutex);
2233
2234         xid = get_xid();
2235
2236         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2237                 file->f_path.dentry->d_name.name, datasync);
2238
2239         tcon = tlink_tcon(smbfile->tlink);
2240         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2241                 server = tcon->ses->server;
2242                 if (server->ops->flush)
2243                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2244                 else
2245                         rc = -ENOSYS;
2246         }
2247
2248         free_xid(xid);
2249         mutex_unlock(&inode->i_mutex);
2250         return rc;
2251 }
2252
2253 /*
2254  * As file closes, flush all cached write data for this inode checking
2255  * for write behind errors.
2256  */
2257 int cifs_flush(struct file *file, fl_owner_t id)
2258 {
2259         struct inode *inode = file_inode(file);
2260         int rc = 0;
2261
2262         if (file->f_mode & FMODE_WRITE)
2263                 rc = filemap_write_and_wait(inode->i_mapping);
2264
2265         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2266
2267         return rc;
2268 }
2269
2270 static int
2271 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2272 {
2273         int rc = 0;
2274         unsigned long i;
2275
2276         for (i = 0; i < num_pages; i++) {
2277                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2278                 if (!pages[i]) {
2279                         /*
2280                          * save number of pages we have already allocated and
2281                          * return with ENOMEM error
2282                          */
2283                         num_pages = i;
2284                         rc = -ENOMEM;
2285                         break;
2286                 }
2287         }
2288
2289         if (rc) {
2290                 for (i = 0; i < num_pages; i++)
2291                         put_page(pages[i]);
2292         }
2293         return rc;
2294 }
2295
2296 static inline
2297 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2298 {
2299         size_t num_pages;
2300         size_t clen;
2301
2302         clen = min_t(const size_t, len, wsize);
2303         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2304
2305         if (cur_len)
2306                 *cur_len = clen;
2307
2308         return num_pages;
2309 }
2310
2311 static void
2312 cifs_uncached_writev_complete(struct work_struct *work)
2313 {
2314         int i;
2315         struct cifs_writedata *wdata = container_of(work,
2316                                         struct cifs_writedata, work);
2317         struct inode *inode = wdata->cfile->dentry->d_inode;
2318         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2319
2320         spin_lock(&inode->i_lock);
2321         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2322         if (cifsi->server_eof > inode->i_size)
2323                 i_size_write(inode, cifsi->server_eof);
2324         spin_unlock(&inode->i_lock);
2325
2326         complete(&wdata->done);
2327
2328         if (wdata->result != -EAGAIN) {
2329                 for (i = 0; i < wdata->nr_pages; i++)
2330                         put_page(wdata->pages[i]);
2331         }
2332
2333         kref_put(&wdata->refcount, cifs_writedata_release);
2334 }
2335
2336 /* attempt to send write to server, retry on any -EAGAIN errors */
2337 static int
2338 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2339 {
2340         int rc;
2341         struct TCP_Server_Info *server;
2342
2343         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2344
2345         do {
2346                 if (wdata->cfile->invalidHandle) {
2347                         rc = cifs_reopen_file(wdata->cfile, false);
2348                         if (rc != 0)
2349                                 continue;
2350                 }
2351                 rc = server->ops->async_writev(wdata);
2352         } while (rc == -EAGAIN);
2353
2354         return rc;
2355 }
2356
2357 static ssize_t
2358 cifs_iovec_write(struct file *file, const struct iovec *iov,
2359                  unsigned long nr_segs, loff_t *poffset)
2360 {
2361         unsigned long nr_pages, i;
2362         size_t copied, len, cur_len;
2363         ssize_t total_written = 0;
2364         loff_t offset;
2365         struct iov_iter it;
2366         struct cifsFileInfo *open_file;
2367         struct cifs_tcon *tcon;
2368         struct cifs_sb_info *cifs_sb;
2369         struct cifs_writedata *wdata, *tmp;
2370         struct list_head wdata_list;
2371         int rc;
2372         pid_t pid;
2373
2374         len = iov_length(iov, nr_segs);
2375         if (!len)
2376                 return 0;
2377
2378         rc = generic_write_checks(file, poffset, &len, 0);
2379         if (rc)
2380                 return rc;
2381
2382         INIT_LIST_HEAD(&wdata_list);
2383         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2384         open_file = file->private_data;
2385         tcon = tlink_tcon(open_file->tlink);
2386
2387         if (!tcon->ses->server->ops->async_writev)
2388                 return -ENOSYS;
2389
2390         offset = *poffset;
2391
2392         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2393                 pid = open_file->pid;
2394         else
2395                 pid = current->tgid;
2396
2397         iov_iter_init(&it, iov, nr_segs, len, 0);
2398         do {
2399                 size_t save_len;
2400
2401                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2402                 wdata = cifs_writedata_alloc(nr_pages,
2403                                              cifs_uncached_writev_complete);
2404                 if (!wdata) {
2405                         rc = -ENOMEM;
2406                         break;
2407                 }
2408
2409                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2410                 if (rc) {
2411                         kfree(wdata);
2412                         break;
2413                 }
2414
2415                 save_len = cur_len;
2416                 for (i = 0; i < nr_pages; i++) {
2417                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2418                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2419                                                          0, copied);
2420                         cur_len -= copied;
2421                         iov_iter_advance(&it, copied);
2422                 }
2423                 cur_len = save_len - cur_len;
2424
2425                 wdata->sync_mode = WB_SYNC_ALL;
2426                 wdata->nr_pages = nr_pages;
2427                 wdata->offset = (__u64)offset;
2428                 wdata->cfile = cifsFileInfo_get(open_file);
2429                 wdata->pid = pid;
2430                 wdata->bytes = cur_len;
2431                 wdata->pagesz = PAGE_SIZE;
2432                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2433                 rc = cifs_uncached_retry_writev(wdata);
2434                 if (rc) {
2435                         kref_put(&wdata->refcount, cifs_writedata_release);
2436                         break;
2437                 }
2438
2439                 list_add_tail(&wdata->list, &wdata_list);
2440                 offset += cur_len;
2441                 len -= cur_len;
2442         } while (len > 0);
2443
2444         /*
2445          * If at least one write was successfully sent, then discard any rc
2446          * value from the later writes. If the other write succeeds, then
2447          * we'll end up returning whatever was written. If it fails, then
2448          * we'll get a new rc value from that.
2449          */
2450         if (!list_empty(&wdata_list))
2451                 rc = 0;
2452
2453         /*
2454          * Wait for and collect replies for any successful sends in order of
2455          * increasing offset. Once an error is hit or we get a fatal signal
2456          * while waiting, then return without waiting for any more replies.
2457          */
2458 restart_loop:
2459         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2460                 if (!rc) {
2461                         /* FIXME: freezable too? */
2462                         rc = wait_for_completion_killable(&wdata->done);
2463                         if (rc)
2464                                 rc = -EINTR;
2465                         else if (wdata->result)
2466                                 rc = wdata->result;
2467                         else
2468                                 total_written += wdata->bytes;
2469
2470                         /* resend call if it's a retryable error */
2471                         if (rc == -EAGAIN) {
2472                                 rc = cifs_uncached_retry_writev(wdata);
2473                                 goto restart_loop;
2474                         }
2475                 }
2476                 list_del_init(&wdata->list);
2477                 kref_put(&wdata->refcount, cifs_writedata_release);
2478         }
2479
2480         if (total_written > 0)
2481                 *poffset += total_written;
2482
2483         cifs_stats_bytes_written(tcon, total_written);
2484         return total_written ? total_written : (ssize_t)rc;
2485 }
2486
2487 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2488                                 unsigned long nr_segs, loff_t pos)
2489 {
2490         ssize_t written;
2491         struct inode *inode;
2492
2493         inode = file_inode(iocb->ki_filp);
2494
2495         /*
2496          * BB - optimize the way when signing is disabled. We can drop this
2497          * extra memory-to-memory copying and use iovec buffers for constructing
2498          * write request.
2499          */
2500
2501         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2502         if (written > 0) {
2503                 CIFS_I(inode)->invalid_mapping = true;
2504                 iocb->ki_pos = pos;
2505         }
2506
2507         return written;
2508 }
2509
2510 static ssize_t
2511 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2512             unsigned long nr_segs, loff_t pos)
2513 {
2514         struct file *file = iocb->ki_filp;
2515         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2516         struct inode *inode = file->f_mapping->host;
2517         struct cifsInodeInfo *cinode = CIFS_I(inode);
2518         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2519         ssize_t rc = -EACCES;
2520
2521         BUG_ON(iocb->ki_pos != pos);
2522
2523         /*
2524          * We need to hold the sem to be sure nobody modifies lock list
2525          * with a brlock that prevents writing.
2526          */
2527         down_read(&cinode->lock_sem);
2528         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2529                                      server->vals->exclusive_lock_type, NULL,
2530                                      CIFS_WRITE_OP)) {
2531                 mutex_lock(&inode->i_mutex);
2532                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2533                                                &iocb->ki_pos);
2534                 mutex_unlock(&inode->i_mutex);
2535         }
2536
2537         if (rc > 0 || rc == -EIOCBQUEUED) {
2538                 ssize_t err;
2539
2540                 err = generic_write_sync(file, pos, rc);
2541                 if (err < 0 && rc > 0)
2542                         rc = err;
2543         }
2544
2545         up_read(&cinode->lock_sem);
2546         return rc;
2547 }
2548
2549 ssize_t
2550 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2551                    unsigned long nr_segs, loff_t pos)
2552 {
2553         struct inode *inode = file_inode(iocb->ki_filp);
2554         struct cifsInodeInfo *cinode = CIFS_I(inode);
2555         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2556         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2557                                                 iocb->ki_filp->private_data;
2558         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2559         ssize_t written;
2560
2561         if (cinode->clientCanCacheAll) {
2562                 if (cap_unix(tcon->ses) &&
2563                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2564                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2565                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2566                 return cifs_writev(iocb, iov, nr_segs, pos);
2567         }
2568         /*
2569          * For non-oplocked files in strict cache mode we need to write the data
2570          * to the server exactly from the pos to pos+len-1 rather than flush all
2571          * affected pages because it may cause a error with mandatory locks on
2572          * these pages but not on the region from pos to ppos+len-1.
2573          */
2574         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2575         if (written > 0 && cinode->clientCanCacheRead) {
2576                 /*
2577                  * Windows 7 server can delay breaking level2 oplock if a write
2578                  * request comes - break it on the client to prevent reading
2579                  * an old data.
2580                  */
2581                 cifs_invalidate_mapping(inode);
2582                 cFYI(1, "Set no oplock for inode=%p after a write operation",
2583                      inode);
2584                 cinode->clientCanCacheRead = false;
2585         }
2586         return written;
2587 }
2588
2589 static struct cifs_readdata *
2590 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2591 {
2592         struct cifs_readdata *rdata;
2593
2594         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2595                         GFP_KERNEL);
2596         if (rdata != NULL) {
2597                 kref_init(&rdata->refcount);
2598                 INIT_LIST_HEAD(&rdata->list);
2599                 init_completion(&rdata->done);
2600                 INIT_WORK(&rdata->work, complete);
2601         }
2602
2603         return rdata;
2604 }
2605
2606 void
2607 cifs_readdata_release(struct kref *refcount)
2608 {
2609         struct cifs_readdata *rdata = container_of(refcount,
2610                                         struct cifs_readdata, refcount);
2611
2612         if (rdata->cfile)
2613                 cifsFileInfo_put(rdata->cfile);
2614
2615         kfree(rdata);
2616 }
2617
2618 static int
2619 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2620 {
2621         int rc = 0;
2622         struct page *page;
2623         unsigned int i;
2624
2625         for (i = 0; i < nr_pages; i++) {
2626                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2627                 if (!page) {
2628                         rc = -ENOMEM;
2629                         break;
2630                 }
2631                 rdata->pages[i] = page;
2632         }
2633
2634         if (rc) {
2635                 for (i = 0; i < nr_pages; i++) {
2636                         put_page(rdata->pages[i]);
2637                         rdata->pages[i] = NULL;
2638                 }
2639         }
2640         return rc;
2641 }
2642
2643 static void
2644 cifs_uncached_readdata_release(struct kref *refcount)
2645 {
2646         struct cifs_readdata *rdata = container_of(refcount,
2647                                         struct cifs_readdata, refcount);
2648         unsigned int i;
2649
2650         for (i = 0; i < rdata->nr_pages; i++) {
2651                 put_page(rdata->pages[i]);
2652                 rdata->pages[i] = NULL;
2653         }
2654         cifs_readdata_release(refcount);
2655 }
2656
2657 static int
2658 cifs_retry_async_readv(struct cifs_readdata *rdata)
2659 {
2660         int rc;
2661         struct TCP_Server_Info *server;
2662
2663         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2664
2665         do {
2666                 if (rdata->cfile->invalidHandle) {
2667                         rc = cifs_reopen_file(rdata->cfile, true);
2668                         if (rc != 0)
2669                                 continue;
2670                 }
2671                 rc = server->ops->async_readv(rdata);
2672         } while (rc == -EAGAIN);
2673
2674         return rc;
2675 }
2676
2677 /**
2678  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2679  * @rdata:      the readdata response with list of pages holding data
2680  * @iov:        vector in which we should copy the data
2681  * @nr_segs:    number of segments in vector
2682  * @offset:     offset into file of the first iovec
2683  * @copied:     used to return the amount of data copied to the iov
2684  *
2685  * This function copies data from a list of pages in a readdata response into
2686  * an array of iovecs. It will first calculate where the data should go
2687  * based on the info in the readdata and then copy the data into that spot.
2688  */
2689 static ssize_t
2690 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2691                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2692 {
2693         int rc = 0;
2694         struct iov_iter ii;
2695         size_t pos = rdata->offset - offset;
2696         ssize_t remaining = rdata->bytes;
2697         unsigned char *pdata;
2698         unsigned int i;
2699
2700         /* set up iov_iter and advance to the correct offset */
2701         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2702         iov_iter_advance(&ii, pos);
2703
2704         *copied = 0;
2705         for (i = 0; i < rdata->nr_pages; i++) {
2706                 ssize_t copy;
2707                 struct page *page = rdata->pages[i];
2708
2709                 /* copy a whole page or whatever's left */
2710                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2711
2712                 /* ...but limit it to whatever space is left in the iov */
2713                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2714
2715                 /* go while there's data to be copied and no errors */
2716                 if (copy && !rc) {
2717                         pdata = kmap(page);
2718                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2719                                                 (int)copy);
2720                         kunmap(page);
2721                         if (!rc) {
2722                                 *copied += copy;
2723                                 remaining -= copy;
2724                                 iov_iter_advance(&ii, copy);
2725                         }
2726                 }
2727         }
2728
2729         return rc;
2730 }
2731
2732 static void
2733 cifs_uncached_readv_complete(struct work_struct *work)
2734 {
2735         struct cifs_readdata *rdata = container_of(work,
2736                                                 struct cifs_readdata, work);
2737
2738         complete(&rdata->done);
2739         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2740 }
2741
2742 static int
2743 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2744                         struct cifs_readdata *rdata, unsigned int len)
2745 {
2746         int total_read = 0, result = 0;
2747         unsigned int i;
2748         unsigned int nr_pages = rdata->nr_pages;
2749         struct kvec iov;
2750
2751         rdata->tailsz = PAGE_SIZE;
2752         for (i = 0; i < nr_pages; i++) {
2753                 struct page *page = rdata->pages[i];
2754
2755                 if (len >= PAGE_SIZE) {
2756                         /* enough data to fill the page */
2757                         iov.iov_base = kmap(page);
2758                         iov.iov_len = PAGE_SIZE;
2759                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2760                                 i, iov.iov_base, iov.iov_len);
2761                         len -= PAGE_SIZE;
2762                 } else if (len > 0) {
2763                         /* enough for partial page, fill and zero the rest */
2764                         iov.iov_base = kmap(page);
2765                         iov.iov_len = len;
2766                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2767                                 i, iov.iov_base, iov.iov_len);
2768                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2769                         rdata->tailsz = len;
2770                         len = 0;
2771                 } else {
2772                         /* no need to hold page hostage */
2773                         rdata->pages[i] = NULL;
2774                         rdata->nr_pages--;
2775                         put_page(page);
2776                         continue;
2777                 }
2778
2779                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2780                 kunmap(page);
2781                 if (result < 0)
2782                         break;
2783
2784                 total_read += result;
2785         }
2786
2787         return total_read > 0 ? total_read : result;
2788 }
2789
2790 static ssize_t
2791 cifs_iovec_read(struct file *file, const struct iovec *iov,
2792                  unsigned long nr_segs, loff_t *poffset)
2793 {
2794         ssize_t rc;
2795         size_t len, cur_len;
2796         ssize_t total_read = 0;
2797         loff_t offset = *poffset;
2798         unsigned int npages;
2799         struct cifs_sb_info *cifs_sb;
2800         struct cifs_tcon *tcon;
2801         struct cifsFileInfo *open_file;
2802         struct cifs_readdata *rdata, *tmp;
2803         struct list_head rdata_list;
2804         pid_t pid;
2805
2806         if (!nr_segs)
2807                 return 0;
2808
2809         len = iov_length(iov, nr_segs);
2810         if (!len)
2811                 return 0;
2812
2813         INIT_LIST_HEAD(&rdata_list);
2814         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2815         open_file = file->private_data;
2816         tcon = tlink_tcon(open_file->tlink);
2817
2818         if (!tcon->ses->server->ops->async_readv)
2819                 return -ENOSYS;
2820
2821         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2822                 pid = open_file->pid;
2823         else
2824                 pid = current->tgid;
2825
2826         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2827                 cFYI(1, "attempting read on write only file instance");
2828
2829         do {
2830                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2831                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2832
2833                 /* allocate a readdata struct */
2834                 rdata = cifs_readdata_alloc(npages,
2835                                             cifs_uncached_readv_complete);
2836                 if (!rdata) {
2837                         rc = -ENOMEM;
2838                         goto error;
2839                 }
2840
2841                 rc = cifs_read_allocate_pages(rdata, npages);
2842                 if (rc)
2843                         goto error;
2844
2845                 rdata->cfile = cifsFileInfo_get(open_file);
2846                 rdata->nr_pages = npages;
2847                 rdata->offset = offset;
2848                 rdata->bytes = cur_len;
2849                 rdata->pid = pid;
2850                 rdata->pagesz = PAGE_SIZE;
2851                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2852
2853                 rc = cifs_retry_async_readv(rdata);
2854 error:
2855                 if (rc) {
2856                         kref_put(&rdata->refcount,
2857                                  cifs_uncached_readdata_release);
2858                         break;
2859                 }
2860
2861                 list_add_tail(&rdata->list, &rdata_list);
2862                 offset += cur_len;
2863                 len -= cur_len;
2864         } while (len > 0);
2865
2866         /* if at least one read request send succeeded, then reset rc */
2867         if (!list_empty(&rdata_list))
2868                 rc = 0;
2869
2870         /* the loop below should proceed in the order of increasing offsets */
2871 restart_loop:
2872         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2873                 if (!rc) {
2874                         ssize_t copied;
2875
2876                         /* FIXME: freezable sleep too? */
2877                         rc = wait_for_completion_killable(&rdata->done);
2878                         if (rc)
2879                                 rc = -EINTR;
2880                         else if (rdata->result)
2881                                 rc = rdata->result;
2882                         else {
2883                                 rc = cifs_readdata_to_iov(rdata, iov,
2884                                                         nr_segs, *poffset,
2885                                                         &copied);
2886                                 total_read += copied;
2887                         }
2888
2889                         /* resend call if it's a retryable error */
2890                         if (rc == -EAGAIN) {
2891                                 rc = cifs_retry_async_readv(rdata);
2892                                 goto restart_loop;
2893                         }
2894                 }
2895                 list_del_init(&rdata->list);
2896                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2897         }
2898
2899         cifs_stats_bytes_read(tcon, total_read);
2900         *poffset += total_read;
2901
2902         /* mask nodata case */
2903         if (rc == -ENODATA)
2904                 rc = 0;
2905
2906         return total_read ? total_read : rc;
2907 }
2908
2909 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2910                                unsigned long nr_segs, loff_t pos)
2911 {
2912         ssize_t read;
2913
2914         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2915         if (read > 0)
2916                 iocb->ki_pos = pos;
2917
2918         return read;
2919 }
2920
2921 ssize_t
2922 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2923                   unsigned long nr_segs, loff_t pos)
2924 {
2925         struct inode *inode = file_inode(iocb->ki_filp);
2926         struct cifsInodeInfo *cinode = CIFS_I(inode);
2927         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2928         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2929                                                 iocb->ki_filp->private_data;
2930         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2931         int rc = -EACCES;
2932
2933         /*
2934          * In strict cache mode we need to read from the server all the time
2935          * if we don't have level II oplock because the server can delay mtime
2936          * change - so we can't make a decision about inode invalidating.
2937          * And we can also fail with pagereading if there are mandatory locks
2938          * on pages affected by this read but not on the region from pos to
2939          * pos+len-1.
2940          */
2941         if (!cinode->clientCanCacheRead)
2942                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2943
2944         if (cap_unix(tcon->ses) &&
2945             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2946             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2947                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2948
2949         /*
2950          * We need to hold the sem to be sure nobody modifies lock list
2951          * with a brlock that prevents reading.
2952          */
2953         down_read(&cinode->lock_sem);
2954         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2955                                      tcon->ses->server->vals->shared_lock_type,
2956                                      NULL, CIFS_READ_OP))
2957                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2958         up_read(&cinode->lock_sem);
2959         return rc;
2960 }
2961
2962 static ssize_t
2963 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2964 {
2965         int rc = -EACCES;
2966         unsigned int bytes_read = 0;
2967         unsigned int total_read;
2968         unsigned int current_read_size;
2969         unsigned int rsize;
2970         struct cifs_sb_info *cifs_sb;
2971         struct cifs_tcon *tcon;
2972         struct TCP_Server_Info *server;
2973         unsigned int xid;
2974         char *cur_offset;
2975         struct cifsFileInfo *open_file;
2976         struct cifs_io_parms io_parms;
2977         int buf_type = CIFS_NO_BUFFER;
2978         __u32 pid;
2979
2980         xid = get_xid();
2981         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2982
2983         /* FIXME: set up handlers for larger reads and/or convert to async */
2984         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2985
2986         if (file->private_data == NULL) {
2987                 rc = -EBADF;
2988                 free_xid(xid);
2989                 return rc;
2990         }
2991         open_file = file->private_data;
2992         tcon = tlink_tcon(open_file->tlink);
2993         server = tcon->ses->server;
2994
2995         if (!server->ops->sync_read) {
2996                 free_xid(xid);
2997                 return -ENOSYS;
2998         }
2999
3000         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3001                 pid = open_file->pid;
3002         else
3003                 pid = current->tgid;
3004
3005         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3006                 cFYI(1, "attempting read on write only file instance");
3007
3008         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3009              total_read += bytes_read, cur_offset += bytes_read) {
3010                 current_read_size = min_t(uint, read_size - total_read, rsize);
3011                 /*
3012                  * For windows me and 9x we do not want to request more than it
3013                  * negotiated since it will refuse the read then.
3014                  */
3015                 if ((tcon->ses) && !(tcon->ses->capabilities &
3016                                 tcon->ses->server->vals->cap_large_files)) {
3017                         current_read_size = min_t(uint, current_read_size,
3018                                         CIFSMaxBufSize);
3019                 }
3020                 rc = -EAGAIN;
3021                 while (rc == -EAGAIN) {
3022                         if (open_file->invalidHandle) {
3023                                 rc = cifs_reopen_file(open_file, true);
3024                                 if (rc != 0)
3025                                         break;
3026                         }
3027                         io_parms.pid = pid;
3028                         io_parms.tcon = tcon;
3029                         io_parms.offset = *offset;
3030                         io_parms.length = current_read_size;
3031                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3032                                                     &bytes_read, &cur_offset,
3033                                                     &buf_type);
3034                 }
3035                 if (rc || (bytes_read == 0)) {
3036                         if (total_read) {
3037                                 break;
3038                         } else {
3039                                 free_xid(xid);
3040                                 return rc;
3041                         }
3042                 } else {
3043                         cifs_stats_bytes_read(tcon, total_read);
3044                         *offset += bytes_read;
3045                 }
3046         }
3047         free_xid(xid);
3048         return total_read;
3049 }
3050
3051 /*
3052  * If the page is mmap'ed into a process' page tables, then we need to make
3053  * sure that it doesn't change while being written back.
3054  */
3055 static int
3056 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3057 {
3058         struct page *page = vmf->page;
3059
3060         lock_page(page);
3061         return VM_FAULT_LOCKED;
3062 }
3063
3064 static struct vm_operations_struct cifs_file_vm_ops = {
3065         .fault = filemap_fault,
3066         .page_mkwrite = cifs_page_mkwrite,
3067         .remap_pages = generic_file_remap_pages,
3068 };
3069
3070 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3071 {
3072         int rc, xid;
3073         struct inode *inode = file_inode(file);
3074
3075         xid = get_xid();
3076
3077         if (!CIFS_I(inode)->clientCanCacheRead) {
3078                 rc = cifs_invalidate_mapping(inode);
3079                 if (rc)
3080                         return rc;
3081         }
3082
3083         rc = generic_file_mmap(file, vma);
3084         if (rc == 0)
3085                 vma->vm_ops = &cifs_file_vm_ops;
3086         free_xid(xid);
3087         return rc;
3088 }
3089
3090 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3091 {
3092         int rc, xid;
3093
3094         xid = get_xid();
3095         rc = cifs_revalidate_file(file);
3096         if (rc) {
3097                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3098                 free_xid(xid);
3099                 return rc;
3100         }
3101         rc = generic_file_mmap(file, vma);
3102         if (rc == 0)
3103                 vma->vm_ops = &cifs_file_vm_ops;
3104         free_xid(xid);
3105         return rc;
3106 }
3107
3108 static void
3109 cifs_readv_complete(struct work_struct *work)
3110 {
3111         unsigned int i;
3112         struct cifs_readdata *rdata = container_of(work,
3113                                                 struct cifs_readdata, work);
3114
3115         for (i = 0; i < rdata->nr_pages; i++) {
3116                 struct page *page = rdata->pages[i];
3117
3118                 lru_cache_add_file(page);
3119
3120                 if (rdata->result == 0) {
3121                         flush_dcache_page(page);
3122                         SetPageUptodate(page);
3123                 }
3124
3125                 unlock_page(page);
3126
3127                 if (rdata->result == 0)
3128                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3129
3130                 page_cache_release(page);
3131                 rdata->pages[i] = NULL;
3132         }
3133         kref_put(&rdata->refcount, cifs_readdata_release);
3134 }
3135
3136 static int
3137 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3138                         struct cifs_readdata *rdata, unsigned int len)
3139 {
3140         int total_read = 0, result = 0;
3141         unsigned int i;
3142         u64 eof;
3143         pgoff_t eof_index;
3144         unsigned int nr_pages = rdata->nr_pages;
3145         struct kvec iov;
3146
3147         /* determine the eof that the server (probably) has */
3148         eof = CIFS_I(rdata->mapping->host)->server_eof;
3149         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3150         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3151
3152         rdata->tailsz = PAGE_CACHE_SIZE;
3153         for (i = 0; i < nr_pages; i++) {
3154                 struct page *page = rdata->pages[i];
3155
3156                 if (len >= PAGE_CACHE_SIZE) {
3157                         /* enough data to fill the page */
3158                         iov.iov_base = kmap(page);
3159                         iov.iov_len = PAGE_CACHE_SIZE;
3160                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3161                                 i, page->index, iov.iov_base, iov.iov_len);
3162                         len -= PAGE_CACHE_SIZE;
3163                 } else if (len > 0) {
3164                         /* enough for partial page, fill and zero the rest */
3165                         iov.iov_base = kmap(page);
3166                         iov.iov_len = len;
3167                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3168                                 i, page->index, iov.iov_base, iov.iov_len);
3169                         memset(iov.iov_base + len,
3170                                 '\0', PAGE_CACHE_SIZE - len);
3171                         rdata->tailsz = len;
3172                         len = 0;
3173                 } else if (page->index > eof_index) {
3174                         /*
3175                          * The VFS will not try to do readahead past the
3176                          * i_size, but it's possible that we have outstanding
3177                          * writes with gaps in the middle and the i_size hasn't
3178                          * caught up yet. Populate those with zeroed out pages
3179                          * to prevent the VFS from repeatedly attempting to
3180                          * fill them until the writes are flushed.
3181                          */
3182                         zero_user(page, 0, PAGE_CACHE_SIZE);
3183                         lru_cache_add_file(page);
3184                         flush_dcache_page(page);
3185                         SetPageUptodate(page);
3186                         unlock_page(page);
3187                         page_cache_release(page);
3188                         rdata->pages[i] = NULL;
3189                         rdata->nr_pages--;
3190                         continue;
3191                 } else {
3192                         /* no need to hold page hostage */
3193                         lru_cache_add_file(page);
3194                         unlock_page(page);
3195                         page_cache_release(page);
3196                         rdata->pages[i] = NULL;
3197                         rdata->nr_pages--;
3198                         continue;
3199                 }
3200
3201                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3202                 kunmap(page);
3203                 if (result < 0)
3204                         break;
3205
3206                 total_read += result;
3207         }
3208
3209         return total_read > 0 ? total_read : result;
3210 }
3211
3212 static int cifs_readpages(struct file *file, struct address_space *mapping,
3213         struct list_head *page_list, unsigned num_pages)
3214 {
3215         int rc;
3216         struct list_head tmplist;
3217         struct cifsFileInfo *open_file = file->private_data;
3218         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3219         unsigned int rsize = cifs_sb->rsize;
3220         pid_t pid;
3221
3222         /*
3223          * Give up immediately if rsize is too small to read an entire page.
3224          * The VFS will fall back to readpage. We should never reach this
3225          * point however since we set ra_pages to 0 when the rsize is smaller
3226          * than a cache page.
3227          */
3228         if (unlikely(rsize < PAGE_CACHE_SIZE))
3229                 return 0;
3230
3231         /*
3232          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3233          * immediately if the cookie is negative
3234          */
3235         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3236                                          &num_pages);
3237         if (rc == 0)
3238                 return rc;
3239
3240         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3241                 pid = open_file->pid;
3242         else
3243                 pid = current->tgid;
3244
3245         rc = 0;
3246         INIT_LIST_HEAD(&tmplist);
3247
3248         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3249                 mapping, num_pages);
3250
3251         /*
3252          * Start with the page at end of list and move it to private
3253          * list. Do the same with any following pages until we hit
3254          * the rsize limit, hit an index discontinuity, or run out of
3255          * pages. Issue the async read and then start the loop again
3256          * until the list is empty.
3257          *
3258          * Note that list order is important. The page_list is in
3259          * the order of declining indexes. When we put the pages in
3260          * the rdata->pages, then we want them in increasing order.
3261          */
3262         while (!list_empty(page_list)) {
3263                 unsigned int i;
3264                 unsigned int bytes = PAGE_CACHE_SIZE;
3265                 unsigned int expected_index;
3266                 unsigned int nr_pages = 1;
3267                 loff_t offset;
3268                 struct page *page, *tpage;
3269                 struct cifs_readdata *rdata;
3270
3271                 page = list_entry(page_list->prev, struct page, lru);
3272
3273                 /*
3274                  * Lock the page and put it in the cache. Since no one else
3275                  * should have access to this page, we're safe to simply set
3276                  * PG_locked without checking it first.
3277                  */
3278                 __set_page_locked(page);
3279                 rc = add_to_page_cache_locked(page, mapping,
3280                                               page->index, GFP_KERNEL);
3281
3282                 /* give up if we can't stick it in the cache */
3283                 if (rc) {
3284                         __clear_page_locked(page);
3285                         break;
3286                 }
3287
3288                 /* move first page to the tmplist */
3289                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3290                 list_move_tail(&page->lru, &tmplist);
3291
3292                 /* now try and add more pages onto the request */
3293                 expected_index = page->index + 1;
3294                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3295                         /* discontinuity ? */
3296                         if (page->index != expected_index)
3297                                 break;
3298
3299                         /* would this page push the read over the rsize? */
3300                         if (bytes + PAGE_CACHE_SIZE > rsize)
3301                                 break;
3302
3303                         __set_page_locked(page);
3304                         if (add_to_page_cache_locked(page, mapping,
3305                                                 page->index, GFP_KERNEL)) {
3306                                 __clear_page_locked(page);
3307                                 break;
3308                         }
3309                         list_move_tail(&page->lru, &tmplist);
3310                         bytes += PAGE_CACHE_SIZE;
3311                         expected_index++;
3312                         nr_pages++;
3313                 }
3314
3315                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3316                 if (!rdata) {
3317                         /* best to give up if we're out of mem */
3318                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3319                                 list_del(&page->lru);
3320                                 lru_cache_add_file(page);
3321                                 unlock_page(page);
3322                                 page_cache_release(page);
3323                         }
3324                         rc = -ENOMEM;
3325                         break;
3326                 }
3327
3328                 rdata->cfile = cifsFileInfo_get(open_file);
3329                 rdata->mapping = mapping;
3330                 rdata->offset = offset;
3331                 rdata->bytes = bytes;
3332                 rdata->pid = pid;
3333                 rdata->pagesz = PAGE_CACHE_SIZE;
3334                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3335
3336                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3337                         list_del(&page->lru);
3338                         rdata->pages[rdata->nr_pages++] = page;
3339                 }
3340
3341                 rc = cifs_retry_async_readv(rdata);
3342                 if (rc != 0) {
3343                         for (i = 0; i < rdata->nr_pages; i++) {
3344                                 page = rdata->pages[i];
3345                                 lru_cache_add_file(page);
3346                                 unlock_page(page);
3347                                 page_cache_release(page);
3348                         }
3349                         kref_put(&rdata->refcount, cifs_readdata_release);
3350                         break;
3351                 }
3352
3353                 kref_put(&rdata->refcount, cifs_readdata_release);
3354         }
3355
3356         return rc;
3357 }
3358
3359 static int cifs_readpage_worker(struct file *file, struct page *page,
3360         loff_t *poffset)
3361 {
3362         char *read_data;
3363         int rc;
3364
3365         /* Is the page cached? */
3366         rc = cifs_readpage_from_fscache(file_inode(file), page);
3367         if (rc == 0)
3368                 goto read_complete;
3369
3370         page_cache_get(page);
3371         read_data = kmap(page);
3372         /* for reads over a certain size could initiate async read ahead */
3373
3374         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3375
3376         if (rc < 0)
3377                 goto io_error;
3378         else
3379                 cFYI(1, "Bytes read %d", rc);
3380
3381         file_inode(file)->i_atime =
3382                 current_fs_time(file_inode(file)->i_sb);
3383
3384         if (PAGE_CACHE_SIZE > rc)
3385                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3386
3387         flush_dcache_page(page);
3388         SetPageUptodate(page);
3389
3390         /* send this page to the cache */
3391         cifs_readpage_to_fscache(file_inode(file), page);
3392
3393         rc = 0;
3394
3395 io_error:
3396         kunmap(page);
3397         page_cache_release(page);
3398
3399 read_complete:
3400         return rc;
3401 }
3402
3403 static int cifs_readpage(struct file *file, struct page *page)
3404 {
3405         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3406         int rc = -EACCES;
3407         unsigned int xid;
3408
3409         xid = get_xid();
3410
3411         if (file->private_data == NULL) {
3412                 rc = -EBADF;
3413                 free_xid(xid);
3414                 return rc;
3415         }
3416
3417         cFYI(1, "readpage %p at offset %d 0x%x",
3418                  page, (int)offset, (int)offset);
3419
3420         rc = cifs_readpage_worker(file, page, &offset);
3421
3422         unlock_page(page);
3423
3424         free_xid(xid);
3425         return rc;
3426 }
3427
3428 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3429 {
3430         struct cifsFileInfo *open_file;
3431
3432         spin_lock(&cifs_file_list_lock);
3433         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3434                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3435                         spin_unlock(&cifs_file_list_lock);
3436                         return 1;
3437                 }
3438         }
3439         spin_unlock(&cifs_file_list_lock);
3440         return 0;
3441 }
3442
3443 /* We do not want to update the file size from server for inodes
3444    open for write - to avoid races with writepage extending
3445    the file - in the future we could consider allowing
3446    refreshing the inode only on increases in the file size
3447    but this is tricky to do without racing with writebehind
3448    page caching in the current Linux kernel design */
3449 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3450 {
3451         if (!cifsInode)
3452                 return true;
3453
3454         if (is_inode_writable(cifsInode)) {
3455                 /* This inode is open for write at least once */
3456                 struct cifs_sb_info *cifs_sb;
3457
3458                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3459                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3460                         /* since no page cache to corrupt on directio
3461                         we can change size safely */
3462                         return true;
3463                 }
3464
3465                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3466                         return true;
3467
3468                 return false;
3469         } else
3470                 return true;
3471 }
3472
3473 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3474                         loff_t pos, unsigned len, unsigned flags,
3475                         struct page **pagep, void **fsdata)
3476 {
3477         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3478         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3479         loff_t page_start = pos & PAGE_MASK;
3480         loff_t i_size;
3481         struct page *page;
3482         int rc = 0;
3483
3484         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3485
3486         page = grab_cache_page_write_begin(mapping, index, flags);
3487         if (!page) {
3488                 rc = -ENOMEM;
3489                 goto out;
3490         }
3491
3492         if (PageUptodate(page))
3493                 goto out;
3494
3495         /*
3496          * If we write a full page it will be up to date, no need to read from
3497          * the server. If the write is short, we'll end up doing a sync write
3498          * instead.
3499          */
3500         if (len == PAGE_CACHE_SIZE)
3501                 goto out;
3502
3503         /*
3504          * optimize away the read when we have an oplock, and we're not
3505          * expecting to use any of the data we'd be reading in. That
3506          * is, when the page lies beyond the EOF, or straddles the EOF
3507          * and the write will cover all of the existing data.
3508          */
3509         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3510                 i_size = i_size_read(mapping->host);
3511                 if (page_start >= i_size ||
3512                     (offset == 0 && (pos + len) >= i_size)) {
3513                         zero_user_segments(page, 0, offset,
3514                                            offset + len,
3515                                            PAGE_CACHE_SIZE);
3516                         /*
3517                          * PageChecked means that the parts of the page
3518                          * to which we're not writing are considered up
3519                          * to date. Once the data is copied to the
3520                          * page, it can be set uptodate.
3521                          */
3522                         SetPageChecked(page);
3523                         goto out;
3524                 }
3525         }
3526
3527         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3528                 /*
3529                  * might as well read a page, it is fast enough. If we get
3530                  * an error, we don't need to return it. cifs_write_end will
3531                  * do a sync write instead since PG_uptodate isn't set.
3532                  */
3533                 cifs_readpage_worker(file, page, &page_start);
3534         } else {
3535                 /* we could try using another file handle if there is one -
3536                    but how would we lock it to prevent close of that handle
3537                    racing with this read? In any case
3538                    this will be written out by write_end so is fine */
3539         }
3540 out:
3541         *pagep = page;
3542         return rc;
3543 }
3544
3545 static int cifs_release_page(struct page *page, gfp_t gfp)
3546 {
3547         if (PagePrivate(page))
3548                 return 0;
3549
3550         return cifs_fscache_release_page(page, gfp);
3551 }
3552
3553 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3554 {
3555         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3556
3557         if (offset == 0)
3558                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3559 }
3560
3561 static int cifs_launder_page(struct page *page)
3562 {
3563         int rc = 0;
3564         loff_t range_start = page_offset(page);
3565         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3566         struct writeback_control wbc = {
3567                 .sync_mode = WB_SYNC_ALL,
3568                 .nr_to_write = 0,
3569                 .range_start = range_start,
3570                 .range_end = range_end,
3571         };
3572
3573         cFYI(1, "Launder page: %p", page);
3574
3575         if (clear_page_dirty_for_io(page))
3576                 rc = cifs_writepage_locked(page, &wbc);
3577
3578         cifs_fscache_invalidate_page(page, page->mapping->host);
3579         return rc;
3580 }
3581
3582 void cifs_oplock_break(struct work_struct *work)
3583 {
3584         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3585                                                   oplock_break);
3586         struct inode *inode = cfile->dentry->d_inode;
3587         struct cifsInodeInfo *cinode = CIFS_I(inode);
3588         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3589         int rc = 0;
3590
3591         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3592                                                 cifs_has_mand_locks(cinode)) {
3593                 cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
3594                      inode);
3595                 cinode->clientCanCacheRead = false;
3596         }
3597
3598         if (inode && S_ISREG(inode->i_mode)) {
3599                 if (cinode->clientCanCacheRead)
3600                         break_lease(inode, O_RDONLY);
3601                 else
3602                         break_lease(inode, O_WRONLY);
3603                 rc = filemap_fdatawrite(inode->i_mapping);
3604                 if (cinode->clientCanCacheRead == 0) {
3605                         rc = filemap_fdatawait(inode->i_mapping);
3606                         mapping_set_error(inode->i_mapping, rc);
3607                         cifs_invalidate_mapping(inode);
3608                 }
3609                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3610         }
3611
3612         rc = cifs_push_locks(cfile);
3613         if (rc)
3614                 cERROR(1, "Push locks rc = %d", rc);
3615
3616         /*
3617          * releasing stale oplock after recent reconnect of smb session using
3618          * a now incorrect file handle is not a data integrity issue but do
3619          * not bother sending an oplock release if session to server still is
3620          * disconnected since oplock already released by the server
3621          */
3622         if (!cfile->oplock_break_cancelled) {
3623                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3624                                                              cinode);
3625                 cFYI(1, "Oplock release rc = %d", rc);
3626         }
3627 }
3628
3629 const struct address_space_operations cifs_addr_ops = {
3630         .readpage = cifs_readpage,
3631         .readpages = cifs_readpages,
3632         .writepage = cifs_writepage,
3633         .writepages = cifs_writepages,
3634         .write_begin = cifs_write_begin,
3635         .write_end = cifs_write_end,
3636         .set_page_dirty = __set_page_dirty_nobuffers,
3637         .releasepage = cifs_release_page,
3638         .invalidatepage = cifs_invalidate_page,
3639         .launder_page = cifs_launder_page,
3640 };
3641
3642 /*
3643  * cifs_readpages requires the server to support a buffer large enough to
3644  * contain the header plus one complete page of data.  Otherwise, we need
3645  * to leave cifs_readpages out of the address space operations.
3646  */
3647 const struct address_space_operations cifs_addr_ops_smallbuf = {
3648         .readpage = cifs_readpage,
3649         .writepage = cifs_writepage,
3650         .writepages = cifs_writepages,
3651         .write_begin = cifs_write_begin,
3652         .write_end = cifs_write_end,
3653         .set_page_dirty = __set_page_dirty_nobuffers,
3654         .releasepage = cifs_release_page,
3655         .invalidatepage = cifs_invalidate_page,
3656         .launder_page = cifs_launder_page,
3657 };