]> Pileus Git - ~andy/linux/blob - fs/cifs/file.c
ARM: dts: exynops4210: really add universal_c210 dts
[~andy/linux] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cFYI(1, "Application %s pid %d has incorrectly set O_EXCL flag"
82                         "but not O_CREAT on file open. Ignoring O_EXCL",
83                         current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cFYI(1, "posix open %s", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_sb->mnt_cifs_flags &
145                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
146         cifs_put_tlink(tlink);
147
148         if (rc)
149                 goto posix_open_ret;
150
151         if (presp_data->Type == cpu_to_le32(-1))
152                 goto posix_open_ret; /* open ok, caller does qpathinfo */
153
154         if (!pinode)
155                 goto posix_open_ret; /* caller does not need info */
156
157         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158
159         /* get new inode and set it up */
160         if (*pinode == NULL) {
161                 cifs_fill_uniqueid(sb, &fattr);
162                 *pinode = cifs_iget(sb, &fattr);
163                 if (!*pinode) {
164                         rc = -ENOMEM;
165                         goto posix_open_ret;
166                 }
167         } else {
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         rc = server->ops->open(xid, tcon, full_path, disposition,
229                                desired_access, create_options, fid, oplock, buf,
230                                cifs_sb);
231
232         if (rc)
233                 goto out;
234
235         if (tcon->unix_ext)
236                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
237                                               xid);
238         else
239                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240                                          xid, &fid->netfid);
241
242 out:
243         kfree(buf);
244         return rc;
245 }
246
247 static bool
248 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
249 {
250         struct cifs_fid_locks *cur;
251         bool has_locks = false;
252
253         down_read(&cinode->lock_sem);
254         list_for_each_entry(cur, &cinode->llist, llist) {
255                 if (!list_empty(&cur->locks)) {
256                         has_locks = true;
257                         break;
258                 }
259         }
260         up_read(&cinode->lock_sem);
261         return has_locks;
262 }
263
264 struct cifsFileInfo *
265 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
266                   struct tcon_link *tlink, __u32 oplock)
267 {
268         struct dentry *dentry = file->f_path.dentry;
269         struct inode *inode = dentry->d_inode;
270         struct cifsInodeInfo *cinode = CIFS_I(inode);
271         struct cifsFileInfo *cfile;
272         struct cifs_fid_locks *fdlocks;
273         struct cifs_tcon *tcon = tlink_tcon(tlink);
274         struct TCP_Server_Info *server = tcon->ses->server;
275
276         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
277         if (cfile == NULL)
278                 return cfile;
279
280         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
281         if (!fdlocks) {
282                 kfree(cfile);
283                 return NULL;
284         }
285
286         INIT_LIST_HEAD(&fdlocks->locks);
287         fdlocks->cfile = cfile;
288         cfile->llist = fdlocks;
289         down_write(&cinode->lock_sem);
290         list_add(&fdlocks->llist, &cinode->llist);
291         up_write(&cinode->lock_sem);
292
293         cfile->count = 1;
294         cfile->pid = current->tgid;
295         cfile->uid = current_fsuid();
296         cfile->dentry = dget(dentry);
297         cfile->f_flags = file->f_flags;
298         cfile->invalidHandle = false;
299         cfile->tlink = cifs_get_tlink(tlink);
300         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
301         mutex_init(&cfile->fh_mutex);
302
303         /*
304          * If the server returned a read oplock and we have mandatory brlocks,
305          * set oplock level to None.
306          */
307         if (oplock == server->vals->oplock_read &&
308                                                 cifs_has_mand_locks(cinode)) {
309                 cFYI(1, "Reset oplock val from read to None due to mand locks");
310                 oplock = 0;
311         }
312
313         spin_lock(&cifs_file_list_lock);
314         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
315                 oplock = fid->pending_open->oplock;
316         list_del(&fid->pending_open->olist);
317
318         server->ops->set_fid(cfile, fid, oplock);
319
320         list_add(&cfile->tlist, &tcon->openFileList);
321         /* if readable file instance put first in list*/
322         if (file->f_mode & FMODE_READ)
323                 list_add(&cfile->flist, &cinode->openFileList);
324         else
325                 list_add_tail(&cfile->flist, &cinode->openFileList);
326         spin_unlock(&cifs_file_list_lock);
327
328         file->private_data = cfile;
329         return cfile;
330 }
331
332 struct cifsFileInfo *
333 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
334 {
335         spin_lock(&cifs_file_list_lock);
336         cifsFileInfo_get_locked(cifs_file);
337         spin_unlock(&cifs_file_list_lock);
338         return cifs_file;
339 }
340
341 /*
342  * Release a reference on the file private data. This may involve closing
343  * the filehandle out on the server. Must be called without holding
344  * cifs_file_list_lock.
345  */
346 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
347 {
348         struct inode *inode = cifs_file->dentry->d_inode;
349         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
350         struct TCP_Server_Info *server = tcon->ses->server;
351         struct cifsInodeInfo *cifsi = CIFS_I(inode);
352         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
353         struct cifsLockInfo *li, *tmp;
354         struct cifs_fid fid;
355         struct cifs_pending_open open;
356
357         spin_lock(&cifs_file_list_lock);
358         if (--cifs_file->count > 0) {
359                 spin_unlock(&cifs_file_list_lock);
360                 return;
361         }
362
363         if (server->ops->get_lease_key)
364                 server->ops->get_lease_key(inode, &fid);
365
366         /* store open in pending opens to make sure we don't miss lease break */
367         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
368
369         /* remove it from the lists */
370         list_del(&cifs_file->flist);
371         list_del(&cifs_file->tlist);
372
373         if (list_empty(&cifsi->openFileList)) {
374                 cFYI(1, "closing last open instance for inode %p",
375                         cifs_file->dentry->d_inode);
376                 /*
377                  * In strict cache mode we need invalidate mapping on the last
378                  * close  because it may cause a error when we open this file
379                  * again and get at least level II oplock.
380                  */
381                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
382                         CIFS_I(inode)->invalid_mapping = true;
383                 cifs_set_oplock_level(cifsi, 0);
384         }
385         spin_unlock(&cifs_file_list_lock);
386
387         cancel_work_sync(&cifs_file->oplock_break);
388
389         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
390                 struct TCP_Server_Info *server = tcon->ses->server;
391                 unsigned int xid;
392
393                 xid = get_xid();
394                 if (server->ops->close)
395                         server->ops->close(xid, tcon, &cifs_file->fid);
396                 _free_xid(xid);
397         }
398
399         cifs_del_pending_open(&open);
400
401         /*
402          * Delete any outstanding lock records. We'll lose them when the file
403          * is closed anyway.
404          */
405         down_write(&cifsi->lock_sem);
406         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
407                 list_del(&li->llist);
408                 cifs_del_lock_waiters(li);
409                 kfree(li);
410         }
411         list_del(&cifs_file->llist->llist);
412         kfree(cifs_file->llist);
413         up_write(&cifsi->lock_sem);
414
415         cifs_put_tlink(cifs_file->tlink);
416         dput(cifs_file->dentry);
417         kfree(cifs_file);
418 }
419
420 int cifs_open(struct inode *inode, struct file *file)
421
422 {
423         int rc = -EACCES;
424         unsigned int xid;
425         __u32 oplock;
426         struct cifs_sb_info *cifs_sb;
427         struct TCP_Server_Info *server;
428         struct cifs_tcon *tcon;
429         struct tcon_link *tlink;
430         struct cifsFileInfo *cfile = NULL;
431         char *full_path = NULL;
432         bool posix_open_ok = false;
433         struct cifs_fid fid;
434         struct cifs_pending_open open;
435
436         xid = get_xid();
437
438         cifs_sb = CIFS_SB(inode->i_sb);
439         tlink = cifs_sb_tlink(cifs_sb);
440         if (IS_ERR(tlink)) {
441                 free_xid(xid);
442                 return PTR_ERR(tlink);
443         }
444         tcon = tlink_tcon(tlink);
445         server = tcon->ses->server;
446
447         full_path = build_path_from_dentry(file->f_path.dentry);
448         if (full_path == NULL) {
449                 rc = -ENOMEM;
450                 goto out;
451         }
452
453         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
454                  inode, file->f_flags, full_path);
455
456         if (server->oplocks)
457                 oplock = REQ_OPLOCK;
458         else
459                 oplock = 0;
460
461         if (!tcon->broken_posix_open && tcon->unix_ext &&
462             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
463                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
464                 /* can not refresh inode info since size could be stale */
465                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
466                                 cifs_sb->mnt_file_mode /* ignored */,
467                                 file->f_flags, &oplock, &fid.netfid, xid);
468                 if (rc == 0) {
469                         cFYI(1, "posix open succeeded");
470                         posix_open_ok = true;
471                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
472                         if (tcon->ses->serverNOS)
473                                 cERROR(1, "server %s of type %s returned"
474                                            " unexpected error on SMB posix open"
475                                            ", disabling posix open support."
476                                            " Check if server update available.",
477                                            tcon->ses->serverName,
478                                            tcon->ses->serverNOS);
479                         tcon->broken_posix_open = true;
480                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
481                          (rc != -EOPNOTSUPP)) /* path not found or net err */
482                         goto out;
483                 /*
484                  * Else fallthrough to retry open the old way on network i/o
485                  * or DFS errors.
486                  */
487         }
488
489         if (server->ops->get_lease_key)
490                 server->ops->get_lease_key(inode, &fid);
491
492         cifs_add_pending_open(&fid, tlink, &open);
493
494         if (!posix_open_ok) {
495                 if (server->ops->get_lease_key)
496                         server->ops->get_lease_key(inode, &fid);
497
498                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
499                                   file->f_flags, &oplock, &fid, xid);
500                 if (rc) {
501                         cifs_del_pending_open(&open);
502                         goto out;
503                 }
504         }
505
506         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
507         if (cfile == NULL) {
508                 if (server->ops->close)
509                         server->ops->close(xid, tcon, &fid);
510                 cifs_del_pending_open(&open);
511                 rc = -ENOMEM;
512                 goto out;
513         }
514
515         cifs_fscache_set_inode_cookie(inode, file);
516
517         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
518                 /*
519                  * Time to set mode which we can not set earlier due to
520                  * problems creating new read-only files.
521                  */
522                 struct cifs_unix_set_info_args args = {
523                         .mode   = inode->i_mode,
524                         .uid    = INVALID_UID, /* no change */
525                         .gid    = INVALID_GID, /* no change */
526                         .ctime  = NO_CHANGE_64,
527                         .atime  = NO_CHANGE_64,
528                         .mtime  = NO_CHANGE_64,
529                         .device = 0,
530                 };
531                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
532                                        cfile->pid);
533         }
534
535 out:
536         kfree(full_path);
537         free_xid(xid);
538         cifs_put_tlink(tlink);
539         return rc;
540 }
541
542 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
543
544 /*
545  * Try to reacquire byte range locks that were released when session
546  * to server was lost.
547  */
548 static int
549 cifs_relock_file(struct cifsFileInfo *cfile)
550 {
551         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
552         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
553         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
554         int rc = 0;
555
556         /* we are going to update can_cache_brlcks here - need a write access */
557         down_write(&cinode->lock_sem);
558         if (cinode->can_cache_brlcks) {
559                 /* can cache locks - no need to push them */
560                 up_write(&cinode->lock_sem);
561                 return rc;
562         }
563
564         if (cap_unix(tcon->ses) &&
565             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
566             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
567                 rc = cifs_push_posix_locks(cfile);
568         else
569                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
570
571         up_write(&cinode->lock_sem);
572         return rc;
573 }
574
575 static int
576 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
577 {
578         int rc = -EACCES;
579         unsigned int xid;
580         __u32 oplock;
581         struct cifs_sb_info *cifs_sb;
582         struct cifs_tcon *tcon;
583         struct TCP_Server_Info *server;
584         struct cifsInodeInfo *cinode;
585         struct inode *inode;
586         char *full_path = NULL;
587         int desired_access;
588         int disposition = FILE_OPEN;
589         int create_options = CREATE_NOT_DIR;
590         struct cifs_fid fid;
591
592         xid = get_xid();
593         mutex_lock(&cfile->fh_mutex);
594         if (!cfile->invalidHandle) {
595                 mutex_unlock(&cfile->fh_mutex);
596                 rc = 0;
597                 free_xid(xid);
598                 return rc;
599         }
600
601         inode = cfile->dentry->d_inode;
602         cifs_sb = CIFS_SB(inode->i_sb);
603         tcon = tlink_tcon(cfile->tlink);
604         server = tcon->ses->server;
605
606         /*
607          * Can not grab rename sem here because various ops, including those
608          * that already have the rename sem can end up causing writepage to get
609          * called and if the server was down that means we end up here, and we
610          * can never tell if the caller already has the rename_sem.
611          */
612         full_path = build_path_from_dentry(cfile->dentry);
613         if (full_path == NULL) {
614                 rc = -ENOMEM;
615                 mutex_unlock(&cfile->fh_mutex);
616                 free_xid(xid);
617                 return rc;
618         }
619
620         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
621              full_path);
622
623         if (tcon->ses->server->oplocks)
624                 oplock = REQ_OPLOCK;
625         else
626                 oplock = 0;
627
628         if (tcon->unix_ext && cap_unix(tcon->ses) &&
629             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
630                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
631                 /*
632                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
633                  * original open. Must mask them off for a reopen.
634                  */
635                 unsigned int oflags = cfile->f_flags &
636                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
637
638                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
639                                      cifs_sb->mnt_file_mode /* ignored */,
640                                      oflags, &oplock, &fid.netfid, xid);
641                 if (rc == 0) {
642                         cFYI(1, "posix reopen succeeded");
643                         goto reopen_success;
644                 }
645                 /*
646                  * fallthrough to retry open the old way on errors, especially
647                  * in the reconnect path it is important to retry hard
648                  */
649         }
650
651         desired_access = cifs_convert_flags(cfile->f_flags);
652
653         if (backup_cred(cifs_sb))
654                 create_options |= CREATE_OPEN_BACKUP_INTENT;
655
656         if (server->ops->get_lease_key)
657                 server->ops->get_lease_key(inode, &fid);
658
659         /*
660          * Can not refresh inode by passing in file_info buf to be returned by
661          * CIFSSMBOpen and then calling get_inode_info with returned buf since
662          * file might have write behind data that needs to be flushed and server
663          * version of file size can be stale. If we knew for sure that inode was
664          * not dirty locally we could do this.
665          */
666         rc = server->ops->open(xid, tcon, full_path, disposition,
667                                desired_access, create_options, &fid, &oplock,
668                                NULL, cifs_sb);
669         if (rc) {
670                 mutex_unlock(&cfile->fh_mutex);
671                 cFYI(1, "cifs_reopen returned 0x%x", rc);
672                 cFYI(1, "oplock: %d", oplock);
673                 goto reopen_error_exit;
674         }
675
676 reopen_success:
677         cfile->invalidHandle = false;
678         mutex_unlock(&cfile->fh_mutex);
679         cinode = CIFS_I(inode);
680
681         if (can_flush) {
682                 rc = filemap_write_and_wait(inode->i_mapping);
683                 mapping_set_error(inode->i_mapping, rc);
684
685                 if (tcon->unix_ext)
686                         rc = cifs_get_inode_info_unix(&inode, full_path,
687                                                       inode->i_sb, xid);
688                 else
689                         rc = cifs_get_inode_info(&inode, full_path, NULL,
690                                                  inode->i_sb, xid, NULL);
691         }
692         /*
693          * Else we are writing out data to server already and could deadlock if
694          * we tried to flush data, and since we do not know if we have data that
695          * would invalidate the current end of file on the server we can not go
696          * to the server to get the new inode info.
697          */
698
699         server->ops->set_fid(cfile, &fid, oplock);
700         cifs_relock_file(cfile);
701
702 reopen_error_exit:
703         kfree(full_path);
704         free_xid(xid);
705         return rc;
706 }
707
708 int cifs_close(struct inode *inode, struct file *file)
709 {
710         if (file->private_data != NULL) {
711                 cifsFileInfo_put(file->private_data);
712                 file->private_data = NULL;
713         }
714
715         /* return code from the ->release op is always ignored */
716         return 0;
717 }
718
719 int cifs_closedir(struct inode *inode, struct file *file)
720 {
721         int rc = 0;
722         unsigned int xid;
723         struct cifsFileInfo *cfile = file->private_data;
724         struct cifs_tcon *tcon;
725         struct TCP_Server_Info *server;
726         char *buf;
727
728         cFYI(1, "Closedir inode = 0x%p", inode);
729
730         if (cfile == NULL)
731                 return rc;
732
733         xid = get_xid();
734         tcon = tlink_tcon(cfile->tlink);
735         server = tcon->ses->server;
736
737         cFYI(1, "Freeing private data in close dir");
738         spin_lock(&cifs_file_list_lock);
739         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
740                 cfile->invalidHandle = true;
741                 spin_unlock(&cifs_file_list_lock);
742                 if (server->ops->close_dir)
743                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
744                 else
745                         rc = -ENOSYS;
746                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
747                 /* not much we can do if it fails anyway, ignore rc */
748                 rc = 0;
749         } else
750                 spin_unlock(&cifs_file_list_lock);
751
752         buf = cfile->srch_inf.ntwrk_buf_start;
753         if (buf) {
754                 cFYI(1, "closedir free smb buf in srch struct");
755                 cfile->srch_inf.ntwrk_buf_start = NULL;
756                 if (cfile->srch_inf.smallBuf)
757                         cifs_small_buf_release(buf);
758                 else
759                         cifs_buf_release(buf);
760         }
761
762         cifs_put_tlink(cfile->tlink);
763         kfree(file->private_data);
764         file->private_data = NULL;
765         /* BB can we lock the filestruct while this is going on? */
766         free_xid(xid);
767         return rc;
768 }
769
770 static struct cifsLockInfo *
771 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
772 {
773         struct cifsLockInfo *lock =
774                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
775         if (!lock)
776                 return lock;
777         lock->offset = offset;
778         lock->length = length;
779         lock->type = type;
780         lock->pid = current->tgid;
781         INIT_LIST_HEAD(&lock->blist);
782         init_waitqueue_head(&lock->block_q);
783         return lock;
784 }
785
786 void
787 cifs_del_lock_waiters(struct cifsLockInfo *lock)
788 {
789         struct cifsLockInfo *li, *tmp;
790         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
791                 list_del_init(&li->blist);
792                 wake_up(&li->block_q);
793         }
794 }
795
796 #define CIFS_LOCK_OP    0
797 #define CIFS_READ_OP    1
798 #define CIFS_WRITE_OP   2
799
800 /* @rw_check : 0 - no op, 1 - read, 2 - write */
801 static bool
802 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
803                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
804                             struct cifsLockInfo **conf_lock, int rw_check)
805 {
806         struct cifsLockInfo *li;
807         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
808         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
809
810         list_for_each_entry(li, &fdlocks->locks, llist) {
811                 if (offset + length <= li->offset ||
812                     offset >= li->offset + li->length)
813                         continue;
814                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
815                     server->ops->compare_fids(cfile, cur_cfile)) {
816                         /* shared lock prevents write op through the same fid */
817                         if (!(li->type & server->vals->shared_lock_type) ||
818                             rw_check != CIFS_WRITE_OP)
819                                 continue;
820                 }
821                 if ((type & server->vals->shared_lock_type) &&
822                     ((server->ops->compare_fids(cfile, cur_cfile) &&
823                      current->tgid == li->pid) || type == li->type))
824                         continue;
825                 if (conf_lock)
826                         *conf_lock = li;
827                 return true;
828         }
829         return false;
830 }
831
832 bool
833 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
834                         __u8 type, struct cifsLockInfo **conf_lock,
835                         int rw_check)
836 {
837         bool rc = false;
838         struct cifs_fid_locks *cur;
839         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
840
841         list_for_each_entry(cur, &cinode->llist, llist) {
842                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
843                                                  cfile, conf_lock, rw_check);
844                 if (rc)
845                         break;
846         }
847
848         return rc;
849 }
850
851 /*
852  * Check if there is another lock that prevents us to set the lock (mandatory
853  * style). If such a lock exists, update the flock structure with its
854  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
855  * or leave it the same if we can't. Returns 0 if we don't need to request to
856  * the server or 1 otherwise.
857  */
858 static int
859 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
860                __u8 type, struct file_lock *flock)
861 {
862         int rc = 0;
863         struct cifsLockInfo *conf_lock;
864         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
865         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
866         bool exist;
867
868         down_read(&cinode->lock_sem);
869
870         exist = cifs_find_lock_conflict(cfile, offset, length, type,
871                                         &conf_lock, CIFS_LOCK_OP);
872         if (exist) {
873                 flock->fl_start = conf_lock->offset;
874                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
875                 flock->fl_pid = conf_lock->pid;
876                 if (conf_lock->type & server->vals->shared_lock_type)
877                         flock->fl_type = F_RDLCK;
878                 else
879                         flock->fl_type = F_WRLCK;
880         } else if (!cinode->can_cache_brlcks)
881                 rc = 1;
882         else
883                 flock->fl_type = F_UNLCK;
884
885         up_read(&cinode->lock_sem);
886         return rc;
887 }
888
889 static void
890 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
891 {
892         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
893         down_write(&cinode->lock_sem);
894         list_add_tail(&lock->llist, &cfile->llist->locks);
895         up_write(&cinode->lock_sem);
896 }
897
898 /*
899  * Set the byte-range lock (mandatory style). Returns:
900  * 1) 0, if we set the lock and don't need to request to the server;
901  * 2) 1, if no locks prevent us but we need to request to the server;
902  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
903  */
904 static int
905 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
906                  bool wait)
907 {
908         struct cifsLockInfo *conf_lock;
909         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
910         bool exist;
911         int rc = 0;
912
913 try_again:
914         exist = false;
915         down_write(&cinode->lock_sem);
916
917         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
918                                         lock->type, &conf_lock, CIFS_LOCK_OP);
919         if (!exist && cinode->can_cache_brlcks) {
920                 list_add_tail(&lock->llist, &cfile->llist->locks);
921                 up_write(&cinode->lock_sem);
922                 return rc;
923         }
924
925         if (!exist)
926                 rc = 1;
927         else if (!wait)
928                 rc = -EACCES;
929         else {
930                 list_add_tail(&lock->blist, &conf_lock->blist);
931                 up_write(&cinode->lock_sem);
932                 rc = wait_event_interruptible(lock->block_q,
933                                         (lock->blist.prev == &lock->blist) &&
934                                         (lock->blist.next == &lock->blist));
935                 if (!rc)
936                         goto try_again;
937                 down_write(&cinode->lock_sem);
938                 list_del_init(&lock->blist);
939         }
940
941         up_write(&cinode->lock_sem);
942         return rc;
943 }
944
945 /*
946  * Check if there is another lock that prevents us to set the lock (posix
947  * style). If such a lock exists, update the flock structure with its
948  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
949  * or leave it the same if we can't. Returns 0 if we don't need to request to
950  * the server or 1 otherwise.
951  */
952 static int
953 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
954 {
955         int rc = 0;
956         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
957         unsigned char saved_type = flock->fl_type;
958
959         if ((flock->fl_flags & FL_POSIX) == 0)
960                 return 1;
961
962         down_read(&cinode->lock_sem);
963         posix_test_lock(file, flock);
964
965         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
966                 flock->fl_type = saved_type;
967                 rc = 1;
968         }
969
970         up_read(&cinode->lock_sem);
971         return rc;
972 }
973
974 /*
975  * Set the byte-range lock (posix style). Returns:
976  * 1) 0, if we set the lock and don't need to request to the server;
977  * 2) 1, if we need to request to the server;
978  * 3) <0, if the error occurs while setting the lock.
979  */
980 static int
981 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
982 {
983         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
984         int rc = 1;
985
986         if ((flock->fl_flags & FL_POSIX) == 0)
987                 return rc;
988
989 try_again:
990         down_write(&cinode->lock_sem);
991         if (!cinode->can_cache_brlcks) {
992                 up_write(&cinode->lock_sem);
993                 return rc;
994         }
995
996         rc = posix_lock_file(file, flock, NULL);
997         up_write(&cinode->lock_sem);
998         if (rc == FILE_LOCK_DEFERRED) {
999                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1000                 if (!rc)
1001                         goto try_again;
1002                 locks_delete_block(flock);
1003         }
1004         return rc;
1005 }
1006
1007 int
1008 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1009 {
1010         unsigned int xid;
1011         int rc = 0, stored_rc;
1012         struct cifsLockInfo *li, *tmp;
1013         struct cifs_tcon *tcon;
1014         unsigned int num, max_num, max_buf;
1015         LOCKING_ANDX_RANGE *buf, *cur;
1016         int types[] = {LOCKING_ANDX_LARGE_FILES,
1017                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1018         int i;
1019
1020         xid = get_xid();
1021         tcon = tlink_tcon(cfile->tlink);
1022
1023         /*
1024          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1025          * and check it for zero before using.
1026          */
1027         max_buf = tcon->ses->server->maxBuf;
1028         if (!max_buf) {
1029                 free_xid(xid);
1030                 return -EINVAL;
1031         }
1032
1033         max_num = (max_buf - sizeof(struct smb_hdr)) /
1034                                                 sizeof(LOCKING_ANDX_RANGE);
1035         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1036         if (!buf) {
1037                 free_xid(xid);
1038                 return -ENOMEM;
1039         }
1040
1041         for (i = 0; i < 2; i++) {
1042                 cur = buf;
1043                 num = 0;
1044                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1045                         if (li->type != types[i])
1046                                 continue;
1047                         cur->Pid = cpu_to_le16(li->pid);
1048                         cur->LengthLow = cpu_to_le32((u32)li->length);
1049                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1050                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1051                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1052                         if (++num == max_num) {
1053                                 stored_rc = cifs_lockv(xid, tcon,
1054                                                        cfile->fid.netfid,
1055                                                        (__u8)li->type, 0, num,
1056                                                        buf);
1057                                 if (stored_rc)
1058                                         rc = stored_rc;
1059                                 cur = buf;
1060                                 num = 0;
1061                         } else
1062                                 cur++;
1063                 }
1064
1065                 if (num) {
1066                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1067                                                (__u8)types[i], 0, num, buf);
1068                         if (stored_rc)
1069                                 rc = stored_rc;
1070                 }
1071         }
1072
1073         kfree(buf);
1074         free_xid(xid);
1075         return rc;
1076 }
1077
1078 /* copied from fs/locks.c with a name change */
1079 #define cifs_for_each_lock(inode, lockp) \
1080         for (lockp = &inode->i_flock; *lockp != NULL; \
1081              lockp = &(*lockp)->fl_next)
1082
1083 struct lock_to_push {
1084         struct list_head llist;
1085         __u64 offset;
1086         __u64 length;
1087         __u32 pid;
1088         __u16 netfid;
1089         __u8 type;
1090 };
1091
1092 static int
1093 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1094 {
1095         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1096         struct file_lock *flock, **before;
1097         unsigned int count = 0, i = 0;
1098         int rc = 0, xid, type;
1099         struct list_head locks_to_send, *el;
1100         struct lock_to_push *lck, *tmp;
1101         __u64 length;
1102
1103         xid = get_xid();
1104
1105         lock_flocks();
1106         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1107                 if ((*before)->fl_flags & FL_POSIX)
1108                         count++;
1109         }
1110         unlock_flocks();
1111
1112         INIT_LIST_HEAD(&locks_to_send);
1113
1114         /*
1115          * Allocating count locks is enough because no FL_POSIX locks can be
1116          * added to the list while we are holding cinode->lock_sem that
1117          * protects locking operations of this inode.
1118          */
1119         for (; i < count; i++) {
1120                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1121                 if (!lck) {
1122                         rc = -ENOMEM;
1123                         goto err_out;
1124                 }
1125                 list_add_tail(&lck->llist, &locks_to_send);
1126         }
1127
1128         el = locks_to_send.next;
1129         lock_flocks();
1130         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1131                 flock = *before;
1132                 if ((flock->fl_flags & FL_POSIX) == 0)
1133                         continue;
1134                 if (el == &locks_to_send) {
1135                         /*
1136                          * The list ended. We don't have enough allocated
1137                          * structures - something is really wrong.
1138                          */
1139                         cERROR(1, "Can't push all brlocks!");
1140                         break;
1141                 }
1142                 length = 1 + flock->fl_end - flock->fl_start;
1143                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1144                         type = CIFS_RDLCK;
1145                 else
1146                         type = CIFS_WRLCK;
1147                 lck = list_entry(el, struct lock_to_push, llist);
1148                 lck->pid = flock->fl_pid;
1149                 lck->netfid = cfile->fid.netfid;
1150                 lck->length = length;
1151                 lck->type = type;
1152                 lck->offset = flock->fl_start;
1153                 el = el->next;
1154         }
1155         unlock_flocks();
1156
1157         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1158                 int stored_rc;
1159
1160                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1161                                              lck->offset, lck->length, NULL,
1162                                              lck->type, 0);
1163                 if (stored_rc)
1164                         rc = stored_rc;
1165                 list_del(&lck->llist);
1166                 kfree(lck);
1167         }
1168
1169 out:
1170         free_xid(xid);
1171         return rc;
1172 err_out:
1173         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1174                 list_del(&lck->llist);
1175                 kfree(lck);
1176         }
1177         goto out;
1178 }
1179
1180 static int
1181 cifs_push_locks(struct cifsFileInfo *cfile)
1182 {
1183         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1184         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1185         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1186         int rc = 0;
1187
1188         /* we are going to update can_cache_brlcks here - need a write access */
1189         down_write(&cinode->lock_sem);
1190         if (!cinode->can_cache_brlcks) {
1191                 up_write(&cinode->lock_sem);
1192                 return rc;
1193         }
1194
1195         if (cap_unix(tcon->ses) &&
1196             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1197             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1198                 rc = cifs_push_posix_locks(cfile);
1199         else
1200                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1201
1202         cinode->can_cache_brlcks = false;
1203         up_write(&cinode->lock_sem);
1204         return rc;
1205 }
1206
1207 static void
1208 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1209                 bool *wait_flag, struct TCP_Server_Info *server)
1210 {
1211         if (flock->fl_flags & FL_POSIX)
1212                 cFYI(1, "Posix");
1213         if (flock->fl_flags & FL_FLOCK)
1214                 cFYI(1, "Flock");
1215         if (flock->fl_flags & FL_SLEEP) {
1216                 cFYI(1, "Blocking lock");
1217                 *wait_flag = true;
1218         }
1219         if (flock->fl_flags & FL_ACCESS)
1220                 cFYI(1, "Process suspended by mandatory locking - "
1221                         "not implemented yet");
1222         if (flock->fl_flags & FL_LEASE)
1223                 cFYI(1, "Lease on file - not implemented yet");
1224         if (flock->fl_flags &
1225             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1226                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1227                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1228
1229         *type = server->vals->large_lock_type;
1230         if (flock->fl_type == F_WRLCK) {
1231                 cFYI(1, "F_WRLCK ");
1232                 *type |= server->vals->exclusive_lock_type;
1233                 *lock = 1;
1234         } else if (flock->fl_type == F_UNLCK) {
1235                 cFYI(1, "F_UNLCK");
1236                 *type |= server->vals->unlock_lock_type;
1237                 *unlock = 1;
1238                 /* Check if unlock includes more than one lock range */
1239         } else if (flock->fl_type == F_RDLCK) {
1240                 cFYI(1, "F_RDLCK");
1241                 *type |= server->vals->shared_lock_type;
1242                 *lock = 1;
1243         } else if (flock->fl_type == F_EXLCK) {
1244                 cFYI(1, "F_EXLCK");
1245                 *type |= server->vals->exclusive_lock_type;
1246                 *lock = 1;
1247         } else if (flock->fl_type == F_SHLCK) {
1248                 cFYI(1, "F_SHLCK");
1249                 *type |= server->vals->shared_lock_type;
1250                 *lock = 1;
1251         } else
1252                 cFYI(1, "Unknown type of lock");
1253 }
1254
1255 static int
1256 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1257            bool wait_flag, bool posix_lck, unsigned int xid)
1258 {
1259         int rc = 0;
1260         __u64 length = 1 + flock->fl_end - flock->fl_start;
1261         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1262         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1263         struct TCP_Server_Info *server = tcon->ses->server;
1264         __u16 netfid = cfile->fid.netfid;
1265
1266         if (posix_lck) {
1267                 int posix_lock_type;
1268
1269                 rc = cifs_posix_lock_test(file, flock);
1270                 if (!rc)
1271                         return rc;
1272
1273                 if (type & server->vals->shared_lock_type)
1274                         posix_lock_type = CIFS_RDLCK;
1275                 else
1276                         posix_lock_type = CIFS_WRLCK;
1277                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1278                                       flock->fl_start, length, flock,
1279                                       posix_lock_type, wait_flag);
1280                 return rc;
1281         }
1282
1283         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1284         if (!rc)
1285                 return rc;
1286
1287         /* BB we could chain these into one lock request BB */
1288         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1289                                     1, 0, false);
1290         if (rc == 0) {
1291                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1292                                             type, 0, 1, false);
1293                 flock->fl_type = F_UNLCK;
1294                 if (rc != 0)
1295                         cERROR(1, "Error unlocking previously locked "
1296                                   "range %d during test of lock", rc);
1297                 return 0;
1298         }
1299
1300         if (type & server->vals->shared_lock_type) {
1301                 flock->fl_type = F_WRLCK;
1302                 return 0;
1303         }
1304
1305         type &= ~server->vals->exclusive_lock_type;
1306
1307         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1308                                     type | server->vals->shared_lock_type,
1309                                     1, 0, false);
1310         if (rc == 0) {
1311                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1312                         type | server->vals->shared_lock_type, 0, 1, false);
1313                 flock->fl_type = F_RDLCK;
1314                 if (rc != 0)
1315                         cERROR(1, "Error unlocking previously locked "
1316                                   "range %d during test of lock", rc);
1317         } else
1318                 flock->fl_type = F_WRLCK;
1319
1320         return 0;
1321 }
1322
1323 void
1324 cifs_move_llist(struct list_head *source, struct list_head *dest)
1325 {
1326         struct list_head *li, *tmp;
1327         list_for_each_safe(li, tmp, source)
1328                 list_move(li, dest);
1329 }
1330
1331 void
1332 cifs_free_llist(struct list_head *llist)
1333 {
1334         struct cifsLockInfo *li, *tmp;
1335         list_for_each_entry_safe(li, tmp, llist, llist) {
1336                 cifs_del_lock_waiters(li);
1337                 list_del(&li->llist);
1338                 kfree(li);
1339         }
1340 }
1341
1342 int
1343 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1344                   unsigned int xid)
1345 {
1346         int rc = 0, stored_rc;
1347         int types[] = {LOCKING_ANDX_LARGE_FILES,
1348                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1349         unsigned int i;
1350         unsigned int max_num, num, max_buf;
1351         LOCKING_ANDX_RANGE *buf, *cur;
1352         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1353         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1354         struct cifsLockInfo *li, *tmp;
1355         __u64 length = 1 + flock->fl_end - flock->fl_start;
1356         struct list_head tmp_llist;
1357
1358         INIT_LIST_HEAD(&tmp_llist);
1359
1360         /*
1361          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1362          * and check it for zero before using.
1363          */
1364         max_buf = tcon->ses->server->maxBuf;
1365         if (!max_buf)
1366                 return -EINVAL;
1367
1368         max_num = (max_buf - sizeof(struct smb_hdr)) /
1369                                                 sizeof(LOCKING_ANDX_RANGE);
1370         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1371         if (!buf)
1372                 return -ENOMEM;
1373
1374         down_write(&cinode->lock_sem);
1375         for (i = 0; i < 2; i++) {
1376                 cur = buf;
1377                 num = 0;
1378                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1379                         if (flock->fl_start > li->offset ||
1380                             (flock->fl_start + length) <
1381                             (li->offset + li->length))
1382                                 continue;
1383                         if (current->tgid != li->pid)
1384                                 continue;
1385                         if (types[i] != li->type)
1386                                 continue;
1387                         if (cinode->can_cache_brlcks) {
1388                                 /*
1389                                  * We can cache brlock requests - simply remove
1390                                  * a lock from the file's list.
1391                                  */
1392                                 list_del(&li->llist);
1393                                 cifs_del_lock_waiters(li);
1394                                 kfree(li);
1395                                 continue;
1396                         }
1397                         cur->Pid = cpu_to_le16(li->pid);
1398                         cur->LengthLow = cpu_to_le32((u32)li->length);
1399                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1400                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1401                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1402                         /*
1403                          * We need to save a lock here to let us add it again to
1404                          * the file's list if the unlock range request fails on
1405                          * the server.
1406                          */
1407                         list_move(&li->llist, &tmp_llist);
1408                         if (++num == max_num) {
1409                                 stored_rc = cifs_lockv(xid, tcon,
1410                                                        cfile->fid.netfid,
1411                                                        li->type, num, 0, buf);
1412                                 if (stored_rc) {
1413                                         /*
1414                                          * We failed on the unlock range
1415                                          * request - add all locks from the tmp
1416                                          * list to the head of the file's list.
1417                                          */
1418                                         cifs_move_llist(&tmp_llist,
1419                                                         &cfile->llist->locks);
1420                                         rc = stored_rc;
1421                                 } else
1422                                         /*
1423                                          * The unlock range request succeed -
1424                                          * free the tmp list.
1425                                          */
1426                                         cifs_free_llist(&tmp_llist);
1427                                 cur = buf;
1428                                 num = 0;
1429                         } else
1430                                 cur++;
1431                 }
1432                 if (num) {
1433                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1434                                                types[i], num, 0, buf);
1435                         if (stored_rc) {
1436                                 cifs_move_llist(&tmp_llist,
1437                                                 &cfile->llist->locks);
1438                                 rc = stored_rc;
1439                         } else
1440                                 cifs_free_llist(&tmp_llist);
1441                 }
1442         }
1443
1444         up_write(&cinode->lock_sem);
1445         kfree(buf);
1446         return rc;
1447 }
1448
1449 static int
1450 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1451            bool wait_flag, bool posix_lck, int lock, int unlock,
1452            unsigned int xid)
1453 {
1454         int rc = 0;
1455         __u64 length = 1 + flock->fl_end - flock->fl_start;
1456         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1457         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1458         struct TCP_Server_Info *server = tcon->ses->server;
1459         struct inode *inode = cfile->dentry->d_inode;
1460
1461         if (posix_lck) {
1462                 int posix_lock_type;
1463
1464                 rc = cifs_posix_lock_set(file, flock);
1465                 if (!rc || rc < 0)
1466                         return rc;
1467
1468                 if (type & server->vals->shared_lock_type)
1469                         posix_lock_type = CIFS_RDLCK;
1470                 else
1471                         posix_lock_type = CIFS_WRLCK;
1472
1473                 if (unlock == 1)
1474                         posix_lock_type = CIFS_UNLCK;
1475
1476                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1477                                       current->tgid, flock->fl_start, length,
1478                                       NULL, posix_lock_type, wait_flag);
1479                 goto out;
1480         }
1481
1482         if (lock) {
1483                 struct cifsLockInfo *lock;
1484
1485                 lock = cifs_lock_init(flock->fl_start, length, type);
1486                 if (!lock)
1487                         return -ENOMEM;
1488
1489                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1490                 if (rc < 0) {
1491                         kfree(lock);
1492                         return rc;
1493                 }
1494                 if (!rc)
1495                         goto out;
1496
1497                 /*
1498                  * Windows 7 server can delay breaking lease from read to None
1499                  * if we set a byte-range lock on a file - break it explicitly
1500                  * before sending the lock to the server to be sure the next
1501                  * read won't conflict with non-overlapted locks due to
1502                  * pagereading.
1503                  */
1504                 if (!CIFS_I(inode)->clientCanCacheAll &&
1505                                         CIFS_I(inode)->clientCanCacheRead) {
1506                         cifs_invalidate_mapping(inode);
1507                         cFYI(1, "Set no oplock for inode=%p due to mand locks",
1508                              inode);
1509                         CIFS_I(inode)->clientCanCacheRead = false;
1510                 }
1511
1512                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1513                                             type, 1, 0, wait_flag);
1514                 if (rc) {
1515                         kfree(lock);
1516                         return rc;
1517                 }
1518
1519                 cifs_lock_add(cfile, lock);
1520         } else if (unlock)
1521                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1522
1523 out:
1524         if (flock->fl_flags & FL_POSIX)
1525                 posix_lock_file_wait(file, flock);
1526         return rc;
1527 }
1528
1529 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1530 {
1531         int rc, xid;
1532         int lock = 0, unlock = 0;
1533         bool wait_flag = false;
1534         bool posix_lck = false;
1535         struct cifs_sb_info *cifs_sb;
1536         struct cifs_tcon *tcon;
1537         struct cifsInodeInfo *cinode;
1538         struct cifsFileInfo *cfile;
1539         __u16 netfid;
1540         __u32 type;
1541
1542         rc = -EACCES;
1543         xid = get_xid();
1544
1545         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1546                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1547                 flock->fl_start, flock->fl_end);
1548
1549         cfile = (struct cifsFileInfo *)file->private_data;
1550         tcon = tlink_tcon(cfile->tlink);
1551
1552         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1553                         tcon->ses->server);
1554
1555         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1556         netfid = cfile->fid.netfid;
1557         cinode = CIFS_I(file_inode(file));
1558
1559         if (cap_unix(tcon->ses) &&
1560             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1561             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1562                 posix_lck = true;
1563         /*
1564          * BB add code here to normalize offset and length to account for
1565          * negative length which we can not accept over the wire.
1566          */
1567         if (IS_GETLK(cmd)) {
1568                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1569                 free_xid(xid);
1570                 return rc;
1571         }
1572
1573         if (!lock && !unlock) {
1574                 /*
1575                  * if no lock or unlock then nothing to do since we do not
1576                  * know what it is
1577                  */
1578                 free_xid(xid);
1579                 return -EOPNOTSUPP;
1580         }
1581
1582         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1583                         xid);
1584         free_xid(xid);
1585         return rc;
1586 }
1587
1588 /*
1589  * update the file size (if needed) after a write. Should be called with
1590  * the inode->i_lock held
1591  */
1592 void
1593 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1594                       unsigned int bytes_written)
1595 {
1596         loff_t end_of_write = offset + bytes_written;
1597
1598         if (end_of_write > cifsi->server_eof)
1599                 cifsi->server_eof = end_of_write;
1600 }
1601
1602 static ssize_t
1603 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1604            size_t write_size, loff_t *offset)
1605 {
1606         int rc = 0;
1607         unsigned int bytes_written = 0;
1608         unsigned int total_written;
1609         struct cifs_sb_info *cifs_sb;
1610         struct cifs_tcon *tcon;
1611         struct TCP_Server_Info *server;
1612         unsigned int xid;
1613         struct dentry *dentry = open_file->dentry;
1614         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1615         struct cifs_io_parms io_parms;
1616
1617         cifs_sb = CIFS_SB(dentry->d_sb);
1618
1619         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1620              *offset, dentry->d_name.name);
1621
1622         tcon = tlink_tcon(open_file->tlink);
1623         server = tcon->ses->server;
1624
1625         if (!server->ops->sync_write)
1626                 return -ENOSYS;
1627
1628         xid = get_xid();
1629
1630         for (total_written = 0; write_size > total_written;
1631              total_written += bytes_written) {
1632                 rc = -EAGAIN;
1633                 while (rc == -EAGAIN) {
1634                         struct kvec iov[2];
1635                         unsigned int len;
1636
1637                         if (open_file->invalidHandle) {
1638                                 /* we could deadlock if we called
1639                                    filemap_fdatawait from here so tell
1640                                    reopen_file not to flush data to
1641                                    server now */
1642                                 rc = cifs_reopen_file(open_file, false);
1643                                 if (rc != 0)
1644                                         break;
1645                         }
1646
1647                         len = min((size_t)cifs_sb->wsize,
1648                                   write_size - total_written);
1649                         /* iov[0] is reserved for smb header */
1650                         iov[1].iov_base = (char *)write_data + total_written;
1651                         iov[1].iov_len = len;
1652                         io_parms.pid = pid;
1653                         io_parms.tcon = tcon;
1654                         io_parms.offset = *offset;
1655                         io_parms.length = len;
1656                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1657                                                      &bytes_written, iov, 1);
1658                 }
1659                 if (rc || (bytes_written == 0)) {
1660                         if (total_written)
1661                                 break;
1662                         else {
1663                                 free_xid(xid);
1664                                 return rc;
1665                         }
1666                 } else {
1667                         spin_lock(&dentry->d_inode->i_lock);
1668                         cifs_update_eof(cifsi, *offset, bytes_written);
1669                         spin_unlock(&dentry->d_inode->i_lock);
1670                         *offset += bytes_written;
1671                 }
1672         }
1673
1674         cifs_stats_bytes_written(tcon, total_written);
1675
1676         if (total_written > 0) {
1677                 spin_lock(&dentry->d_inode->i_lock);
1678                 if (*offset > dentry->d_inode->i_size)
1679                         i_size_write(dentry->d_inode, *offset);
1680                 spin_unlock(&dentry->d_inode->i_lock);
1681         }
1682         mark_inode_dirty_sync(dentry->d_inode);
1683         free_xid(xid);
1684         return total_written;
1685 }
1686
1687 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1688                                         bool fsuid_only)
1689 {
1690         struct cifsFileInfo *open_file = NULL;
1691         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1692
1693         /* only filter by fsuid on multiuser mounts */
1694         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1695                 fsuid_only = false;
1696
1697         spin_lock(&cifs_file_list_lock);
1698         /* we could simply get the first_list_entry since write-only entries
1699            are always at the end of the list but since the first entry might
1700            have a close pending, we go through the whole list */
1701         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1702                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1703                         continue;
1704                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1705                         if (!open_file->invalidHandle) {
1706                                 /* found a good file */
1707                                 /* lock it so it will not be closed on us */
1708                                 cifsFileInfo_get_locked(open_file);
1709                                 spin_unlock(&cifs_file_list_lock);
1710                                 return open_file;
1711                         } /* else might as well continue, and look for
1712                              another, or simply have the caller reopen it
1713                              again rather than trying to fix this handle */
1714                 } else /* write only file */
1715                         break; /* write only files are last so must be done */
1716         }
1717         spin_unlock(&cifs_file_list_lock);
1718         return NULL;
1719 }
1720
1721 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1722                                         bool fsuid_only)
1723 {
1724         struct cifsFileInfo *open_file, *inv_file = NULL;
1725         struct cifs_sb_info *cifs_sb;
1726         bool any_available = false;
1727         int rc;
1728         unsigned int refind = 0;
1729
1730         /* Having a null inode here (because mapping->host was set to zero by
1731         the VFS or MM) should not happen but we had reports of on oops (due to
1732         it being zero) during stress testcases so we need to check for it */
1733
1734         if (cifs_inode == NULL) {
1735                 cERROR(1, "Null inode passed to cifs_writeable_file");
1736                 dump_stack();
1737                 return NULL;
1738         }
1739
1740         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1741
1742         /* only filter by fsuid on multiuser mounts */
1743         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1744                 fsuid_only = false;
1745
1746         spin_lock(&cifs_file_list_lock);
1747 refind_writable:
1748         if (refind > MAX_REOPEN_ATT) {
1749                 spin_unlock(&cifs_file_list_lock);
1750                 return NULL;
1751         }
1752         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1753                 if (!any_available && open_file->pid != current->tgid)
1754                         continue;
1755                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1756                         continue;
1757                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1758                         if (!open_file->invalidHandle) {
1759                                 /* found a good writable file */
1760                                 cifsFileInfo_get_locked(open_file);
1761                                 spin_unlock(&cifs_file_list_lock);
1762                                 return open_file;
1763                         } else {
1764                                 if (!inv_file)
1765                                         inv_file = open_file;
1766                         }
1767                 }
1768         }
1769         /* couldn't find useable FH with same pid, try any available */
1770         if (!any_available) {
1771                 any_available = true;
1772                 goto refind_writable;
1773         }
1774
1775         if (inv_file) {
1776                 any_available = false;
1777                 cifsFileInfo_get_locked(inv_file);
1778         }
1779
1780         spin_unlock(&cifs_file_list_lock);
1781
1782         if (inv_file) {
1783                 rc = cifs_reopen_file(inv_file, false);
1784                 if (!rc)
1785                         return inv_file;
1786                 else {
1787                         spin_lock(&cifs_file_list_lock);
1788                         list_move_tail(&inv_file->flist,
1789                                         &cifs_inode->openFileList);
1790                         spin_unlock(&cifs_file_list_lock);
1791                         cifsFileInfo_put(inv_file);
1792                         spin_lock(&cifs_file_list_lock);
1793                         ++refind;
1794                         goto refind_writable;
1795                 }
1796         }
1797
1798         return NULL;
1799 }
1800
1801 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1802 {
1803         struct address_space *mapping = page->mapping;
1804         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1805         char *write_data;
1806         int rc = -EFAULT;
1807         int bytes_written = 0;
1808         struct inode *inode;
1809         struct cifsFileInfo *open_file;
1810
1811         if (!mapping || !mapping->host)
1812                 return -EFAULT;
1813
1814         inode = page->mapping->host;
1815
1816         offset += (loff_t)from;
1817         write_data = kmap(page);
1818         write_data += from;
1819
1820         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1821                 kunmap(page);
1822                 return -EIO;
1823         }
1824
1825         /* racing with truncate? */
1826         if (offset > mapping->host->i_size) {
1827                 kunmap(page);
1828                 return 0; /* don't care */
1829         }
1830
1831         /* check to make sure that we are not extending the file */
1832         if (mapping->host->i_size - offset < (loff_t)to)
1833                 to = (unsigned)(mapping->host->i_size - offset);
1834
1835         open_file = find_writable_file(CIFS_I(mapping->host), false);
1836         if (open_file) {
1837                 bytes_written = cifs_write(open_file, open_file->pid,
1838                                            write_data, to - from, &offset);
1839                 cifsFileInfo_put(open_file);
1840                 /* Does mm or vfs already set times? */
1841                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1842                 if ((bytes_written > 0) && (offset))
1843                         rc = 0;
1844                 else if (bytes_written < 0)
1845                         rc = bytes_written;
1846         } else {
1847                 cFYI(1, "No writeable filehandles for inode");
1848                 rc = -EIO;
1849         }
1850
1851         kunmap(page);
1852         return rc;
1853 }
1854
1855 static int cifs_writepages(struct address_space *mapping,
1856                            struct writeback_control *wbc)
1857 {
1858         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1859         bool done = false, scanned = false, range_whole = false;
1860         pgoff_t end, index;
1861         struct cifs_writedata *wdata;
1862         struct TCP_Server_Info *server;
1863         struct page *page;
1864         int rc = 0;
1865
1866         /*
1867          * If wsize is smaller than the page cache size, default to writing
1868          * one page at a time via cifs_writepage
1869          */
1870         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1871                 return generic_writepages(mapping, wbc);
1872
1873         if (wbc->range_cyclic) {
1874                 index = mapping->writeback_index; /* Start from prev offset */
1875                 end = -1;
1876         } else {
1877                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1878                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1879                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1880                         range_whole = true;
1881                 scanned = true;
1882         }
1883 retry:
1884         while (!done && index <= end) {
1885                 unsigned int i, nr_pages, found_pages;
1886                 pgoff_t next = 0, tofind;
1887                 struct page **pages;
1888
1889                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1890                                 end - index) + 1;
1891
1892                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1893                                              cifs_writev_complete);
1894                 if (!wdata) {
1895                         rc = -ENOMEM;
1896                         break;
1897                 }
1898
1899                 /*
1900                  * find_get_pages_tag seems to return a max of 256 on each
1901                  * iteration, so we must call it several times in order to
1902                  * fill the array or the wsize is effectively limited to
1903                  * 256 * PAGE_CACHE_SIZE.
1904                  */
1905                 found_pages = 0;
1906                 pages = wdata->pages;
1907                 do {
1908                         nr_pages = find_get_pages_tag(mapping, &index,
1909                                                         PAGECACHE_TAG_DIRTY,
1910                                                         tofind, pages);
1911                         found_pages += nr_pages;
1912                         tofind -= nr_pages;
1913                         pages += nr_pages;
1914                 } while (nr_pages && tofind && index <= end);
1915
1916                 if (found_pages == 0) {
1917                         kref_put(&wdata->refcount, cifs_writedata_release);
1918                         break;
1919                 }
1920
1921                 nr_pages = 0;
1922                 for (i = 0; i < found_pages; i++) {
1923                         page = wdata->pages[i];
1924                         /*
1925                          * At this point we hold neither mapping->tree_lock nor
1926                          * lock on the page itself: the page may be truncated or
1927                          * invalidated (changing page->mapping to NULL), or even
1928                          * swizzled back from swapper_space to tmpfs file
1929                          * mapping
1930                          */
1931
1932                         if (nr_pages == 0)
1933                                 lock_page(page);
1934                         else if (!trylock_page(page))
1935                                 break;
1936
1937                         if (unlikely(page->mapping != mapping)) {
1938                                 unlock_page(page);
1939                                 break;
1940                         }
1941
1942                         if (!wbc->range_cyclic && page->index > end) {
1943                                 done = true;
1944                                 unlock_page(page);
1945                                 break;
1946                         }
1947
1948                         if (next && (page->index != next)) {
1949                                 /* Not next consecutive page */
1950                                 unlock_page(page);
1951                                 break;
1952                         }
1953
1954                         if (wbc->sync_mode != WB_SYNC_NONE)
1955                                 wait_on_page_writeback(page);
1956
1957                         if (PageWriteback(page) ||
1958                                         !clear_page_dirty_for_io(page)) {
1959                                 unlock_page(page);
1960                                 break;
1961                         }
1962
1963                         /*
1964                          * This actually clears the dirty bit in the radix tree.
1965                          * See cifs_writepage() for more commentary.
1966                          */
1967                         set_page_writeback(page);
1968
1969                         if (page_offset(page) >= i_size_read(mapping->host)) {
1970                                 done = true;
1971                                 unlock_page(page);
1972                                 end_page_writeback(page);
1973                                 break;
1974                         }
1975
1976                         wdata->pages[i] = page;
1977                         next = page->index + 1;
1978                         ++nr_pages;
1979                 }
1980
1981                 /* reset index to refind any pages skipped */
1982                 if (nr_pages == 0)
1983                         index = wdata->pages[0]->index + 1;
1984
1985                 /* put any pages we aren't going to use */
1986                 for (i = nr_pages; i < found_pages; i++) {
1987                         page_cache_release(wdata->pages[i]);
1988                         wdata->pages[i] = NULL;
1989                 }
1990
1991                 /* nothing to write? */
1992                 if (nr_pages == 0) {
1993                         kref_put(&wdata->refcount, cifs_writedata_release);
1994                         continue;
1995                 }
1996
1997                 wdata->sync_mode = wbc->sync_mode;
1998                 wdata->nr_pages = nr_pages;
1999                 wdata->offset = page_offset(wdata->pages[0]);
2000                 wdata->pagesz = PAGE_CACHE_SIZE;
2001                 wdata->tailsz =
2002                         min(i_size_read(mapping->host) -
2003                             page_offset(wdata->pages[nr_pages - 1]),
2004                             (loff_t)PAGE_CACHE_SIZE);
2005                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2006                                         wdata->tailsz;
2007
2008                 do {
2009                         if (wdata->cfile != NULL)
2010                                 cifsFileInfo_put(wdata->cfile);
2011                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2012                                                           false);
2013                         if (!wdata->cfile) {
2014                                 cERROR(1, "No writable handles for inode");
2015                                 rc = -EBADF;
2016                                 break;
2017                         }
2018                         wdata->pid = wdata->cfile->pid;
2019                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2020                         rc = server->ops->async_writev(wdata);
2021                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2022
2023                 for (i = 0; i < nr_pages; ++i)
2024                         unlock_page(wdata->pages[i]);
2025
2026                 /* send failure -- clean up the mess */
2027                 if (rc != 0) {
2028                         for (i = 0; i < nr_pages; ++i) {
2029                                 if (rc == -EAGAIN)
2030                                         redirty_page_for_writepage(wbc,
2031                                                            wdata->pages[i]);
2032                                 else
2033                                         SetPageError(wdata->pages[i]);
2034                                 end_page_writeback(wdata->pages[i]);
2035                                 page_cache_release(wdata->pages[i]);
2036                         }
2037                         if (rc != -EAGAIN)
2038                                 mapping_set_error(mapping, rc);
2039                 }
2040                 kref_put(&wdata->refcount, cifs_writedata_release);
2041
2042                 wbc->nr_to_write -= nr_pages;
2043                 if (wbc->nr_to_write <= 0)
2044                         done = true;
2045
2046                 index = next;
2047         }
2048
2049         if (!scanned && !done) {
2050                 /*
2051                  * We hit the last page and there is more work to be done: wrap
2052                  * back to the start of the file
2053                  */
2054                 scanned = true;
2055                 index = 0;
2056                 goto retry;
2057         }
2058
2059         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2060                 mapping->writeback_index = index;
2061
2062         return rc;
2063 }
2064
2065 static int
2066 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2067 {
2068         int rc;
2069         unsigned int xid;
2070
2071         xid = get_xid();
2072 /* BB add check for wbc flags */
2073         page_cache_get(page);
2074         if (!PageUptodate(page))
2075                 cFYI(1, "ppw - page not up to date");
2076
2077         /*
2078          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2079          *
2080          * A writepage() implementation always needs to do either this,
2081          * or re-dirty the page with "redirty_page_for_writepage()" in
2082          * the case of a failure.
2083          *
2084          * Just unlocking the page will cause the radix tree tag-bits
2085          * to fail to update with the state of the page correctly.
2086          */
2087         set_page_writeback(page);
2088 retry_write:
2089         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2090         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2091                 goto retry_write;
2092         else if (rc == -EAGAIN)
2093                 redirty_page_for_writepage(wbc, page);
2094         else if (rc != 0)
2095                 SetPageError(page);
2096         else
2097                 SetPageUptodate(page);
2098         end_page_writeback(page);
2099         page_cache_release(page);
2100         free_xid(xid);
2101         return rc;
2102 }
2103
2104 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2105 {
2106         int rc = cifs_writepage_locked(page, wbc);
2107         unlock_page(page);
2108         return rc;
2109 }
2110
2111 static int cifs_write_end(struct file *file, struct address_space *mapping,
2112                         loff_t pos, unsigned len, unsigned copied,
2113                         struct page *page, void *fsdata)
2114 {
2115         int rc;
2116         struct inode *inode = mapping->host;
2117         struct cifsFileInfo *cfile = file->private_data;
2118         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2119         __u32 pid;
2120
2121         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2122                 pid = cfile->pid;
2123         else
2124                 pid = current->tgid;
2125
2126         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2127                  page, pos, copied);
2128
2129         if (PageChecked(page)) {
2130                 if (copied == len)
2131                         SetPageUptodate(page);
2132                 ClearPageChecked(page);
2133         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2134                 SetPageUptodate(page);
2135
2136         if (!PageUptodate(page)) {
2137                 char *page_data;
2138                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2139                 unsigned int xid;
2140
2141                 xid = get_xid();
2142                 /* this is probably better than directly calling
2143                    partialpage_write since in this function the file handle is
2144                    known which we might as well leverage */
2145                 /* BB check if anything else missing out of ppw
2146                    such as updating last write time */
2147                 page_data = kmap(page);
2148                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2149                 /* if (rc < 0) should we set writebehind rc? */
2150                 kunmap(page);
2151
2152                 free_xid(xid);
2153         } else {
2154                 rc = copied;
2155                 pos += copied;
2156                 set_page_dirty(page);
2157         }
2158
2159         if (rc > 0) {
2160                 spin_lock(&inode->i_lock);
2161                 if (pos > inode->i_size)
2162                         i_size_write(inode, pos);
2163                 spin_unlock(&inode->i_lock);
2164         }
2165
2166         unlock_page(page);
2167         page_cache_release(page);
2168
2169         return rc;
2170 }
2171
2172 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2173                       int datasync)
2174 {
2175         unsigned int xid;
2176         int rc = 0;
2177         struct cifs_tcon *tcon;
2178         struct TCP_Server_Info *server;
2179         struct cifsFileInfo *smbfile = file->private_data;
2180         struct inode *inode = file_inode(file);
2181         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2182
2183         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2184         if (rc)
2185                 return rc;
2186         mutex_lock(&inode->i_mutex);
2187
2188         xid = get_xid();
2189
2190         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2191                 file->f_path.dentry->d_name.name, datasync);
2192
2193         if (!CIFS_I(inode)->clientCanCacheRead) {
2194                 rc = cifs_invalidate_mapping(inode);
2195                 if (rc) {
2196                         cFYI(1, "rc: %d during invalidate phase", rc);
2197                         rc = 0; /* don't care about it in fsync */
2198                 }
2199         }
2200
2201         tcon = tlink_tcon(smbfile->tlink);
2202         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2203                 server = tcon->ses->server;
2204                 if (server->ops->flush)
2205                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2206                 else
2207                         rc = -ENOSYS;
2208         }
2209
2210         free_xid(xid);
2211         mutex_unlock(&inode->i_mutex);
2212         return rc;
2213 }
2214
2215 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2216 {
2217         unsigned int xid;
2218         int rc = 0;
2219         struct cifs_tcon *tcon;
2220         struct TCP_Server_Info *server;
2221         struct cifsFileInfo *smbfile = file->private_data;
2222         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2223         struct inode *inode = file->f_mapping->host;
2224
2225         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2226         if (rc)
2227                 return rc;
2228         mutex_lock(&inode->i_mutex);
2229
2230         xid = get_xid();
2231
2232         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2233                 file->f_path.dentry->d_name.name, datasync);
2234
2235         tcon = tlink_tcon(smbfile->tlink);
2236         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2237                 server = tcon->ses->server;
2238                 if (server->ops->flush)
2239                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2240                 else
2241                         rc = -ENOSYS;
2242         }
2243
2244         free_xid(xid);
2245         mutex_unlock(&inode->i_mutex);
2246         return rc;
2247 }
2248
2249 /*
2250  * As file closes, flush all cached write data for this inode checking
2251  * for write behind errors.
2252  */
2253 int cifs_flush(struct file *file, fl_owner_t id)
2254 {
2255         struct inode *inode = file_inode(file);
2256         int rc = 0;
2257
2258         if (file->f_mode & FMODE_WRITE)
2259                 rc = filemap_write_and_wait(inode->i_mapping);
2260
2261         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2262
2263         return rc;
2264 }
2265
2266 static int
2267 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2268 {
2269         int rc = 0;
2270         unsigned long i;
2271
2272         for (i = 0; i < num_pages; i++) {
2273                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2274                 if (!pages[i]) {
2275                         /*
2276                          * save number of pages we have already allocated and
2277                          * return with ENOMEM error
2278                          */
2279                         num_pages = i;
2280                         rc = -ENOMEM;
2281                         break;
2282                 }
2283         }
2284
2285         if (rc) {
2286                 for (i = 0; i < num_pages; i++)
2287                         put_page(pages[i]);
2288         }
2289         return rc;
2290 }
2291
2292 static inline
2293 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2294 {
2295         size_t num_pages;
2296         size_t clen;
2297
2298         clen = min_t(const size_t, len, wsize);
2299         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2300
2301         if (cur_len)
2302                 *cur_len = clen;
2303
2304         return num_pages;
2305 }
2306
2307 static void
2308 cifs_uncached_writev_complete(struct work_struct *work)
2309 {
2310         int i;
2311         struct cifs_writedata *wdata = container_of(work,
2312                                         struct cifs_writedata, work);
2313         struct inode *inode = wdata->cfile->dentry->d_inode;
2314         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2315
2316         spin_lock(&inode->i_lock);
2317         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2318         if (cifsi->server_eof > inode->i_size)
2319                 i_size_write(inode, cifsi->server_eof);
2320         spin_unlock(&inode->i_lock);
2321
2322         complete(&wdata->done);
2323
2324         if (wdata->result != -EAGAIN) {
2325                 for (i = 0; i < wdata->nr_pages; i++)
2326                         put_page(wdata->pages[i]);
2327         }
2328
2329         kref_put(&wdata->refcount, cifs_writedata_release);
2330 }
2331
2332 /* attempt to send write to server, retry on any -EAGAIN errors */
2333 static int
2334 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2335 {
2336         int rc;
2337         struct TCP_Server_Info *server;
2338
2339         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2340
2341         do {
2342                 if (wdata->cfile->invalidHandle) {
2343                         rc = cifs_reopen_file(wdata->cfile, false);
2344                         if (rc != 0)
2345                                 continue;
2346                 }
2347                 rc = server->ops->async_writev(wdata);
2348         } while (rc == -EAGAIN);
2349
2350         return rc;
2351 }
2352
2353 static ssize_t
2354 cifs_iovec_write(struct file *file, const struct iovec *iov,
2355                  unsigned long nr_segs, loff_t *poffset)
2356 {
2357         unsigned long nr_pages, i;
2358         size_t copied, len, cur_len;
2359         ssize_t total_written = 0;
2360         loff_t offset;
2361         struct iov_iter it;
2362         struct cifsFileInfo *open_file;
2363         struct cifs_tcon *tcon;
2364         struct cifs_sb_info *cifs_sb;
2365         struct cifs_writedata *wdata, *tmp;
2366         struct list_head wdata_list;
2367         int rc;
2368         pid_t pid;
2369
2370         len = iov_length(iov, nr_segs);
2371         if (!len)
2372                 return 0;
2373
2374         rc = generic_write_checks(file, poffset, &len, 0);
2375         if (rc)
2376                 return rc;
2377
2378         INIT_LIST_HEAD(&wdata_list);
2379         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2380         open_file = file->private_data;
2381         tcon = tlink_tcon(open_file->tlink);
2382
2383         if (!tcon->ses->server->ops->async_writev)
2384                 return -ENOSYS;
2385
2386         offset = *poffset;
2387
2388         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2389                 pid = open_file->pid;
2390         else
2391                 pid = current->tgid;
2392
2393         iov_iter_init(&it, iov, nr_segs, len, 0);
2394         do {
2395                 size_t save_len;
2396
2397                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2398                 wdata = cifs_writedata_alloc(nr_pages,
2399                                              cifs_uncached_writev_complete);
2400                 if (!wdata) {
2401                         rc = -ENOMEM;
2402                         break;
2403                 }
2404
2405                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2406                 if (rc) {
2407                         kfree(wdata);
2408                         break;
2409                 }
2410
2411                 save_len = cur_len;
2412                 for (i = 0; i < nr_pages; i++) {
2413                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2414                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2415                                                          0, copied);
2416                         cur_len -= copied;
2417                         iov_iter_advance(&it, copied);
2418                 }
2419                 cur_len = save_len - cur_len;
2420
2421                 wdata->sync_mode = WB_SYNC_ALL;
2422                 wdata->nr_pages = nr_pages;
2423                 wdata->offset = (__u64)offset;
2424                 wdata->cfile = cifsFileInfo_get(open_file);
2425                 wdata->pid = pid;
2426                 wdata->bytes = cur_len;
2427                 wdata->pagesz = PAGE_SIZE;
2428                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2429                 rc = cifs_uncached_retry_writev(wdata);
2430                 if (rc) {
2431                         kref_put(&wdata->refcount, cifs_writedata_release);
2432                         break;
2433                 }
2434
2435                 list_add_tail(&wdata->list, &wdata_list);
2436                 offset += cur_len;
2437                 len -= cur_len;
2438         } while (len > 0);
2439
2440         /*
2441          * If at least one write was successfully sent, then discard any rc
2442          * value from the later writes. If the other write succeeds, then
2443          * we'll end up returning whatever was written. If it fails, then
2444          * we'll get a new rc value from that.
2445          */
2446         if (!list_empty(&wdata_list))
2447                 rc = 0;
2448
2449         /*
2450          * Wait for and collect replies for any successful sends in order of
2451          * increasing offset. Once an error is hit or we get a fatal signal
2452          * while waiting, then return without waiting for any more replies.
2453          */
2454 restart_loop:
2455         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2456                 if (!rc) {
2457                         /* FIXME: freezable too? */
2458                         rc = wait_for_completion_killable(&wdata->done);
2459                         if (rc)
2460                                 rc = -EINTR;
2461                         else if (wdata->result)
2462                                 rc = wdata->result;
2463                         else
2464                                 total_written += wdata->bytes;
2465
2466                         /* resend call if it's a retryable error */
2467                         if (rc == -EAGAIN) {
2468                                 rc = cifs_uncached_retry_writev(wdata);
2469                                 goto restart_loop;
2470                         }
2471                 }
2472                 list_del_init(&wdata->list);
2473                 kref_put(&wdata->refcount, cifs_writedata_release);
2474         }
2475
2476         if (total_written > 0)
2477                 *poffset += total_written;
2478
2479         cifs_stats_bytes_written(tcon, total_written);
2480         return total_written ? total_written : (ssize_t)rc;
2481 }
2482
2483 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2484                                 unsigned long nr_segs, loff_t pos)
2485 {
2486         ssize_t written;
2487         struct inode *inode;
2488
2489         inode = file_inode(iocb->ki_filp);
2490
2491         /*
2492          * BB - optimize the way when signing is disabled. We can drop this
2493          * extra memory-to-memory copying and use iovec buffers for constructing
2494          * write request.
2495          */
2496
2497         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2498         if (written > 0) {
2499                 CIFS_I(inode)->invalid_mapping = true;
2500                 iocb->ki_pos = pos;
2501         }
2502
2503         return written;
2504 }
2505
2506 static ssize_t
2507 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2508             unsigned long nr_segs, loff_t pos)
2509 {
2510         struct file *file = iocb->ki_filp;
2511         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2512         struct inode *inode = file->f_mapping->host;
2513         struct cifsInodeInfo *cinode = CIFS_I(inode);
2514         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2515         ssize_t rc = -EACCES;
2516
2517         BUG_ON(iocb->ki_pos != pos);
2518
2519         sb_start_write(inode->i_sb);
2520
2521         /*
2522          * We need to hold the sem to be sure nobody modifies lock list
2523          * with a brlock that prevents writing.
2524          */
2525         down_read(&cinode->lock_sem);
2526         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2527                                      server->vals->exclusive_lock_type, NULL,
2528                                      CIFS_WRITE_OP)) {
2529                 mutex_lock(&inode->i_mutex);
2530                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2531                                                &iocb->ki_pos);
2532                 mutex_unlock(&inode->i_mutex);
2533         }
2534
2535         if (rc > 0 || rc == -EIOCBQUEUED) {
2536                 ssize_t err;
2537
2538                 err = generic_write_sync(file, pos, rc);
2539                 if (err < 0 && rc > 0)
2540                         rc = err;
2541         }
2542
2543         up_read(&cinode->lock_sem);
2544         sb_end_write(inode->i_sb);
2545         return rc;
2546 }
2547
2548 ssize_t
2549 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2550                    unsigned long nr_segs, loff_t pos)
2551 {
2552         struct inode *inode = file_inode(iocb->ki_filp);
2553         struct cifsInodeInfo *cinode = CIFS_I(inode);
2554         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2555         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2556                                                 iocb->ki_filp->private_data;
2557         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2558         ssize_t written;
2559
2560         if (cinode->clientCanCacheAll) {
2561                 if (cap_unix(tcon->ses) &&
2562                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2563                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2564                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2565                 return cifs_writev(iocb, iov, nr_segs, pos);
2566         }
2567         /*
2568          * For non-oplocked files in strict cache mode we need to write the data
2569          * to the server exactly from the pos to pos+len-1 rather than flush all
2570          * affected pages because it may cause a error with mandatory locks on
2571          * these pages but not on the region from pos to ppos+len-1.
2572          */
2573         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2574         if (written > 0 && cinode->clientCanCacheRead) {
2575                 /*
2576                  * Windows 7 server can delay breaking level2 oplock if a write
2577                  * request comes - break it on the client to prevent reading
2578                  * an old data.
2579                  */
2580                 cifs_invalidate_mapping(inode);
2581                 cFYI(1, "Set no oplock for inode=%p after a write operation",
2582                      inode);
2583                 cinode->clientCanCacheRead = false;
2584         }
2585         return written;
2586 }
2587
2588 static struct cifs_readdata *
2589 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2590 {
2591         struct cifs_readdata *rdata;
2592
2593         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2594                         GFP_KERNEL);
2595         if (rdata != NULL) {
2596                 kref_init(&rdata->refcount);
2597                 INIT_LIST_HEAD(&rdata->list);
2598                 init_completion(&rdata->done);
2599                 INIT_WORK(&rdata->work, complete);
2600         }
2601
2602         return rdata;
2603 }
2604
2605 void
2606 cifs_readdata_release(struct kref *refcount)
2607 {
2608         struct cifs_readdata *rdata = container_of(refcount,
2609                                         struct cifs_readdata, refcount);
2610
2611         if (rdata->cfile)
2612                 cifsFileInfo_put(rdata->cfile);
2613
2614         kfree(rdata);
2615 }
2616
2617 static int
2618 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2619 {
2620         int rc = 0;
2621         struct page *page;
2622         unsigned int i;
2623
2624         for (i = 0; i < nr_pages; i++) {
2625                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2626                 if (!page) {
2627                         rc = -ENOMEM;
2628                         break;
2629                 }
2630                 rdata->pages[i] = page;
2631         }
2632
2633         if (rc) {
2634                 for (i = 0; i < nr_pages; i++) {
2635                         put_page(rdata->pages[i]);
2636                         rdata->pages[i] = NULL;
2637                 }
2638         }
2639         return rc;
2640 }
2641
2642 static void
2643 cifs_uncached_readdata_release(struct kref *refcount)
2644 {
2645         struct cifs_readdata *rdata = container_of(refcount,
2646                                         struct cifs_readdata, refcount);
2647         unsigned int i;
2648
2649         for (i = 0; i < rdata->nr_pages; i++) {
2650                 put_page(rdata->pages[i]);
2651                 rdata->pages[i] = NULL;
2652         }
2653         cifs_readdata_release(refcount);
2654 }
2655
2656 static int
2657 cifs_retry_async_readv(struct cifs_readdata *rdata)
2658 {
2659         int rc;
2660         struct TCP_Server_Info *server;
2661
2662         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2663
2664         do {
2665                 if (rdata->cfile->invalidHandle) {
2666                         rc = cifs_reopen_file(rdata->cfile, true);
2667                         if (rc != 0)
2668                                 continue;
2669                 }
2670                 rc = server->ops->async_readv(rdata);
2671         } while (rc == -EAGAIN);
2672
2673         return rc;
2674 }
2675
2676 /**
2677  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2678  * @rdata:      the readdata response with list of pages holding data
2679  * @iov:        vector in which we should copy the data
2680  * @nr_segs:    number of segments in vector
2681  * @offset:     offset into file of the first iovec
2682  * @copied:     used to return the amount of data copied to the iov
2683  *
2684  * This function copies data from a list of pages in a readdata response into
2685  * an array of iovecs. It will first calculate where the data should go
2686  * based on the info in the readdata and then copy the data into that spot.
2687  */
2688 static ssize_t
2689 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2690                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2691 {
2692         int rc = 0;
2693         struct iov_iter ii;
2694         size_t pos = rdata->offset - offset;
2695         ssize_t remaining = rdata->bytes;
2696         unsigned char *pdata;
2697         unsigned int i;
2698
2699         /* set up iov_iter and advance to the correct offset */
2700         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2701         iov_iter_advance(&ii, pos);
2702
2703         *copied = 0;
2704         for (i = 0; i < rdata->nr_pages; i++) {
2705                 ssize_t copy;
2706                 struct page *page = rdata->pages[i];
2707
2708                 /* copy a whole page or whatever's left */
2709                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2710
2711                 /* ...but limit it to whatever space is left in the iov */
2712                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2713
2714                 /* go while there's data to be copied and no errors */
2715                 if (copy && !rc) {
2716                         pdata = kmap(page);
2717                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2718                                                 (int)copy);
2719                         kunmap(page);
2720                         if (!rc) {
2721                                 *copied += copy;
2722                                 remaining -= copy;
2723                                 iov_iter_advance(&ii, copy);
2724                         }
2725                 }
2726         }
2727
2728         return rc;
2729 }
2730
2731 static void
2732 cifs_uncached_readv_complete(struct work_struct *work)
2733 {
2734         struct cifs_readdata *rdata = container_of(work,
2735                                                 struct cifs_readdata, work);
2736
2737         complete(&rdata->done);
2738         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2739 }
2740
2741 static int
2742 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2743                         struct cifs_readdata *rdata, unsigned int len)
2744 {
2745         int total_read = 0, result = 0;
2746         unsigned int i;
2747         unsigned int nr_pages = rdata->nr_pages;
2748         struct kvec iov;
2749
2750         rdata->tailsz = PAGE_SIZE;
2751         for (i = 0; i < nr_pages; i++) {
2752                 struct page *page = rdata->pages[i];
2753
2754                 if (len >= PAGE_SIZE) {
2755                         /* enough data to fill the page */
2756                         iov.iov_base = kmap(page);
2757                         iov.iov_len = PAGE_SIZE;
2758                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2759                                 i, iov.iov_base, iov.iov_len);
2760                         len -= PAGE_SIZE;
2761                 } else if (len > 0) {
2762                         /* enough for partial page, fill and zero the rest */
2763                         iov.iov_base = kmap(page);
2764                         iov.iov_len = len;
2765                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2766                                 i, iov.iov_base, iov.iov_len);
2767                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2768                         rdata->tailsz = len;
2769                         len = 0;
2770                 } else {
2771                         /* no need to hold page hostage */
2772                         rdata->pages[i] = NULL;
2773                         rdata->nr_pages--;
2774                         put_page(page);
2775                         continue;
2776                 }
2777
2778                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2779                 kunmap(page);
2780                 if (result < 0)
2781                         break;
2782
2783                 total_read += result;
2784         }
2785
2786         return total_read > 0 ? total_read : result;
2787 }
2788
2789 static ssize_t
2790 cifs_iovec_read(struct file *file, const struct iovec *iov,
2791                  unsigned long nr_segs, loff_t *poffset)
2792 {
2793         ssize_t rc;
2794         size_t len, cur_len;
2795         ssize_t total_read = 0;
2796         loff_t offset = *poffset;
2797         unsigned int npages;
2798         struct cifs_sb_info *cifs_sb;
2799         struct cifs_tcon *tcon;
2800         struct cifsFileInfo *open_file;
2801         struct cifs_readdata *rdata, *tmp;
2802         struct list_head rdata_list;
2803         pid_t pid;
2804
2805         if (!nr_segs)
2806                 return 0;
2807
2808         len = iov_length(iov, nr_segs);
2809         if (!len)
2810                 return 0;
2811
2812         INIT_LIST_HEAD(&rdata_list);
2813         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2814         open_file = file->private_data;
2815         tcon = tlink_tcon(open_file->tlink);
2816
2817         if (!tcon->ses->server->ops->async_readv)
2818                 return -ENOSYS;
2819
2820         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2821                 pid = open_file->pid;
2822         else
2823                 pid = current->tgid;
2824
2825         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2826                 cFYI(1, "attempting read on write only file instance");
2827
2828         do {
2829                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2830                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2831
2832                 /* allocate a readdata struct */
2833                 rdata = cifs_readdata_alloc(npages,
2834                                             cifs_uncached_readv_complete);
2835                 if (!rdata) {
2836                         rc = -ENOMEM;
2837                         goto error;
2838                 }
2839
2840                 rc = cifs_read_allocate_pages(rdata, npages);
2841                 if (rc)
2842                         goto error;
2843
2844                 rdata->cfile = cifsFileInfo_get(open_file);
2845                 rdata->nr_pages = npages;
2846                 rdata->offset = offset;
2847                 rdata->bytes = cur_len;
2848                 rdata->pid = pid;
2849                 rdata->pagesz = PAGE_SIZE;
2850                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2851
2852                 rc = cifs_retry_async_readv(rdata);
2853 error:
2854                 if (rc) {
2855                         kref_put(&rdata->refcount,
2856                                  cifs_uncached_readdata_release);
2857                         break;
2858                 }
2859
2860                 list_add_tail(&rdata->list, &rdata_list);
2861                 offset += cur_len;
2862                 len -= cur_len;
2863         } while (len > 0);
2864
2865         /* if at least one read request send succeeded, then reset rc */
2866         if (!list_empty(&rdata_list))
2867                 rc = 0;
2868
2869         /* the loop below should proceed in the order of increasing offsets */
2870 restart_loop:
2871         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2872                 if (!rc) {
2873                         ssize_t copied;
2874
2875                         /* FIXME: freezable sleep too? */
2876                         rc = wait_for_completion_killable(&rdata->done);
2877                         if (rc)
2878                                 rc = -EINTR;
2879                         else if (rdata->result)
2880                                 rc = rdata->result;
2881                         else {
2882                                 rc = cifs_readdata_to_iov(rdata, iov,
2883                                                         nr_segs, *poffset,
2884                                                         &copied);
2885                                 total_read += copied;
2886                         }
2887
2888                         /* resend call if it's a retryable error */
2889                         if (rc == -EAGAIN) {
2890                                 rc = cifs_retry_async_readv(rdata);
2891                                 goto restart_loop;
2892                         }
2893                 }
2894                 list_del_init(&rdata->list);
2895                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2896         }
2897
2898         cifs_stats_bytes_read(tcon, total_read);
2899         *poffset += total_read;
2900
2901         /* mask nodata case */
2902         if (rc == -ENODATA)
2903                 rc = 0;
2904
2905         return total_read ? total_read : rc;
2906 }
2907
2908 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2909                                unsigned long nr_segs, loff_t pos)
2910 {
2911         ssize_t read;
2912
2913         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2914         if (read > 0)
2915                 iocb->ki_pos = pos;
2916
2917         return read;
2918 }
2919
2920 ssize_t
2921 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2922                   unsigned long nr_segs, loff_t pos)
2923 {
2924         struct inode *inode = file_inode(iocb->ki_filp);
2925         struct cifsInodeInfo *cinode = CIFS_I(inode);
2926         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2927         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2928                                                 iocb->ki_filp->private_data;
2929         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2930         int rc = -EACCES;
2931
2932         /*
2933          * In strict cache mode we need to read from the server all the time
2934          * if we don't have level II oplock because the server can delay mtime
2935          * change - so we can't make a decision about inode invalidating.
2936          * And we can also fail with pagereading if there are mandatory locks
2937          * on pages affected by this read but not on the region from pos to
2938          * pos+len-1.
2939          */
2940         if (!cinode->clientCanCacheRead)
2941                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2942
2943         if (cap_unix(tcon->ses) &&
2944             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2945             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2946                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2947
2948         /*
2949          * We need to hold the sem to be sure nobody modifies lock list
2950          * with a brlock that prevents reading.
2951          */
2952         down_read(&cinode->lock_sem);
2953         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2954                                      tcon->ses->server->vals->shared_lock_type,
2955                                      NULL, CIFS_READ_OP))
2956                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2957         up_read(&cinode->lock_sem);
2958         return rc;
2959 }
2960
2961 static ssize_t
2962 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2963 {
2964         int rc = -EACCES;
2965         unsigned int bytes_read = 0;
2966         unsigned int total_read;
2967         unsigned int current_read_size;
2968         unsigned int rsize;
2969         struct cifs_sb_info *cifs_sb;
2970         struct cifs_tcon *tcon;
2971         struct TCP_Server_Info *server;
2972         unsigned int xid;
2973         char *cur_offset;
2974         struct cifsFileInfo *open_file;
2975         struct cifs_io_parms io_parms;
2976         int buf_type = CIFS_NO_BUFFER;
2977         __u32 pid;
2978
2979         xid = get_xid();
2980         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2981
2982         /* FIXME: set up handlers for larger reads and/or convert to async */
2983         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2984
2985         if (file->private_data == NULL) {
2986                 rc = -EBADF;
2987                 free_xid(xid);
2988                 return rc;
2989         }
2990         open_file = file->private_data;
2991         tcon = tlink_tcon(open_file->tlink);
2992         server = tcon->ses->server;
2993
2994         if (!server->ops->sync_read) {
2995                 free_xid(xid);
2996                 return -ENOSYS;
2997         }
2998
2999         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3000                 pid = open_file->pid;
3001         else
3002                 pid = current->tgid;
3003
3004         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3005                 cFYI(1, "attempting read on write only file instance");
3006
3007         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3008              total_read += bytes_read, cur_offset += bytes_read) {
3009                 current_read_size = min_t(uint, read_size - total_read, rsize);
3010                 /*
3011                  * For windows me and 9x we do not want to request more than it
3012                  * negotiated since it will refuse the read then.
3013                  */
3014                 if ((tcon->ses) && !(tcon->ses->capabilities &
3015                                 tcon->ses->server->vals->cap_large_files)) {
3016                         current_read_size = min_t(uint, current_read_size,
3017                                         CIFSMaxBufSize);
3018                 }
3019                 rc = -EAGAIN;
3020                 while (rc == -EAGAIN) {
3021                         if (open_file->invalidHandle) {
3022                                 rc = cifs_reopen_file(open_file, true);
3023                                 if (rc != 0)
3024                                         break;
3025                         }
3026                         io_parms.pid = pid;
3027                         io_parms.tcon = tcon;
3028                         io_parms.offset = *offset;
3029                         io_parms.length = current_read_size;
3030                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3031                                                     &bytes_read, &cur_offset,
3032                                                     &buf_type);
3033                 }
3034                 if (rc || (bytes_read == 0)) {
3035                         if (total_read) {
3036                                 break;
3037                         } else {
3038                                 free_xid(xid);
3039                                 return rc;
3040                         }
3041                 } else {
3042                         cifs_stats_bytes_read(tcon, total_read);
3043                         *offset += bytes_read;
3044                 }
3045         }
3046         free_xid(xid);
3047         return total_read;
3048 }
3049
3050 /*
3051  * If the page is mmap'ed into a process' page tables, then we need to make
3052  * sure that it doesn't change while being written back.
3053  */
3054 static int
3055 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3056 {
3057         struct page *page = vmf->page;
3058
3059         lock_page(page);
3060         return VM_FAULT_LOCKED;
3061 }
3062
3063 static struct vm_operations_struct cifs_file_vm_ops = {
3064         .fault = filemap_fault,
3065         .page_mkwrite = cifs_page_mkwrite,
3066         .remap_pages = generic_file_remap_pages,
3067 };
3068
3069 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3070 {
3071         int rc, xid;
3072         struct inode *inode = file_inode(file);
3073
3074         xid = get_xid();
3075
3076         if (!CIFS_I(inode)->clientCanCacheRead) {
3077                 rc = cifs_invalidate_mapping(inode);
3078                 if (rc)
3079                         return rc;
3080         }
3081
3082         rc = generic_file_mmap(file, vma);
3083         if (rc == 0)
3084                 vma->vm_ops = &cifs_file_vm_ops;
3085         free_xid(xid);
3086         return rc;
3087 }
3088
3089 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3090 {
3091         int rc, xid;
3092
3093         xid = get_xid();
3094         rc = cifs_revalidate_file(file);
3095         if (rc) {
3096                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3097                 free_xid(xid);
3098                 return rc;
3099         }
3100         rc = generic_file_mmap(file, vma);
3101         if (rc == 0)
3102                 vma->vm_ops = &cifs_file_vm_ops;
3103         free_xid(xid);
3104         return rc;
3105 }
3106
3107 static void
3108 cifs_readv_complete(struct work_struct *work)
3109 {
3110         unsigned int i;
3111         struct cifs_readdata *rdata = container_of(work,
3112                                                 struct cifs_readdata, work);
3113
3114         for (i = 0; i < rdata->nr_pages; i++) {
3115                 struct page *page = rdata->pages[i];
3116
3117                 lru_cache_add_file(page);
3118
3119                 if (rdata->result == 0) {
3120                         flush_dcache_page(page);
3121                         SetPageUptodate(page);
3122                 }
3123
3124                 unlock_page(page);
3125
3126                 if (rdata->result == 0)
3127                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3128
3129                 page_cache_release(page);
3130                 rdata->pages[i] = NULL;
3131         }
3132         kref_put(&rdata->refcount, cifs_readdata_release);
3133 }
3134
3135 static int
3136 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3137                         struct cifs_readdata *rdata, unsigned int len)
3138 {
3139         int total_read = 0, result = 0;
3140         unsigned int i;
3141         u64 eof;
3142         pgoff_t eof_index;
3143         unsigned int nr_pages = rdata->nr_pages;
3144         struct kvec iov;
3145
3146         /* determine the eof that the server (probably) has */
3147         eof = CIFS_I(rdata->mapping->host)->server_eof;
3148         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3149         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3150
3151         rdata->tailsz = PAGE_CACHE_SIZE;
3152         for (i = 0; i < nr_pages; i++) {
3153                 struct page *page = rdata->pages[i];
3154
3155                 if (len >= PAGE_CACHE_SIZE) {
3156                         /* enough data to fill the page */
3157                         iov.iov_base = kmap(page);
3158                         iov.iov_len = PAGE_CACHE_SIZE;
3159                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3160                                 i, page->index, iov.iov_base, iov.iov_len);
3161                         len -= PAGE_CACHE_SIZE;
3162                 } else if (len > 0) {
3163                         /* enough for partial page, fill and zero the rest */
3164                         iov.iov_base = kmap(page);
3165                         iov.iov_len = len;
3166                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3167                                 i, page->index, iov.iov_base, iov.iov_len);
3168                         memset(iov.iov_base + len,
3169                                 '\0', PAGE_CACHE_SIZE - len);
3170                         rdata->tailsz = len;
3171                         len = 0;
3172                 } else if (page->index > eof_index) {
3173                         /*
3174                          * The VFS will not try to do readahead past the
3175                          * i_size, but it's possible that we have outstanding
3176                          * writes with gaps in the middle and the i_size hasn't
3177                          * caught up yet. Populate those with zeroed out pages
3178                          * to prevent the VFS from repeatedly attempting to
3179                          * fill them until the writes are flushed.
3180                          */
3181                         zero_user(page, 0, PAGE_CACHE_SIZE);
3182                         lru_cache_add_file(page);
3183                         flush_dcache_page(page);
3184                         SetPageUptodate(page);
3185                         unlock_page(page);
3186                         page_cache_release(page);
3187                         rdata->pages[i] = NULL;
3188                         rdata->nr_pages--;
3189                         continue;
3190                 } else {
3191                         /* no need to hold page hostage */
3192                         lru_cache_add_file(page);
3193                         unlock_page(page);
3194                         page_cache_release(page);
3195                         rdata->pages[i] = NULL;
3196                         rdata->nr_pages--;
3197                         continue;
3198                 }
3199
3200                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3201                 kunmap(page);
3202                 if (result < 0)
3203                         break;
3204
3205                 total_read += result;
3206         }
3207
3208         return total_read > 0 ? total_read : result;
3209 }
3210
3211 static int cifs_readpages(struct file *file, struct address_space *mapping,
3212         struct list_head *page_list, unsigned num_pages)
3213 {
3214         int rc;
3215         struct list_head tmplist;
3216         struct cifsFileInfo *open_file = file->private_data;
3217         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3218         unsigned int rsize = cifs_sb->rsize;
3219         pid_t pid;
3220
3221         /*
3222          * Give up immediately if rsize is too small to read an entire page.
3223          * The VFS will fall back to readpage. We should never reach this
3224          * point however since we set ra_pages to 0 when the rsize is smaller
3225          * than a cache page.
3226          */
3227         if (unlikely(rsize < PAGE_CACHE_SIZE))
3228                 return 0;
3229
3230         /*
3231          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3232          * immediately if the cookie is negative
3233          */
3234         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3235                                          &num_pages);
3236         if (rc == 0)
3237                 return rc;
3238
3239         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3240                 pid = open_file->pid;
3241         else
3242                 pid = current->tgid;
3243
3244         rc = 0;
3245         INIT_LIST_HEAD(&tmplist);
3246
3247         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3248                 mapping, num_pages);
3249
3250         /*
3251          * Start with the page at end of list and move it to private
3252          * list. Do the same with any following pages until we hit
3253          * the rsize limit, hit an index discontinuity, or run out of
3254          * pages. Issue the async read and then start the loop again
3255          * until the list is empty.
3256          *
3257          * Note that list order is important. The page_list is in
3258          * the order of declining indexes. When we put the pages in
3259          * the rdata->pages, then we want them in increasing order.
3260          */
3261         while (!list_empty(page_list)) {
3262                 unsigned int i;
3263                 unsigned int bytes = PAGE_CACHE_SIZE;
3264                 unsigned int expected_index;
3265                 unsigned int nr_pages = 1;
3266                 loff_t offset;
3267                 struct page *page, *tpage;
3268                 struct cifs_readdata *rdata;
3269
3270                 page = list_entry(page_list->prev, struct page, lru);
3271
3272                 /*
3273                  * Lock the page and put it in the cache. Since no one else
3274                  * should have access to this page, we're safe to simply set
3275                  * PG_locked without checking it first.
3276                  */
3277                 __set_page_locked(page);
3278                 rc = add_to_page_cache_locked(page, mapping,
3279                                               page->index, GFP_KERNEL);
3280
3281                 /* give up if we can't stick it in the cache */
3282                 if (rc) {
3283                         __clear_page_locked(page);
3284                         break;
3285                 }
3286
3287                 /* move first page to the tmplist */
3288                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3289                 list_move_tail(&page->lru, &tmplist);
3290
3291                 /* now try and add more pages onto the request */
3292                 expected_index = page->index + 1;
3293                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3294                         /* discontinuity ? */
3295                         if (page->index != expected_index)
3296                                 break;
3297
3298                         /* would this page push the read over the rsize? */
3299                         if (bytes + PAGE_CACHE_SIZE > rsize)
3300                                 break;
3301
3302                         __set_page_locked(page);
3303                         if (add_to_page_cache_locked(page, mapping,
3304                                                 page->index, GFP_KERNEL)) {
3305                                 __clear_page_locked(page);
3306                                 break;
3307                         }
3308                         list_move_tail(&page->lru, &tmplist);
3309                         bytes += PAGE_CACHE_SIZE;
3310                         expected_index++;
3311                         nr_pages++;
3312                 }
3313
3314                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3315                 if (!rdata) {
3316                         /* best to give up if we're out of mem */
3317                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3318                                 list_del(&page->lru);
3319                                 lru_cache_add_file(page);
3320                                 unlock_page(page);
3321                                 page_cache_release(page);
3322                         }
3323                         rc = -ENOMEM;
3324                         break;
3325                 }
3326
3327                 rdata->cfile = cifsFileInfo_get(open_file);
3328                 rdata->mapping = mapping;
3329                 rdata->offset = offset;
3330                 rdata->bytes = bytes;
3331                 rdata->pid = pid;
3332                 rdata->pagesz = PAGE_CACHE_SIZE;
3333                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3334
3335                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3336                         list_del(&page->lru);
3337                         rdata->pages[rdata->nr_pages++] = page;
3338                 }
3339
3340                 rc = cifs_retry_async_readv(rdata);
3341                 if (rc != 0) {
3342                         for (i = 0; i < rdata->nr_pages; i++) {
3343                                 page = rdata->pages[i];
3344                                 lru_cache_add_file(page);
3345                                 unlock_page(page);
3346                                 page_cache_release(page);
3347                         }
3348                         kref_put(&rdata->refcount, cifs_readdata_release);
3349                         break;
3350                 }
3351
3352                 kref_put(&rdata->refcount, cifs_readdata_release);
3353         }
3354
3355         return rc;
3356 }
3357
3358 static int cifs_readpage_worker(struct file *file, struct page *page,
3359         loff_t *poffset)
3360 {
3361         char *read_data;
3362         int rc;
3363
3364         /* Is the page cached? */
3365         rc = cifs_readpage_from_fscache(file_inode(file), page);
3366         if (rc == 0)
3367                 goto read_complete;
3368
3369         page_cache_get(page);
3370         read_data = kmap(page);
3371         /* for reads over a certain size could initiate async read ahead */
3372
3373         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3374
3375         if (rc < 0)
3376                 goto io_error;
3377         else
3378                 cFYI(1, "Bytes read %d", rc);
3379
3380         file_inode(file)->i_atime =
3381                 current_fs_time(file_inode(file)->i_sb);
3382
3383         if (PAGE_CACHE_SIZE > rc)
3384                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3385
3386         flush_dcache_page(page);
3387         SetPageUptodate(page);
3388
3389         /* send this page to the cache */
3390         cifs_readpage_to_fscache(file_inode(file), page);
3391
3392         rc = 0;
3393
3394 io_error:
3395         kunmap(page);
3396         page_cache_release(page);
3397
3398 read_complete:
3399         return rc;
3400 }
3401
3402 static int cifs_readpage(struct file *file, struct page *page)
3403 {
3404         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3405         int rc = -EACCES;
3406         unsigned int xid;
3407
3408         xid = get_xid();
3409
3410         if (file->private_data == NULL) {
3411                 rc = -EBADF;
3412                 free_xid(xid);
3413                 return rc;
3414         }
3415
3416         cFYI(1, "readpage %p at offset %d 0x%x",
3417                  page, (int)offset, (int)offset);
3418
3419         rc = cifs_readpage_worker(file, page, &offset);
3420
3421         unlock_page(page);
3422
3423         free_xid(xid);
3424         return rc;
3425 }
3426
3427 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3428 {
3429         struct cifsFileInfo *open_file;
3430
3431         spin_lock(&cifs_file_list_lock);
3432         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3433                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3434                         spin_unlock(&cifs_file_list_lock);
3435                         return 1;
3436                 }
3437         }
3438         spin_unlock(&cifs_file_list_lock);
3439         return 0;
3440 }
3441
3442 /* We do not want to update the file size from server for inodes
3443    open for write - to avoid races with writepage extending
3444    the file - in the future we could consider allowing
3445    refreshing the inode only on increases in the file size
3446    but this is tricky to do without racing with writebehind
3447    page caching in the current Linux kernel design */
3448 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3449 {
3450         if (!cifsInode)
3451                 return true;
3452
3453         if (is_inode_writable(cifsInode)) {
3454                 /* This inode is open for write at least once */
3455                 struct cifs_sb_info *cifs_sb;
3456
3457                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3458                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3459                         /* since no page cache to corrupt on directio
3460                         we can change size safely */
3461                         return true;
3462                 }
3463
3464                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3465                         return true;
3466
3467                 return false;
3468         } else
3469                 return true;
3470 }
3471
3472 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3473                         loff_t pos, unsigned len, unsigned flags,
3474                         struct page **pagep, void **fsdata)
3475 {
3476         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3477         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3478         loff_t page_start = pos & PAGE_MASK;
3479         loff_t i_size;
3480         struct page *page;
3481         int rc = 0;
3482
3483         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3484
3485         page = grab_cache_page_write_begin(mapping, index, flags);
3486         if (!page) {
3487                 rc = -ENOMEM;
3488                 goto out;
3489         }
3490
3491         if (PageUptodate(page))
3492                 goto out;
3493
3494         /*
3495          * If we write a full page it will be up to date, no need to read from
3496          * the server. If the write is short, we'll end up doing a sync write
3497          * instead.
3498          */
3499         if (len == PAGE_CACHE_SIZE)
3500                 goto out;
3501
3502         /*
3503          * optimize away the read when we have an oplock, and we're not
3504          * expecting to use any of the data we'd be reading in. That
3505          * is, when the page lies beyond the EOF, or straddles the EOF
3506          * and the write will cover all of the existing data.
3507          */
3508         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3509                 i_size = i_size_read(mapping->host);
3510                 if (page_start >= i_size ||
3511                     (offset == 0 && (pos + len) >= i_size)) {
3512                         zero_user_segments(page, 0, offset,
3513                                            offset + len,
3514                                            PAGE_CACHE_SIZE);
3515                         /*
3516                          * PageChecked means that the parts of the page
3517                          * to which we're not writing are considered up
3518                          * to date. Once the data is copied to the
3519                          * page, it can be set uptodate.
3520                          */
3521                         SetPageChecked(page);
3522                         goto out;
3523                 }
3524         }
3525
3526         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3527                 /*
3528                  * might as well read a page, it is fast enough. If we get
3529                  * an error, we don't need to return it. cifs_write_end will
3530                  * do a sync write instead since PG_uptodate isn't set.
3531                  */
3532                 cifs_readpage_worker(file, page, &page_start);
3533         } else {
3534                 /* we could try using another file handle if there is one -
3535                    but how would we lock it to prevent close of that handle
3536                    racing with this read? In any case
3537                    this will be written out by write_end so is fine */
3538         }
3539 out:
3540         *pagep = page;
3541         return rc;
3542 }
3543
3544 static int cifs_release_page(struct page *page, gfp_t gfp)
3545 {
3546         if (PagePrivate(page))
3547                 return 0;
3548
3549         return cifs_fscache_release_page(page, gfp);
3550 }
3551
3552 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3553 {
3554         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3555
3556         if (offset == 0)
3557                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3558 }
3559
3560 static int cifs_launder_page(struct page *page)
3561 {
3562         int rc = 0;
3563         loff_t range_start = page_offset(page);
3564         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3565         struct writeback_control wbc = {
3566                 .sync_mode = WB_SYNC_ALL,
3567                 .nr_to_write = 0,
3568                 .range_start = range_start,
3569                 .range_end = range_end,
3570         };
3571
3572         cFYI(1, "Launder page: %p", page);
3573
3574         if (clear_page_dirty_for_io(page))
3575                 rc = cifs_writepage_locked(page, &wbc);
3576
3577         cifs_fscache_invalidate_page(page, page->mapping->host);
3578         return rc;
3579 }
3580
3581 void cifs_oplock_break(struct work_struct *work)
3582 {
3583         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3584                                                   oplock_break);
3585         struct inode *inode = cfile->dentry->d_inode;
3586         struct cifsInodeInfo *cinode = CIFS_I(inode);
3587         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3588         int rc = 0;
3589
3590         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3591                                                 cifs_has_mand_locks(cinode)) {
3592                 cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
3593                      inode);
3594                 cinode->clientCanCacheRead = false;
3595         }
3596
3597         if (inode && S_ISREG(inode->i_mode)) {
3598                 if (cinode->clientCanCacheRead)
3599                         break_lease(inode, O_RDONLY);
3600                 else
3601                         break_lease(inode, O_WRONLY);
3602                 rc = filemap_fdatawrite(inode->i_mapping);
3603                 if (cinode->clientCanCacheRead == 0) {
3604                         rc = filemap_fdatawait(inode->i_mapping);
3605                         mapping_set_error(inode->i_mapping, rc);
3606                         cifs_invalidate_mapping(inode);
3607                 }
3608                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3609         }
3610
3611         rc = cifs_push_locks(cfile);
3612         if (rc)
3613                 cERROR(1, "Push locks rc = %d", rc);
3614
3615         /*
3616          * releasing stale oplock after recent reconnect of smb session using
3617          * a now incorrect file handle is not a data integrity issue but do
3618          * not bother sending an oplock release if session to server still is
3619          * disconnected since oplock already released by the server
3620          */
3621         if (!cfile->oplock_break_cancelled) {
3622                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3623                                                              cinode);
3624                 cFYI(1, "Oplock release rc = %d", rc);
3625         }
3626 }
3627
3628 const struct address_space_operations cifs_addr_ops = {
3629         .readpage = cifs_readpage,
3630         .readpages = cifs_readpages,
3631         .writepage = cifs_writepage,
3632         .writepages = cifs_writepages,
3633         .write_begin = cifs_write_begin,
3634         .write_end = cifs_write_end,
3635         .set_page_dirty = __set_page_dirty_nobuffers,
3636         .releasepage = cifs_release_page,
3637         .invalidatepage = cifs_invalidate_page,
3638         .launder_page = cifs_launder_page,
3639 };
3640
3641 /*
3642  * cifs_readpages requires the server to support a buffer large enough to
3643  * contain the header plus one complete page of data.  Otherwise, we need
3644  * to leave cifs_readpages out of the address space operations.
3645  */
3646 const struct address_space_operations cifs_addr_ops_smallbuf = {
3647         .readpage = cifs_readpage,
3648         .writepage = cifs_writepage,
3649         .writepages = cifs_writepages,
3650         .write_begin = cifs_write_begin,
3651         .write_end = cifs_write_end,
3652         .set_page_dirty = __set_page_dirty_nobuffers,
3653         .releasepage = cifs_release_page,
3654         .invalidatepage = cifs_invalidate_page,
3655         .launder_page = cifs_launder_page,
3656 };