The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfs/direct.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * linux/fs/nfs/direct.c
    3  *
    4  * High-performance direct I/O for the NFS client
    5  *
    6  * When an application requests uncached I/O, all read and write requests
    7  * are made directly to the server; data stored or fetched via these
    8  * requests is not cached in the Linux page cache.  The client does not
    9  * correct unaligned requests from applications.  All requested bytes are
   10  * held on permanent storage before a direct write system call returns to
   11  * an application.  Applications that manage their own data caching, such
   12  * as databases, make very good use of direct I/O on local file systems.
   13  *
   14  * Solaris implements an uncached I/O facility called directio() that
   15  * is used for backups and sequential I/O to very large files.  Solaris
   16  * also supports uncaching whole NFS partitions with "-o forcedirectio,"
   17  * an undocumented mount option.
   18  *
   19  * Note that I/O to read in executables (e.g. kernel_read) cannot use
   20  * direct (kiobuf) reads because there is no vma backing the passed-in
   21  * data buffer.
   22  *
   23  * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust.
   24  *
   25  * Initial implementation:      12/2001 by Chuck Lever <cel@netapp.com>
   26  *
   27  * TODO:
   28  *
   29  * 1.  Use concurrent asynchronous network requests rather than
   30  *     serialized synchronous network requests for normal (non-sync)
   31  *     direct I/O.
   32  */
   33 
   34 #include <linux/config.h>
   35 #include <linux/sched.h>
   36 #include <linux/kernel.h>
   37 #include <linux/file.h>
   38 #include <linux/errno.h>
   39 #include <linux/nfs_fs.h>
   40 #include <linux/smp_lock.h>
   41 #include <linux/sunrpc/clnt.h>
   42 #include <linux/iobuf.h>
   43 
   44 #include <asm/system.h>
   45 #include <asm/uaccess.h>
   46 
   47 #define NFSDBG_FACILITY         (NFSDBG_PAGECACHE | NFSDBG_VFS)
   48 #define VERF_SIZE               (2 * sizeof(__u32))
   49 
   50 static inline int
   51 nfs_direct_read_rpc(struct file *file, struct nfs_readargs *arg)
   52 {
   53         int result;
   54         struct inode * inode = file->f_dentry->d_inode;
   55         struct nfs_fattr fattr;
   56         struct rpc_message msg;
   57         struct nfs_readres res = { &fattr, arg->count, 0 };
   58 
   59 #ifdef CONFIG_NFS_V3
   60         msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ?
   61                                                 NFS3PROC_READ : NFSPROC_READ;
   62 #else
   63         msg.rpc_proc = NFSPROC_READ;
   64 #endif
   65         msg.rpc_argp = arg;
   66         msg.rpc_resp = &res;
   67 
   68         lock_kernel();
   69         msg.rpc_cred = nfs_file_cred(file);
   70         fattr.valid = 0;
   71         result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
   72         nfs_refresh_inode(inode, &fattr);
   73         unlock_kernel();
   74 
   75         return result;
   76 }
   77 
   78 static inline int
   79 nfs_direct_write_rpc(struct file *file, struct nfs_writeargs *arg,
   80         struct nfs_writeverf *verf)
   81 {
   82         int result;
   83         struct inode *inode = file->f_dentry->d_inode;
   84         struct nfs_fattr fattr;
   85         struct rpc_message msg;
   86         struct nfs_writeres res = { &fattr, verf, 0 };
   87 
   88 #ifdef CONFIG_NFS_V3
   89         msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ?
   90                                                 NFS3PROC_WRITE : NFSPROC_WRITE;
   91 #else
   92         msg.rpc_proc = NFSPROC_WRITE;
   93 #endif
   94         msg.rpc_argp = arg;
   95         msg.rpc_resp = &res;
   96 
   97         lock_kernel();
   98         msg.rpc_cred = get_rpccred(nfs_file_cred(file));
   99         fattr.valid = 0;
  100         result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
  101         nfs_write_attributes(inode, &fattr);
  102         put_rpccred(msg.rpc_cred);
  103         unlock_kernel();
  104 
  105 #ifdef CONFIG_NFS_V3
  106         if (NFS_PROTO(inode)->version == 3) {
  107                 if (result > 0) {
  108                         if ((arg->stable == NFS_FILE_SYNC) &&
  109                             (verf->committed != NFS_FILE_SYNC)) {
  110                                 printk(KERN_ERR
  111                                 "%s: server didn't sync stable write request\n",
  112                                 __FUNCTION__);
  113                                 return -EIO;
  114                         }
  115 
  116                         if (result != arg->count) {
  117                                 printk(KERN_INFO
  118                                         "%s: short write, count=%u, result=%d\n",
  119                                         __FUNCTION__, arg->count, result);
  120                         }
  121                 }
  122                 return result;
  123         } else {
  124 #endif
  125                 verf->committed = NFS_FILE_SYNC; /* NFSv2 always syncs data */
  126                 if (result == 0)
  127                         return arg->count;
  128                 return result;
  129 #ifdef CONFIG_NFS_V3
  130         }
  131 #endif
  132 }
  133 
  134 #ifdef CONFIG_NFS_V3
  135 static inline int
  136 nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count,
  137         struct nfs_writeverf *verf)
  138 {
  139         int result;
  140         struct nfs_fattr fattr;
  141         struct nfs_writeargs    arg = { NFS_FH(inode), offset, count, 0, 0,
  142                                         NULL };
  143         struct nfs_writeres     res = { &fattr, verf, 0 };
  144         struct rpc_message      msg = { NFS3PROC_COMMIT, &arg, &res, NULL };
  145 
  146         fattr.valid = 0;
  147 
  148         lock_kernel();
  149         result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
  150         nfs_write_attributes(inode, &fattr);
  151         unlock_kernel();
  152 
  153         return result;
  154 }
  155 #else
  156 static inline int
  157 nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count,
  158         struct nfs_writeverf *verf)
  159 {
  160         return 0;
  161 }
  162 #endif
  163 
  164 /*
  165  * Walk through the iobuf and create an iovec for each "rsize" bytes.
  166  */
  167 static int
  168 nfs_direct_read(struct file *file, struct kiobuf *iobuf, loff_t offset,
  169         size_t count)
  170 {
  171         int curpage, total;
  172         int result = 0;
  173         struct inode *inode = file->f_dentry->d_inode;
  174         int rsize = NFS_SERVER(inode)->rsize;
  175         struct page *pages[NFS_READ_MAXIOV];
  176         struct nfs_readargs args = { NFS_FH(inode), offset, 0, iobuf->offset,
  177                                      pages };
  178 
  179         total = 0;
  180         curpage = 0;
  181         while (count) {
  182                 int len, request;
  183                 struct page **dest = pages;
  184 
  185                 request = count;
  186                 if (count > rsize)
  187                         request = rsize;
  188                 args.count = request;
  189                 args.offset = offset;
  190                 args.pgbase = (iobuf->offset + total) & ~PAGE_MASK;
  191                 len = PAGE_SIZE - args.pgbase;
  192 
  193                 do {
  194                         struct page *page = iobuf->maplist[curpage];
  195 
  196                         if (curpage >= iobuf->nr_pages || !page) {
  197                                 result = -EFAULT;
  198                                 goto out_err;
  199                         }
  200 
  201                         *dest++ = page;
  202                         /* zero after the first iov */
  203                         if (request < len)
  204                                 break;
  205                         request -= len;
  206                         len = PAGE_SIZE;
  207                         curpage++;
  208                 } while (request != 0);
  209 
  210                 result = nfs_direct_read_rpc(file, &args);
  211 
  212                 if (result < 0)
  213                         break;
  214 
  215                 total += result;
  216                 if (result < args.count)   /* NFSv2ism */
  217                         break;
  218                 count -= result;
  219                 offset += result;
  220         };
  221 out_err:
  222         if (!total)
  223                 return result;
  224         return total;
  225 }
  226 
  227 /*
  228  * Walk through the iobuf and create an iovec for each "wsize" bytes.
  229  * If only one network write is necessary, or if the O_SYNC flag or
  230  * 'sync' mount option are present, or if this is a V2 inode, use
  231  * FILE_SYNC.  Otherwise, use UNSTABLE and finish with a COMMIT.
  232  *
  233  * The mechanics of this function are much the same as nfs_direct_read,
  234  * with the added complexity of committing unstable writes.
  235  */
  236 static int
  237 nfs_direct_write(struct file *file, struct kiobuf *iobuf,
  238         loff_t offset, size_t count)
  239 {
  240         int curpage, total;
  241         int need_commit = 0;
  242         int result = 0;
  243         loff_t save_offset = offset;
  244         struct inode *inode = file->f_dentry->d_inode;
  245         int wsize = NFS_SERVER(inode)->wsize;
  246         struct nfs_writeverf first_verf, ret_verf;
  247         struct page *pages[NFS_WRITE_MAXIOV];
  248         struct nfs_writeargs args = { NFS_FH(inode), 0, 0, NFS_FILE_SYNC, 0,
  249                                 pages };
  250 
  251 #ifdef CONFIG_NFS_V3
  252         if ((NFS_PROTO(inode)->version == 3) && (count > wsize) &&
  253                                                         (!IS_SYNC(inode)))
  254                 args.stable = NFS_UNSTABLE;
  255 #endif
  256 
  257 retry:
  258         total = 0;
  259         curpage = 0;
  260         while (count) {
  261                 int len, request;
  262                 struct page **dest = pages;
  263 
  264                 request = count;
  265                 if (count > wsize)
  266                         request = wsize;
  267                 args.count = request;
  268                 args.offset = offset;
  269                 args.pgbase = (iobuf->offset + total) & ~PAGE_MASK;
  270                 len = PAGE_SIZE - args.pgbase;
  271 
  272                 do {
  273                         struct page *page = iobuf->maplist[curpage];
  274 
  275                         if (curpage >= iobuf->nr_pages || !page) {
  276                                 result = -EFAULT;
  277                                 goto out_err;
  278                         }
  279 
  280                         *dest++ = page;
  281                         /* zero after the first iov */
  282                         if (request < len)
  283                                 break;
  284                         request -= len;
  285                         len = PAGE_SIZE;
  286                         curpage++;
  287                 } while (request != 0);
  288 
  289                 result = nfs_direct_write_rpc(file, &args, &ret_verf);
  290 
  291                 if (result < 0)
  292                         break;
  293 
  294                 if (!total)
  295                         memcpy(&first_verf.verifier, &ret_verf.verifier,
  296                                                                 VERF_SIZE);
  297                 if (ret_verf.committed != NFS_FILE_SYNC) {
  298                         need_commit = 1;
  299                         if (memcmp(&first_verf.verifier, &ret_verf.verifier,
  300                                                                 VERF_SIZE))
  301                                 goto print_retry;
  302                 }
  303 
  304                 total += result;
  305                 count -= result;
  306                 offset += result;
  307         };
  308 
  309 out_err:
  310         /*
  311          * Commit data written so far, even in the event of an error
  312          */
  313         if (need_commit) {
  314                 if (nfs_direct_commit_rpc(inode, save_offset,
  315                                         iobuf->length - count, &ret_verf))
  316                         goto print_retry;
  317                 if (memcmp(&first_verf.verifier, &ret_verf.verifier,
  318                                                                 VERF_SIZE))
  319                         goto print_retry;
  320         }
  321 
  322         if (!total)
  323                 return result;
  324         return total;
  325 
  326 print_retry:
  327         printk(KERN_INFO "%s: detected server restart; retrying with FILE_SYNC\n",
  328                         __FUNCTION__);
  329         args.stable = NFS_FILE_SYNC;
  330         offset = save_offset;
  331         count = iobuf->length;
  332         goto retry;
  333 }
  334 
  335 /*
  336  * Read or write data, moving the data directly to/from the
  337  * application's buffer without caching in the page cache.
  338  *
  339  * Rules for direct I/O
  340  *
  341  * 1.  block size = 512 bytes or more
  342  * 2.  file byte offset is block aligned
  343  * 3.  byte count is a multiple of block size
  344  * 4.  user buffer is not aligned
  345  * 5.  user buffer is faulted in and pinned
  346  *
  347  * These are verified before we get here.
  348  */
  349 int
  350 nfs_direct_IO(int rw, struct file *file, struct kiobuf *iobuf,
  351         unsigned long blocknr, int blocksize)
  352 {
  353         int result = -EINVAL;
  354         size_t count = iobuf->length;
  355         struct dentry *dentry = file->f_dentry;
  356         struct inode *inode = dentry->d_inode;
  357         loff_t offset = blocknr << inode->i_blkbits;
  358 
  359         switch (rw) {
  360         case READ:
  361                 dfprintk(VFS,
  362                         "NFS: direct_IO(READ) (%s/%s) off/cnt(%Lu/%d)\n",
  363                                 dentry->d_parent->d_name.name,
  364                                         dentry->d_name.name, offset, count);
  365 
  366                 result = nfs_direct_read(file, iobuf, offset, count);
  367                 break;
  368         case WRITE:
  369                 dfprintk(VFS,
  370                         "NFS: direct_IO(WRITE) (%s/%s) off/cnt(%Lu/%d)\n",
  371                                 dentry->d_parent->d_name.name,
  372                                         dentry->d_name.name, offset, count);
  373 
  374                 result = nfs_direct_write(file, iobuf, offset, count);
  375                 break;
  376         default:
  377                 break;
  378         }
  379 
  380         dfprintk(VFS, "NFS: direct_IO result = %d\n", result);
  381         return result;
  382 }

Cache object: 0f34b9777d9841b03f40c5bac95f63ae


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.