nfs_bio.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: nfs_bio.c,v 1.116.2.3 2005/12/01 16:20:34 riz Exp $    */
    2 
    3 /*
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Rick Macklem at The University of Guelph.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.116.2.3 2005/12/01 16:20:34 riz Exp $");
   39 
   40 #include "opt_nfs.h"
   41 #include "opt_ddb.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/resourcevar.h>
   46 #include <sys/signalvar.h>
   47 #include <sys/proc.h>
   48 #include <sys/buf.h>
   49 #include <sys/vnode.h>
   50 #include <sys/mount.h>
   51 #include <sys/kernel.h>
   52 #include <sys/namei.h>
   53 #include <sys/dirent.h>
   54 #include <sys/malloc.h>
   55 
   56 #include <uvm/uvm_extern.h>
   57 #include <uvm/uvm.h>
   58 
   59 #include <nfs/rpcv2.h>
   60 #include <nfs/nfsproto.h>
   61 #include <nfs/nfs.h>
   62 #include <nfs/nfsmount.h>
   63 #include <nfs/nqnfs.h>
   64 #include <nfs/nfsnode.h>
   65 #include <nfs/nfs_var.h>
   66 
   67 extern int nfs_numasync;
   68 extern int nfs_commitsize;
   69 extern struct nfsstats nfsstats;
   70 
   71 static int nfs_doio_read __P((struct buf *, struct uio *));
   72 static int nfs_doio_write __P((struct buf *, struct uio *));
   73 static int nfs_doio_phys __P((struct buf *, struct uio *));
   74 
   75 /*
   76  * Vnode op for read using bio
   77  * Any similarity to readip() is purely coincidental
   78  */
   79 int
   80 nfs_bioread(vp, uio, ioflag, cred, cflag)
   81         struct vnode *vp;
   82         struct uio *uio;
   83         int ioflag, cflag;
   84         struct ucred *cred;
   85 {
   86         struct nfsnode *np = VTONFS(vp);
   87         struct buf *bp = NULL, *rabp;
   88         struct vattr vattr;
   89         struct proc *p;
   90         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   91         struct nfsdircache *ndp = NULL, *nndp = NULL;
   92         caddr_t baddr, ep, edp;
   93         int got_buf = 0, error = 0, n = 0, on = 0, en, enn;
   94         int enough = 0;
   95         struct dirent *dp, *pdp;
   96         off_t curoff = 0;
   97 
   98 #ifdef DIAGNOSTIC
   99         if (uio->uio_rw != UIO_READ)
  100                 panic("nfs_read mode");
  101 #endif
  102         if (uio->uio_resid == 0)
  103                 return (0);
  104         if (vp->v_type != VDIR && uio->uio_offset < 0)
  105                 return (EINVAL);
  106         p = uio->uio_procp;
  107 #ifndef NFS_V2_ONLY
  108         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
  109             !(nmp->nm_iflag & NFSMNT_GOTFSINFO))
  110                 (void)nfs_fsinfo(nmp, vp, cred, p);
  111 #endif
  112         if (vp->v_type != VDIR &&
  113             (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
  114                 return (EFBIG);
  115 
  116         /*
  117          * For nfs, cache consistency can only be maintained approximately.
  118          * Although RFC1094 does not specify the criteria, the following is
  119          * believed to be compatible with the reference port.
  120          * For nqnfs, full cache consistency is maintained within the loop.
  121          * For nfs:
  122          * If the file's modify time on the server has changed since the
  123          * last read rpc or you have written to the file,
  124          * you may have lost data cache consistency with the
  125          * server, so flush all of the file's data out of the cache.
  126          * Then force a getattr rpc to ensure that you have up to date
  127          * attributes.
  128          * NB: This implies that cache data can be read when up to
  129          * NFS_ATTRTIMEO seconds out of date. If you find that you need current
  130          * attributes this could be forced by setting n_attrstamp to 0 before
  131          * the VOP_GETATTR() call.
  132          */
  133 
  134         if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
  135                 if (np->n_flag & NMODIFIED) {
  136                         if (vp->v_type != VREG) {
  137                                 if (vp->v_type != VDIR)
  138                                         panic("nfs: bioread, not dir");
  139                                 nfs_invaldircache(vp, 0);
  140                                 np->n_direofoffset = 0;
  141                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  142                                 if (error)
  143                                         return (error);
  144                         }
  145                         NFS_INVALIDATE_ATTRCACHE(np);
  146                         error = VOP_GETATTR(vp, &vattr, cred, p);
  147                         if (error)
  148                                 return (error);
  149                         np->n_mtime = vattr.va_mtime;
  150                 } else {
  151                         error = VOP_GETATTR(vp, &vattr, cred, p);
  152                         if (error)
  153                                 return (error);
  154                         if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) {
  155                                 if (vp->v_type == VDIR) {
  156                                         nfs_invaldircache(vp, 0);
  157                                         np->n_direofoffset = 0;
  158                                 }
  159                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  160                                 if (error)
  161                                         return (error);
  162                                 np->n_mtime = vattr.va_mtime;
  163                         }
  164                 }
  165         }
  166 
  167         do {
  168 #ifndef NFS_V2_ONLY
  169             /*
  170              * Get a valid lease. If cached data is stale, flush it.
  171              */
  172             if (nmp->nm_flag & NFSMNT_NQNFS) {
  173                 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
  174                     do {
  175                         error = nqnfs_getlease(vp, ND_READ, cred, p);
  176                     } while (error == NQNFS_EXPIRED);
  177                     if (error)
  178                         return (error);
  179                     if (np->n_lrev != np->n_brev ||
  180                         (np->n_flag & NQNFSNONCACHE) ||
  181                         ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
  182                         if (vp->v_type == VDIR) {
  183                                 nfs_invaldircache(vp, 0);
  184                                 np->n_direofoffset = 0;
  185                         }
  186                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  187                         if (error)
  188                             return (error);
  189                         np->n_brev = np->n_lrev;
  190                     }
  191                 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
  192                     nfs_invaldircache(vp, 0);
  193                     error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  194                     np->n_direofoffset = 0;
  195                     if (error)
  196                         return (error);
  197                 }
  198             }
  199 #endif
  200             /*
  201              * Don't cache symlinks.
  202              */
  203             if (np->n_flag & NQNFSNONCACHE
  204                 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) {
  205                 switch (vp->v_type) {
  206                 case VREG:
  207                         return (nfs_readrpc(vp, uio));
  208                 case VLNK:
  209                         return (nfs_readlinkrpc(vp, uio, cred));
  210                 case VDIR:
  211                         break;
  212                 default:
  213                         printf(" NQNFSNONCACHE: type %x unexpected\n",  
  214                             vp->v_type);
  215                 };
  216             }
  217             baddr = (caddr_t)0;
  218             switch (vp->v_type) {
  219             case VREG:
  220                 nfsstats.biocache_reads++;
  221 
  222                 error = 0;
  223                 while (uio->uio_resid > 0) {
  224                         void *win;
  225                         vsize_t bytelen;
  226 
  227                         nfs_delayedtruncate(vp);
  228                         if (np->n_size <= uio->uio_offset) {
  229                                 break;
  230                         }
  231                         bytelen =
  232                             MIN(np->n_size - uio->uio_offset, uio->uio_resid);
  233                         win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
  234                                         &bytelen, UBC_READ);
  235                         error = uiomove(win, bytelen, uio);
  236                         ubc_release(win, 0);
  237                         if (error) {
  238                                 /*
  239                                  * XXXkludge
  240                                  * the file has been truncated on the server.
  241                                  * there isn't much we can do.
  242                                  */
  243                                 if (uio->uio_offset >= np->n_size) {
  244                                         /* end of file */
  245                                         error = 0;
  246                                 } else {
  247                                         break;
  248                                 }
  249                         }
  250                 }
  251                 break;
  252 
  253             case VLNK:
  254                 nfsstats.biocache_readlinks++;
  255                 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
  256                 if (!bp)
  257                         return (EINTR);
  258                 if ((bp->b_flags & B_DONE) == 0) {
  259                         bp->b_flags |= B_READ;
  260                         error = nfs_doio(bp, p);
  261                         if (error) {
  262                                 brelse(bp);
  263                                 return (error);
  264                         }
  265                 }
  266                 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
  267                 got_buf = 1;
  268                 on = 0;
  269                 break;
  270             case VDIR:
  271 diragain:
  272                 nfsstats.biocache_readdirs++;
  273                 ndp = nfs_searchdircache(vp, uio->uio_offset,
  274                         (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0);
  275                 if (!ndp) {
  276                         /*
  277                          * We've been handed a cookie that is not
  278                          * in the cache. If we're not translating
  279                          * 32 <-> 64, it may be a value that was
  280                          * flushed out of the cache because it grew
  281                          * too big. Let the server judge if it's
  282                          * valid or not. In the translation case,
  283                          * we have no way of validating this value,
  284                          * so punt.
  285                          */
  286                         if (nmp->nm_flag & NFSMNT_XLATECOOKIE)
  287                                 return (EINVAL);
  288                         ndp = nfs_enterdircache(vp, uio->uio_offset, 
  289                                 uio->uio_offset, 0, 0);
  290                 }
  291 
  292                 if (uio->uio_offset != 0 &&
  293                     ndp->dc_cookie == np->n_direofoffset) {
  294                         nfs_putdircache(np, ndp);
  295                         nfsstats.direofcache_hits++;
  296                         return (0);
  297                 }
  298 
  299                 bp = nfs_getcacheblk(vp, ndp->dc_blkno, NFS_DIRBLKSIZ, p);
  300                 if (!bp)
  301                     return (EINTR);
  302                 if ((bp->b_flags & B_DONE) == 0) {
  303                     bp->b_flags |= B_READ;
  304                     bp->b_dcookie = ndp->dc_blkcookie;
  305                     error = nfs_doio(bp, p);
  306                     if (error) {
  307                         /*
  308                          * Yuck! The directory has been modified on the
  309                          * server. Punt and let the userland code
  310                          * deal with it.
  311                          */
  312                         nfs_putdircache(np, ndp);
  313                         brelse(bp);
  314                         if (error == NFSERR_BAD_COOKIE) {
  315                             nfs_invaldircache(vp, 0);
  316                             nfs_vinvalbuf(vp, 0, cred, p, 1);
  317                             error = EINVAL;
  318                         }
  319                         return (error);
  320                     }
  321                 }
  322 
  323                 /*
  324                  * Just return if we hit EOF right away with this
  325                  * block. Always check here, because direofoffset
  326                  * may have been set by an nfsiod since the last
  327                  * check.
  328                  */
  329                 if (np->n_direofoffset != 0 && 
  330                         ndp->dc_blkcookie == np->n_direofoffset) {
  331                         nfs_putdircache(np, ndp);
  332                         brelse(bp);
  333                         return (0);
  334                 }
  335 
  336                 /*
  337                  * Find the entry we were looking for in the block.
  338                  */
  339 
  340                 en = ndp->dc_entry;
  341 
  342                 pdp = dp = (struct dirent *)bp->b_data;
  343                 edp = bp->b_data + bp->b_bcount - bp->b_resid;
  344                 enn = 0;
  345                 while (enn < en && (caddr_t)dp < edp) {
  346                         pdp = dp;
  347                         dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
  348                         enn++;
  349                 }
  350 
  351                 /*
  352                  * If the entry number was bigger than the number of
  353                  * entries in the block, or the cookie of the previous
  354                  * entry doesn't match, the directory cache is
  355                  * stale. Flush it and try again (i.e. go to
  356                  * the server).
  357                  */
  358                 if ((caddr_t)dp >= edp || (caddr_t)dp + dp->d_reclen > edp ||
  359                     (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) {
  360 #ifdef DEBUG
  361                         printf("invalid cache: %p %p %p off %lx %lx\n",
  362                                 pdp, dp, edp,
  363                                 (unsigned long)uio->uio_offset,
  364                                 (unsigned long)NFS_GETCOOKIE(pdp));
  365 #endif
  366                         nfs_putdircache(np, ndp);
  367                         brelse(bp);
  368                         nfs_invaldircache(vp, 0);
  369                         nfs_vinvalbuf(vp, 0, cred, p, 0);
  370                         goto diragain;
  371                 }
  372 
  373                 on = (caddr_t)dp - bp->b_data;
  374 
  375                 /*
  376                  * Cache all entries that may be exported to the
  377                  * user, as they may be thrown back at us. The
  378                  * NFSBIO_CACHECOOKIES flag indicates that all
  379                  * entries are being 'exported', so cache them all.
  380                  */
  381 
  382                 if (en == 0 && pdp == dp) {
  383                         dp = (struct dirent *)
  384                             ((caddr_t)dp + dp->d_reclen);
  385                         enn++;
  386                 }
  387 
  388                 if (uio->uio_resid < (bp->b_bcount - bp->b_resid - on)) {
  389                         n = uio->uio_resid;
  390                         enough = 1;
  391                 } else
  392                         n = bp->b_bcount - bp->b_resid - on;
  393 
  394                 ep = bp->b_data + on + n;
  395 
  396                 /*
  397                  * Find last complete entry to copy, caching entries
  398                  * (if requested) as we go.
  399                  */
  400 
  401                 while ((caddr_t)dp < ep && (caddr_t)dp + dp->d_reclen <= ep) {  
  402                         if (cflag & NFSBIO_CACHECOOKIES) {
  403                                 nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp),
  404                                     ndp->dc_blkcookie, enn, bp->b_lblkno);
  405                                 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
  406                                         NFS_STASHCOOKIE32(pdp,
  407                                             nndp->dc_cookie32);
  408                                 }
  409                                 nfs_putdircache(np, nndp);
  410                         }
  411                         pdp = dp;
  412                         dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
  413                         enn++;
  414                 }
  415                 nfs_putdircache(np, ndp);
  416 
  417                 /*
  418                  * If the last requested entry was not the last in the
  419                  * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ),  
  420                  * cache the cookie of the last requested one, and
  421                  * set of the offset to it.
  422                  */
  423 
  424                 if ((on + n) < bp->b_bcount - bp->b_resid) {
  425                         curoff = NFS_GETCOOKIE(pdp);
  426                         nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie,
  427                             enn, bp->b_lblkno);
  428                         if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
  429                                 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32);
  430                                 curoff = nndp->dc_cookie32;
  431                         }
  432                         nfs_putdircache(np, nndp);
  433                 } else
  434                         curoff = bp->b_dcookie;
  435 
  436                 /*
  437                  * Always cache the entry for the next block,
  438                  * so that readaheads can use it.
  439                  */
  440                 nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0);
  441                 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
  442                         if (curoff == bp->b_dcookie) {
  443                                 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32);
  444                                 curoff = nndp->dc_cookie32;
  445                         }
  446                 }
  447 
  448                 n = ((caddr_t)pdp + pdp->d_reclen) - (bp->b_data + on);
  449 
  450                 /*
  451                  * If not eof and read aheads are enabled, start one.
  452                  * (You need the current block first, so that you have the
  453                  *  directory offset cookie of the next block.)
  454                  */
  455                 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
  456                     np->n_direofoffset == 0 && !(np->n_flag & NQNFSNONCACHE)) {
  457                         rabp = nfs_getcacheblk(vp, nndp->dc_blkno,
  458                                                 NFS_DIRBLKSIZ, p);
  459                         if (rabp) {
  460                             if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
  461                                 rabp->b_dcookie = nndp->dc_cookie;
  462                                 rabp->b_flags |= (B_READ | B_ASYNC);
  463                                 if (nfs_asyncio(rabp)) {
  464                                     rabp->b_flags |= B_INVAL;
  465                                     brelse(rabp);
  466                                 }
  467                             } else
  468                                 brelse(rabp);
  469                         }
  470                 }
  471                 nfs_putdircache(np, nndp);
  472                 got_buf = 1;
  473                 break;
  474             default:
  475                 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
  476                 break;
  477             }
  478 
  479             if (n > 0) {
  480                 if (!baddr)
  481                         baddr = bp->b_data;
  482                 error = uiomove(baddr + on, (int)n, uio);
  483             }
  484             switch (vp->v_type) {
  485             case VREG:
  486                 break;
  487             case VLNK:
  488                 n = 0;
  489                 break;
  490             case VDIR:
  491                 if (np->n_flag & NQNFSNONCACHE)
  492                         bp->b_flags |= B_INVAL;
  493                 uio->uio_offset = curoff;
  494                 if (enough)
  495                         n = 0;
  496                 break;
  497             default:
  498                 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
  499             }
  500             if (got_buf)
  501                 brelse(bp);
  502         } while (error == 0 && uio->uio_resid > 0 && n > 0);
  503         return (error);
  504 }
  505 
  506 /*
  507  * Vnode op for write using bio
  508  */
  509 int
  510 nfs_write(v)
  511         void *v;
  512 {
  513         struct vop_write_args /* {
  514                 struct vnode *a_vp;
  515                 struct uio *a_uio;
  516                 int  a_ioflag;
  517                 struct ucred *a_cred;
  518         } */ *ap = v;
  519         struct uio *uio = ap->a_uio;
  520         struct proc *p = uio->uio_procp;
  521         struct vnode *vp = ap->a_vp;
  522         struct nfsnode *np = VTONFS(vp);
  523         struct ucred *cred = ap->a_cred;
  524         int ioflag = ap->a_ioflag;
  525         struct vattr vattr;
  526         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  527         void *win;
  528         voff_t oldoff, origoff;
  529         vsize_t bytelen;
  530         int error = 0;
  531         int extended = 0, wrotedta = 0;
  532 
  533 #ifdef DIAGNOSTIC
  534         if (uio->uio_rw != UIO_WRITE)
  535                 panic("nfs_write mode");
  536         if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
  537                 panic("nfs_write proc");
  538 #endif
  539         if (vp->v_type != VREG)
  540                 return (EIO);
  541         if (np->n_flag & NWRITEERR) {
  542                 np->n_flag &= ~NWRITEERR;
  543                 return (np->n_error);
  544         }
  545 #ifndef NFS_V2_ONLY
  546         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
  547             !(nmp->nm_iflag & NFSMNT_GOTFSINFO))
  548                 (void)nfs_fsinfo(nmp, vp, cred, p);
  549 #endif
  550         if (ioflag & (IO_APPEND | IO_SYNC)) {
  551                 if (np->n_flag & NMODIFIED) {
  552                         NFS_INVALIDATE_ATTRCACHE(np);
  553                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  554                         if (error)
  555                                 return (error);
  556                 }
  557                 if (ioflag & IO_APPEND) {
  558                         NFS_INVALIDATE_ATTRCACHE(np);
  559                         error = VOP_GETATTR(vp, &vattr, cred, p);
  560                         if (error)
  561                                 return (error);
  562                         uio->uio_offset = np->n_size;
  563                 }
  564         }
  565         if (uio->uio_offset < 0)
  566                 return (EINVAL);
  567         if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
  568                 return (EFBIG);
  569         if (uio->uio_resid == 0)
  570                 return (0);
  571         /*
  572          * Maybe this should be above the vnode op call, but so long as
  573          * file servers have no limits, i don't think it matters
  574          */
  575         if (p && uio->uio_offset + uio->uio_resid >
  576               p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  577                 psignal(p, SIGXFSZ);
  578                 return (EFBIG);
  579         }
  580 
  581         if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
  582                 int iomode = NFSV3WRITE_FILESYNC;
  583                 boolean_t stalewriteverf = FALSE;
  584 
  585                 lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL);
  586                 error = nfs_writerpc(vp, uio, &iomode, FALSE, &stalewriteverf);
  587                 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL);
  588                 if (stalewriteverf)
  589                         nfs_clearcommit(vp->v_mount);
  590                 return (error);
  591         }
  592 
  593         origoff = uio->uio_offset;
  594         do {
  595                 boolean_t extending; /* if we are extending whole pages */
  596                 u_quad_t oldsize;
  597                 oldoff = uio->uio_offset;
  598                 bytelen = uio->uio_resid;
  599 
  600 #ifndef NFS_V2_ONLY
  601                 /*
  602                  * Check for a valid write lease.
  603                  */
  604                 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
  605                     NQNFS_CKINVALID(vp, np, ND_WRITE)) {
  606                         do {
  607                                 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
  608                         } while (error == NQNFS_EXPIRED);
  609                         if (error)
  610                                 return (error);
  611                         if (np->n_lrev != np->n_brev ||
  612                             (np->n_flag & NQNFSNONCACHE)) {
  613                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  614                                 if (error)
  615                                         return (error);
  616                                 np->n_brev = np->n_lrev;
  617                         }
  618                 }
  619 #endif
  620                 nfsstats.biocache_writes++;
  621 
  622                 oldsize = np->n_size;
  623                 np->n_flag |= NMODIFIED;
  624                 if (np->n_size < uio->uio_offset + bytelen) {
  625                         np->n_size = uio->uio_offset + bytelen;
  626                 }
  627                 extending = ((uio->uio_offset & PAGE_MASK) == 0 &&
  628                     (bytelen & PAGE_MASK) == 0 &&
  629                     uio->uio_offset >= vp->v_size);
  630                 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
  631                             UBC_WRITE | (extending ? UBC_FAULTBUSY : 0));
  632                 error = uiomove(win, bytelen, uio);
  633                 ubc_release(win, 0);
  634                 if (error) {
  635                         if (extending) {
  636                                 /*
  637                                  * backout size and free pages past eof.
  638                                  */
  639                                 np->n_size = oldsize;
  640                                 simple_lock(&vp->v_interlock);
  641                                 (void)VOP_PUTPAGES(vp, round_page(vp->v_size),
  642                                     0, PGO_SYNCIO | PGO_FREE);
  643                         }
  644                         break;
  645                 }
  646                 wrotedta = 1;
  647 
  648                 /*
  649                  * update UVM's notion of the size now that we've
  650                  * copied the data into the vnode's pages.
  651                  */
  652 
  653                 if (vp->v_size < uio->uio_offset) {
  654                         uvm_vnp_setsize(vp, uio->uio_offset);
  655                         extended = 1;
  656                 }
  657 
  658                 if ((oldoff & ~(nmp->nm_wsize - 1)) !=
  659                     (uio->uio_offset & ~(nmp->nm_wsize - 1))) {
  660                         simple_lock(&vp->v_interlock);
  661                         error = VOP_PUTPAGES(vp,
  662                             trunc_page(oldoff & ~(nmp->nm_wsize - 1)),
  663                             round_page((uio->uio_offset + nmp->nm_wsize - 1) &
  664                                        ~(nmp->nm_wsize - 1)), PGO_CLEANIT);
  665                 }
  666         } while (uio->uio_resid > 0);
  667         if (wrotedta)
  668                 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
  669         if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
  670                 simple_lock(&vp->v_interlock);
  671                 error = VOP_PUTPAGES(vp,
  672                     trunc_page(origoff & ~(nmp->nm_wsize - 1)),
  673                     round_page((uio->uio_offset + nmp->nm_wsize - 1) &
  674                                ~(nmp->nm_wsize - 1)),
  675                     PGO_CLEANIT | PGO_SYNCIO);
  676         }
  677         return error;
  678 }
  679 
  680 /*
  681  * Get an nfs cache block.
  682  * Allocate a new one if the block isn't currently in the cache
  683  * and return the block marked busy. If the calling process is
  684  * interrupted by a signal for an interruptible mount point, return
  685  * NULL.
  686  */
  687 struct buf *
  688 nfs_getcacheblk(vp, bn, size, p)
  689         struct vnode *vp;
  690         daddr_t bn;
  691         int size;
  692         struct proc *p;
  693 {
  694         struct buf *bp;
  695         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  696 
  697         if (nmp->nm_flag & NFSMNT_INT) {
  698                 bp = getblk(vp, bn, size, PCATCH, 0);
  699                 while (bp == NULL) {
  700                         if (nfs_sigintr(nmp, NULL, p))
  701                                 return (NULL);
  702                         bp = getblk(vp, bn, size, 0, 2 * hz);
  703                 }
  704         } else
  705                 bp = getblk(vp, bn, size, 0, 0);
  706         return (bp);
  707 }
  708 
  709 /*
  710  * Flush and invalidate all dirty buffers. If another process is already
  711  * doing the flush, just wait for completion.
  712  */
  713 int
  714 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
  715         struct vnode *vp;
  716         int flags;
  717         struct ucred *cred;
  718         struct proc *p;
  719         int intrflg;
  720 {
  721         struct nfsnode *np = VTONFS(vp);
  722         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  723         int error = 0, slpflag, slptimeo;
  724 
  725         if ((nmp->nm_flag & NFSMNT_INT) == 0)
  726                 intrflg = 0;
  727         if (intrflg) {
  728                 slpflag = PCATCH;
  729                 slptimeo = 2 * hz;
  730         } else {
  731                 slpflag = 0;
  732                 slptimeo = 0;
  733         }
  734         /*
  735          * First wait for any other process doing a flush to complete.
  736          */
  737         simple_lock(&vp->v_interlock);
  738         while (np->n_flag & NFLUSHINPROG) {
  739                 np->n_flag |= NFLUSHWANT;
  740                 error = ltsleep(&np->n_flag, PRIBIO + 2, "nfsvinval",
  741                         slptimeo, &vp->v_interlock);
  742                 if (error && intrflg && nfs_sigintr(nmp, NULL, p)) {
  743                         simple_unlock(&vp->v_interlock);
  744                         return EINTR;
  745                 }
  746         }
  747 
  748         /*
  749          * Now, flush as required.
  750          */
  751         np->n_flag |= NFLUSHINPROG;
  752         simple_unlock(&vp->v_interlock);
  753         error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
  754         while (error) {
  755                 if (intrflg && nfs_sigintr(nmp, NULL, p)) {
  756                         error = EINTR;
  757                         break;
  758                 }
  759                 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
  760         }
  761         simple_lock(&vp->v_interlock);
  762         if (error == 0)
  763                 np->n_flag &= ~NMODIFIED;
  764         np->n_flag &= ~NFLUSHINPROG;
  765         if (np->n_flag & NFLUSHWANT) {
  766                 np->n_flag &= ~NFLUSHWANT;
  767                 wakeup(&np->n_flag);
  768         }
  769         simple_unlock(&vp->v_interlock);
  770         return error;
  771 }
  772 
  773 /*
  774  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  775  * This is mainly to avoid queueing async I/O requests when the nfsiods
  776  * are all hung on a dead server.
  777  */
  778 
  779 int
  780 nfs_asyncio(bp)
  781         struct buf *bp;
  782 {
  783         int i;
  784         struct nfsmount *nmp;
  785         int gotiod, slpflag = 0, slptimeo = 0, error;
  786 
  787         if (nfs_numasync == 0)
  788                 return (EIO);
  789 
  790         nmp = VFSTONFS(bp->b_vp->v_mount);
  791 again:
  792         if (nmp->nm_flag & NFSMNT_INT)
  793                 slpflag = PCATCH;
  794         gotiod = FALSE;
  795  
  796         /*
  797          * Find a free iod to process this request.
  798          */
  799 
  800         for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
  801                 struct nfs_iod *iod = &nfs_asyncdaemon[i];
  802 
  803                 simple_lock(&iod->nid_slock);
  804                 if (iod->nid_want) {
  805                         /*
  806                          * Found one, so wake it up and tell it which
  807                          * mount to process.
  808                          */
  809                         iod->nid_want = NULL;
  810                         iod->nid_mount = nmp;
  811                         wakeup(&iod->nid_want);
  812                         simple_lock(&nmp->nm_slock);
  813                         simple_unlock(&iod->nid_slock);
  814                         nmp->nm_bufqiods++;
  815                         gotiod = TRUE;
  816                         break;
  817                 }
  818                 simple_unlock(&iod->nid_slock);
  819         }
  820 
  821         /*
  822          * If none are free, we may already have an iod working on this mount
  823          * point.  If so, it will process our request.
  824          */
  825 
  826         if (!gotiod) {
  827                 simple_lock(&nmp->nm_slock);
  828                 if (nmp->nm_bufqiods > 0)
  829                         gotiod = TRUE;
  830         }
  831 
  832         LOCK_ASSERT(simple_lock_held(&nmp->nm_slock));
  833 
  834         /*
  835          * If we have an iod which can process the request, then queue
  836          * the buffer.  However, even if we have an iod, do not initiate 
  837          * queue cleaning if curproc is the pageout daemon. if the NFS mount
  838          * is via local loopback, we may put curproc (pagedaemon) to sleep
  839          * waiting for the writes to complete. But the server (ourself)
  840          * may block the write, waiting for its (ie., our) pagedaemon
  841          * to produce clean pages to handle the write: deadlock.
  842          * XXX: start non-loopback mounts straight away?  If "lots free",
  843          * let pagedaemon start loopback writes anyway?
  844          */
  845         if (gotiod) {
  846           
  847                 /*
  848                  * Ensure that the queue never grows too large.
  849                  */
  850                 if (curproc == uvm.pagedaemon_proc) {
  851                         /* Enque for later, to avoid free-page deadlock */
  852                           (void) 0;
  853                 } else while (nmp->nm_bufqlen >= 2*nfs_numasync) {
  854                         nmp->nm_bufqwant = TRUE;
  855                         error = ltsleep(&nmp->nm_bufq,
  856                             slpflag | PRIBIO | PNORELOCK,
  857                             "nfsaio", slptimeo, &nmp->nm_slock);
  858                         if (error) {
  859                                 if (nfs_sigintr(nmp, NULL, curproc))
  860                                         return (EINTR);
  861                                 if (slpflag == PCATCH) {
  862                                         slpflag = 0;
  863                                         slptimeo = 2 * hz;
  864                                 }
  865                         }
  866 
  867                         /*
  868                          * We might have lost our iod while sleeping,
  869                          * so check and loop if nescessary.
  870                          */
  871 
  872                         if (nmp->nm_bufqiods == 0)
  873                                 goto again;
  874 
  875                         simple_lock(&nmp->nm_slock);
  876                 }
  877                 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
  878                 nmp->nm_bufqlen++;
  879                 simple_unlock(&nmp->nm_slock);
  880                 return (0);
  881         }
  882         simple_unlock(&nmp->nm_slock);
  883 
  884         /*
  885          * All the iods are busy on other mounts, so return EIO to
  886          * force the caller to process the i/o synchronously.
  887          */
  888 
  889         return (EIO);
  890 }
  891 
  892 /*
  893  * nfs_doio for read.
  894  */
  895 static int
  896 nfs_doio_read(bp, uiop)
  897         struct buf *bp;
  898         struct uio *uiop;
  899 {
  900         struct vnode *vp = bp->b_vp;
  901         struct nfsnode *np = VTONFS(vp);
  902         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  903         int error = 0;
  904 
  905         uiop->uio_rw = UIO_READ;
  906         switch (vp->v_type) {
  907         case VREG:
  908                 nfsstats.read_bios++;
  909                 error = nfs_readrpc(vp, uiop);
  910                 if (!error && uiop->uio_resid) {
  911                         int diff, len;
  912 
  913                         /*
  914                          * If len > 0, there is a hole in the file and
  915                          * no writes after the hole have been pushed to
  916                          * the server yet.
  917                          * Just zero fill the rest of the valid area.
  918                          */
  919 
  920                         diff = bp->b_bcount - uiop->uio_resid;
  921                         len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
  922                                 + diff);
  923                         if (len > 0) {
  924                                 len = MIN(len, uiop->uio_resid);
  925                                 memset((char *)bp->b_data + diff, 0, len);
  926                         }
  927                 }
  928                 if (uiop->uio_procp && (vp->v_flag & VTEXT) &&
  929                     (((nmp->nm_flag & NFSMNT_NQNFS) &&
  930                       NQNFS_CKINVALID(vp, np, ND_READ) &&
  931                       np->n_lrev != np->n_brev) ||
  932                      (!(nmp->nm_flag & NFSMNT_NQNFS) &&
  933                       timespeccmp(&np->n_mtime, &np->n_vattr->va_mtime, !=)))) {
  934                         uprintf("Process killed due to "
  935                                 "text file modification\n");
  936                         psignal(uiop->uio_procp, SIGKILL);
  937 #if 0 /* XXX NJWLWP */
  938                         uiop->uio_procp->p_holdcnt++;
  939 #endif
  940                 }
  941                 break;
  942         case VLNK:
  943                 KASSERT(uiop->uio_offset == (off_t)0);
  944                 nfsstats.readlink_bios++;
  945                 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
  946                 break;
  947         case VDIR:
  948                 nfsstats.readdir_bios++;
  949                 uiop->uio_offset = bp->b_dcookie;
  950                 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
  951                         error = nfs_readdirplusrpc(vp, uiop, np->n_rcred);
  952                         if (error == NFSERR_NOTSUPP)
  953                                 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
  954                 }
  955                 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
  956                         error = nfs_readdirrpc(vp, uiop, np->n_rcred);
  957                 if (!error) {
  958                         bp->b_dcookie = uiop->uio_offset;
  959                 }
  960                 break;
  961         default:
  962                 printf("nfs_doio:  type %x unexpected\n", vp->v_type);
  963                 break;
  964         }
  965         if (error) {
  966                 bp->b_flags |= B_ERROR;
  967                 bp->b_error = error;
  968         }
  969         return error;
  970 }
  971 
  972 /*
  973  * nfs_doio for write.
  974  */
  975 static int
  976 nfs_doio_write(bp, uiop)
  977         struct buf *bp;
  978         struct uio *uiop;
  979 {
  980         struct vnode *vp = bp->b_vp;
  981         struct nfsnode *np = VTONFS(vp);
  982         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  983         int iomode;
  984         boolean_t stalewriteverf = FALSE;
  985         int i, npages = (bp->b_bcount + PAGE_SIZE - 1) >> PAGE_SHIFT;
  986         struct vm_page *pgs[npages];
  987         boolean_t needcommit = TRUE; /* need only COMMIT RPC */
  988         boolean_t pageprotected;
  989         struct uvm_object *uobj = &vp->v_uobj;
  990         int error;
  991         off_t off, cnt;
  992 
  993         if ((bp->b_flags & B_ASYNC) != 0 && NFS_ISV3(vp)) {
  994                 iomode = NFSV3WRITE_UNSTABLE;
  995         } else {
  996                 iomode = NFSV3WRITE_FILESYNC;
  997         }
  998 
  999 again:
 1000         lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL);
 1001 
 1002         for (i = 0; i < npages; i++) {
 1003                 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
 1004                 if (pgs[i]->uobject == uobj &&
 1005                     pgs[i]->offset == uiop->uio_offset + (i << PAGE_SHIFT)) {
 1006                         KASSERT(pgs[i]->flags & PG_BUSY);
 1007                         /*
 1008                          * this page belongs to our object.
 1009                          */
 1010                         simple_lock(&uobj->vmobjlock);
 1011                         /*
 1012                          * write out the page stably if it's about to
 1013                          * be released because we can't resend it
 1014                          * on the server crash.
 1015                          *
 1016                          * XXX assuming PG_RELEASE|PG_PAGEOUT won't be
 1017                          * changed until unbusy the page.
 1018                          */
 1019                         if (pgs[i]->flags & (PG_RELEASED|PG_PAGEOUT))
 1020                                 iomode = NFSV3WRITE_FILESYNC;
 1021                         /*
 1022                          * if we met a page which hasn't been sent yet,
 1023                          * we need do WRITE RPC.
 1024                          */
 1025                         if ((pgs[i]->flags & PG_NEEDCOMMIT) == 0)
 1026                                 needcommit = FALSE;
 1027                         simple_unlock(&uobj->vmobjlock);
 1028                 } else {
 1029                         iomode = NFSV3WRITE_FILESYNC;
 1030                         needcommit = FALSE;
 1031                 }
 1032         }
 1033         if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) {
 1034                 simple_lock(&uobj->vmobjlock);
 1035                 for (i = 0; i < npages; i++) {
 1036                         pgs[i]->flags |= PG_NEEDCOMMIT | PG_RDONLY;
 1037                         pmap_page_protect(pgs[i], VM_PROT_READ);
 1038                 }
 1039                 simple_unlock(&uobj->vmobjlock);
 1040                 pageprotected = TRUE; /* pages can't be modified during i/o. */
 1041         } else
 1042                 pageprotected = FALSE;
 1043 
 1044         /*
 1045          * Send the data to the server if necessary,
 1046          * otherwise just send a commit rpc.
 1047          */
 1048 
 1049         if (needcommit) {
 1050 
 1051                 /*
 1052                  * If the buffer is in the range that we already committed,
 1053                  * there's nothing to do.
 1054                  *
 1055                  * If it's in the range that we need to commit, push the
 1056                  * whole range at once, otherwise only push the buffer.
 1057                  * In both these cases, acquire the commit lock to avoid
 1058                  * other processes modifying the range.
 1059                  */
 1060 
 1061                 off = uiop->uio_offset;
 1062                 cnt = bp->b_bcount;
 1063                 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL);
 1064                 if (!nfs_in_committed_range(vp, off, bp->b_bcount)) {
 1065                         boolean_t pushedrange;
 1066                         if (nfs_in_tobecommitted_range(vp, off, bp->b_bcount)) {
 1067                                 pushedrange = TRUE;
 1068                                 off = np->n_pushlo;
 1069                                 cnt = np->n_pushhi - np->n_pushlo;
 1070                         } else {
 1071                                 pushedrange = FALSE;
 1072                         }
 1073                         error = nfs_commit(vp, off, cnt, curproc);
 1074                         if (error == 0) {
 1075                                 if (pushedrange) {
 1076                                         nfs_merge_commit_ranges(vp);
 1077                                 } else {
 1078                                         nfs_add_committed_range(vp, off, cnt);
 1079                                 }
 1080                         }
 1081                 } else {
 1082                         error = 0;
 1083                 }
 1084                 lockmgr(&np->n_commitlock, LK_RELEASE, NULL);
 1085                 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL);
 1086                 if (!error) {
 1087                         /*
 1088                          * pages are now on stable storage.
 1089                          */
 1090                         uiop->uio_resid = 0;
 1091                         simple_lock(&uobj->vmobjlock);
 1092                         for (i = 0; i < npages; i++) {
 1093                                 pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 1094                         }
 1095                         simple_unlock(&uobj->vmobjlock);
 1096                         return 0;
 1097                 } else if (error == NFSERR_STALEWRITEVERF) {
 1098                         nfs_clearcommit(vp->v_mount);
 1099                         goto again;
 1100                 }
 1101                 if (error) {
 1102                         bp->b_flags |= B_ERROR;
 1103                         bp->b_error = np->n_error = error;
 1104                         np->n_flag |= NWRITEERR;
 1105                 }
 1106                 return error;
 1107         }
 1108         off = uiop->uio_offset;
 1109         cnt = bp->b_bcount;
 1110         uiop->uio_rw = UIO_WRITE;
 1111         nfsstats.write_bios++;
 1112         error = nfs_writerpc(vp, uiop, &iomode, pageprotected, &stalewriteverf);
 1113         if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 1114                 /*
 1115                  * we need to commit pages later.
 1116                  */
 1117                 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL);
 1118                 nfs_add_tobecommitted_range(vp, off, cnt);
 1119                 /*
 1120                  * if there can be too many uncommitted pages, commit them now.
 1121                  */
 1122                 if (np->n_pushhi - np->n_pushlo > nfs_commitsize) {
 1123                         off = np->n_pushlo;
 1124                         cnt = nfs_commitsize >> 1;
 1125                         error = nfs_commit(vp, off, cnt, curproc);
 1126                         if (!error) {
 1127                                 nfs_add_committed_range(vp, off, cnt);
 1128                                 nfs_del_tobecommitted_range(vp, off, cnt);
 1129                         }
 1130                         if (error == NFSERR_STALEWRITEVERF) {
 1131                                 stalewriteverf = TRUE;
 1132                                 error = 0; /* it isn't a real error */
 1133                         }
 1134                 } else {
 1135                         /*
 1136                          * re-dirty pages so that they will be passed
 1137                          * to us later again.
 1138                          */
 1139                         simple_lock(&uobj->vmobjlock);
 1140                         for (i = 0; i < npages; i++) {
 1141                                 pgs[i]->flags &= ~PG_CLEAN;
 1142                         }
 1143                         simple_unlock(&uobj->vmobjlock);
 1144                 }
 1145                 lockmgr(&np->n_commitlock, LK_RELEASE, NULL);
 1146         } else if (!error) {
 1147                 /*
 1148                  * pages are now on stable storage.
 1149                  */
 1150                 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL);
 1151                 nfs_del_committed_range(vp, off, cnt);
 1152                 lockmgr(&np->n_commitlock, LK_RELEASE, NULL);
 1153                 simple_lock(&uobj->vmobjlock);
 1154                 for (i = 0; i < npages; i++) {
 1155                         pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 1156                 }
 1157                 simple_unlock(&uobj->vmobjlock);
 1158         } else {
 1159                 /*
 1160                  * we got an error.
 1161                  */
 1162                 bp->b_flags |= B_ERROR;
 1163                 bp->b_error = np->n_error = error;
 1164                 np->n_flag |= NWRITEERR;
 1165         }
 1166 
 1167         lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL);
 1168 
 1169         if (stalewriteverf) {
 1170                 nfs_clearcommit(vp->v_mount);
 1171         }
 1172         return error;
 1173 }
 1174 
 1175 /*
 1176  * nfs_doio for B_PHYS.
 1177  */
 1178 static int
 1179 nfs_doio_phys(bp, uiop)
 1180         struct buf *bp;
 1181         struct uio *uiop;
 1182 {
 1183         struct vnode *vp = bp->b_vp;
 1184         int error;
 1185 
 1186         uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
 1187         if (bp->b_flags & B_READ) {
 1188                 uiop->uio_rw = UIO_READ;
 1189                 nfsstats.read_physios++;
 1190                 error = nfs_readrpc(vp, uiop);
 1191         } else {
 1192                 int iomode = NFSV3WRITE_DATASYNC;
 1193                 boolean_t stalewriteverf;
 1194                 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 1195 
 1196                 uiop->uio_rw = UIO_WRITE;
 1197                 nfsstats.write_physios++;
 1198                 lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL);
 1199                 error = nfs_writerpc(vp, uiop, &iomode, FALSE, &stalewriteverf);
 1200                 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL);
 1201                 if (stalewriteverf) {
 1202                         nfs_clearcommit(bp->b_vp->v_mount);
 1203                 }
 1204         }
 1205         if (error) {
 1206                 bp->b_flags |= B_ERROR;
 1207                 bp->b_error = error;
 1208         }
 1209         return error;
 1210 }
 1211 
 1212 /*
 1213  * Do an I/O operation to/from a cache block. This may be called
 1214  * synchronously or from an nfsiod.
 1215  */
 1216 int
 1217 nfs_doio(bp, p)
 1218         struct buf *bp;
 1219         struct proc *p;
 1220 {
 1221         int error;
 1222         struct uio uio;
 1223         struct uio *uiop = &uio;
 1224         struct iovec io;
 1225         UVMHIST_FUNC("nfs_doio"); UVMHIST_CALLED(ubchist);
 1226 
 1227         uiop->uio_iov = &io;
 1228         uiop->uio_iovcnt = 1;
 1229         uiop->uio_segflg = UIO_SYSSPACE;
 1230         uiop->uio_procp = p;
 1231         uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT);
 1232         io.iov_base = bp->b_data;
 1233         io.iov_len = uiop->uio_resid = bp->b_bcount;
 1234 
 1235         /*
 1236          * Historically, paging was done with physio, but no more...
 1237          */
 1238         if (bp->b_flags & B_PHYS) {
 1239                 /*
 1240                  * ...though reading /dev/drum still gets us here.
 1241                  */
 1242                 error = nfs_doio_phys(bp, uiop);
 1243         } else if (bp->b_flags & B_READ) {
 1244                 error = nfs_doio_read(bp, uiop);
 1245         } else {
 1246                 error = nfs_doio_write(bp, uiop);
 1247         }
 1248         bp->b_resid = uiop->uio_resid;
 1249         biodone(bp);
 1250         return (error);
 1251 }
 1252 
 1253 /*
 1254  * Vnode op for VM getpages.
 1255  */
 1256 
 1257 int
 1258 nfs_getpages(v)
 1259         void *v;
 1260 {
 1261         struct vop_getpages_args /* {
 1262                 struct vnode *a_vp;
 1263                 voff_t a_offset;
 1264                 struct vm_page **a_m;
 1265                 int *a_count;
 1266                 int a_centeridx;
 1267                 vm_prot_t a_access_type;
 1268                 int a_advice;
 1269                 int a_flags;
 1270         } */ *ap = v;
 1271 
 1272         struct vnode *vp = ap->a_vp;
 1273         struct uvm_object *uobj = &vp->v_uobj;
 1274         struct nfsnode *np = VTONFS(vp);
 1275         const int npages = *ap->a_count;
 1276         struct vm_page *pg, **pgs, *opgs[npages];
 1277         off_t origoffset, len;
 1278         int i, error;
 1279         boolean_t v3 = NFS_ISV3(vp);
 1280         boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
 1281         boolean_t locked = (ap->a_flags & PGO_LOCKED) != 0;
 1282 
 1283         /*
 1284          * call the genfs code to get the pages.  `pgs' may be NULL
 1285          * when doing read-ahead.
 1286          */
 1287 
 1288         pgs = ap->a_m;
 1289         if (write && locked && v3) {
 1290                 KASSERT(pgs != NULL);
 1291 #ifdef DEBUG
 1292 
 1293                 /*
 1294                  * If PGO_LOCKED is set, real pages shouldn't exists
 1295                  * in the array.
 1296                  */
 1297 
 1298                 for (i = 0; i < npages; i++)
 1299                         KDASSERT(pgs[i] == NULL || pgs[i] == PGO_DONTCARE);
 1300 #endif
 1301                 memcpy(opgs, pgs, npages * sizeof(struct vm_pages *));
 1302         }
 1303         error = genfs_getpages(v);
 1304         if (error) {
 1305                 return (error);
 1306         }
 1307 
 1308         /*
 1309          * for read faults where the nfs node is not yet marked NMODIFIED,
 1310          * set PG_RDONLY on the pages so that we come back here if someone
 1311          * tries to modify later via the mapping that will be entered for
 1312          * this fault.
 1313          */
 1314 
 1315         if (!write && (np->n_flag & NMODIFIED) == 0 && pgs != NULL) {
 1316                 if (!locked) {
 1317                         simple_lock(&uobj->vmobjlock);
 1318                 }
 1319                 for (i = 0; i < npages; i++) {
 1320                         pg = pgs[i];
 1321                         if (pg == NULL || pg == PGO_DONTCARE) {
 1322                                 continue;
 1323                         }
 1324                         pg->flags |= PG_RDONLY;
 1325                 }
 1326                 if (!locked) {
 1327                         simple_unlock(&uobj->vmobjlock);
 1328                 }
 1329         }
 1330         if (!write) {
 1331                 return (0);
 1332         }
 1333 
 1334         /*
 1335          * this is a write fault, update the commit info.
 1336          */
 1337 
 1338         origoffset = ap->a_offset;
 1339         len = npages << PAGE_SHIFT;
 1340 
 1341         if (v3) {
 1342                 error = lockmgr(&np->n_commitlock,
 1343                     LK_EXCLUSIVE | (locked ? LK_NOWAIT : 0), NULL);
 1344                 if (error) {
 1345                         KASSERT(locked != 0);
 1346 
 1347                         /*
 1348                          * Since PGO_LOCKED is set, we need to unbusy
 1349                          * all pages fetched by genfs_getpages() above,
 1350                          * tell the caller that there are no pages
 1351                          * available and put back original pgs array.
 1352                          */
 1353 
 1354                         uvm_lock_pageq();
 1355                         uvm_page_unbusy(pgs, npages);
 1356                         uvm_unlock_pageq();
 1357                         *ap->a_count = 0;
 1358                         memcpy(pgs, opgs,
 1359                             npages * sizeof(struct vm_pages *));
 1360                         return (error);
 1361                 }
 1362                 nfs_del_committed_range(vp, origoffset, len);
 1363                 nfs_del_tobecommitted_range(vp, origoffset, len);
 1364         }
 1365         np->n_flag |= NMODIFIED;
 1366         if (!locked) {
 1367                 simple_lock(&uobj->vmobjlock);
 1368         }
 1369         for (i = 0; i < npages; i++) {
 1370                 pg = pgs[i];
 1371                 if (pg == NULL || pg == PGO_DONTCARE) {
 1372                         continue;
 1373                 }
 1374                 pg->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 1375         }
 1376         if (!locked) {
 1377                 simple_unlock(&uobj->vmobjlock);
 1378         }
 1379         if (v3) {
 1380                 lockmgr(&np->n_commitlock, LK_RELEASE, NULL);
 1381         }
 1382         return (0);
 1383 }
Cache object: ab702534b867dbb5bf26e50d2de3e06d
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/nfs/nfs_bio.c

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_bio.c