The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_bio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
   37  * $FreeBSD$
   38  */
   39 
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/resourcevar.h>
   44 #include <sys/signalvar.h>
   45 #include <sys/proc.h>
   46 #include <sys/buf.h>
   47 #include <sys/vnode.h>
   48 #include <sys/mount.h>
   49 #include <sys/kernel.h>
   50 
   51 #include <vm/vm.h>
   52 #include <vm/vm_extern.h>
   53 #include <vm/vm_prot.h>
   54 #include <vm/vm_page.h>
   55 #include <vm/vm_object.h>
   56 #include <vm/vm_pager.h>
   57 #include <vm/vnode_pager.h>
   58 
   59 #include <nfs/rpcv2.h>
   60 #include <nfs/nfsproto.h>
   61 #include <nfs/nfs.h>
   62 #include <nfs/nfsmount.h>
   63 #include <nfs/nqnfs.h>
   64 #include <nfs/nfsnode.h>
   65 
   66 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
   67                                         struct proc *p));
   68 static void nfs_prot_buf __P((struct buf *bp, int off, int n));
   69 
   70 extern int nfs_numasync;
   71 extern struct nfsstats nfsstats;
   72 
   73 /*
   74  * Vnode op for VM getpages.
   75  */
   76 int
   77 nfs_getpages(ap)
   78         struct vop_getpages_args /* {
   79                 struct vnode *a_vp;
   80                 vm_page_t *a_m;
   81                 int a_count;
   82                 int a_reqpage;
   83                 vm_ooffset_t a_offset;
   84         } */ *ap;
   85 {
   86         int i, error, nextoff, size, toff, npages, count;
   87         struct uio uio;
   88         struct iovec iov;
   89         vm_offset_t kva;
   90         struct buf *bp;
   91         struct vnode *vp;
   92         struct proc *p;
   93         struct ucred *cred;
   94         struct nfsmount *nmp;
   95         vm_page_t *pages;
   96 
   97         vp = ap->a_vp;
   98         p = curproc;                            /* XXX */
   99         cred = curproc->p_ucred;                /* XXX */
  100         nmp = VFSTONFS(vp->v_mount);
  101         pages = ap->a_m;
  102         count = ap->a_count;
  103 
  104         if (vp->v_object == NULL) {
  105                 printf("nfs_getpages: called with non-merged cache vnode??\n");
  106                 return VM_PAGER_ERROR;
  107         }
  108 
  109         if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
  110             (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
  111                 (void)nfs_fsinfo(nmp, vp, cred, p);
  112         /*
  113          * We use only the kva address for the buffer, but this is extremely
  114          * convienient and fast.
  115          */
  116         bp = getpbuf();
  117 
  118         npages = btoc(count);
  119         kva = (vm_offset_t) bp->b_data;
  120         pmap_qenter(kva, pages, npages);
  121 
  122         iov.iov_base = (caddr_t) kva;
  123         iov.iov_len = count;
  124         uio.uio_iov = &iov;
  125         uio.uio_iovcnt = 1;
  126         uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
  127         uio.uio_resid = count;
  128         uio.uio_segflg = UIO_SYSSPACE;
  129         uio.uio_rw = UIO_READ;
  130         uio.uio_procp = p;
  131 
  132         error = nfs_readrpc(vp, &uio, cred);
  133         pmap_qremove(kva, npages);
  134 
  135         relpbuf(bp);
  136 
  137         if (error && (uio.uio_resid == count))
  138                 return VM_PAGER_ERROR;
  139 
  140         size = count - uio.uio_resid;
  141 
  142         for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
  143                 vm_page_t m;
  144                 nextoff = toff + PAGE_SIZE;
  145                 m = pages[i];
  146 
  147                 m->flags &= ~PG_ZERO;
  148 
  149                 if (nextoff <= size) {
  150                         m->valid = VM_PAGE_BITS_ALL;
  151                         m->dirty = 0;
  152                 } else {
  153                         int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
  154                         vm_page_set_validclean(m, 0, nvalid);
  155                 }
  156                 
  157                 if (i != ap->a_reqpage) {
  158                         /*
  159                          * Whether or not to leave the page activated is up in
  160                          * the air, but we should put the page on a page queue
  161                          * somewhere (it already is in the object).  Result:
  162                          * It appears that emperical results show that
  163                          * deactivating pages is best.
  164                          */
  165 
  166                         /*
  167                          * Just in case someone was asking for this page we
  168                          * now tell them that it is ok to use.
  169                          */
  170                         if (!error) {
  171                                 if (m->flags & PG_WANTED)
  172                                         vm_page_activate(m);
  173                                 else
  174                                         vm_page_deactivate(m);
  175                                 vm_page_wakeup(m);
  176                         } else {
  177                                 vnode_pager_freepage(m);
  178                         }
  179                 }
  180         }
  181         return 0;
  182 }
  183 
  184 /*
  185  * Vnode op for VM putpages.
  186  */
  187 int
  188 nfs_putpages(ap)
  189         struct vop_putpages_args /* {
  190                 struct vnode *a_vp;
  191                 vm_page_t *a_m;
  192                 int a_count;
  193                 int a_sync;
  194                 int *a_rtvals;
  195                 vm_ooffset_t a_offset;
  196         } */ *ap;
  197 {
  198         struct uio uio;
  199         struct iovec iov;
  200         vm_offset_t kva;
  201         struct buf *bp;
  202         int iomode, must_commit, i, error, npages, count;
  203         int *rtvals;
  204         struct vnode *vp;
  205         struct proc *p;
  206         struct ucred *cred;
  207         struct nfsmount *nmp;
  208         vm_page_t *pages;
  209 
  210         vp = ap->a_vp;
  211         p = curproc;                            /* XXX */
  212         cred = curproc->p_ucred;                /* XXX */
  213         nmp = VFSTONFS(vp->v_mount);
  214         pages = ap->a_m;
  215         count = ap->a_count;
  216         rtvals = ap->a_rtvals;
  217         npages = btoc(count);
  218 
  219         if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
  220             (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
  221                 (void)nfs_fsinfo(nmp, vp, cred, p);
  222 
  223         for (i = 0; i < npages; i++) {
  224                 rtvals[i] = VM_PAGER_AGAIN;
  225         }
  226 
  227         /*
  228          * We use only the kva address for the buffer, but this is extremely
  229          * convienient and fast.
  230          */
  231         bp = getpbuf();
  232 
  233         kva = (vm_offset_t) bp->b_data;
  234         pmap_qenter(kva, pages, npages);
  235 
  236         iov.iov_base = (caddr_t) kva;
  237         iov.iov_len = count;
  238         uio.uio_iov = &iov;
  239         uio.uio_iovcnt = 1;
  240         uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
  241         uio.uio_resid = count;
  242         uio.uio_segflg = UIO_SYSSPACE;
  243         uio.uio_rw = UIO_WRITE;
  244         uio.uio_procp = p;
  245 
  246         if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
  247             iomode = NFSV3WRITE_UNSTABLE;
  248         else
  249             iomode = NFSV3WRITE_FILESYNC;
  250 
  251         error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
  252 
  253         pmap_qremove(kva, npages);
  254         relpbuf(bp);
  255 
  256         if (!error) {
  257                 int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
  258                 for (i = 0; i < nwritten; i++) {
  259                         rtvals[i] = VM_PAGER_OK;
  260                         pages[i]->dirty = 0;
  261                 }
  262                 if (must_commit)
  263                         nfs_clearcommit(vp->v_mount);
  264         }
  265         return rtvals[0];
  266 }
  267 
  268 /*
  269  * Vnode op for read using bio
  270  */
  271 int
  272 nfs_bioread(vp, uio, ioflag, cred, getpages)
  273         register struct vnode *vp;
  274         register struct uio *uio;
  275         int ioflag;
  276         struct ucred *cred;
  277         int getpages;
  278 {
  279         register struct nfsnode *np = VTONFS(vp);
  280         register int biosize, i;
  281         off_t diff;
  282         struct buf *bp = 0, *rabp;
  283         struct vattr vattr;
  284         struct proc *p;
  285         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  286         daddr_t lbn, rabn;
  287         int bufsize;
  288         int nra, error = 0, n = 0, on = 0, not_readin;
  289 
  290 #ifdef DIAGNOSTIC
  291         if (uio->uio_rw != UIO_READ)
  292                 panic("nfs_read mode");
  293 #endif
  294         if (uio->uio_resid == 0)
  295                 return (0);
  296         if (uio->uio_offset < 0)        /* XXX VDIR cookies can be negative */
  297                 return (EINVAL);
  298         p = uio->uio_procp;
  299         if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
  300             (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
  301                 (void)nfs_fsinfo(nmp, vp, cred, p);
  302         if (vp->v_type != VDIR &&
  303             (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
  304                 return (EFBIG);
  305         biosize = vp->v_mount->mnt_stat.f_iosize;
  306         /*
  307          * For nfs, cache consistency can only be maintained approximately.
  308          * Although RFC1094 does not specify the criteria, the following is
  309          * believed to be compatible with the reference port.
  310          * For nqnfs, full cache consistency is maintained within the loop.
  311          * For nfs:
  312          * If the file's modify time on the server has changed since the
  313          * last read rpc or you have written to the file,
  314          * you may have lost data cache consistency with the
  315          * server, so flush all of the file's data out of the cache.
  316          * Then force a getattr rpc to ensure that you have up to date
  317          * attributes.
  318          * NB: This implies that cache data can be read when up to
  319          * NFS_ATTRTIMEO seconds out of date. If you find that you need current
  320          * attributes this could be forced by setting n_attrstamp to 0 before
  321          * the VOP_GETATTR() call.
  322          */
  323         if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
  324                 if (np->n_flag & NMODIFIED) {
  325                         if (vp->v_type != VREG) {
  326                                 if (vp->v_type != VDIR)
  327                                         panic("nfs: bioread, not dir");
  328                                 nfs_invaldir(vp);
  329                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  330                                 if (error)
  331                                         return (error);
  332                         }
  333                         np->n_attrstamp = 0;
  334                         error = VOP_GETATTR(vp, &vattr, cred, p);
  335                         if (error)
  336                                 return (error);
  337                         np->n_mtime = vattr.va_mtime.tv_sec;
  338                 } else {
  339                         error = VOP_GETATTR(vp, &vattr, cred, p);
  340                         if (error)
  341                                 return (error);
  342                         if (np->n_mtime != vattr.va_mtime.tv_sec) {
  343                                 if (vp->v_type == VDIR)
  344                                         nfs_invaldir(vp);
  345                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  346                                 if (error)
  347                                         return (error);
  348                                 np->n_mtime = vattr.va_mtime.tv_sec;
  349                         }
  350                 }
  351         }
  352         do {
  353 
  354             /*
  355              * Get a valid lease. If cached data is stale, flush it.
  356              */
  357             if (nmp->nm_flag & NFSMNT_NQNFS) {
  358                 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
  359                     do {
  360                         error = nqnfs_getlease(vp, ND_READ, cred, p);
  361                     } while (error == NQNFS_EXPIRED);
  362                     if (error)
  363                         return (error);
  364                     if (np->n_lrev != np->n_brev ||
  365                         (np->n_flag & NQNFSNONCACHE) ||
  366                         ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
  367                         if (vp->v_type == VDIR)
  368                             nfs_invaldir(vp);
  369                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  370                         if (error)
  371                             return (error);
  372                         np->n_brev = np->n_lrev;
  373                     }
  374                 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
  375                     nfs_invaldir(vp);
  376                     error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  377                     if (error)
  378                         return (error);
  379                 }
  380             }
  381             if (np->n_flag & NQNFSNONCACHE) {
  382                 switch (vp->v_type) {
  383                 case VREG:
  384                         return (nfs_readrpc(vp, uio, cred));
  385                 case VLNK:
  386                         return (nfs_readlinkrpc(vp, uio, cred));
  387                 case VDIR:
  388                         break;
  389                 default:
  390                         printf(" NQNFSNONCACHE: type %x unexpected\n",  
  391                                 vp->v_type);
  392                 };
  393             }
  394             switch (vp->v_type) {
  395             case VREG:
  396                 nfsstats.biocache_reads++;
  397                 lbn = uio->uio_offset / biosize;
  398                 on = uio->uio_offset & (biosize - 1);
  399                 not_readin = 1;
  400 
  401                 /*
  402                  * Start the read ahead(s), as required.
  403                  */
  404                 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
  405                     for (nra = 0; nra < nmp->nm_readahead &&
  406                         (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
  407                         rabn = lbn + 1 + nra;
  408                         if (!incore(vp, rabn)) {
  409                             rabp = nfs_getcacheblk(vp, rabn, biosize, p);
  410                             if (!rabp)
  411                                 return (EINTR);
  412                             if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
  413                                 rabp->b_flags |= (B_READ | B_ASYNC);
  414                                 vfs_busy_pages(rabp, 0);
  415                                 if (nfs_asyncio(rabp, cred)) {
  416                                     rabp->b_flags |= B_INVAL|B_ERROR;
  417                                     vfs_unbusy_pages(rabp);
  418                                     brelse(rabp);
  419                                 }
  420                             } else
  421                                 brelse(rabp);
  422                         }
  423                     }
  424                 }
  425 
  426                 /*
  427                  * If the block is in the cache and has the required data
  428                  * in a valid region, just copy it out.
  429                  * Otherwise, get the block and write back/read in,
  430                  * as required.
  431                  */
  432 again:
  433                 bufsize = biosize;
  434                 if ((off_t)(lbn + 1) * biosize > np->n_size && 
  435                     (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
  436                         bufsize = np->n_size - (off_t)lbn * biosize;
  437                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
  438                 }
  439                 bp = nfs_getcacheblk(vp, lbn, bufsize, p);
  440                 if (!bp)
  441                         return (EINTR);
  442                 /*
  443                  * If we are being called from nfs_getpages, we must
  444                  * make sure the buffer is a vmio buffer.  The vp will
  445                  * already be setup for vmio but there may be some old
  446                  * non-vmio buffers attached to it.
  447                  */
  448                 if (getpages && !(bp->b_flags & B_VMIO)) {
  449 #ifdef DIAGNOSTIC
  450                         printf("nfs_bioread: non vmio buf found, discarding\n");
  451 #endif
  452                         bp->b_flags |= B_NOCACHE;
  453                         bp->b_flags |= B_INVAFTERWRITE;
  454                         if (bp->b_dirtyend > 0) {
  455                                 if ((bp->b_flags & B_DELWRI) == 0)
  456                                         panic("nfsbioread");
  457                                 if (VOP_BWRITE(bp) == EINTR)
  458                                         return (EINTR);
  459                         } else
  460                                 brelse(bp);
  461                         goto again;
  462                 }
  463                 if ((bp->b_flags & B_CACHE) == 0) {
  464                     bp->b_flags |= B_READ;
  465                     bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
  466                     not_readin = 0;
  467                     vfs_busy_pages(bp, 0);
  468                     error = nfs_doio(bp, cred, p);
  469                     if (error) {
  470                         brelse(bp);
  471                         return (error);
  472                     }
  473                 }
  474                 if (bufsize > on) {
  475                         n = min((unsigned)(bufsize - on), uio->uio_resid);
  476                 } else {
  477                         n = 0;
  478                 }
  479                 diff = np->n_size - uio->uio_offset;
  480                 if (diff < n)
  481                         n = diff;
  482                 if (not_readin && n > 0) {
  483                         if (on < bp->b_validoff || (on + n) > bp->b_validend) {
  484                                 bp->b_flags |= B_NOCACHE;
  485                                 bp->b_flags |= B_INVAFTERWRITE;
  486                                 if (bp->b_dirtyend > 0) {
  487                                     if ((bp->b_flags & B_DELWRI) == 0)
  488                                         panic("nfsbioread");
  489                                     if (VOP_BWRITE(bp) == EINTR)
  490                                         return (EINTR);
  491                                 } else
  492                                     brelse(bp);
  493                                 goto again;
  494                         }
  495                 }
  496                 vp->v_lastr = lbn;
  497                 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
  498                 if (diff < n)
  499                         n = diff;
  500                 break;
  501             case VLNK:
  502                 nfsstats.biocache_readlinks++;
  503                 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
  504                 if (!bp)
  505                         return (EINTR);
  506                 if ((bp->b_flags & B_CACHE) == 0) {
  507                     bp->b_flags |= B_READ;
  508                     vfs_busy_pages(bp, 0);
  509                     error = nfs_doio(bp, cred, p);
  510                     if (error) {
  511                         bp->b_flags |= B_ERROR;
  512                         brelse(bp);
  513                         return (error);
  514                     }
  515                 }
  516                 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
  517                 on = 0;
  518                 break;
  519             case VDIR:
  520                 nfsstats.biocache_readdirs++;
  521                 if (np->n_direofoffset
  522                     && uio->uio_offset >= np->n_direofoffset) {
  523                     return (0);
  524                 }
  525                 lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
  526                 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
  527                 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
  528                 if (!bp)
  529                     return (EINTR);
  530                 if ((bp->b_flags & B_CACHE) == 0) {
  531                     bp->b_flags |= B_READ;
  532                     vfs_busy_pages(bp, 0);
  533                     error = nfs_doio(bp, cred, p);
  534                     if (error) {
  535                             brelse(bp);
  536                     }
  537                     while (error == NFSERR_BAD_COOKIE) {
  538                         nfs_invaldir(vp);
  539                         error = nfs_vinvalbuf(vp, 0, cred, p, 1);
  540                         /*
  541                          * Yuck! The directory has been modified on the
  542                          * server. The only way to get the block is by
  543                          * reading from the beginning to get all the
  544                          * offset cookies.
  545                          */
  546                         for (i = 0; i <= lbn && !error; i++) {
  547                             if (np->n_direofoffset
  548                                 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
  549                                     return (0);
  550                             bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
  551                             if (!bp)
  552                                 return (EINTR);
  553                             if ((bp->b_flags & B_DONE) == 0) {
  554                                 bp->b_flags |= B_READ;
  555                                 vfs_busy_pages(bp, 0);
  556                                 error = nfs_doio(bp, cred, p);
  557                                 if (error == 0 && (bp->b_flags & B_INVAL))
  558                                         break;
  559                                 if (error) {
  560                                     brelse(bp);
  561                                 } else if (i < lbn) {
  562                                     brelse(bp);
  563                                 }
  564                             }
  565                         }
  566                     }
  567                     if (error)
  568                             return (error);
  569                 }
  570 
  571                 /*
  572                  * If not eof and read aheads are enabled, start one.
  573                  * (You need the current block first, so that you have the
  574                  *  directory offset cookie of the next block.)
  575                  */
  576                 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
  577                     (bp->b_flags & B_INVAL) == 0 &&
  578                     (np->n_direofoffset == 0 ||
  579                     (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
  580                     !(np->n_flag & NQNFSNONCACHE) &&
  581                     !incore(vp, lbn + 1)) {
  582                         rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
  583                         if (rabp) {
  584                             if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
  585                                 rabp->b_flags |= (B_READ | B_ASYNC);
  586                                 vfs_busy_pages(rabp, 0);
  587                                 if (nfs_asyncio(rabp, cred)) {
  588                                     rabp->b_flags |= B_INVAL|B_ERROR;
  589                                     vfs_unbusy_pages(rabp);
  590                                     brelse(rabp);
  591                                 }
  592                             } else {
  593                                 brelse(rabp);
  594                             }
  595                         }
  596                 }
  597                 /*
  598                  * Make sure we use a signed variant of min() since
  599                  * the second term may be negative.
  600                  */
  601                 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
  602                 break;
  603             default:
  604                 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
  605                 break;
  606             };
  607 
  608             if (n > 0) {
  609                     error = uiomove(bp->b_data + on, (int)n, uio);
  610             }
  611             switch (vp->v_type) {
  612             case VREG:
  613                 break;
  614             case VLNK:
  615                 n = 0;
  616                 break;
  617             case VDIR:
  618                 if (np->n_flag & NQNFSNONCACHE)
  619                         bp->b_flags |= B_INVAL;
  620                 break;
  621             default:
  622                 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
  623             }
  624             brelse(bp);
  625         } while (error == 0 && uio->uio_resid > 0 && n > 0);
  626         return (error);
  627 }
  628 
  629 static void
  630 nfs_prot_buf(bp, off, n)
  631         struct buf *bp;
  632         int off;
  633         int n;
  634 {
  635         int pindex, boff, end;
  636 
  637         if ((bp->b_flags & B_VMIO) == 0)
  638                 return;
  639 
  640         end = round_page(off + n);
  641         for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) {
  642                 pindex = boff >> PAGE_SHIFT;
  643                 vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE);
  644         }
  645 }
  646 
  647 /*
  648  * Vnode op for write using bio
  649  */
  650 int
  651 nfs_write(ap)
  652         struct vop_write_args /* {
  653                 struct vnode *a_vp;
  654                 struct uio *a_uio;
  655                 int  a_ioflag;
  656                 struct ucred *a_cred;
  657         } */ *ap;
  658 {
  659         register int biosize;
  660         register struct uio *uio = ap->a_uio;
  661         struct proc *p = uio->uio_procp;
  662         register struct vnode *vp = ap->a_vp;
  663         struct nfsnode *np = VTONFS(vp);
  664         register struct ucred *cred = ap->a_cred;
  665         int ioflag = ap->a_ioflag;
  666         struct buf *bp;
  667         struct vattr vattr;
  668         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  669         daddr_t lbn;
  670         int bufsize;
  671         int n, on, error = 0, iomode, must_commit;
  672 
  673 #ifdef DIAGNOSTIC
  674         if (uio->uio_rw != UIO_WRITE)
  675                 panic("nfs_write mode");
  676         if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
  677                 panic("nfs_write proc");
  678 #endif
  679         if (vp->v_type != VREG)
  680                 return (EIO);
  681         if (np->n_flag & NWRITEERR) {
  682                 np->n_flag &= ~NWRITEERR;
  683                 return (np->n_error);
  684         }
  685         if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
  686             (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
  687                 (void)nfs_fsinfo(nmp, vp, cred, p);
  688         if (ioflag & (IO_APPEND | IO_SYNC)) {
  689                 if (np->n_flag & NMODIFIED) {
  690                         np->n_attrstamp = 0;
  691                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  692                         if (error)
  693                                 return (error);
  694                 }
  695                 if (ioflag & IO_APPEND) {
  696                         np->n_attrstamp = 0;
  697                         error = VOP_GETATTR(vp, &vattr, cred, p);
  698                         if (error)
  699                                 return (error);
  700                         uio->uio_offset = np->n_size;
  701                 }
  702         }
  703         if (uio->uio_offset < 0)
  704                 return (EINVAL);
  705         if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
  706                 return (EFBIG);
  707         if (uio->uio_resid == 0)
  708                 return (0);
  709         /*
  710          * Maybe this should be above the vnode op call, but so long as
  711          * file servers have no limits, i don't think it matters
  712          */
  713         if (p && uio->uio_offset + uio->uio_resid >
  714               p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  715                 psignal(p, SIGXFSZ);
  716                 return (EFBIG);
  717         }
  718         /*
  719          * I use nm_rsize, not nm_wsize so that all buffer cache blocks
  720          * will be the same size within a filesystem. nfs_writerpc will
  721          * still use nm_wsize when sizing the rpc's.
  722          */
  723         biosize = vp->v_mount->mnt_stat.f_iosize;
  724         do {
  725                 /*
  726                  * Check for a valid write lease.
  727                  */
  728                 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
  729                     NQNFS_CKINVALID(vp, np, ND_WRITE)) {
  730                         do {
  731                                 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
  732                         } while (error == NQNFS_EXPIRED);
  733                         if (error)
  734                                 return (error);
  735                         if (np->n_lrev != np->n_brev ||
  736                             (np->n_flag & NQNFSNONCACHE)) {
  737                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  738                                 if (error)
  739                                         return (error);
  740                                 np->n_brev = np->n_lrev;
  741                         }
  742                 }
  743                 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
  744                     iomode = NFSV3WRITE_FILESYNC;
  745                     error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
  746                     if (must_commit)
  747                         nfs_clearcommit(vp->v_mount);
  748                     return (error);
  749                 }
  750                 nfsstats.biocache_writes++;
  751                 lbn = uio->uio_offset / biosize;
  752                 on = uio->uio_offset & (biosize-1);
  753                 n = min((unsigned)(biosize - on), uio->uio_resid);
  754 again:
  755                 if (uio->uio_offset + n > np->n_size) {
  756                         np->n_size = uio->uio_offset + n;
  757                         np->n_flag |= NMODIFIED;
  758                         vnode_pager_setsize(vp, np->n_size);
  759                 }
  760                 bufsize = biosize;
  761 #if 0
  762                 /*
  763                  * This optimization causes problems if the file grows while
  764                  * blocked in nfs_getcacheblk().  Not only can data be lost,
  765                  * but b_dirtyoff/end/b_validoff/end can wind up greater then
  766                  * b_bufsize, resulting in general memory corruption.
  767                  */
  768                 if ((off_t)(lbn + 1) * biosize > np->n_size) {
  769                         bufsize = np->n_size - (off_t)lbn * biosize;
  770                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
  771                 }
  772 #endif
  773                 bp = nfs_getcacheblk(vp, lbn, bufsize, p);
  774                 if (!bp)
  775                         return (EINTR);
  776                 if (bp->b_wcred == NOCRED) {
  777                         crhold(cred);
  778                         bp->b_wcred = cred;
  779                 }
  780                 np->n_flag |= NMODIFIED;
  781 
  782                 if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
  783                         bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
  784                 if (bp->b_dirtyend <= bp->b_dirtyoff)
  785                         bp->b_dirtyend = bp->b_dirtyoff = 0;
  786 
  787                 /*
  788                  * If the new write will leave a contiguous dirty
  789                  * area, just update the b_dirtyoff and b_dirtyend,
  790                  * otherwise force a write rpc of the old dirty area.
  791                  */
  792                 if (bp->b_dirtyend > 0 &&
  793                     (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
  794                         bp->b_proc = p;
  795                         if (VOP_BWRITE(bp) == EINTR)
  796                                 return (EINTR);
  797                         goto again;
  798                 }
  799 
  800                 /*
  801                  * Check for valid write lease and get one as required.
  802                  * In case getblk() and/or bwrite() delayed us.
  803                  */
  804                 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
  805                     NQNFS_CKINVALID(vp, np, ND_WRITE)) {
  806                         do {
  807                                 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
  808                         } while (error == NQNFS_EXPIRED);
  809                         if (error) {
  810                                 brelse(bp);
  811                                 return (error);
  812                         }
  813                         if (np->n_lrev != np->n_brev ||
  814                             (np->n_flag & NQNFSNONCACHE)) {
  815                                 brelse(bp);
  816                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  817                                 if (error)
  818                                         return (error);
  819                                 np->n_brev = np->n_lrev;
  820                                 goto again;
  821                         }
  822                 }
  823 
  824                 error = uiomove((char *)bp->b_data + on, n, uio);
  825                 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
  826                 if (error) {
  827                         bp->b_flags |= B_ERROR;
  828                         brelse(bp);
  829                         return (error);
  830                 }
  831 
  832                 /*
  833                  * This will keep the buffer and mmaped regions more coherent.
  834                  */
  835                 nfs_prot_buf(bp, on, n);
  836 
  837                 if (bp->b_dirtyend > 0) {
  838                         bp->b_dirtyoff = min(on, bp->b_dirtyoff);
  839                         bp->b_dirtyend = max((on + n), bp->b_dirtyend);
  840                 } else {
  841                         bp->b_dirtyoff = on;
  842                         bp->b_dirtyend = on + n;
  843                 }
  844                 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
  845                     bp->b_validoff > bp->b_dirtyend) {
  846                         bp->b_validoff = bp->b_dirtyoff;
  847                         bp->b_validend = bp->b_dirtyend;
  848                 } else {
  849                         bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
  850                         bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
  851                 }
  852 
  853                 /*
  854                  * Since this block is being modified, it must be written
  855                  * again and not just committed.
  856                  */
  857                 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
  858 
  859                 /*
  860                  * If the lease is non-cachable or IO_SYNC do bwrite().
  861                  */
  862                 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
  863                         bp->b_proc = p;
  864                         if (ioflag & IO_INVAL)
  865                                 bp->b_flags |= B_INVAL;
  866                         error = VOP_BWRITE(bp);
  867                         if (error)
  868                                 return (error);
  869                         if (np->n_flag & NQNFSNONCACHE) {
  870                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  871                                 if (error)
  872                                         return (error);
  873                         }
  874                 } else if ((n + on) == biosize &&
  875                         (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
  876                         bp->b_proc = (struct proc *)0;
  877                         bp->b_flags |= B_ASYNC;
  878                         (void)nfs_writebp(bp, 0);
  879                 } else
  880                         bdwrite(bp);
  881         } while (uio->uio_resid > 0 && n > 0);
  882         return (0);
  883 }
  884 
  885 /*
  886  * Get an nfs cache block.
  887  * Allocate a new one if the block isn't currently in the cache
  888  * and return the block marked busy. If the calling process is
  889  * interrupted by a signal for an interruptible mount point, return
  890  * NULL.
  891  */
  892 static struct buf *
  893 nfs_getcacheblk(vp, bn, size, p)
  894         struct vnode *vp;
  895         daddr_t bn;
  896         int size;
  897         struct proc *p;
  898 {
  899         register struct buf *bp;
  900         struct mount *mp;
  901         struct nfsmount *nmp;
  902 
  903         mp = vp->v_mount;
  904         nmp = VFSTONFS(mp);
  905 
  906         if (nmp->nm_flag & NFSMNT_INT) {
  907                 bp = getblk(vp, bn, size, PCATCH, 0);
  908                 while (bp == (struct buf *)0) {
  909                         if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
  910                                 return ((struct buf *)0);
  911                         bp = getblk(vp, bn, size, 0, 2 * hz);
  912                 }
  913         } else
  914                 bp = getblk(vp, bn, size, 0, 0);
  915 
  916         if (vp->v_type == VREG) {
  917                 int biosize;
  918                 biosize = mp->mnt_stat.f_iosize;
  919                 bp->b_blkno = bn * (biosize / DEV_BSIZE);
  920         }
  921 
  922         return (bp);
  923 }
  924 
  925 /*
  926  * Flush and invalidate all dirty buffers. If another process is already
  927  * doing the flush, just wait for completion.
  928  */
  929 int
  930 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
  931         struct vnode *vp;
  932         int flags;
  933         struct ucred *cred;
  934         struct proc *p;
  935         int intrflg;
  936 {
  937         register struct nfsnode *np = VTONFS(vp);
  938         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  939         int error = 0, slpflag, slptimeo;
  940 
  941         if (vp->v_flag & VXLOCK) {
  942                 return (0);
  943         }
  944 
  945         if ((nmp->nm_flag & NFSMNT_INT) == 0)
  946                 intrflg = 0;
  947         if (intrflg) {
  948                 slpflag = PCATCH;
  949                 slptimeo = 2 * hz;
  950         } else {
  951                 slpflag = 0;
  952                 slptimeo = 0;
  953         }
  954         /*
  955          * First wait for any other process doing a flush to complete.
  956          */
  957         while (np->n_flag & NFLUSHINPROG) {
  958                 np->n_flag |= NFLUSHWANT;
  959                 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
  960                         slptimeo);
  961                 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
  962                         return (EINTR);
  963         }
  964 
  965         /*
  966          * Now, flush as required.
  967          */
  968         np->n_flag |= NFLUSHINPROG;
  969         error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
  970         while (error) {
  971                 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
  972                         np->n_flag &= ~NFLUSHINPROG;
  973                         if (np->n_flag & NFLUSHWANT) {
  974                                 np->n_flag &= ~NFLUSHWANT;
  975                                 wakeup((caddr_t)&np->n_flag);
  976                         }
  977                         return (EINTR);
  978                 }
  979                 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
  980         }
  981         np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
  982         if (np->n_flag & NFLUSHWANT) {
  983                 np->n_flag &= ~NFLUSHWANT;
  984                 wakeup((caddr_t)&np->n_flag);
  985         }
  986         return (0);
  987 }
  988 
  989 /*
  990  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  991  * This is mainly to avoid queueing async I/O requests when the nfsiods
  992  * are all hung on a dead server.
  993  */
  994 int
  995 nfs_asyncio(bp, cred)
  996         register struct buf *bp;
  997         struct ucred *cred;
  998 {
  999         struct nfsmount *nmp;
 1000         int i;
 1001         int gotiod;
 1002         int slpflag = 0;
 1003         int slptimeo = 0;
 1004         int error;
 1005 
 1006         if (nfs_numasync == 0)
 1007                 return (EIO);
 1008         
 1009         nmp = VFSTONFS(bp->b_vp->v_mount);
 1010 again:
 1011         if (nmp->nm_flag & NFSMNT_INT)
 1012                 slpflag = PCATCH;
 1013         gotiod = FALSE;
 1014 
 1015         /*
 1016          * Find a free iod to process this request.
 1017          */
 1018         for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 1019                 if (nfs_iodwant[i]) {
 1020                         /*
 1021                          * Found one, so wake it up and tell it which
 1022                          * mount to process.
 1023                          */
 1024                         NFS_DPF(ASYNCIO,
 1025                                 ("nfs_asyncio: waking iod %d for mount %p\n",
 1026                                  i, nmp));
 1027                         nfs_iodwant[i] = (struct proc *)0;
 1028                         nfs_iodmount[i] = nmp;
 1029                         nmp->nm_bufqiods++;
 1030                         wakeup((caddr_t)&nfs_iodwant[i]);
 1031                         gotiod = TRUE;
 1032                         break;
 1033                 }
 1034 
 1035         /*
 1036          * If none are free, we may already have an iod working on this mount
 1037          * point.  If so, it will process our request.
 1038          */
 1039         if (!gotiod) {
 1040                 if (nmp->nm_bufqiods > 0) {
 1041                         NFS_DPF(ASYNCIO,
 1042                                 ("nfs_asyncio: %d iods are already processing mount %p\n",
 1043                                  nmp->nm_bufqiods, nmp));
 1044                         gotiod = TRUE;
 1045                 }
 1046         }
 1047 
 1048         /*
 1049          * If we have an iod which can process the request, then queue
 1050          * the buffer.
 1051          */
 1052         if (gotiod) {
 1053                 /*
 1054                  * Ensure that the queue never grows too large.
 1055                  */
 1056                 while (nmp->nm_bufqlen >= 2*nfs_numasync) {
 1057                         NFS_DPF(ASYNCIO,
 1058                                 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
 1059                         nmp->nm_bufqwant = TRUE;
 1060                         error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
 1061                                        "nfsaio", slptimeo);
 1062                         if (error) {
 1063                                 if (nfs_sigintr(nmp, NULL, bp->b_proc))
 1064                                         return (EINTR);
 1065                                 if (slpflag == PCATCH) {
 1066                                         slpflag = 0;
 1067                                         slptimeo = 2 * hz;
 1068                                 }
 1069                         }
 1070                         /*
 1071                          * We might have lost our iod while sleeping,
 1072                          * so check and loop if nescessary.
 1073                          */
 1074                         if (nmp->nm_bufqiods == 0) {
 1075                                 NFS_DPF(ASYNCIO,
 1076                                         ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
 1077                                 goto again;
 1078                         }
 1079                 }
 1080 
 1081                 if (bp->b_flags & B_READ) {
 1082                         if (bp->b_rcred == NOCRED && cred != NOCRED) {
 1083                                 crhold(cred);
 1084                                 bp->b_rcred = cred;
 1085                         }
 1086                 } else {
 1087                         bp->b_flags |= B_WRITEINPROG;
 1088                         if (bp->b_wcred == NOCRED && cred != NOCRED) {
 1089                                 crhold(cred);
 1090                                 bp->b_wcred = cred;
 1091                         }
 1092                 }
 1093 
 1094                 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 1095                 nmp->nm_bufqlen++;
 1096                 return (0);
 1097         }
 1098 
 1099         /*
 1100          * All the iods are busy on other mounts, so return EIO to
 1101          * force the caller to process the i/o synchronously.
 1102          */
 1103         NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
 1104         return (EIO);
 1105 }
 1106 
 1107 /*
 1108  * Do an I/O operation to/from a cache block. This may be called
 1109  * synchronously or from an nfsiod.
 1110  */
 1111 int
 1112 nfs_doio(bp, cr, p)
 1113         register struct buf *bp;
 1114         struct ucred *cr;
 1115         struct proc *p;
 1116 {
 1117         register struct uio *uiop;
 1118         register struct vnode *vp;
 1119         struct nfsnode *np;
 1120         struct nfsmount *nmp;
 1121         int error = 0, diff, len, iomode, must_commit = 0;
 1122         struct uio uio;
 1123         struct iovec io;
 1124 
 1125         vp = bp->b_vp;
 1126         np = VTONFS(vp);
 1127         nmp = VFSTONFS(vp->v_mount);
 1128         uiop = &uio;
 1129         uiop->uio_iov = &io;
 1130         uiop->uio_iovcnt = 1;
 1131         uiop->uio_segflg = UIO_SYSSPACE;
 1132         uiop->uio_procp = p;
 1133 
 1134         /*
 1135          * Historically, paging was done with physio, but no more.
 1136          */
 1137         if (bp->b_flags & B_PHYS) {
 1138             /*
 1139              * ...though reading /dev/drum still gets us here.
 1140              */
 1141             io.iov_len = uiop->uio_resid = bp->b_bcount;
 1142             /* mapping was done by vmapbuf() */
 1143             io.iov_base = bp->b_data;
 1144             uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 1145             if (bp->b_flags & B_READ) {
 1146                 uiop->uio_rw = UIO_READ;
 1147                 nfsstats.read_physios++;
 1148                 error = nfs_readrpc(vp, uiop, cr);
 1149             } else {
 1150                 int com;
 1151 
 1152                 iomode = NFSV3WRITE_DATASYNC;
 1153                 uiop->uio_rw = UIO_WRITE;
 1154                 nfsstats.write_physios++;
 1155                 error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
 1156             }
 1157             if (error) {
 1158                 bp->b_flags |= B_ERROR;
 1159                 bp->b_error = error;
 1160             }
 1161         } else if (bp->b_flags & B_READ) {
 1162             io.iov_len = uiop->uio_resid = bp->b_bcount;
 1163             io.iov_base = bp->b_data;
 1164             uiop->uio_rw = UIO_READ;
 1165             switch (vp->v_type) {
 1166             case VREG:
 1167                 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 1168                 nfsstats.read_bios++;
 1169                 error = nfs_readrpc(vp, uiop, cr);
 1170                 if (!error) {
 1171                     bp->b_validoff = 0;
 1172                     if (uiop->uio_resid) {
 1173                         /*
 1174                          * If len > 0, there is a hole in the file and
 1175                          * no writes after the hole have been pushed to
 1176                          * the server yet.
 1177                          * Just zero fill the rest of the valid area.
 1178                          */
 1179                         diff = bp->b_bcount - uiop->uio_resid;
 1180                         len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
 1181                                 + diff);
 1182                         if (len > 0) {
 1183                             len = min(len, uiop->uio_resid);
 1184                             bzero((char *)bp->b_data + diff, len);
 1185                             bp->b_validend = diff + len;
 1186                         } else
 1187                             bp->b_validend = diff;
 1188                     } else
 1189                         bp->b_validend = bp->b_bcount;
 1190                 }
 1191                 if (p && (vp->v_flag & VTEXT) &&
 1192                         (((nmp->nm_flag & NFSMNT_NQNFS) &&
 1193                           NQNFS_CKINVALID(vp, np, ND_READ) &&
 1194                           np->n_lrev != np->n_brev) ||
 1195                          (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 1196                           np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 1197                         uprintf("Process killed due to text file modification\n");
 1198                         psignal(p, SIGKILL);
 1199                         p->p_flag |= P_NOSWAP;
 1200                 }
 1201                 break;
 1202             case VLNK:
 1203                 uiop->uio_offset = (off_t)0;
 1204                 nfsstats.readlink_bios++;
 1205                 error = nfs_readlinkrpc(vp, uiop, cr);
 1206                 break;
 1207             case VDIR:
 1208                 nfsstats.readdir_bios++;
 1209                 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 1210                 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 1211                         error = nfs_readdirplusrpc(vp, uiop, cr);
 1212                         if (error == NFSERR_NOTSUPP)
 1213                                 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 1214                 }
 1215                 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 1216                         error = nfs_readdirrpc(vp, uiop, cr);
 1217                 if (error == 0 && uiop->uio_resid == bp->b_bcount)
 1218                         bp->b_flags |= B_INVAL;
 1219                 break;
 1220             default:
 1221                 printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 1222                 break;
 1223             };
 1224             if (error) {
 1225                 bp->b_flags |= B_ERROR;
 1226                 bp->b_error = error;
 1227             }
 1228         } else {
 1229             if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
 1230                 bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
 1231 
 1232             if (bp->b_dirtyend > bp->b_dirtyoff) {
 1233                 io.iov_len = uiop->uio_resid = bp->b_dirtyend
 1234                     - bp->b_dirtyoff;
 1235                 uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
 1236                     + bp->b_dirtyoff;
 1237                 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 1238                 uiop->uio_rw = UIO_WRITE;
 1239                 nfsstats.write_bios++;
 1240                 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 1241                     iomode = NFSV3WRITE_UNSTABLE;
 1242                 else
 1243                     iomode = NFSV3WRITE_FILESYNC;
 1244                 bp->b_flags |= B_WRITEINPROG;
 1245                 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 1246                 if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 1247                     bp->b_flags |= B_NEEDCOMMIT;
 1248                     if (bp->b_dirtyoff == 0
 1249                         && bp->b_dirtyend == bp->b_bufsize)
 1250                         bp->b_flags |= B_CLUSTEROK;
 1251                 } else
 1252                     bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 1253                 bp->b_flags &= ~B_WRITEINPROG;
 1254 
 1255                 /*
 1256                  * For an interrupted write, the buffer is still valid
 1257                  * and the write hasn't been pushed to the server yet,
 1258                  * so we can't set B_ERROR and report the interruption
 1259                  * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 1260                  * is not relevant, so the rpc attempt is essentially
 1261                  * a noop.  For the case of a V3 write rpc not being
 1262                  * committed to stable storage, the block is still
 1263                  * dirty and requires either a commit rpc or another
 1264                  * write rpc with iomode == NFSV3WRITE_FILESYNC before
 1265                  * the block is reused. This is indicated by setting
 1266                  * the B_DELWRI and B_NEEDCOMMIT flags.
 1267                  */
 1268                 if (error == EINTR
 1269                     || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 1270                         int s;
 1271 
 1272                         bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 1273                         ++numdirtybuffers;
 1274                         bp->b_flags |= B_DELWRI;
 1275                         s = splbio();
 1276                         reassignbuf(bp, vp);
 1277                         splx(s);
 1278                         if (error && (bp->b_flags & B_ASYNC) == 0)
 1279                             bp->b_flags |= B_EINTR;
 1280                 } else {
 1281                         if (error) {
 1282                                 bp->b_flags |= B_ERROR;
 1283                                 bp->b_error = np->n_error = error;
 1284                                 np->n_flag |= NWRITEERR;
 1285                         }
 1286                         bp->b_dirtyoff = bp->b_dirtyend = 0;
 1287                 }
 1288             } else {
 1289                 bp->b_resid = 0;
 1290                 bp->b_dirtyend = bp->b_dirtyoff = 0;
 1291                 biodone(bp);
 1292                 return (0);
 1293             }
 1294         }
 1295         bp->b_resid = uiop->uio_resid;
 1296         if (must_commit)
 1297                 nfs_clearcommit(vp->v_mount);
 1298         biodone(bp);
 1299         return (error);
 1300 }

Cache object: 6673062ae96a59aaec21c7f7dadc65f5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.