nfs_bio.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)nfs_bio.c   8.5 (Berkeley) 1/4/94
   37  * $FreeBSD: src/sys/nfs/nfs_bio.c,v 1.28.2.11 1999/12/12 07:28:50 dillon Exp $
   38  */
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/resourcevar.h>
   43 #include <sys/signalvar.h>
   44 #include <sys/proc.h>
   45 #include <sys/buf.h>
   46 #include <sys/vnode.h>
   47 #include <sys/mount.h>
   48 #include <sys/kernel.h>
   49 #include <sys/sysctl.h>
   50 
   51 #include <vm/vm.h>
   52 #include <vm/vm_param.h>
   53 #include <vm/vm_extern.h>
   54 #include <vm/vm_prot.h>
   55 #include <vm/vm_page.h>
   56 #include <vm/vm_object.h>
   57 #include <vm/vm_pager.h>
   58 #include <vm/vnode_pager.h>
   59 
   60 #include <nfs/rpcv2.h>
   61 #include <nfs/nfsproto.h>
   62 #include <nfs/nfs.h>
   63 #include <nfs/nfsmount.h>
   64 #include <nfs/nqnfs.h>
   65 #include <nfs/nfsnode.h>
   66 
   67 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
   68                                         struct proc *p));
   69 
   70 extern int nfs_numasync;
   71 extern struct nfsstats nfsstats;
   72 
   73 /*
   74  * Ifdefs for FreeBSD-current's merged VM/buffer cache. It is unfortunate
   75  * that this isn't done inside getblk() and brelse() so these calls
   76  * wouldn't need to be here.
   77  */
   78 #ifdef B_VMIO
   79 #define vnode_pager_uncache(vp)
   80 #else
   81 #define vfs_busy_pages(bp, f)
   82 #define vfs_unbusy_pages(bp)
   83 #define vfs_dirty_pages(bp)
   84 #endif
   85 
   86 /*
   87  * Vnode op for VM getpages.
   88  */
   89 int
   90 nfs_getpages(ap)
   91         struct vop_getpages_args *ap;
   92 {
   93         int i, bsize;
   94         vm_object_t obj;
   95         int pcount;
   96         struct uio auio;
   97         struct iovec aiov;
   98         int error;
   99         vm_page_t m;
  100 
  101         if (!(ap->a_vp->v_flag & VVMIO)) {
  102                 printf("nfs_getpages: called with non-VMIO vnode??\n");
  103                 return EOPNOTSUPP;
  104         }
  105 
  106         pcount = round_page(ap->a_count) / PAGE_SIZE;
  107 
  108         obj = ap->a_m[ap->a_reqpage]->object;
  109         bsize = ap->a_vp->v_mount->mnt_stat.f_iosize;
  110 
  111         for (i = 0; i < pcount; i++) {
  112                 if (i != ap->a_reqpage) {
  113                         vnode_pager_freepage(ap->a_m[i]);
  114                 }
  115         }
  116         m = ap->a_m[ap->a_reqpage];
  117 
  118         m->busy++;
  119         m->flags &= ~PG_BUSY;
  120 
  121         auio.uio_iov = &aiov;
  122         auio.uio_iovcnt = 1;
  123         aiov.iov_base = 0;
  124         aiov.iov_len = MAXBSIZE;
  125         auio.uio_resid = MAXBSIZE;
  126         auio.uio_offset = IDX_TO_OFF(m->pindex);
  127         auio.uio_segflg = UIO_NOCOPY;
  128         auio.uio_rw = UIO_READ;
  129         auio.uio_procp = curproc;
  130         error = nfs_bioread(ap->a_vp, &auio, IO_NODELOCKED, curproc->p_ucred, 1);
  131 
  132         m->flags |= PG_BUSY;
  133         m->busy--;
  134         if (m->busy == 0 && (m->flags & PG_WANTED)) {
  135                 m->flags &= ~PG_WANTED;
  136                 wakeup(m);
  137         }
  138 
  139         if (error && (auio.uio_resid == MAXBSIZE))
  140                 return VM_PAGER_ERROR;
  141         return 0;
  142 }
  143 
  144 /*
  145  * Vnode op for read using bio
  146  * Any similarity to readip() is purely coincidental
  147  */
  148 int
  149 nfs_bioread(vp, uio, ioflag, cred, getpages)
  150         register struct vnode *vp;
  151         register struct uio *uio;
  152         int ioflag;
  153         struct ucred *cred;
  154         int getpages;
  155 {
  156         register struct nfsnode *np = VTONFS(vp);
  157         register int biosize, diff, i;
  158         struct buf *bp = 0, *rabp;
  159         struct vattr vattr;
  160         struct proc *p;
  161         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  162         daddr_t lbn, rabn;
  163         int bufsize;
  164         int nra, error = 0, n = 0, on = 0, not_readin;
  165 
  166 #ifdef DIAGNOSTIC
  167         if (uio->uio_rw != UIO_READ)
  168                 panic("nfs_read mode");
  169 #endif
  170         if (uio->uio_resid == 0)
  171                 return (0);
  172         if (uio->uio_offset < 0)
  173                 return (EINVAL);
  174         p = uio->uio_procp;
  175         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
  176                 (void)nfs_fsinfo(nmp, vp, cred, p);
  177         biosize = vp->v_mount->mnt_stat.f_iosize;
  178         /*
  179          * For nfs, cache consistency can only be maintained approximately.
  180          * Although RFC1094 does not specify the criteria, the following is
  181          * believed to be compatible with the reference port.
  182          * For nqnfs, full cache consistency is maintained within the loop.
  183          * For nfs:
  184          * If the file's modify time on the server has changed since the
  185          * last read rpc or you have written to the file,
  186          * you may have lost data cache consistency with the
  187          * server, so flush all of the file's data out of the cache.
  188          * Then force a getattr rpc to ensure that you have up to date
  189          * attributes.
  190          * NB: This implies that cache data can be read when up to
  191          * NFS_ATTRTIMEO seconds out of date. If you find that you need current
  192          * attributes this could be forced by setting n_attrstamp to 0 before
  193          * the VOP_GETATTR() call.
  194          */
  195         if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
  196                 if (np->n_flag & NMODIFIED) {
  197                         if (vp->v_type != VREG) {
  198                                 if (vp->v_type != VDIR)
  199                                         panic("nfs: bioread, not dir");
  200                                 nfs_invaldir(vp);
  201                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  202                                 if (error)
  203                                         return (error);
  204                         }
  205                         np->n_attrstamp = 0;
  206                         error = VOP_GETATTR(vp, &vattr, cred, p);
  207                         if (error)
  208                                 return (error);
  209                         np->n_mtime = vattr.va_mtime.tv_sec;
  210                 } else {
  211                         error = VOP_GETATTR(vp, &vattr, cred, p);
  212                         if (error)
  213                                 return (error);
  214                         if (np->n_mtime != vattr.va_mtime.tv_sec) {
  215                                 if (vp->v_type == VDIR)
  216                                         nfs_invaldir(vp);
  217                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  218                                 if (error)
  219                                         return (error);
  220                                 np->n_mtime = vattr.va_mtime.tv_sec;
  221                         }
  222                 }
  223         }
  224         do {
  225 
  226             /*
  227              * Get a valid lease. If cached data is stale, flush it.
  228              */
  229             if (nmp->nm_flag & NFSMNT_NQNFS) {
  230                 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
  231                     do {
  232                         error = nqnfs_getlease(vp, ND_READ, cred, p);
  233                     } while (error == NQNFS_EXPIRED);
  234                     if (error)
  235                         return (error);
  236                     if (np->n_lrev != np->n_brev ||
  237                         (np->n_flag & NQNFSNONCACHE) ||
  238                         ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
  239                         if (vp->v_type == VDIR)
  240                             nfs_invaldir(vp);
  241                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  242                         if (error)
  243                             return (error);
  244                         np->n_brev = np->n_lrev;
  245                     }
  246                 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
  247                     nfs_invaldir(vp);
  248                     error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  249                     if (error)
  250                         return (error);
  251                 }
  252             }
  253             if (np->n_flag & NQNFSNONCACHE) {
  254                 switch (vp->v_type) {
  255                 case VREG:
  256                         return (nfs_readrpc(vp, uio, cred));
  257                 case VLNK:
  258                         return (nfs_readlinkrpc(vp, uio, cred));
  259                 case VDIR:
  260                         break;
  261                 default:
  262                         printf(" NQNFSNONCACHE: type %x unexpected\n",
  263                                 vp->v_type);
  264                 };
  265             }
  266             switch (vp->v_type) {
  267             case VREG:
  268                 nfsstats.biocache_reads++;
  269                 lbn = uio->uio_offset / biosize;
  270                 on = uio->uio_offset & (biosize - 1);
  271                 not_readin = 1;
  272 
  273                 /*
  274                  * Start the read ahead(s), as required.
  275                  */
  276                 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
  277                     for (nra = 0; nra < nmp->nm_readahead &&
  278                         (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
  279                         rabn = lbn + 1 + nra;
  280                         if (!incore(vp, rabn)) {
  281                             rabp = nfs_getcacheblk(vp, rabn, biosize, p);
  282                             if (!rabp)
  283                                 return (EINTR);
  284                             if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
  285                                 rabp->b_flags |= (B_READ | B_ASYNC);
  286                                 vfs_busy_pages(rabp, 0);
  287                                 if (nfs_asyncio(rabp, cred)) {
  288                                     rabp->b_flags |= B_INVAL|B_ERROR;
  289                                     vfs_unbusy_pages(rabp);
  290                                     brelse(rabp);
  291                                 }
  292                             } else {
  293                                 brelse(rabp);
  294                             }
  295                         }
  296                     }
  297                 }
  298 
  299                 /*
  300                  * If the block is in the cache and has the required data
  301                  * in a valid region, just copy it out.
  302                  * Otherwise, get the block and write back/read in,
  303                  * as required.
  304                  */
  305 again:
  306                 bufsize = biosize;
  307                 if ((off_t)(lbn + 1) * biosize > np->n_size && 
  308                     (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
  309                         bufsize = np->n_size - lbn * biosize;
  310                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
  311                 }
  312                 bp = nfs_getcacheblk(vp, lbn, bufsize, p);
  313                 if (!bp)
  314                         return (EINTR);
  315                 /*
  316                  * If we are being called from nfs_getpages, we must
  317                  * make sure the buffer is a vmio buffer.  The vp will
  318                  * already be setup for vmio but there may be some old
  319                  * non-vmio buffers attached to it.
  320                  */
  321                 if (getpages && !(bp->b_flags & B_VMIO)) {
  322 #ifdef DIAGNOSTIC
  323                         printf("nfs_bioread: non vmio buf found, discarding\n");
  324 #endif
  325                         bp->b_flags |= B_NOCACHE;
  326                         bp->b_flags |= B_INVAFTERWRITE;
  327                         if (bp->b_dirtyend > 0) {
  328                                 if ((bp->b_flags & B_DELWRI) == 0)
  329                                         panic("nfsbioread");
  330                                 if (VOP_BWRITE(bp) == EINTR)
  331                                         return (EINTR);
  332                         } else
  333                                 brelse(bp);
  334                         goto again;
  335                 }
  336                 if ((bp->b_flags & B_CACHE) == 0) {
  337                         bp->b_flags |= B_READ;
  338                         bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
  339                         not_readin = 0;
  340                         vfs_busy_pages(bp, 0);
  341                         error = nfs_doio(bp, cred, p);
  342                         if (error) {
  343                             brelse(bp);
  344                             return (error);
  345                         }
  346                 }
  347                 if (bufsize > on) {
  348                         n = min((unsigned)(bufsize - on), uio->uio_resid);
  349                 } else {
  350                         n = 0;
  351                 }
  352                 diff = np->n_size - uio->uio_offset;
  353                 if (diff < n)
  354                         n = diff;
  355                 if (not_readin && n > 0) {
  356                         if (on < bp->b_validoff || (on + n) > bp->b_validend) {
  357                                 bp->b_flags |= B_NOCACHE;
  358                                 if (bp->b_dirtyend > 0) {
  359                                     if ((bp->b_flags & B_DELWRI) == 0)
  360                                         panic("nfsbioread");
  361                                     if (VOP_BWRITE(bp) == EINTR)
  362                                         return (EINTR);
  363                                 } else
  364                                     brelse(bp);
  365                                 goto again;
  366                         }
  367                 }
  368                 vp->v_lastr = lbn;
  369                 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
  370                 if (diff < n)
  371                         n = diff;
  372                 break;
  373             case VLNK:
  374                 nfsstats.biocache_readlinks++;
  375                 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
  376                 if (!bp)
  377                         return (EINTR);
  378                 if ((bp->b_flags & B_CACHE) == 0) {
  379                         bp->b_flags |= B_READ;
  380                         vfs_busy_pages(bp, 0);
  381                         error = nfs_doio(bp, cred, p);
  382                         if (error) {
  383                                 bp->b_flags |= B_ERROR;
  384                                 brelse(bp);
  385                                 return (error);
  386                         }
  387                 }
  388                 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
  389                 on = 0;
  390                 break;
  391             case VDIR:
  392                 nfsstats.biocache_readdirs++;
  393                 if (np->n_direofoffset
  394                     && uio->uio_offset >= np->n_direofoffset) {
  395                     return (0);
  396                 }
  397                 lbn = uio->uio_offset / NFS_DIRBLKSIZ;
  398                 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
  399                 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
  400                 if (!bp)
  401                     return (EINTR);
  402                 if ((bp->b_flags & B_CACHE) == 0) {
  403                     bp->b_flags |= B_READ;
  404                     vfs_busy_pages(bp, 0);
  405                     error = nfs_doio(bp, cred, p);
  406                     if (error) {
  407                         vfs_unbusy_pages(bp);
  408                         brelse(bp);
  409                         while (error == NFSERR_BAD_COOKIE) {
  410                             nfs_invaldir(vp);
  411                             error = nfs_vinvalbuf(vp, 0, cred, p, 1);
  412                             /*
  413                              * Yuck! The directory has been modified on the
  414                              * server. The only way to get the block is by
  415                              * reading from the beginning to get all the
  416                              * offset cookies.
  417                              */
  418                             for (i = 0; i <= lbn && !error; i++) {
  419                                 if (np->n_direofoffset
  420                                     && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
  421                                     return (0);
  422                                 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
  423                                 if (!bp)
  424                                     return (EINTR);
  425                                 if ((bp->b_flags & B_DONE) == 0) {
  426                                     bp->b_flags |= B_READ;
  427                                     vfs_busy_pages(bp, 0);
  428                                     error = nfs_doio(bp, cred, p);
  429                                     if (error) {
  430                                         vfs_unbusy_pages(bp);
  431                                         brelse(bp);
  432                                     } else if (i < lbn)
  433                                         brelse(bp);
  434                                 }
  435                             }
  436                         }
  437                         if (error)
  438                             return (error);
  439                     }
  440                 }
  441 
  442                 /*
  443                  * If not eof and read aheads are enabled, start one.
  444                  * (You need the current block first, so that you have the
  445                  *  directory offset cookie of the next block.)
  446                  */
  447                 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
  448                     (np->n_direofoffset == 0 ||
  449                     (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
  450                     !(np->n_flag & NQNFSNONCACHE) &&
  451                     !incore(vp, lbn + 1)) {
  452                         rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
  453                         if (rabp) {
  454                             if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
  455                                 rabp->b_flags |= (B_READ | B_ASYNC);
  456                                 vfs_busy_pages(rabp, 0);
  457                                 if (nfs_asyncio(rabp, cred)) {
  458                                     rabp->b_flags |= B_INVAL|B_ERROR;
  459                                     vfs_unbusy_pages(rabp);
  460                                     brelse(rabp);
  461                                 }
  462                             } else {
  463                                 brelse(rabp);
  464                             }
  465                         }
  466                 }
  467                 /*
  468                  * Make sure we use a signed variant of min() since
  469                  * the second term may be negative.
  470                  */
  471                 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
  472                 break;
  473             default:
  474                 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
  475                 break;
  476             };
  477 
  478             if (n > 0) {
  479                 error = uiomove(bp->b_data + on, (int)n, uio);
  480             }
  481             switch (vp->v_type) {
  482             case VREG:
  483                 break;
  484             case VLNK:
  485                 n = 0;
  486                 break;
  487             case VDIR:
  488                 if (np->n_flag & NQNFSNONCACHE)
  489                         bp->b_flags |= B_INVAL;
  490                 break;
  491             default:
  492                 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
  493             }
  494             brelse(bp);
  495         } while (error == 0 && uio->uio_resid > 0 && n > 0);
  496         return (error);
  497 }
  498 
  499 /*
  500  * Vnode op for write using bio
  501  */
  502 int
  503 nfs_write(ap)
  504         struct vop_write_args /* {
  505                 struct vnode *a_vp;
  506                 struct uio *a_uio;
  507                 int  a_ioflag;
  508                 struct ucred *a_cred;
  509         } */ *ap;
  510 {
  511         register int biosize;
  512         register struct uio *uio = ap->a_uio;
  513         struct proc *p = uio->uio_procp;
  514         register struct vnode *vp = ap->a_vp;
  515         struct nfsnode *np = VTONFS(vp);
  516         register struct ucred *cred = ap->a_cred;
  517         int ioflag = ap->a_ioflag;
  518         struct buf *bp;
  519         struct vattr vattr;
  520         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  521         daddr_t lbn;
  522         int bufsize;
  523         int n, on, error = 0, iomode, must_commit;
  524 
  525 #ifdef DIAGNOSTIC
  526         if (uio->uio_rw != UIO_WRITE)
  527                 panic("nfs_write mode");
  528         if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
  529                 panic("nfs_write proc");
  530 #endif
  531         if (vp->v_type != VREG)
  532                 return (EIO);
  533         if (np->n_flag & NWRITEERR) {
  534                 np->n_flag &= ~NWRITEERR;
  535                 return (np->n_error);
  536         }
  537         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
  538                 (void)nfs_fsinfo(nmp, vp, cred, p);
  539         if (ioflag & (IO_APPEND | IO_SYNC)) {
  540                 if (np->n_flag & NMODIFIED) {
  541                         np->n_attrstamp = 0;
  542                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  543                         if (error)
  544                                 return (error);
  545                 }
  546                 if (ioflag & IO_APPEND) {
  547                         np->n_attrstamp = 0;
  548                         error = VOP_GETATTR(vp, &vattr, cred, p);
  549                         if (error)
  550                                 return (error);
  551                         uio->uio_offset = np->n_size;
  552                 }
  553         }
  554         if (uio->uio_offset < 0)
  555                 return (EINVAL);
  556         if (uio->uio_resid == 0)
  557                 return (0);
  558         /*
  559          * Maybe this should be above the vnode op call, but so long as
  560          * file servers have no limits, i don't think it matters
  561          */
  562         if (p && uio->uio_offset + uio->uio_resid >
  563               p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  564                 psignal(p, SIGXFSZ);
  565                 return (EFBIG);
  566         }
  567         /*
  568          * I use nm_rsize, not nm_wsize so that all buffer cache blocks
  569          * will be the same size within a filesystem. nfs_writerpc will
  570          * still use nm_wsize when sizing the rpc's.
  571          */
  572         biosize = vp->v_mount->mnt_stat.f_iosize;
  573         do {
  574 
  575                 /*
  576                  * XXX make sure we aren't cached in the VM page cache
  577                  */
  578                 /*
  579                  * Check for a valid write lease.
  580                  */
  581                 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
  582                     NQNFS_CKINVALID(vp, np, ND_WRITE)) {
  583                         do {
  584                                 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
  585                         } while (error == NQNFS_EXPIRED);
  586                         if (error)
  587                                 return (error);
  588                         if (np->n_lrev != np->n_brev ||
  589                             (np->n_flag & NQNFSNONCACHE)) {
  590                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  591                                 if (error)
  592                                         return (error);
  593                                 np->n_brev = np->n_lrev;
  594                         }
  595                 }
  596                 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
  597                     iomode = NFSV3WRITE_FILESYNC;
  598                     error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
  599                     if (must_commit)
  600                         nfs_clearcommit(vp->v_mount);
  601                     return (error);
  602                 }
  603                 nfsstats.biocache_writes++;
  604                 lbn = uio->uio_offset / biosize;
  605                 on = uio->uio_offset & (biosize-1);
  606                 n = min((unsigned)(biosize - on), uio->uio_resid);
  607 again:
  608                 if (uio->uio_offset + n > np->n_size) {
  609                         np->n_size = uio->uio_offset + n;
  610                         np->n_flag |= NMODIFIED;
  611                         vnode_pager_setsize(vp, (u_long)np->n_size);
  612                 }
  613                 bufsize = biosize;
  614                 if ((lbn + 1) * biosize > np->n_size) {
  615                         bufsize = np->n_size - lbn * biosize;
  616                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
  617                 }
  618                 bp = nfs_getcacheblk(vp, lbn, bufsize, p);
  619                 if (!bp)
  620                         return (EINTR);
  621                 if (bp->b_wcred == NOCRED) {
  622                         crhold(cred);
  623                         bp->b_wcred = cred;
  624                 }
  625                 np->n_flag |= NMODIFIED;
  626 
  627                 if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
  628                         bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
  629                 }
  630 
  631                 /*
  632                  * If the new write will leave a contiguous dirty
  633                  * area, just update the b_dirtyoff and b_dirtyend,
  634                  * otherwise force a write rpc of the old dirty area.
  635                  */
  636                 if (bp->b_dirtyend > 0 &&
  637                     (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
  638                         bp->b_proc = p;
  639                         if (VOP_BWRITE(bp) == EINTR)
  640                                 return (EINTR);
  641                         goto again;
  642                 }
  643 
  644                 /*
  645                  * Check for valid write lease and get one as required.
  646                  * In case getblk() and/or bwrite() delayed us.
  647                  */
  648                 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
  649                     NQNFS_CKINVALID(vp, np, ND_WRITE)) {
  650                         do {
  651                                 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
  652                         } while (error == NQNFS_EXPIRED);
  653                         if (error) {
  654                                 brelse(bp);
  655                                 return (error);
  656                         }
  657                         if (np->n_lrev != np->n_brev ||
  658                             (np->n_flag & NQNFSNONCACHE)) {
  659                                 brelse(bp);
  660                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  661                                 if (error)
  662                                         return (error);
  663                                 np->n_brev = np->n_lrev;
  664                                 goto again;
  665                         }
  666                 }
  667                 error = uiomove((char *)bp->b_data + on, n, uio);
  668                 if (error) {
  669                         bp->b_flags |= B_ERROR;
  670                         brelse(bp);
  671                         return (error);
  672                 }
  673                 if (bp->b_dirtyend > 0) {
  674                         bp->b_dirtyoff = min(on, bp->b_dirtyoff);
  675                         bp->b_dirtyend = max((on + n), bp->b_dirtyend);
  676                 } else {
  677                         bp->b_dirtyoff = on;
  678                         bp->b_dirtyend = on + n;
  679                 }
  680                 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
  681                     bp->b_validoff > bp->b_dirtyend) {
  682                         bp->b_validoff = bp->b_dirtyoff;
  683                         bp->b_validend = bp->b_dirtyend;
  684                 } else {
  685                         bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
  686                         bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
  687                 }
  688 
  689                 /*
  690                  * Since this block is being modified, it must be written
  691                  * again and not just committed.
  692                  */
  693                 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
  694 
  695                 /*
  696                  * If the lease is non-cachable or IO_SYNC do bwrite().
  697                  */
  698                 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
  699                         bp->b_proc = p;
  700                         error = VOP_BWRITE(bp);
  701                         if (error)
  702                                 return (error);
  703                         if (np->n_flag & NQNFSNONCACHE) {
  704                                 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  705                                 if (error)
  706                                         return (error);
  707                         }
  708                 } else if ((n + on) == biosize &&
  709                         (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
  710                         bp->b_proc = (struct proc *)0;
  711                         bp->b_flags |= B_ASYNC;
  712                         (void)nfs_writebp(bp, 0);
  713                 } else
  714                         bdwrite(bp);
  715         } while (uio->uio_resid > 0 && n > 0);
  716         return (0);
  717 }
  718 
  719 /*
  720  * Get an nfs cache block.
  721  * Allocate a new one if the block isn't currently in the cache
  722  * and return the block marked busy. If the calling process is
  723  * interrupted by a signal for an interruptible mount point, return
  724  * NULL.
  725  */
  726 static struct buf *
  727 nfs_getcacheblk(vp, bn, size, p)
  728         struct vnode *vp;
  729         daddr_t bn;
  730         int size;
  731         struct proc *p;
  732 {
  733         register struct buf *bp;
  734         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  735         int biosize = vp->v_mount->mnt_stat.f_iosize;
  736 
  737         if (nmp->nm_flag & NFSMNT_INT) {
  738                 bp = getblk(vp, bn, size, PCATCH, 0);
  739                 while (bp == (struct buf *)0) {
  740                         if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
  741                                 return ((struct buf *)0);
  742                         bp = getblk(vp, bn, size, 0, 2 * hz);
  743                 }
  744         } else
  745                 bp = getblk(vp, bn, size, 0, 0);
  746 
  747         if( vp->v_type == VREG)
  748                 bp->b_blkno = (bn * biosize) / DEV_BSIZE;
  749 
  750         return (bp);
  751 }
  752 
  753 /*
  754  * Flush and invalidate all dirty buffers. If another process is already
  755  * doing the flush, just wait for completion.
  756  */
  757 int
  758 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
  759         struct vnode *vp;
  760         int flags;
  761         struct ucred *cred;
  762         struct proc *p;
  763         int intrflg;
  764 {
  765         register struct nfsnode *np = VTONFS(vp);
  766         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  767         int error = 0, slpflag, slptimeo;
  768 
  769         if ((nmp->nm_flag & NFSMNT_INT) == 0)
  770                 intrflg = 0;
  771         if (intrflg) {
  772                 slpflag = PCATCH;
  773                 slptimeo = 2 * hz;
  774         } else {
  775                 slpflag = 0;
  776                 slptimeo = 0;
  777         }
  778         /*
  779          * First wait for any other process doing a flush to complete.
  780          */
  781         while (np->n_flag & NFLUSHINPROG) {
  782                 np->n_flag |= NFLUSHWANT;
  783                 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
  784                         slptimeo);
  785                 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
  786                         return (EINTR);
  787         }
  788 
  789         /*
  790          * Now, flush as required.
  791          */
  792         np->n_flag |= NFLUSHINPROG;
  793         error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
  794         while (error) {
  795                 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
  796                         np->n_flag &= ~NFLUSHINPROG;
  797                         if (np->n_flag & NFLUSHWANT) {
  798                                 np->n_flag &= ~NFLUSHWANT;
  799                                 wakeup((caddr_t)&np->n_flag);
  800                         }
  801                         return (EINTR);
  802                 }
  803                 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
  804         }
  805         np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
  806         if (np->n_flag & NFLUSHWANT) {
  807                 np->n_flag &= ~NFLUSHWANT;
  808                 wakeup((caddr_t)&np->n_flag);
  809         }
  810         return (0);
  811 }
  812 
  813 /*
  814  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  815  * This is mainly to avoid queueing async I/O requests when the nfsiods
  816  * are all hung on a dead server.
  817  */
  818 int
  819 nfs_asyncio(bp, cred)
  820         register struct buf *bp;
  821         struct ucred *cred;
  822 {
  823         struct nfsmount *nmp;
  824         int i;
  825         int gotiod;
  826         int slpflag = 0;
  827         int slptimeo = 0;
  828         int error;
  829 
  830         if (nfs_numasync == 0)
  831                 return (EIO);
  832         
  833         nmp = VFSTONFS(bp->b_vp->v_mount);
  834 again:
  835         if (nmp->nm_flag & NFSMNT_INT)
  836                 slpflag = PCATCH;
  837         gotiod = FALSE;
  838 
  839         /*
  840          * Find a free iod to process this request.
  841          */
  842         for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
  843                 if (nfs_iodwant[i]) {
  844                         /*
  845                          * Found one, so wake it up and tell it which
  846                          * mount to process.
  847                          */
  848                         NFS_DPF(ASYNCIO,
  849                                 ("nfs_asyncio: waking iod %d for mount %p\n",
  850                                  i, nmp));
  851                         nfs_iodwant[i] = (struct proc *)0;
  852                         nfs_iodmount[i] = nmp;
  853                         nmp->nm_bufqiods++;
  854                         wakeup((caddr_t)&nfs_iodwant[i]);
  855                         gotiod = TRUE;
  856                         break;
  857                 }
  858 
  859         /*
  860          * If none are free, we may already have an iod working on this mount
  861          * point.  If so, it will process our request.
  862          */
  863         if (!gotiod) {
  864                 if (nmp->nm_bufqiods > 0) {
  865                         NFS_DPF(ASYNCIO,
  866                                 ("nfs_asyncio: %d iods are already processing mount %p\n",
  867                                  nmp->nm_bufqiods, nmp));
  868                         gotiod = TRUE;
  869                 }
  870         }
  871 
  872         /*
  873          * If we have an iod which can process the request, then queue
  874          * the buffer.
  875          */
  876         if (gotiod) {
  877                 /*
  878                  * Ensure that the queue never grows too large.
  879                  */
  880                 while (nmp->nm_bufqlen >= 2*nfs_numasync) {
  881                         NFS_DPF(ASYNCIO,
  882                                 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
  883                         nmp->nm_bufqwant = TRUE;
  884                         error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
  885                                        "nfsaio", slptimeo);
  886                         if (error) {
  887                                 if (nfs_sigintr(nmp, NULL, bp->b_proc))
  888                                         return (EINTR);
  889                                 if (slpflag == PCATCH) {
  890                                         slpflag = 0;
  891                                         slptimeo = 2 * hz;
  892                                 }
  893                         }
  894                         /*
  895                          * We might have lost our iod while sleeping,
  896                          * so check and loop if nescessary.
  897                          */
  898                         if (nmp->nm_bufqiods == 0) {
  899                                 NFS_DPF(ASYNCIO,
  900                                         ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
  901                                 goto again;
  902                         }
  903                 }
  904 
  905                 if (bp->b_flags & B_READ) {
  906                         if (bp->b_rcred == NOCRED && cred != NOCRED) {
  907                                 crhold(cred);
  908                                 bp->b_rcred = cred;
  909                         }
  910                 } else {
  911                         bp->b_flags |= B_WRITEINPROG;
  912                         if (bp->b_wcred == NOCRED && cred != NOCRED) {
  913                                 crhold(cred);
  914                                 bp->b_wcred = cred;
  915                         }
  916                 }
  917 
  918                 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
  919                 nmp->nm_bufqlen++;
  920                 return (0);
  921         }
  922 
  923         /*
  924          * All the iods are busy on other mounts, so return EIO to
  925          * force the caller to process the i/o synchronously.
  926          */
  927         NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
  928         return (EIO);
  929 }
  930 
  931 /*
  932  * Do an I/O operation to/from a cache block. This may be called
  933  * synchronously or from an nfsiod.
  934  */
  935 int
  936 nfs_doio(bp, cr, p)
  937         register struct buf *bp;
  938         struct ucred *cr;
  939         struct proc *p;
  940 {
  941         register struct uio *uiop;
  942         register struct vnode *vp;
  943         struct nfsnode *np;
  944         struct nfsmount *nmp;
  945         int error = 0, diff, len, iomode, must_commit = 0;
  946         struct uio uio;
  947         struct iovec io;
  948 
  949         vp = bp->b_vp;
  950         np = VTONFS(vp);
  951         nmp = VFSTONFS(vp->v_mount);
  952         uiop = &uio;
  953         uiop->uio_iov = &io;
  954         uiop->uio_iovcnt = 1;
  955         uiop->uio_segflg = UIO_SYSSPACE;
  956         uiop->uio_procp = p;
  957 
  958         /*
  959          * Historically, paging was done with physio, but no more.
  960          */
  961         if (bp->b_flags & B_PHYS) {
  962             /*
  963              * ...though reading /dev/drum still gets us here.
  964              */
  965             io.iov_len = uiop->uio_resid = bp->b_bcount;
  966             /* mapping was done by vmapbuf() */
  967             io.iov_base = bp->b_data;
  968             uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
  969             if (bp->b_flags & B_READ) {
  970                 uiop->uio_rw = UIO_READ;
  971                 nfsstats.read_physios++;
  972                 error = nfs_readrpc(vp, uiop, cr);
  973             } else {
  974                 int com;
  975 
  976                 iomode = NFSV3WRITE_DATASYNC;
  977                 uiop->uio_rw = UIO_WRITE;
  978                 nfsstats.write_physios++;
  979                 error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
  980             }
  981             if (error) {
  982                 bp->b_flags |= B_ERROR;
  983                 bp->b_error = error;
  984             }
  985         } else if (bp->b_flags & B_READ) {
  986             io.iov_len = uiop->uio_resid = bp->b_bcount;
  987             io.iov_base = bp->b_data;
  988             uiop->uio_rw = UIO_READ;
  989             switch (vp->v_type) {
  990             case VREG:
  991                 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
  992                 nfsstats.read_bios++;
  993                 error = nfs_readrpc(vp, uiop, cr);
  994                 if (!error) {
  995                     bp->b_validoff = 0;
  996                     if (uiop->uio_resid) {
  997                         /*
  998                          * If len > 0, there is a hole in the file and
  999                          * no writes after the hole have been pushed to
 1000                          * the server yet.
 1001                          * Just zero fill the rest of the valid area.
 1002                          */
 1003                         diff = bp->b_bcount - uiop->uio_resid;
 1004                         len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
 1005                                 + diff);
 1006                         if (len > 0) {
 1007                             len = min(len, uiop->uio_resid);
 1008                             bzero((char *)bp->b_data + diff, len);
 1009                             bp->b_validend = diff + len;
 1010                         } else
 1011                             bp->b_validend = diff;
 1012                     } else
 1013                         bp->b_validend = bp->b_bcount;
 1014                 }
 1015                 if (p && (vp->v_flag & VTEXT) &&
 1016                         (((nmp->nm_flag & NFSMNT_NQNFS) &&
 1017                           NQNFS_CKINVALID(vp, np, ND_READ) &&
 1018                           np->n_lrev != np->n_brev) ||
 1019                          (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 1020                           np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 1021                         uprintf("Process killed due to text file modification\n");
 1022                         psignal(p, SIGKILL);
 1023 #ifdef __NetBSD__
 1024                         p->p_holdcnt++;
 1025 #else
 1026                         p->p_flag |= P_NOSWAP;
 1027 #endif
 1028                 }
 1029                 break;
 1030             case VLNK:
 1031                 uiop->uio_offset = (off_t)0;
 1032                 nfsstats.readlink_bios++;
 1033                 error = nfs_readlinkrpc(vp, uiop, cr);
 1034                 break;
 1035             case VDIR:
 1036                 nfsstats.readdir_bios++;
 1037                 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 1038                 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 1039                         error = nfs_readdirplusrpc(vp, uiop, cr);
 1040                         if (error == NFSERR_NOTSUPP)
 1041                                 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 1042                 }
 1043                 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 1044                         error = nfs_readdirrpc(vp, uiop, cr);
 1045                 break;
 1046             default:
 1047                 printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 1048                 break;
 1049             };
 1050             if (error) {
 1051                 bp->b_flags |= B_ERROR;
 1052                 bp->b_error = error;
 1053             }
 1054         } else {
 1055             if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
 1056                 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
 1057 
 1058             if (bp->b_dirtyend > bp->b_dirtyoff) {
 1059                 io.iov_len = uiop->uio_resid = bp->b_dirtyend
 1060                     - bp->b_dirtyoff;
 1061                 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
 1062                     + bp->b_dirtyoff;
 1063                 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 1064                 uiop->uio_rw = UIO_WRITE;
 1065                 nfsstats.write_bios++;
 1066                 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 1067                     iomode = NFSV3WRITE_UNSTABLE;
 1068                 else
 1069                     iomode = NFSV3WRITE_FILESYNC;
 1070                 bp->b_flags |= B_WRITEINPROG;
 1071                 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 1072                 if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 1073                     bp->b_flags |= B_NEEDCOMMIT;
 1074                     if (bp->b_dirtyoff == 0
 1075                         && bp->b_dirtyend == bp->b_bufsize)
 1076                         bp->b_flags |= B_CLUSTEROK;
 1077                 } else
 1078                     bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 1079                 bp->b_flags &= ~B_WRITEINPROG;
 1080 
 1081                 /*
 1082                  * For an interrupted write, the buffer is still valid
 1083                  * and the write hasn't been pushed to the server yet,
 1084                  * so we can't set B_ERROR and report the interruption
 1085                  * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 1086                  * is not relevant, so the rpc attempt is essentially
 1087                  * a noop.  For the case of a V3 write rpc not being
 1088                  * committed to stable storage, the block is still
 1089                  * dirty and requires either a commit rpc or another
 1090                  * write rpc with iomode == NFSV3WRITE_FILESYNC before
 1091                  * the block is reused. This is indicated by setting
 1092                  * the B_DELWRI and B_NEEDCOMMIT flags.
 1093                  */
 1094                 if (error == EINTR
 1095                     || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 1096                         bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 1097                         bp->b_flags |= B_DELWRI;
 1098 
 1099                 /*
 1100                  * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
 1101                  * buffer to the clean list, we have to reassign it back to the
 1102                  * dirty one. Ugh.
 1103                  */
 1104                         if (bp->b_flags & B_ASYNC)
 1105                                 reassignbuf(bp, vp);
 1106                         else if (error)
 1107                                 bp->b_flags |= B_EINTR;
 1108                 } else {
 1109                         if (error) {
 1110                                 bp->b_flags |= B_ERROR;
 1111                                 bp->b_error = np->n_error = error;
 1112                                 np->n_flag |= NWRITEERR;
 1113                         }
 1114                         bp->b_dirtyoff = bp->b_dirtyend = 0;
 1115                 }
 1116             } else {
 1117                 bp->b_resid = 0;
 1118                 biodone(bp);
 1119                 return (0);
 1120             }
 1121         }
 1122         bp->b_resid = uiop->uio_resid;
 1123         if (must_commit)
 1124                 nfs_clearcommit(vp->v_mount);
 1125         biodone(bp);
 1126         return (error);
 1127 }
Cache object: 35857f450b1a90513ebd5adce59d9c29
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/nfs/nfs_bio.c

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_bio.c