[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_readwrite.c

Version: -  FREEBSD  -  FREEBSD8  -  FREEBSD7  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)ufs_readwrite.c     8.11 (Berkeley) 5/8/95
   34  * $FreeBSD: src/sys/ufs/ufs/ufs_readwrite.c,v 1.65.2.16 2004/05/14 23:36:20 kensmith Exp $
   35  */
   36 
   37 #define BLKSIZE(a, b, c)        blksize(a, b, c)
   38 #define FS                      struct fs
   39 #define I_FS                    i_fs
   40 #define READ                    ffs_read
   41 #define READ_S                  "ffs_read"
   42 #define WRITE                   ffs_write
   43 #define WRITE_S                 "ffs_write"
   44 
   45 #include <vm/vm.h>
   46 #include <vm/vm_object.h>
   47 #include <vm/vm_pager.h>
   48 #include <vm/vm_map.h>
   49 #include <vm/vnode_pager.h>
   50 #include <sys/event.h>
   51 #include <sys/vmmeter.h>
   52 #include "opt_directio.h"
   53 
   54 #define VN_KNOTE(vp, b) \
   55         KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
   56 
   57 #ifdef DIRECTIO
   58 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
   59 #endif
   60 
   61 /*
   62  * Vnode op for reading.
   63  */
   64 /* ARGSUSED */
   65 int
   66 READ(ap)
   67         struct vop_read_args /* {
   68                 struct vnode *a_vp;
   69                 struct uio *a_uio;
   70                 int a_ioflag;
   71                 struct ucred *a_cred;
   72         } */ *ap;
   73 {
   74         register struct vnode *vp;
   75         register struct inode *ip;
   76         register struct uio *uio;
   77         register FS *fs;
   78         struct buf *bp;
   79         ufs_daddr_t lbn, nextlbn;
   80         off_t bytesinfile;
   81         long size, xfersize, blkoffset;
   82         int error, orig_resid;
   83         u_short mode;
   84         int seqcount;
   85         int ioflag;
   86         vm_object_t object;
   87 
   88         vp = ap->a_vp;
   89         seqcount = ap->a_ioflag >> 16;
   90         ip = VTOI(vp);
   91         mode = ip->i_mode;
   92         uio = ap->a_uio;
   93         ioflag = ap->a_ioflag;
   94 #ifdef DIRECTIO
   95         if ((ioflag & IO_DIRECT) != 0) {
   96                 int workdone;
   97 
   98                 error = ffs_rawread(vp, uio, &workdone);
   99                 if (error || workdone)
  100                         return error;
  101         }
  102 #endif
  103 
  104 #ifdef DIAGNOSTIC
  105         if (uio->uio_rw != UIO_READ)
  106                 panic("%s: mode", READ_S);
  107 
  108         if (vp->v_type == VLNK) {
  109                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
  110                         panic("%s: short symlink", READ_S);
  111         } else if (vp->v_type != VREG && vp->v_type != VDIR)
  112                 panic("%s: type %d", READ_S, vp->v_type);
  113 #endif
  114         fs = ip->I_FS;
  115         if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
  116                 return (EFBIG);
  117 
  118         orig_resid = uio->uio_resid;
  119         if (orig_resid <= 0)
  120                 return (0);
  121 
  122         object = vp->v_object;
  123 
  124         bytesinfile = ip->i_size - uio->uio_offset;
  125         if (bytesinfile <= 0) {
  126                 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  127                         ip->i_flag |= IN_ACCESS;
  128                 return 0;
  129         }
  130 
  131         if (object)
  132                 vm_object_reference(object);
  133 
  134 #ifdef ENABLE_VFS_IOOPT
  135         /*
  136          * If IO optimisation is turned on,
  137          * and we are NOT a VM based IO request, 
  138          * (i.e. not headed for the buffer cache)
  139          * but there IS a vm object associated with it.
  140          */
  141         if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
  142                 int nread, toread;
  143 
  144                 toread = uio->uio_resid;
  145                 if (toread > bytesinfile)
  146                         toread = bytesinfile;
  147                 if (toread >= PAGE_SIZE) {
  148                         /*
  149                          * Then if it's at least a page in size, try 
  150                          * get the data from the object using vm tricks
  151                          */
  152                         error = uioread(toread, uio, object, &nread);
  153                         if ((uio->uio_resid == 0) || (error != 0)) {
  154                                 /*
  155                                  * If we finished or there was an error
  156                                  * then finish up (the reference previously
  157                                  * obtained on object must be released).
  158                                  */
  159                                 if ((error == 0 ||
  160                                     uio->uio_resid != orig_resid) &&
  161                                     (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  162                                         ip->i_flag |= IN_ACCESS;
  163 
  164                                 if (object)
  165                                         vm_object_vndeallocate(object);
  166                                 return error;
  167                         }
  168                 }
  169         }
  170 #endif
  171 
  172         /*
  173          * Ok so we couldn't do it all in one vm trick...
  174          * so cycle around trying smaller bites..
  175          */
  176         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  177                 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  178                         break;
  179 #ifdef ENABLE_VFS_IOOPT
  180                 if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
  181                         /*
  182                          * Obviously we didn't finish above, but we
  183                          * didn't get an error either. Try the same trick again.
  184                          * but this time we are looping.
  185                          */
  186                         int nread, toread;
  187                         toread = uio->uio_resid;
  188                         if (toread > bytesinfile)
  189                                 toread = bytesinfile;
  190 
  191                         /*
  192                          * Once again, if there isn't enough for a
  193                          * whole page, don't try optimising.
  194                          */
  195                         if (toread >= PAGE_SIZE) {
  196                                 error = uioread(toread, uio, object, &nread);
  197                                 if ((uio->uio_resid == 0) || (error != 0)) {
  198                                         /*
  199                                          * If we finished or there was an 
  200                                          * error then finish up (the reference
  201                                          * previously obtained on object must 
  202                                          * be released).
  203                                          */
  204                                         if ((error == 0 ||
  205                                             uio->uio_resid != orig_resid) &&
  206                                             (vp->v_mount->mnt_flag &
  207                                             MNT_NOATIME) == 0)
  208                                                 ip->i_flag |= IN_ACCESS;
  209                                         if (object)
  210                                                 vm_object_vndeallocate(object);
  211                                         return error;
  212                                 }
  213                                 /*
  214                                  * To get here we didnt't finish or err.
  215                                  * If we did get some data,
  216                                  * loop to try another bite.
  217                                  */
  218                                 if (nread > 0) {
  219                                         continue;
  220                                 }
  221                         }
  222                 }
  223 #endif
  224 
  225                 lbn = lblkno(fs, uio->uio_offset);
  226                 nextlbn = lbn + 1;
  227 
  228                 /*
  229                  * size of buffer.  The buffer representing the
  230                  * end of the file is rounded up to the size of
  231                  * the block type ( fragment or full block, 
  232                  * depending ).
  233                  */
  234                 size = BLKSIZE(fs, ip, lbn);
  235                 blkoffset = blkoff(fs, uio->uio_offset);
  236                 
  237                 /*
  238                  * The amount we want to transfer in this iteration is
  239                  * one FS block less the amount of the data before
  240                  * our startpoint (duh!)
  241                  */
  242                 xfersize = fs->fs_bsize - blkoffset;
  243 
  244                 /*
  245                  * But if we actually want less than the block,
  246                  * or the file doesn't have a whole block more of data,
  247                  * then use the lesser number.
  248                  */
  249                 if (uio->uio_resid < xfersize)
  250                         xfersize = uio->uio_resid;
  251                 if (bytesinfile < xfersize)
  252                         xfersize = bytesinfile;
  253 
  254                 if (lblktosize(fs, nextlbn) >= ip->i_size) {
  255                         /*
  256                          * Don't do readahead if this is the end of the file.
  257                          */
  258                         error = bread(vp, lbn, size, NOCRED, &bp);
  259                 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
  260                         /* 
  261                          * Otherwise if we are allowed to cluster,
  262                          * grab as much as we can.
  263                          *
  264                          * XXX  This may not be a win if we are not
  265                          * doing sequential access.
  266                          */
  267                         error = cluster_read(vp, ip->i_size, lbn,
  268                                 size, NOCRED, uio->uio_resid, seqcount, &bp);
  269                 } else if (seqcount > 1) {
  270                         /*
  271                          * If we are NOT allowed to cluster, then
  272                          * if we appear to be acting sequentially,
  273                          * fire off a request for a readahead
  274                          * as well as a read. Note that the 4th and 5th
  275                          * arguments point to arrays of the size specified in
  276                          * the 6th argument.
  277                          */
  278                         int nextsize = BLKSIZE(fs, ip, nextlbn);
  279                         error = breadn(vp, lbn,
  280                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  281                 } else {
  282                         /*
  283                          * Failing all of the above, just read what the 
  284                          * user asked for. Interestingly, the same as
  285                          * the first option above.
  286                          */
  287                         error = bread(vp, lbn, size, NOCRED, &bp);
  288                 }
  289                 if (error) {
  290                         brelse(bp);
  291                         bp = NULL;
  292                         break;
  293                 }
  294 
  295                 /*
  296                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  297                  * will cause us to attempt to release the buffer later on
  298                  * and will cause the buffer cache to attempt to free the
  299                  * underlying pages.
  300                  */
  301                 if (ioflag & IO_DIRECT)
  302                         bp->b_flags |= B_DIRECT;
  303 
  304                 /*
  305                  * We should only get non-zero b_resid when an I/O error
  306                  * has occurred, which should cause us to break above.
  307                  * However, if the short read did not cause an error,
  308                  * then we want to ensure that we do not uiomove bad
  309                  * or uninitialized data.
  310                  *
  311                  * XXX b_resid is only valid when an actual I/O has occured
  312                  * and may be incorrect if the buffer is B_CACHE or if the
  313                  * last op on the buffer was a failed write.  This KASSERT
  314                  * is a precursor to removing it from the UFS code.
  315                  */
  316                 KASSERT(bp->b_resid == 0, ("bp->b_resid != 0"));
  317                 size -= bp->b_resid;
  318                 if (size < xfersize) {
  319                         if (size == 0)
  320                                 break;
  321                         xfersize = size;
  322                 }
  323 
  324 #ifdef ENABLE_VFS_IOOPT
  325                 if (vfs_ioopt && object &&
  326                     (bp->b_flags & B_VMIO) &&
  327                     ((blkoffset & PAGE_MASK) == 0) &&
  328                     ((xfersize & PAGE_MASK) == 0)) {
  329                         /*
  330                          * If VFS IO  optimisation is turned on,
  331                          * and it's an exact page multiple
  332                          * And a normal VM based op,
  333                          * then use uiomiveco()
  334                          */
  335                         error =
  336                                 uiomoveco((char *)bp->b_data + blkoffset,
  337                                         (int)xfersize, uio, object);
  338                 } else 
  339 #endif
  340                 {
  341                         /*
  342                          * otherwise use the general form
  343                          */
  344                         error =
  345                                 uiomove((char *)bp->b_data + blkoffset,
  346                                         (int)xfersize, uio);
  347                 }
  348 
  349                 if (error)
  350                         break;
  351 
  352                 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  353                     (LIST_FIRST(&bp->b_dep) == NULL)) {
  354                         /*
  355                          * If there are no dependencies, and it's VMIO,
  356                          * then we don't need the buf, mark it available
  357                          * for freeing. The VM has the data.
  358                          */
  359                         bp->b_flags |= B_RELBUF;
  360                         brelse(bp);
  361                 } else {
  362                         /*
  363                          * Otherwise let whoever
  364                          * made the request take care of
  365                          * freeing it. We just queue
  366                          * it onto another list.
  367                          */
  368                         bqrelse(bp);
  369                 }
  370         }
  371 
  372         /* 
  373          * This can only happen in the case of an error
  374          * because the loop above resets bp to NULL on each iteration
  375          * and on normal completion has not set a new value into it.
  376          * so it must have come from a 'break' statement
  377          */
  378         if (bp != NULL) {
  379                 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  380                     (LIST_FIRST(&bp->b_dep) == NULL)) {
  381                         bp->b_flags |= B_RELBUF;
  382                         brelse(bp);
  383                 } else {
  384                         bqrelse(bp);
  385                 }
  386         }
  387 
  388         if (object)
  389                 vm_object_vndeallocate(object);
  390         if ((error == 0 || uio->uio_resid != orig_resid) &&
  391             (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  392                 ip->i_flag |= IN_ACCESS;
  393         return (error);
  394 }
  395 
  396 /*
  397  * Vnode op for writing.
  398  */
  399 int
  400 WRITE(ap)
  401         struct vop_write_args /* {
  402                 struct vnode *a_vp;
  403                 struct uio *a_uio;
  404                 int a_ioflag;
  405                 struct ucred *a_cred;
  406         } */ *ap;
  407 {
  408         register struct vnode *vp;
  409         register struct uio *uio;
  410         register struct inode *ip;
  411         register FS *fs;
  412         struct buf *bp;
  413         struct proc *p;
  414         ufs_daddr_t lbn;
  415         off_t osize;
  416         int seqcount;
  417         int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
  418         vm_object_t object;
  419 
  420         extended = 0;
  421         seqcount = ap->a_ioflag >> 16;
  422         ioflag = ap->a_ioflag;
  423         uio = ap->a_uio;
  424         vp = ap->a_vp;
  425         ip = VTOI(vp);
  426 
  427         object = vp->v_object;
  428         if (object)
  429                 vm_object_reference(object);
  430 
  431 #ifdef DIAGNOSTIC
  432         if (uio->uio_rw != UIO_WRITE)
  433                 panic("%s: mode", WRITE_S);
  434 #endif
  435 
  436         switch (vp->v_type) {
  437         case VREG:
  438                 if (ioflag & IO_APPEND)
  439                         uio->uio_offset = ip->i_size;
  440                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
  441                         if (object)
  442                                 vm_object_vndeallocate(object);
  443                         return (EPERM);
  444                 }
  445                 /* FALLTHROUGH */
  446         case VLNK:
  447                 break;
  448         case VDIR:
  449                 panic("%s: dir write", WRITE_S);
  450                 break;
  451         default:
  452                 panic("%s: type %p %d (%d,%d)", WRITE_S, vp, (int)vp->v_type,
  453                         (int)uio->uio_offset,
  454                         (int)uio->uio_resid
  455                 );
  456         }
  457 
  458         fs = ip->I_FS;
  459         if (uio->uio_offset < 0 ||
  460             (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
  461                 if (object)
  462                         vm_object_vndeallocate(object);
  463                 return (EFBIG);
  464         }
  465         /*
  466          * Maybe this should be above the vnode op call, but so long as
  467          * file servers have no limits, I don't think it matters.
  468          */
  469         p = uio->uio_procp;
  470         if (vp->v_type == VREG && p &&
  471             uio->uio_offset + uio->uio_resid >
  472             p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  473                 psignal(p, SIGXFSZ);
  474                 if (object)
  475                         vm_object_vndeallocate(object);
  476                 return (EFBIG);
  477         }
  478 
  479         resid = uio->uio_resid;
  480         osize = ip->i_size;
  481 
  482         /*
  483          * NOTE! These B_ flags are actually balloc-only flags, not buffer
  484          * flags.  They are similar to the BA_ flags in -current.
  485          */
  486         if (seqcount > B_SEQMAX)
  487                 flags = B_SEQMAX << B_SEQSHIFT;
  488         else
  489                 flags = seqcount << B_SEQSHIFT;
  490         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  491                 flags |= B_SYNC;
  492 
  493         if (object && (object->flags & OBJ_OPT)) {
  494                 vm_freeze_copyopts(object,
  495                         OFF_TO_IDX(uio->uio_offset),
  496                         OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
  497         }
  498 
  499         for (error = 0; uio->uio_resid > 0;) {
  500                 lbn = lblkno(fs, uio->uio_offset);
  501                 blkoffset = blkoff(fs, uio->uio_offset);
  502                 xfersize = fs->fs_bsize - blkoffset;
  503                 if (uio->uio_resid < xfersize)
  504                         xfersize = uio->uio_resid;
  505 
  506                 if (uio->uio_offset + xfersize > ip->i_size)
  507                         vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  508 
  509                 /*      
  510                  * We must perform a read-before-write if the transfer
  511                  * size does not cover the entire buffer.
  512                  */
  513                 if (fs->fs_bsize > xfersize)
  514                         flags |= B_CLRBUF;
  515                 else
  516                         flags &= ~B_CLRBUF;
  517 /* XXX is uio->uio_offset the right thing here? */
  518                 error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
  519                     ap->a_cred, flags, &bp);
  520                 if (error != 0)
  521                         break;
  522                 /*
  523                  * If the buffer is not valid and we did not clear garbage
  524                  * out above, we have to do so here even though the write
  525                  * covers the entire buffer in order to avoid a mmap()/write
  526                  * race where another process may see the garbage prior to
  527                  * the uiomove() for a write replacing it.
  528                  */
  529                 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
  530                         vfs_bio_clrbuf(bp);
  531                 if (ioflag & IO_DIRECT)
  532                         bp->b_flags |= B_DIRECT;
  533                 if (ioflag & IO_NOWDRAIN)
  534                         bp->b_flags |= B_NOWDRAIN;
  535                 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
  536                         bp->b_flags |= B_NOCACHE;
  537 
  538                 if (uio->uio_offset + xfersize > ip->i_size) {
  539                         ip->i_size = uio->uio_offset + xfersize;
  540                         extended = 1;
  541                 }
  542 
  543                 size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
  544                 if (size < xfersize)
  545                         xfersize = size;
  546 
  547                 error =
  548                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
  549                 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  550                     (LIST_FIRST(&bp->b_dep) == NULL)) {
  551                         bp->b_flags |= B_RELBUF;
  552                 }
  553 
  554                 /*
  555                  * If IO_SYNC each buffer is written synchronously.  Otherwise
  556                  * if we have a severe page deficiency write the buffer 
  557                  * asynchronously.  Otherwise try to cluster, and if that
  558                  * doesn't do it then either do an async write (if O_DIRECT),
  559                  * or a delayed write (if not).
  560                  */
  561 
  562                 if (ioflag & IO_SYNC) {
  563                         (void)bwrite(bp);
  564                 } else if (vm_page_count_severe() || 
  565                             buf_dirty_count_severe() ||
  566                             (ioflag & IO_ASYNC)) {
  567                         bp->b_flags |= B_CLUSTEROK;
  568                         bawrite(bp);
  569                 } else if (xfersize + blkoffset == fs->fs_bsize) {
  570                         if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  571                                 bp->b_flags |= B_CLUSTEROK;
  572                                 cluster_write(bp, ip->i_size, seqcount);
  573                         } else {
  574                                 bawrite(bp);
  575                         }
  576                 } else if (ioflag & IO_DIRECT) {
  577                         bp->b_flags |= B_CLUSTEROK;
  578                         bawrite(bp);
  579                 } else {
  580                         bp->b_flags |= B_CLUSTEROK;
  581                         bdwrite(bp);
  582                 }
  583                 if (error || xfersize == 0)
  584                         break;
  585                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  586         }
  587         /*
  588          * If we successfully wrote any data, and we are not the superuser
  589          * we clear the setuid and setgid bits as a precaution against
  590          * tampering.
  591          */
  592         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
  593                 ip->i_mode &= ~(ISUID | ISGID);
  594         if (resid > uio->uio_resid)
  595                 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
  596         if (error) {
  597                 if (ioflag & IO_UNIT) {
  598                         (void)UFS_TRUNCATE(vp, osize,
  599                             ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
  600                         uio->uio_offset -= resid - uio->uio_resid;
  601                         uio->uio_resid = resid;
  602                 }
  603         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
  604                 error = UFS_UPDATE(vp, 1);
  605 
  606         if (object)
  607                 vm_object_vndeallocate(object);
  608 
  609         return (error);
  610 }
  611 
  612 
  613 /*
  614  * get page routine
  615  */
  616 int
  617 ffs_getpages(ap)
  618         struct vop_getpages_args *ap;
  619 {
  620         off_t foff, physoffset;
  621         int i, size, bsize;
  622         struct vnode *dp, *vp;
  623         vm_object_t obj;
  624         vm_pindex_t pindex, firstindex;
  625         vm_page_t mreq;
  626         int bbackwards, bforwards;
  627         int pbackwards, pforwards;
  628         int firstpage;
  629         int reqlblkno;
  630         daddr_t reqblkno;
  631         int poff;
  632         int pcount;
  633         int rtval;
  634         int pagesperblock;
  635 
  636 
  637         pcount = round_page(ap->a_count) / PAGE_SIZE;
  638         mreq = ap->a_m[ap->a_reqpage];
  639         firstindex = ap->a_m[0]->pindex;
  640 
  641         /*
  642          * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
  643          * then the entire page is valid.  Since the page may be mapped,
  644          * user programs might reference data beyond the actual end of file
  645          * occuring within the page.  We have to zero that data.
  646          */
  647         if (mreq->valid) {
  648                 if (mreq->valid != VM_PAGE_BITS_ALL)
  649                         vm_page_zero_invalid(mreq, TRUE);
  650                 for (i = 0; i < pcount; i++) {
  651                         if (i != ap->a_reqpage) {
  652                                 vm_page_free(ap->a_m[i]);
  653                         }
  654                 }
  655                 return VM_PAGER_OK;
  656         }
  657 
  658         vp = ap->a_vp;
  659         obj = vp->v_object;
  660         bsize = vp->v_mount->mnt_stat.f_iosize;
  661         pindex = mreq->pindex;
  662         foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
  663 
  664         if (bsize < PAGE_SIZE)
  665                 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
  666                                                     ap->a_count,
  667                                                     ap->a_reqpage);
  668 
  669         /*
  670          * foff is the file offset of the required page
  671          * reqlblkno is the logical block that contains the page
  672          * poff is the index of the page into the logical block
  673          */
  674         reqlblkno = foff / bsize;
  675         poff = (foff % bsize) / PAGE_SIZE;
  676 
  677         if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno,
  678                 &bforwards, &bbackwards) || (reqblkno == -1)) {
  679                 for(i = 0; i < pcount; i++) {
  680                         if (i != ap->a_reqpage)
  681                                 vm_page_free(ap->a_m[i]);
  682                 }
  683                 if (reqblkno == -1) {
  684                         if ((mreq->flags & PG_ZERO) == 0)
  685                                 vm_page_zero_fill(mreq);
  686                         vm_page_undirty(mreq);
  687                         mreq->valid = VM_PAGE_BITS_ALL;
  688                         return VM_PAGER_OK;
  689                 } else {
  690                         return VM_PAGER_ERROR;
  691                 }
  692         }
  693 
  694         physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
  695         pagesperblock = bsize / PAGE_SIZE;
  696         /*
  697          * find the first page that is contiguous...
  698          * note that pbackwards is the number of pages that are contiguous
  699          * backwards.
  700          */
  701         firstpage = 0;
  702         if (ap->a_count) {
  703                 pbackwards = poff + bbackwards * pagesperblock;
  704                 if (ap->a_reqpage > pbackwards) {
  705                         firstpage = ap->a_reqpage - pbackwards;
  706                         for(i=0;i<firstpage;i++)
  707                                 vm_page_free(ap->a_m[i]);
  708                 }
  709 
  710         /*
  711          * pforwards is the number of pages that are contiguous
  712          * after the current page.
  713          */
  714                 pforwards = (pagesperblock - (poff + 1)) +
  715                         bforwards * pagesperblock;
  716                 if (pforwards < (pcount - (ap->a_reqpage + 1))) {
  717                         for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
  718                                 vm_page_free(ap->a_m[i]);
  719                         pcount = ap->a_reqpage + pforwards + 1;
  720                 }
  721 
  722         /*
  723          * number of pages for I/O corrected for the non-contig pages at
  724          * the beginning of the array.
  725          */
  726                 pcount -= firstpage;
  727         }
  728 
  729         /*
  730          * calculate the size of the transfer
  731          */
  732 
  733         size = pcount * PAGE_SIZE;
  734 
  735         if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
  736                 obj->un_pager.vnp.vnp_size)
  737                 size = obj->un_pager.vnp.vnp_size -
  738                         IDX_TO_OFF(ap->a_m[firstpage]->pindex);
  739 
  740         physoffset -= foff;
  741         rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
  742                 (ap->a_reqpage - firstpage), physoffset);
  743 
  744         return (rtval);
  745 }
  746 
  747 /*
  748  * put page routine
  749  *
  750  * XXX By default, wimp out... note that a_offset is ignored (and always
  751  * XXX has been).
  752  */
  753 int
  754 ffs_putpages(ap)
  755         struct vop_putpages_args *ap;
  756 {
  757         return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
  758                 ap->a_sync, ap->a_rtvals);
  759 }

Cache object: 58a92304aa7bbb838fb16ceacf1f6aef


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.