The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/ext2fs/ext2_readwrite.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  *  modified for Lites 1.1
    3  *
    4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
    5  *  University of Utah, Department of Computer Science
    6  */
    7 /*-
    8  * Copyright (c) 1993
    9  *      The Regents of the University of California.  All rights reserved.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 4. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)ufs_readwrite.c     8.7 (Berkeley) 1/21/94
   36  * $FreeBSD: releng/9.0/sys/fs/ext2fs/ext2_readwrite.c 221261 2011-04-30 13:49:03Z kib $
   37  */
   38 
   39 /* XXX TODO: remove these obfuscations (as in ffs_vnops.c). */
   40 #define BLKSIZE(a, b, c)        blksize(a, b, c)
   41 #define FS                      struct m_ext2fs
   42 #define I_FS                    i_e2fs
   43 #define READ                    ext2_read
   44 #define READ_S                  "ext2_read"
   45 #define WRITE                   ext2_write
   46 #define WRITE_S                 "ext2_write"
   47 
   48 #include <vm/vm.h>
   49 #include <vm/vm_extern.h>
   50 #include <vm/vm_object.h>
   51 #include <vm/vm_page.h>
   52 #include <vm/vm_pager.h>
   53 #include <vm/vnode_pager.h>
   54 
   55 #include "opt_directio.h"
   56 
   57 /*
   58  * Vnode op for reading.
   59  */
   60 static int
   61 READ(ap)
   62         struct vop_read_args /* {
   63                 struct vnode *a_vp;
   64                 struct uio *a_uio;
   65                 int a_ioflag;
   66                 struct ucred *a_cred;
   67         } */ *ap;
   68 {
   69         struct vnode *vp;
   70         struct inode *ip;
   71         struct uio *uio;
   72         FS *fs;
   73         struct buf *bp;
   74         daddr_t lbn, nextlbn;
   75         off_t bytesinfile;
   76         long size, xfersize, blkoffset;
   77         int error, orig_resid, seqcount;
   78         int ioflag;
   79 
   80         vp = ap->a_vp;
   81         uio = ap->a_uio;
   82         ioflag = ap->a_ioflag;
   83 
   84         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
   85         ip = VTOI(vp);
   86 
   87 #ifdef INVARIANTS
   88         if (uio->uio_rw != UIO_READ)
   89                 panic("%s: mode", READ_S);
   90 
   91         if (vp->v_type == VLNK) {
   92                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
   93                         panic("%s: short symlink", READ_S);
   94         } else if (vp->v_type != VREG && vp->v_type != VDIR)
   95                 panic("%s: type %d", READ_S, vp->v_type);
   96 #endif
   97         orig_resid = uio->uio_resid;
   98         KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
   99         if (orig_resid == 0)
  100                 return (0);
  101         KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
  102         fs = ip->I_FS;
  103         if (uio->uio_offset < ip->i_size &&
  104             uio->uio_offset >= fs->e2fs_maxfilesize)
  105                 return (EOVERFLOW);
  106 
  107         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  108                 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  109                         break;
  110                 lbn = lblkno(fs, uio->uio_offset);
  111                 nextlbn = lbn + 1;
  112                 size = BLKSIZE(fs, ip, lbn);
  113                 blkoffset = blkoff(fs, uio->uio_offset);
  114 
  115                 xfersize = fs->e2fs_fsize - blkoffset;
  116                 if (uio->uio_resid < xfersize)
  117                         xfersize = uio->uio_resid;
  118                 if (bytesinfile < xfersize)
  119                         xfersize = bytesinfile;
  120 
  121                 if (lblktosize(fs, nextlbn) >= ip->i_size)
  122                         error = bread(vp, lbn, size, NOCRED, &bp);
  123                 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
  124                         error = cluster_read(vp, ip->i_size, lbn, size,
  125                             NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
  126                 else if (seqcount > 1) {
  127                         int nextsize = BLKSIZE(fs, ip, nextlbn);
  128                         error = breadn(vp, lbn,
  129                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  130                 } else
  131                         error = bread(vp, lbn, size, NOCRED, &bp);
  132                 if (error) {
  133                         brelse(bp);
  134                         bp = NULL;
  135                         break;
  136                 }
  137 
  138                 /*
  139                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  140                  * will cause us to attempt to release the buffer later on
  141                  * and will cause the buffer cache to attempt to free the
  142                  * underlying pages.
  143                  */
  144                 if (ioflag & IO_DIRECT)
  145                         bp->b_flags |= B_DIRECT;
  146 
  147                 /*
  148                  * We should only get non-zero b_resid when an I/O error
  149                  * has occurred, which should cause us to break above.
  150                  * However, if the short read did not cause an error,
  151                  * then we want to ensure that we do not uiomove bad
  152                  * or uninitialized data.
  153                  */
  154                 size -= bp->b_resid;
  155                 if (size < xfersize) {
  156                         if (size == 0)
  157                                 break;
  158                         xfersize = size;
  159                 }
  160                 error = uiomove((char *)bp->b_data + blkoffset,
  161                         (int)xfersize, uio);
  162                 if (error)
  163                         break;
  164 
  165                 if (ioflag & (IO_VMIO|IO_DIRECT)) {
  166                         /*
  167                          * If it's VMIO or direct I/O, then we don't
  168                          * need the buf, mark it available for
  169                          * freeing. If it's non-direct VMIO, the VM has
  170                          * the data.
  171                          */
  172                         bp->b_flags |= B_RELBUF;
  173                         brelse(bp);
  174                 } else {
  175                         /*
  176                          * Otherwise let whoever
  177                          * made the request take care of
  178                          * freeing it. We just queue
  179                          * it onto another list.
  180                          */
  181                         bqrelse(bp);
  182                 }
  183         }
  184 
  185         /* 
  186          * This can only happen in the case of an error
  187          * because the loop above resets bp to NULL on each iteration
  188          * and on normal completion has not set a new value into it.
  189          * so it must have come from a 'break' statement
  190          */
  191         if (bp != NULL) {
  192                 if (ioflag & (IO_VMIO|IO_DIRECT)) {
  193                         bp->b_flags |= B_RELBUF;
  194                         brelse(bp);
  195                 } else {
  196                         bqrelse(bp);
  197                 }
  198         }
  199 
  200         if ((error == 0 || uio->uio_resid != orig_resid) &&
  201             (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  202                 ip->i_flag |= IN_ACCESS;
  203         return (error);
  204 }
  205 
  206 /*
  207  * Vnode op for writing.
  208  */
  209 static int
  210 WRITE(ap)
  211         struct vop_write_args /* {
  212                 struct vnode *a_vp;
  213                 struct uio *a_uio;
  214                 int a_ioflag;
  215                 struct ucred *a_cred;
  216         } */ *ap;
  217 {
  218         struct vnode *vp;
  219         struct uio *uio;
  220         struct inode *ip;
  221         FS *fs;
  222         struct buf *bp;
  223         daddr_t lbn;
  224         off_t osize;
  225         int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
  226 
  227         ioflag = ap->a_ioflag;
  228         uio = ap->a_uio;
  229         vp = ap->a_vp;
  230 
  231         seqcount = ioflag >> IO_SEQSHIFT;
  232         ip = VTOI(vp);
  233 
  234 #ifdef INVARIANTS
  235         if (uio->uio_rw != UIO_WRITE)
  236                 panic("%s: mode", WRITE_S);
  237 #endif
  238 
  239         switch (vp->v_type) {
  240         case VREG:
  241                 if (ioflag & IO_APPEND)
  242                         uio->uio_offset = ip->i_size;
  243                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
  244                         return (EPERM);
  245                 /* FALLTHROUGH */
  246         case VLNK:
  247                 break;
  248         case VDIR:
  249                 /* XXX differs from ffs -- this is called from ext2_mkdir(). */
  250                 if ((ioflag & IO_SYNC) == 0)
  251                 panic("ext2_write: nonsync dir write");
  252                 break;
  253         default:
  254                 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
  255                     vp->v_type, (intmax_t)uio->uio_offset,
  256                     (intmax_t)uio->uio_resid);
  257         }
  258 
  259         KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
  260         KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
  261         fs = ip->I_FS;
  262         if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
  263                 return (EFBIG);
  264         /*
  265          * Maybe this should be above the vnode op call, but so long as
  266          * file servers have no limits, I don't think it matters.
  267          */
  268         if (vn_rlimit_fsize(vp, uio, uio->uio_td))
  269                 return (EFBIG);
  270 
  271         resid = uio->uio_resid;
  272         osize = ip->i_size;
  273         if (seqcount > BA_SEQMAX)
  274                 flags = BA_SEQMAX << BA_SEQSHIFT;
  275         else
  276                 flags = seqcount << BA_SEQSHIFT;
  277         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  278                 flags |= IO_SYNC;
  279 
  280         for (error = 0; uio->uio_resid > 0;) {
  281                 lbn = lblkno(fs, uio->uio_offset);
  282                 blkoffset = blkoff(fs, uio->uio_offset);
  283                 xfersize = fs->e2fs_fsize - blkoffset;
  284                 if (uio->uio_resid < xfersize)
  285                         xfersize = uio->uio_resid;
  286                 if (uio->uio_offset + xfersize > ip->i_size)
  287                         vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  288 
  289                 /*
  290                  * We must perform a read-before-write if the transfer size
  291                  * does not cover the entire buffer.
  292                  */
  293                 if (fs->e2fs_bsize > xfersize)
  294                         flags |= BA_CLRBUF;
  295                 else
  296                         flags &= ~BA_CLRBUF;
  297                 error = ext2_balloc(ip, lbn, blkoffset + xfersize,
  298                     ap->a_cred, &bp, flags);
  299                 if (error != 0)
  300                         break;
  301 
  302                 /*
  303                  * If the buffer is not valid and we did not clear garbage
  304                  * out above, we have to do so here even though the write
  305                  * covers the entire buffer in order to avoid a mmap()/write
  306                  * race where another process may see the garbage prior to
  307                  * the uiomove() for a write replacing it.
  308                  */
  309                 if ((bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize <= xfersize)
  310                         vfs_bio_clrbuf(bp);
  311                 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
  312                         bp->b_flags |= B_NOCACHE;
  313                 if (uio->uio_offset + xfersize > ip->i_size)
  314                         ip->i_size = uio->uio_offset + xfersize;
  315                 size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
  316                 if (size < xfersize)
  317                         xfersize = size;
  318 
  319                 error =
  320                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
  321                 if (ioflag & (IO_VMIO|IO_DIRECT)) {
  322                         bp->b_flags |= B_RELBUF;
  323                 }
  324 
  325                 /*
  326                  * If IO_SYNC each buffer is written synchronously.  Otherwise
  327                  * if we have a severe page deficiency write the buffer
  328                  * asynchronously.  Otherwise try to cluster, and if that
  329                  * doesn't do it then either do an async write (if O_DIRECT),
  330                  * or a delayed write (if not).
  331                  */
  332                 if (ioflag & IO_SYNC) {
  333                         (void)bwrite(bp);
  334                 } else if (vm_page_count_severe() ||
  335                     buf_dirty_count_severe() ||
  336                     (ioflag & IO_ASYNC)) {
  337                         bp->b_flags |= B_CLUSTEROK;
  338                         bawrite(bp);
  339                 } else if (xfersize + blkoffset == fs->e2fs_fsize) {
  340                         if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  341                                 bp->b_flags |= B_CLUSTEROK;
  342                                 cluster_write(vp, bp, ip->i_size, seqcount);
  343                         } else {
  344                                 bawrite(bp);
  345                         }
  346                 } else if (ioflag & IO_DIRECT) {
  347                         bp->b_flags |= B_CLUSTEROK;
  348                         bawrite(bp);
  349                 } else {
  350                         bp->b_flags |= B_CLUSTEROK;
  351                         bdwrite(bp);
  352                 }
  353                 if (error || xfersize == 0)
  354                         break;
  355         }
  356         /*
  357          * If we successfully wrote any data, and we are not the superuser
  358          * we clear the setuid and setgid bits as a precaution against
  359          * tampering.
  360          */
  361         if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
  362             ap->a_cred) {
  363                 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
  364                         ip->i_mode &= ~(ISUID | ISGID);
  365         }
  366         if (error) {
  367                 if (ioflag & IO_UNIT) {
  368                         (void)ext2_truncate(vp, osize,
  369                             ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
  370                         uio->uio_offset -= resid - uio->uio_resid;
  371                         uio->uio_resid = resid;
  372                 }
  373         }
  374         if (uio->uio_resid != resid) {
  375                ip->i_flag |= IN_CHANGE | IN_UPDATE;
  376                if (ioflag & IO_SYNC)
  377                        error = ext2_update(vp, 1);
  378        }
  379         return (error);
  380 }

Cache object: a672dcc18308393af804fb5dda2bf877


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.