The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_readwrite.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ufs_readwrite.c,v 1.55 2003/08/07 16:34:46 agc Exp $   */
    2 
    3 /*-
    4  * Copyright (c) 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)ufs_readwrite.c     8.11 (Berkeley) 5/8/95
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.55 2003/08/07 16:34:46 agc Exp $");
   36 
   37 #ifdef LFS_READWRITE
   38 #define BLKSIZE(a, b, c)        blksize(a, b, c)
   39 #define FS                      struct lfs
   40 #define I_FS                    i_lfs
   41 #define READ                    lfs_read
   42 #define READ_S                  "lfs_read"
   43 #define WRITE                   lfs_write
   44 #define WRITE_S                 "lfs_write"
   45 #define fs_bsize                lfs_bsize
   46 #define fs_maxfilesize          lfs_maxfilesize
   47 #else
   48 #define BLKSIZE(a, b, c)        blksize(a, b, c)
   49 #define FS                      struct fs
   50 #define I_FS                    i_fs
   51 #define READ                    ffs_read
   52 #define READ_S                  "ffs_read"
   53 #define WRITE                   ffs_write
   54 #define WRITE_S                 "ffs_write"
   55 #endif
   56 
   57 /*
   58  * Vnode op for reading.
   59  */
   60 /* ARGSUSED */
   61 int
   62 READ(void *v)
   63 {
   64         struct vop_read_args /* {
   65                 struct vnode *a_vp;
   66                 struct uio *a_uio;
   67                 int a_ioflag;
   68                 struct ucred *a_cred;
   69         } */ *ap = v;
   70         struct vnode *vp;
   71         struct inode *ip;
   72         struct uio *uio;
   73         FS *fs;
   74         void *win;
   75         vsize_t bytelen;
   76         struct buf *bp;
   77         daddr_t lbn, nextlbn;
   78         off_t bytesinfile;
   79         long size, xfersize, blkoffset;
   80         int error;
   81         boolean_t usepc = FALSE;
   82 
   83         vp = ap->a_vp;
   84         ip = VTOI(vp);
   85         uio = ap->a_uio;
   86         error = 0;
   87 
   88 #ifdef DIAGNOSTIC
   89         if (uio->uio_rw != UIO_READ)
   90                 panic("%s: mode", READ_S);
   91 
   92         if (vp->v_type == VLNK) {
   93                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
   94                     (vp->v_mount->mnt_maxsymlinklen == 0 &&
   95                      DIP(ip, blocks) == 0))
   96                         panic("%s: short symlink", READ_S);
   97         } else if (vp->v_type != VREG && vp->v_type != VDIR)
   98                 panic("%s: type %d", READ_S, vp->v_type);
   99 #endif
  100         fs = ip->I_FS;
  101         if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
  102                 return (EFBIG);
  103         if (uio->uio_resid == 0)
  104                 return (0);
  105         if (uio->uio_offset >= ip->i_size) {
  106                 goto out;
  107         }
  108 
  109 #ifdef LFS_READWRITE
  110         usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
  111 #else /* !LFS_READWRITE */
  112         usepc = vp->v_type == VREG;
  113 #endif /* !LFS_READWRITE */
  114         if (usepc) {
  115                 while (uio->uio_resid > 0) {
  116                         bytelen = MIN(ip->i_size - uio->uio_offset,
  117                             uio->uio_resid);
  118                         if (bytelen == 0)
  119                                 break;
  120 
  121                         win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
  122                                         &bytelen, UBC_READ);
  123                         error = uiomove(win, bytelen, uio);
  124                         ubc_release(win, 0);
  125                         if (error)
  126                                 break;
  127                 }
  128                 goto out;
  129         }
  130 
  131         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  132                 bytesinfile = ip->i_size - uio->uio_offset;
  133                 if (bytesinfile <= 0)
  134                         break;
  135                 lbn = lblkno(fs, uio->uio_offset);
  136                 nextlbn = lbn + 1;
  137                 size = BLKSIZE(fs, ip, lbn);
  138                 blkoffset = blkoff(fs, uio->uio_offset);
  139                 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
  140                     bytesinfile);
  141 
  142                 if (lblktosize(fs, nextlbn) >= ip->i_size)
  143                         error = bread(vp, lbn, size, NOCRED, &bp);
  144                 else {
  145                         int nextsize = BLKSIZE(fs, ip, nextlbn);
  146                         error = breadn(vp, lbn,
  147                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  148                 }
  149                 if (error)
  150                         break;
  151 
  152                 /*
  153                  * We should only get non-zero b_resid when an I/O error
  154                  * has occurred, which should cause us to break above.
  155                  * However, if the short read did not cause an error,
  156                  * then we want to ensure that we do not uiomove bad
  157                  * or uninitialized data.
  158                  */
  159                 size -= bp->b_resid;
  160                 if (size < xfersize) {
  161                         if (size == 0)
  162                                 break;
  163                         xfersize = size;
  164                 }
  165                 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
  166                 if (error)
  167                         break;
  168                 brelse(bp);
  169         }
  170         if (bp != NULL)
  171                 brelse(bp);
  172 
  173  out:
  174         if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
  175                 ip->i_flag |= IN_ACCESS;
  176                 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
  177                         error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
  178         }
  179         return (error);
  180 }
  181 
  182 /*
  183  * Vnode op for writing.
  184  */
  185 int
  186 WRITE(void *v)
  187 {
  188         struct vop_write_args /* {
  189                 struct vnode *a_vp;
  190                 struct uio *a_uio;
  191                 int a_ioflag;
  192                 struct ucred *a_cred;
  193         } */ *ap = v;
  194         struct vnode *vp;
  195         struct uio *uio;
  196         struct inode *ip;
  197         struct genfs_node *gp;
  198         FS *fs;
  199         struct buf *bp;
  200         struct proc *p;
  201         struct ucred *cred;
  202         daddr_t lbn;
  203         off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
  204         int blkoffset, error, flags, ioflag, resid, size, xfersize;
  205         int bsize, aflag;
  206         int ubc_alloc_flags;
  207         int extended=0;
  208         void *win;
  209         vsize_t bytelen;
  210         boolean_t async;
  211         boolean_t usepc = FALSE;
  212 #ifdef LFS_READWRITE
  213         boolean_t need_unreserve = FALSE;
  214 #endif
  215 
  216         cred = ap->a_cred;
  217         ioflag = ap->a_ioflag;
  218         uio = ap->a_uio;
  219         vp = ap->a_vp;
  220         ip = VTOI(vp);
  221         gp = VTOG(vp);
  222 
  223         KASSERT(vp->v_size == ip->i_size);
  224 #ifdef DIAGNOSTIC
  225         if (uio->uio_rw != UIO_WRITE)
  226                 panic("%s: mode", WRITE_S);
  227 #endif
  228 
  229         switch (vp->v_type) {
  230         case VREG:
  231                 if (ioflag & IO_APPEND)
  232                         uio->uio_offset = ip->i_size;
  233                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
  234                         return (EPERM);
  235                 /* FALLTHROUGH */
  236         case VLNK:
  237                 break;
  238         case VDIR:
  239                 if ((ioflag & IO_SYNC) == 0)
  240                         panic("%s: nonsync dir write", WRITE_S);
  241                 break;
  242         default:
  243                 panic("%s: type", WRITE_S);
  244         }
  245 
  246         fs = ip->I_FS;
  247         if (uio->uio_offset < 0 ||
  248             (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
  249                 return (EFBIG);
  250 #ifdef LFS_READWRITE
  251         /* Disallow writes to the Ifile, even if noschg flag is removed */
  252         /* XXX can this go away when the Ifile is no longer in the namespace? */
  253         if (vp == fs->lfs_ivnode)
  254                 return (EPERM);
  255 #endif
  256 
  257         /*
  258          * Maybe this should be above the vnode op call, but so long as
  259          * file servers have no limits, I don't think it matters.
  260          */
  261         p = uio->uio_procp;
  262         if (vp->v_type == VREG && p &&
  263             uio->uio_offset + uio->uio_resid >
  264             p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  265                 psignal(p, SIGXFSZ);
  266                 return (EFBIG);
  267         }
  268         if (uio->uio_resid == 0)
  269                 return (0);
  270 
  271         flags = ioflag & IO_SYNC ? B_SYNC : 0;
  272         async = vp->v_mount->mnt_flag & MNT_ASYNC;
  273         origoff = uio->uio_offset;
  274         resid = uio->uio_resid;
  275         osize = ip->i_size;
  276         bsize = fs->fs_bsize;
  277         error = 0;
  278 
  279         usepc = vp->v_type == VREG;
  280 #ifdef LFS_READWRITE
  281         async = TRUE;
  282 
  283         /* Account writes.  This overcounts if pages are already dirty. */
  284         if (usepc) {
  285                 simple_lock(&lfs_subsys_lock);
  286                 lfs_subsys_pages += round_page(uio->uio_resid) >> PAGE_SHIFT;
  287                 simple_unlock(&lfs_subsys_lock);
  288         }
  289         lfs_check(vp, LFS_UNUSED_LBN, 0);
  290 #endif /* !LFS_READWRITE */
  291         if (!usepc) {
  292                 goto bcache;
  293         }
  294 
  295         preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
  296         aflag = ioflag & IO_SYNC ? B_SYNC : 0;
  297         nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
  298         endallocoff = nsize - blkoff(fs, nsize);
  299 
  300         /*
  301          * if we're increasing the file size, deal with expanding
  302          * the fragment if there is one.
  303          */
  304 
  305         if (nsize > osize && lblkno(fs, osize) < NDADDR &&
  306             lblkno(fs, osize) != lblkno(fs, nsize) &&
  307             blkroundup(fs, osize) != osize) {
  308                 error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) -
  309                     osize, cred, aflag);
  310                 if (error) {
  311                         goto out;
  312                 }
  313                 if (flags & B_SYNC) {
  314                         vp->v_size = blkroundup(fs, osize);
  315                         simple_lock(&vp->v_interlock);
  316                         VOP_PUTPAGES(vp, trunc_page(osize & ~(bsize - 1)),
  317                             round_page(vp->v_size), PGO_CLEANIT | PGO_SYNCIO);
  318                 }
  319         }
  320 
  321         ubc_alloc_flags = UBC_WRITE;
  322         while (uio->uio_resid > 0) {
  323                 boolean_t extending; /* if we're extending a whole block */
  324                 off_t newoff;
  325 
  326                 oldoff = uio->uio_offset;
  327                 blkoffset = blkoff(fs, uio->uio_offset);
  328                 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
  329 
  330                 /*
  331                  * if we're filling in a hole, allocate the blocks now and
  332                  * initialize the pages first.  if we're extending the file,
  333                  * we can safely allocate blocks without initializing pages
  334                  * since the new blocks will be inaccessible until the write
  335                  * is complete.
  336                  */
  337                 extending = uio->uio_offset >= preallocoff &&
  338                     uio->uio_offset < endallocoff;
  339 
  340                 if (!extending) {
  341                         error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
  342                             cred, aflag);
  343                         if (error) {
  344                                 break;
  345                         }
  346                         ubc_alloc_flags &= ~UBC_FAULTBUSY;
  347                 } else {
  348                         lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
  349                         error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
  350                             aflag, cred);
  351                         lockmgr(&gp->g_glock, LK_RELEASE, NULL);
  352                         if (error) {
  353                                 break;
  354                         }
  355                         ubc_alloc_flags |= UBC_FAULTBUSY;
  356                 }
  357 
  358                 /*
  359                  * copy the data.
  360                  */
  361 
  362                 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
  363                     ubc_alloc_flags);
  364                 error = uiomove(win, bytelen, uio);
  365                 if (error && extending) {
  366                         /*
  367                          * if we haven't initialized the pages yet,
  368                          * do it now.  it's safe to use memset here
  369                          * because we just mapped the pages above.
  370                          */
  371                         memset(win, 0, bytelen);
  372                 }
  373                 ubc_release(win, 0);
  374 
  375                 /*
  376                  * update UVM's notion of the size now that we've
  377                  * copied the data into the vnode's pages.
  378                  *
  379                  * we should update the size even when uiomove failed.
  380                  * otherwise ffs_truncate can't flush soft update states.
  381                  */
  382 
  383                 newoff = oldoff + bytelen;
  384                 if (vp->v_size < newoff) {
  385                         uvm_vnp_setsize(vp, newoff);
  386                         extended = 1;
  387                 }
  388 
  389                 if (error) {
  390                         break;
  391                 }
  392 
  393                 /*
  394                  * flush what we just wrote if necessary.
  395                  * XXXUBC simplistic async flushing.
  396                  */
  397 
  398                 if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
  399                         simple_lock(&vp->v_interlock);
  400                         error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
  401                             (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
  402                         if (error) {
  403                                 break;
  404                         }
  405                 }
  406         }
  407         if (error == 0 && ioflag & IO_SYNC) {
  408                 simple_lock(&vp->v_interlock);
  409                 error = VOP_PUTPAGES(vp, trunc_page(origoff & ~(bsize - 1)),
  410                     round_page(blkroundup(fs, uio->uio_offset)),
  411                     PGO_CLEANIT | PGO_SYNCIO);
  412         }
  413         goto out;
  414 
  415  bcache:
  416         simple_lock(&vp->v_interlock);
  417         VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
  418             PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
  419         while (uio->uio_resid > 0) {
  420                 lbn = lblkno(fs, uio->uio_offset);
  421                 blkoffset = blkoff(fs, uio->uio_offset);
  422                 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
  423                 if (fs->fs_bsize > xfersize)
  424                         flags |= B_CLRBUF;
  425                 else
  426                         flags &= ~B_CLRBUF;
  427 
  428 #ifdef LFS_READWRITE
  429                 error = lfs_reserve(fs, vp, NULL,
  430                     btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
  431                 if (error)
  432                         break;
  433                 need_unreserve = TRUE;
  434 #endif
  435                 error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
  436                     ap->a_cred, flags, &bp);
  437 
  438                 if (error)
  439                         break;
  440                 if (uio->uio_offset + xfersize > ip->i_size) {
  441                         ip->i_size = uio->uio_offset + xfersize;
  442                         DIP_ASSIGN(ip, size, ip->i_size);
  443                         uvm_vnp_setsize(vp, ip->i_size);
  444                         extended = 1;
  445                 }
  446                 size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
  447                 if (xfersize > size)
  448                         xfersize = size;
  449 
  450                 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
  451 
  452                 /*
  453                  * if we didn't clear the block and the uiomove failed,
  454                  * the buf will now contain part of some other file,
  455                  * so we need to invalidate it.
  456                  */
  457                 if (error && (flags & B_CLRBUF) == 0) {
  458                         bp->b_flags |= B_INVAL;
  459                         brelse(bp);
  460                         break;
  461                 }
  462 #ifdef LFS_READWRITE
  463                 (void)VOP_BWRITE(bp);
  464                 lfs_reserve(fs, vp, NULL,
  465                     -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
  466                 need_unreserve = FALSE;
  467 #else
  468                 if (ioflag & IO_SYNC)
  469                         (void)bwrite(bp);
  470                 else if (xfersize + blkoffset == fs->fs_bsize)
  471                         bawrite(bp);
  472                 else
  473                         bdwrite(bp);
  474 #endif
  475                 if (error || xfersize == 0)
  476                         break;
  477         }
  478 #ifdef LFS_READWRITE
  479         if (need_unreserve) {
  480                 lfs_reserve(fs, vp, NULL,
  481                     -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
  482         }
  483 #endif
  484 
  485         /*
  486          * If we successfully wrote any data, and we are not the superuser
  487          * we clear the setuid and setgid bits as a precaution against
  488          * tampering.
  489          */
  490 out:
  491         ip->i_flag |= IN_CHANGE | IN_UPDATE;
  492         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) {
  493                 ip->i_mode &= ~(ISUID | ISGID);
  494                 DIP_ASSIGN(ip, mode, ip->i_mode);
  495         }
  496         if (resid > uio->uio_resid)
  497                 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
  498         if (error) {
  499                 (void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
  500                     uio->uio_procp);
  501                 uio->uio_offset -= resid - uio->uio_resid;
  502                 uio->uio_resid = resid;
  503         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
  504                 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
  505         KASSERT(vp->v_size == ip->i_size);
  506         return (error);
  507 }

Cache object: 4e4172af0b374f169ded52b64673b21e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.