The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/ext2fs/ext2_alloc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  *  modified for Lites 1.1
    3  *
    4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
    5  *  University of Utah, Department of Computer Science
    6  */
    7 /*-
    8  * Copyright (c) 1982, 1986, 1989, 1993
    9  *      The Regents of the University of California.  All rights reserved.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 4. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94
   36  * $FreeBSD: releng/11.1/sys/fs/ext2fs/ext2_alloc.c 311231 2017-01-04 02:42:17Z pfg $
   37  */
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/conf.h>
   42 #include <sys/vnode.h>
   43 #include <sys/stat.h>
   44 #include <sys/mount.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/syslog.h>
   47 #include <sys/buf.h>
   48 
   49 #include <fs/ext2fs/fs.h>
   50 #include <fs/ext2fs/inode.h>
   51 #include <fs/ext2fs/ext2_mount.h>
   52 #include <fs/ext2fs/ext2fs.h>
   53 #include <fs/ext2fs/ext2_extern.h>
   54 
   55 static daddr_t  ext2_alloccg(struct inode *, int, daddr_t, int);
   56 static daddr_t  ext2_clusteralloc(struct inode *, int, daddr_t, int);
   57 static u_long   ext2_dirpref(struct inode *);
   58 static void     ext2_fserr(struct m_ext2fs *, uid_t, char *);
   59 static u_long   ext2_hashalloc(struct inode *, int, long, int,
   60                                 daddr_t (*)(struct inode *, int, daddr_t, 
   61                                                 int));
   62 static daddr_t  ext2_nodealloccg(struct inode *, int, daddr_t, int);
   63 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
   64 
   65 /*
   66  * Allocate a block in the filesystem.
   67  *
   68  * A preference may be optionally specified. If a preference is given
   69  * the following hierarchy is used to allocate a block:
   70  *   1) allocate the requested block.
   71  *   2) allocate a rotationally optimal block in the same cylinder.
   72  *   3) allocate a block in the same cylinder group.
   73  *   4) quadradically rehash into other cylinder groups, until an
   74  *        available block is located.
   75  * If no block preference is given the following hierarchy is used
   76  * to allocate a block:
   77  *   1) allocate a block in the cylinder group that contains the
   78  *        inode for the file.
   79  *   2) quadradically rehash into other cylinder groups, until an
   80  *        available block is located.
   81  */
   82 int
   83 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
   84     struct ucred *cred, e4fs_daddr_t *bnp)
   85 {
   86         struct m_ext2fs *fs;
   87         struct ext2mount *ump;
   88         int32_t bno;
   89         int cg;
   90 
   91         *bnp = 0;
   92         fs = ip->i_e2fs;
   93         ump = ip->i_ump;
   94         mtx_assert(EXT2_MTX(ump), MA_OWNED);
   95 #ifdef INVARIANTS
   96         if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
   97                 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
   98                     (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
   99                 panic("ext2_alloc: bad size");
  100         }
  101         if (cred == NOCRED)
  102                 panic("ext2_alloc: missing credential");
  103 #endif          /* INVARIANTS */
  104         if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
  105                 goto nospace;
  106         if (cred->cr_uid != 0 &&
  107             fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
  108                 goto nospace;
  109         if (bpref >= fs->e2fs->e2fs_bcount)
  110                 bpref = 0;
  111         if (bpref == 0)
  112                 cg = ino_to_cg(fs, ip->i_number);
  113         else
  114                 cg = dtog(fs, bpref);
  115         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
  116             ext2_alloccg);
  117         if (bno > 0) {
  118                 /* set next_alloc fields as done in block_getblk */
  119                 ip->i_next_alloc_block = lbn;
  120                 ip->i_next_alloc_goal = bno;
  121 
  122                 ip->i_blocks += btodb(fs->e2fs_bsize);
  123                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  124                 *bnp = bno;
  125                 return (0);
  126         }
  127 nospace:
  128         EXT2_UNLOCK(ump);
  129         ext2_fserr(fs, cred->cr_uid, "filesystem full");
  130         uprintf("\n%s: write failed, filesystem is full\n", fs->e2fs_fsmnt);
  131         return (ENOSPC);
  132 }
  133 
  134 /*
  135  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  136  *
  137  * The vnode and an array of buffer pointers for a range of sequential
  138  * logical blocks to be made contiguous is given. The allocator attempts
  139  * to find a range of sequential blocks starting as close as possible to
  140  * an fs_rotdelay offset from the end of the allocation for the logical
  141  * block immediately preceding the current range. If successful, the
  142  * physical block numbers in the buffer pointers and in the inode are
  143  * changed to reflect the new allocation. If unsuccessful, the allocation
  144  * is left unchanged. The success in doing the reallocation is returned.
  145  * Note that the error return is not reflected back to the user. Rather
  146  * the previous block allocation will be used.
  147  */
  148 
  149 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
  150 
  151 static int doasyncfree = 1;
  152 
  153 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
  154     "Use asychronous writes to update block pointers when freeing blocks");
  155 
  156 static int doreallocblks = 1;
  157 
  158 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
  159 
  160 int
  161 ext2_reallocblks(struct vop_reallocblks_args *ap)
  162 {
  163         struct m_ext2fs *fs;
  164         struct inode *ip;
  165         struct vnode *vp;
  166         struct buf *sbp, *ebp;
  167         uint32_t *bap, *sbap, *ebap;
  168         struct ext2mount *ump;
  169         struct cluster_save *buflist;
  170         struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
  171         e2fs_lbn_t start_lbn, end_lbn;
  172         int soff;
  173         e2fs_daddr_t newblk, blkno;
  174         int i, len, start_lvl, end_lvl, pref, ssize;
  175 
  176         if (doreallocblks == 0)
  177                 return (ENOSPC);
  178 
  179         vp = ap->a_vp;
  180         ip = VTOI(vp);
  181         fs = ip->i_e2fs;
  182         ump = ip->i_ump;
  183 
  184         if (fs->e2fs_contigsumsize <= 0)
  185                 return (ENOSPC);
  186 
  187         buflist = ap->a_buflist;
  188         len = buflist->bs_nchildren;
  189         start_lbn = buflist->bs_children[0]->b_lblkno;
  190         end_lbn = start_lbn + len - 1;
  191 #ifdef INVARIANTS
  192         for (i = 1; i < len; i++)
  193                 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
  194                         panic("ext2_reallocblks: non-cluster");
  195 #endif
  196         /*
  197          * If the cluster crosses the boundary for the first indirect
  198          * block, leave space for the indirect block. Indirect blocks
  199          * are initially laid out in a position after the last direct
  200          * block. Block reallocation would usually destroy locality by
  201          * moving the indirect block out of the way to make room for
  202          * data blocks if we didn't compensate here. We should also do
  203          * this for other indirect block boundaries, but it is only
  204          * important for the first one.
  205          */
  206         if (start_lbn < NDADDR && end_lbn >= NDADDR)
  207                 return (ENOSPC);
  208         /*
  209          * If the latest allocation is in a new cylinder group, assume that
  210          * the filesystem has decided to move and do not force it back to
  211          * the previous cylinder group.
  212          */
  213         if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
  214             dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
  215                 return (ENOSPC);
  216         if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
  217             ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
  218                 return (ENOSPC);
  219         /*
  220          * Get the starting offset and block map for the first block.
  221          */
  222         if (start_lvl == 0) {
  223                 sbap = &ip->i_db[0];
  224                 soff = start_lbn;
  225         } else {
  226                 idp = &start_ap[start_lvl - 1];
  227                 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
  228                         brelse(sbp);
  229                         return (ENOSPC);
  230                 }
  231                 sbap = (u_int *)sbp->b_data;
  232                 soff = idp->in_off;
  233         }
  234         /*
  235          * If the block range spans two block maps, get the second map.
  236          */
  237         ebap = NULL;
  238         if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
  239                 ssize = len;
  240         } else {
  241 #ifdef INVARIANTS
  242                 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
  243                         panic("ext2_reallocblks: start == end");
  244 #endif
  245                 ssize = len - (idp->in_off + 1);
  246                 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
  247                         goto fail;
  248                 ebap = (u_int *)ebp->b_data;
  249         }
  250         /*
  251          * Find the preferred location for the cluster.
  252          */
  253         EXT2_LOCK(ump);
  254         pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
  255         /*
  256          * Search the block map looking for an allocation of the desired size.
  257          */
  258         if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
  259             len, ext2_clusteralloc)) == 0) {
  260                 EXT2_UNLOCK(ump);
  261                 goto fail;
  262         }
  263         /*
  264          * We have found a new contiguous block.
  265          *
  266          * First we have to replace the old block pointers with the new
  267          * block pointers in the inode and indirect blocks associated
  268          * with the file.
  269          */
  270 #ifdef DEBUG
  271         printf("realloc: ino %ju, lbns %jd-%jd\n\told:",
  272             (uintmax_t)ip->i_number, (intmax_t)start_lbn, (intmax_t)end_lbn);
  273 #endif  /* DEBUG */
  274         blkno = newblk;
  275         for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  276                 if (i == ssize) {
  277                         bap = ebap;
  278                         soff = -i;
  279                 }
  280 #ifdef INVARIANTS
  281                 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
  282                         panic("ext2_reallocblks: alloc mismatch");
  283 #endif
  284 #ifdef DEBUG
  285                 printf(" %d,", *bap);
  286 #endif  /* DEBUG */
  287                 *bap++ = blkno;
  288         }
  289         /*
  290          * Next we must write out the modified inode and indirect blocks.
  291          * For strict correctness, the writes should be synchronous since
  292          * the old block values may have been written to disk. In practise
  293          * they are almost never written, but if we are concerned about
  294          * strict correctness, the `doasyncfree' flag should be set to zero.
  295          *
  296          * The test on `doasyncfree' should be changed to test a flag
  297          * that shows whether the associated buffers and inodes have
  298          * been written. The flag should be set when the cluster is
  299          * started and cleared whenever the buffer or inode is flushed.
  300          * We can then check below to see if it is set, and do the
  301          * synchronous write only when it has been cleared.
  302          */
  303         if (sbap != &ip->i_db[0]) {
  304                 if (doasyncfree)
  305                         bdwrite(sbp);
  306                 else
  307                         bwrite(sbp);
  308         } else {
  309                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  310                 if (!doasyncfree)
  311                         ext2_update(vp, 1);
  312         }
  313         if (ssize < len) {
  314                 if (doasyncfree)
  315                         bdwrite(ebp);
  316                 else
  317                         bwrite(ebp);
  318         }
  319         /*
  320          * Last, free the old blocks and assign the new blocks to the buffers.
  321          */
  322 #ifdef DEBUG
  323         printf("\n\tnew:");
  324 #endif  /* DEBUG */
  325         for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  326                 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
  327                     fs->e2fs_bsize);
  328                 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
  329 #ifdef DEBUG
  330                 printf(" %d,", blkno);
  331 #endif  /* DEBUG */
  332         }
  333 #ifdef DEBUG
  334         printf("\n");
  335 #endif  /* DEBUG */
  336         return (0);
  337 
  338 fail:
  339         if (ssize < len)
  340                 brelse(ebp);
  341         if (sbap != &ip->i_db[0])
  342                 brelse(sbp);
  343         return (ENOSPC);
  344 }
  345 
  346 /*
  347  * Allocate an inode in the filesystem.
  348  *
  349  */
  350 int
  351 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
  352 {
  353         struct timespec ts;
  354         struct inode *pip;
  355         struct m_ext2fs *fs;
  356         struct inode *ip;
  357         struct ext2mount *ump;
  358         ino_t ino, ipref;
  359         int i, error, cg;
  360 
  361         *vpp = NULL;
  362         pip = VTOI(pvp);
  363         fs = pip->i_e2fs;
  364         ump = pip->i_ump;
  365 
  366         EXT2_LOCK(ump);
  367         if (fs->e2fs->e2fs_ficount == 0)
  368                 goto noinodes;
  369         /*
  370          * If it is a directory then obtain a cylinder group based on
  371          * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
  372          * always the next inode.
  373          */
  374         if ((mode & IFMT) == IFDIR) {
  375                 cg = ext2_dirpref(pip);
  376                 if (fs->e2fs_contigdirs[cg] < 255)
  377                         fs->e2fs_contigdirs[cg]++;
  378         } else {
  379                 cg = ino_to_cg(fs, pip->i_number);
  380                 if (fs->e2fs_contigdirs[cg] > 0)
  381                         fs->e2fs_contigdirs[cg]--;
  382         }
  383         ipref = cg * fs->e2fs->e2fs_ipg + 1;
  384         ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
  385 
  386         if (ino == 0)
  387                 goto noinodes;
  388         error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
  389         if (error) {
  390                 ext2_vfree(pvp, ino, mode);
  391                 return (error);
  392         }
  393         ip = VTOI(*vpp);
  394 
  395         /*
  396          * The question is whether using VGET was such good idea at all:
  397          * Linux doesn't read the old inode in when it is allocating a
  398          * new one. I will set at least i_size and i_blocks to zero.
  399          */
  400         ip->i_flag = 0;
  401         ip->i_size = 0;
  402         ip->i_blocks = 0;
  403         ip->i_mode = 0;
  404         ip->i_flags = 0;
  405         /* now we want to make sure that the block pointers are zeroed out */
  406         for (i = 0; i < NDADDR; i++)
  407                 ip->i_db[i] = 0;
  408         for (i = 0; i < NIADDR; i++)
  409                 ip->i_ib[i] = 0;
  410 
  411         /*
  412          * Set up a new generation number for this inode.
  413          * Avoid zero values.
  414          */
  415         do {
  416                 ip->i_gen = arc4random();
  417         } while (ip->i_gen == 0);
  418 
  419         vfs_timestamp(&ts);
  420         ip->i_birthtime = ts.tv_sec;
  421         ip->i_birthnsec = ts.tv_nsec;
  422 
  423 /*
  424 printf("ext2_valloc: allocated inode %d\n", ino);
  425 */
  426         return (0);
  427 noinodes:
  428         EXT2_UNLOCK(ump);
  429         ext2_fserr(fs, cred->cr_uid, "out of inodes");
  430         uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
  431         return (ENOSPC);
  432 }
  433 
  434 /*
  435  * Find a cylinder to place a directory.
  436  *
  437  * The policy implemented by this algorithm is to allocate a
  438  * directory inode in the same cylinder group as its parent
  439  * directory, but also to reserve space for its files inodes
  440  * and data. Restrict the number of directories which may be
  441  * allocated one after another in the same cylinder group
  442  * without intervening allocation of files.
  443  *
  444  * If we allocate a first level directory then force allocation
  445  * in another cylinder group.
  446  *
  447  */
  448 static u_long
  449 ext2_dirpref(struct inode *pip)
  450 {
  451         struct m_ext2fs *fs;
  452         int cg, prefcg, cgsize;
  453         u_int avgifree, avgbfree, avgndir, curdirsize;
  454         u_int minifree, minbfree, maxndir;
  455         u_int mincg, minndir;
  456         u_int dirsize, maxcontigdirs;
  457 
  458         mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
  459         fs = pip->i_e2fs;
  460 
  461         avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
  462         avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
  463         avgndir = fs->e2fs_total_dir / fs->e2fs_gcount;
  464 
  465         /*
  466          * Force allocation in another cg if creating a first level dir.
  467          */
  468         ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
  469         if (ITOV(pip)->v_vflag & VV_ROOT) {
  470                 prefcg = arc4random() % fs->e2fs_gcount;
  471                 mincg = prefcg;
  472                 minndir = fs->e2fs_ipg;
  473                 for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  474                         if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
  475                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
  476                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
  477                                 mincg = cg;
  478                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
  479                         }
  480                 for (cg = 0; cg < prefcg; cg++)
  481                         if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
  482                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
  483                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
  484                                 mincg = cg;
  485                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
  486                         }
  487                 return (mincg);
  488         }
  489         /*
  490          * Count various limits which used for
  491          * optimal allocation of a directory inode.
  492          */
  493         maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
  494         minifree = avgifree - avgifree / 4;
  495         if (minifree < 1)
  496                 minifree = 1;
  497         minbfree = avgbfree - avgbfree / 4;
  498         if (minbfree < 1)
  499                 minbfree = 1;
  500         cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
  501         dirsize = AVGDIRSIZE;
  502         curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
  503         if (dirsize < curdirsize)
  504                 dirsize = curdirsize;
  505         maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
  506         maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
  507         if (maxcontigdirs == 0)
  508                 maxcontigdirs = 1;
  509 
  510         /*
  511          * Limit number of dirs in one cg and reserve space for
  512          * regular files, but only if we have no deficit in
  513          * inodes or space.
  514          */
  515         prefcg = ino_to_cg(fs, pip->i_number);
  516         for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  517                 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
  518                     fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
  519                     fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
  520                         if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
  521                                 return (cg);
  522                 }
  523         for (cg = 0; cg < prefcg; cg++)
  524                 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
  525                     fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
  526                     fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
  527                         if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
  528                                 return (cg);
  529                 }
  530         /*
  531          * This is a backstop when we have deficit in space.
  532          */
  533         for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  534                 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
  535                         return (cg);
  536         for (cg = 0; cg < prefcg; cg++)
  537                 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
  538                         break;
  539         return (cg);
  540 }
  541 
  542 /*
  543  * Select the desired position for the next block in a file.
  544  *
  545  * we try to mimic what Remy does in inode_getblk/block_getblk
  546  *
  547  * we note: blocknr == 0 means that we're about to allocate either
  548  * a direct block or a pointer block at the first level of indirection
  549  * (In other words, stuff that will go in i_db[] or i_ib[])
  550  *
  551  * blocknr != 0 means that we're allocating a block that is none
  552  * of the above. Then, blocknr tells us the number of the block
  553  * that will hold the pointer
  554  */
  555 e4fs_daddr_t
  556 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
  557     e2fs_daddr_t blocknr)
  558 {
  559         int tmp;
  560 
  561         mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
  562 
  563         /*
  564          * If the next block is actually what we thought it is, then set the
  565          * goal to what we thought it should be.
  566          */
  567         if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
  568                 return ip->i_next_alloc_goal;
  569 
  570         /*
  571          * Now check whether we were provided with an array that basically
  572          * tells us previous blocks to which we want to stay close.
  573          */
  574         if (bap)
  575                 for (tmp = indx - 1; tmp >= 0; tmp--)
  576                         if (bap[tmp])
  577                                 return bap[tmp];
  578 
  579         /*
  580          * Else lets fall back to the blocknr or, if there is none, follow
  581          * the rule that a block should be allocated near its inode.
  582          */
  583         return blocknr ? blocknr :
  584             (e2fs_daddr_t)(ip->i_block_group *
  585             EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
  586             ip->i_e2fs->e2fs->e2fs_first_dblock;
  587 }
  588 
  589 /*
  590  * Implement the cylinder overflow algorithm.
  591  *
  592  * The policy implemented by this algorithm is:
  593  *   1) allocate the block in its requested cylinder group.
  594  *   2) quadradically rehash on the cylinder group number.
  595  *   3) brute force search for a free block.
  596  */
  597 static u_long
  598 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
  599     daddr_t (*allocator) (struct inode *, int, daddr_t, int))
  600 {
  601         struct m_ext2fs *fs;
  602         ino_t result;
  603         int i, icg = cg;
  604 
  605         mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
  606         fs = ip->i_e2fs;
  607         /*
  608          * 1: preferred cylinder group
  609          */
  610         result = (*allocator)(ip, cg, pref, size);
  611         if (result)
  612                 return (result);
  613         /*
  614          * 2: quadratic rehash
  615          */
  616         for (i = 1; i < fs->e2fs_gcount; i *= 2) {
  617                 cg += i;
  618                 if (cg >= fs->e2fs_gcount)
  619                         cg -= fs->e2fs_gcount;
  620                 result = (*allocator)(ip, cg, 0, size);
  621                 if (result)
  622                         return (result);
  623         }
  624         /*
  625          * 3: brute force search
  626          * Note that we start at i == 2, since 0 was checked initially,
  627          * and 1 is always checked in the quadratic rehash.
  628          */
  629         cg = (icg + 2) % fs->e2fs_gcount;
  630         for (i = 2; i < fs->e2fs_gcount; i++) {
  631                 result = (*allocator)(ip, cg, 0, size);
  632                 if (result)
  633                         return (result);
  634                 cg++;
  635                 if (cg == fs->e2fs_gcount)
  636                         cg = 0;
  637         }
  638         return (0);
  639 }
  640 
  641 /*
  642  * Determine whether a block can be allocated.
  643  *
  644  * Check to see if a block of the appropriate size is available,
  645  * and if it is, allocate it.
  646  */
  647 static daddr_t
  648 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
  649 {
  650         struct m_ext2fs *fs;
  651         struct buf *bp;
  652         struct ext2mount *ump;
  653         daddr_t bno, runstart, runlen;
  654         int bit, loc, end, error, start;
  655         char *bbp;
  656         /* XXX ondisk32 */
  657         fs = ip->i_e2fs;
  658         ump = ip->i_ump;
  659         if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
  660                 return (0);
  661         EXT2_UNLOCK(ump);
  662         error = bread(ip->i_devvp, fsbtodb(fs,
  663             fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  664             (int)fs->e2fs_bsize, NOCRED, &bp);
  665         if (error) {
  666                 brelse(bp);
  667                 EXT2_LOCK(ump);
  668                 return (0);
  669         }
  670         if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
  671                 /*
  672                  * Another thread allocated the last block in this
  673                  * group while we were waiting for the buffer.
  674                  */
  675                 brelse(bp);
  676                 EXT2_LOCK(ump);
  677                 return (0);
  678         }
  679         bbp = (char *)bp->b_data;
  680 
  681         if (dtog(fs, bpref) != cg)
  682                 bpref = 0;
  683         if (bpref != 0) {
  684                 bpref = dtogd(fs, bpref);
  685                 /*
  686                  * if the requested block is available, use it
  687                  */
  688                 if (isclr(bbp, bpref)) {
  689                         bno = bpref;
  690                         goto gotit;
  691                 }
  692         }
  693         /*
  694          * no blocks in the requested cylinder, so take next
  695          * available one in this cylinder group.
  696          * first try to get 8 contigous blocks, then fall back to a single
  697          * block.
  698          */
  699         if (bpref)
  700                 start = dtogd(fs, bpref) / NBBY;
  701         else
  702                 start = 0;
  703         end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
  704 retry:
  705         runlen = 0;
  706         runstart = 0;
  707         for (loc = start; loc < end; loc++) {
  708                 if (bbp[loc] == (char)0xff) {
  709                         runlen = 0;
  710                         continue;
  711                 }
  712 
  713                 /* Start of a run, find the number of high clear bits. */
  714                 if (runlen == 0) {
  715                         bit = fls(bbp[loc]);
  716                         runlen = NBBY - bit;
  717                         runstart = loc * NBBY + bit;
  718                 } else if (bbp[loc] == 0) {
  719                         /* Continue a run. */
  720                         runlen += NBBY;
  721                 } else {
  722                         /*
  723                          * Finish the current run.  If it isn't long
  724                          * enough, start a new one.
  725                          */
  726                         bit = ffs(bbp[loc]) - 1;
  727                         runlen += bit;
  728                         if (runlen >= 8) {
  729                                 bno = runstart;
  730                                 goto gotit;
  731                         }
  732 
  733                         /* Run was too short, start a new one. */
  734                         bit = fls(bbp[loc]);
  735                         runlen = NBBY - bit;
  736                         runstart = loc * NBBY + bit;
  737                 }
  738 
  739                 /* If the current run is long enough, use it. */
  740                 if (runlen >= 8) {
  741                         bno = runstart;
  742                         goto gotit;
  743                 }
  744         }
  745         if (start != 0) {
  746                 end = start;
  747                 start = 0;
  748                 goto retry;
  749         }
  750         bno = ext2_mapsearch(fs, bbp, bpref);
  751         if (bno < 0) {
  752                 brelse(bp);
  753                 EXT2_LOCK(ump);
  754                 return (0);
  755         }
  756 gotit:
  757 #ifdef INVARIANTS
  758         if (isset(bbp, bno)) {
  759                 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
  760                     cg, (intmax_t)bno, fs->e2fs_fsmnt);
  761                 panic("ext2fs_alloccg: dup alloc");
  762         }
  763 #endif
  764         setbit(bbp, bno);
  765         EXT2_LOCK(ump);
  766         ext2_clusteracct(fs, bbp, cg, bno, -1);
  767         fs->e2fs->e2fs_fbcount--;
  768         fs->e2fs_gd[cg].ext2bgd_nbfree--;
  769         fs->e2fs_fmod = 1;
  770         EXT2_UNLOCK(ump);
  771         bdwrite(bp);
  772         return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
  773 }
  774 
  775 /*
  776  * Determine whether a cluster can be allocated.
  777  */
  778 static daddr_t
  779 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
  780 {
  781         struct m_ext2fs *fs;
  782         struct ext2mount *ump;
  783         struct buf *bp;
  784         char *bbp;
  785         int bit, error, got, i, loc, run;
  786         int32_t *lp;
  787         daddr_t bno;
  788 
  789         fs = ip->i_e2fs;
  790         ump = ip->i_ump;
  791 
  792         if (fs->e2fs_maxcluster[cg] < len)
  793                 return (0);
  794 
  795         EXT2_UNLOCK(ump);
  796         error = bread(ip->i_devvp,
  797             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  798             (int)fs->e2fs_bsize, NOCRED, &bp);
  799         if (error)
  800                 goto fail_lock;
  801 
  802         bbp = (char *)bp->b_data;
  803         EXT2_LOCK(ump);
  804         /*
  805          * Check to see if a cluster of the needed size (or bigger) is
  806          * available in this cylinder group.
  807          */
  808         lp = &fs->e2fs_clustersum[cg].cs_sum[len];
  809         for (i = len; i <= fs->e2fs_contigsumsize; i++)
  810                 if (*lp++ > 0)
  811                         break;
  812         if (i > fs->e2fs_contigsumsize) {
  813                 /*
  814                  * Update the cluster summary information to reflect
  815                  * the true maximum-sized cluster so that future cluster
  816                  * allocation requests can avoid reading the bitmap only
  817                  * to find no cluster.
  818                  */
  819                 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
  820                 for (i = len - 1; i > 0; i--)
  821                         if (*lp-- > 0)
  822                                 break;
  823                 fs->e2fs_maxcluster[cg] = i;
  824                 goto fail;
  825         }
  826         EXT2_UNLOCK(ump);
  827 
  828         /* Search the bitmap to find a big enough cluster like in FFS. */
  829         if (dtog(fs, bpref) != cg)
  830                 bpref = 0;
  831         if (bpref != 0)
  832                 bpref = dtogd(fs, bpref);
  833         loc = bpref / NBBY;
  834         bit = 1 << (bpref % NBBY);
  835         for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
  836                 if ((bbp[loc] & bit) != 0)
  837                         run = 0;
  838                 else {
  839                         run++;
  840                         if (run == len)
  841                                 break;
  842                 }
  843                 if ((got & (NBBY - 1)) != (NBBY - 1))
  844                         bit <<= 1;
  845                 else {
  846                         loc++;
  847                         bit = 1;
  848                 }
  849         }
  850 
  851         if (got >= fs->e2fs->e2fs_fpg)
  852                 goto fail_lock;
  853 
  854         /* Allocate the cluster that we found. */
  855         for (i = 1; i < len; i++)
  856                 if (!isclr(bbp, got - run + i))
  857                         panic("ext2_clusteralloc: map mismatch");
  858 
  859         bno = got - run + 1;
  860         if (bno >= fs->e2fs->e2fs_fpg)
  861                 panic("ext2_clusteralloc: allocated out of group");
  862 
  863         EXT2_LOCK(ump);
  864         for (i = 0; i < len; i += fs->e2fs_fpb) {
  865                 setbit(bbp, bno + i);
  866                 ext2_clusteracct(fs, bbp, cg, bno + i, -1);
  867                 fs->e2fs->e2fs_fbcount--;
  868                 fs->e2fs_gd[cg].ext2bgd_nbfree--;
  869         }
  870         fs->e2fs_fmod = 1;
  871         EXT2_UNLOCK(ump);
  872 
  873         bdwrite(bp);
  874         return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
  875 
  876 fail_lock:
  877         EXT2_LOCK(ump);
  878 fail:
  879         brelse(bp);
  880         return (0);
  881 }
  882 
  883 /*
  884  * Determine whether an inode can be allocated.
  885  *
  886  * Check to see if an inode is available, and if it is,
  887  * allocate it using tode in the specified cylinder group.
  888  */
  889 static daddr_t
  890 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
  891 {
  892         struct m_ext2fs *fs;
  893         struct buf *bp;
  894         struct ext2mount *ump;
  895         int error, start, len;
  896         char *ibp, *loc;
  897 
  898         ipref--;        /* to avoid a lot of (ipref -1) */
  899         if (ipref == -1)
  900                 ipref = 0;
  901         fs = ip->i_e2fs;
  902         ump = ip->i_ump;
  903         if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
  904                 return (0);
  905         EXT2_UNLOCK(ump);
  906         error = bread(ip->i_devvp, fsbtodb(fs,
  907             fs->e2fs_gd[cg].ext2bgd_i_bitmap),
  908             (int)fs->e2fs_bsize, NOCRED, &bp);
  909         if (error) {
  910                 brelse(bp);
  911                 EXT2_LOCK(ump);
  912                 return (0);
  913         }
  914         if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) {
  915                 /*
  916                  * Another thread allocated the last i-node in this
  917                  * group while we were waiting for the buffer.
  918                  */
  919                 brelse(bp);
  920                 EXT2_LOCK(ump);
  921                 return (0);
  922         }
  923         ibp = (char *)bp->b_data;
  924         if (ipref) {
  925                 ipref %= fs->e2fs->e2fs_ipg;
  926                 if (isclr(ibp, ipref))
  927                         goto gotit;
  928         }
  929         start = ipref / NBBY;
  930         len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
  931         loc = memcchr(&ibp[start], 0xff, len);
  932         if (loc == NULL) {
  933                 len = start + 1;
  934                 start = 0;
  935                 loc = memcchr(&ibp[start], 0xff, len);
  936                 if (loc == NULL) {
  937                         printf("cg = %d, ipref = %lld, fs = %s\n",
  938                             cg, (long long)ipref, fs->e2fs_fsmnt);
  939                         panic("ext2fs_nodealloccg: map corrupted");
  940                         /* NOTREACHED */
  941                 }
  942         }
  943         ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
  944 gotit:
  945         setbit(ibp, ipref);
  946         EXT2_LOCK(ump);
  947         fs->e2fs_gd[cg].ext2bgd_nifree--;
  948         fs->e2fs->e2fs_ficount--;
  949         fs->e2fs_fmod = 1;
  950         if ((mode & IFMT) == IFDIR) {
  951                 fs->e2fs_gd[cg].ext2bgd_ndirs++;
  952                 fs->e2fs_total_dir++;
  953         }
  954         EXT2_UNLOCK(ump);
  955         bdwrite(bp);
  956         return (cg * fs->e2fs->e2fs_ipg + ipref + 1);
  957 }
  958 
  959 /*
  960  * Free a block or fragment.
  961  *
  962  */
  963 void
  964 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
  965 {
  966         struct m_ext2fs *fs;
  967         struct buf *bp;
  968         struct ext2mount *ump;
  969         int cg, error;
  970         char *bbp;
  971 
  972         fs = ip->i_e2fs;
  973         ump = ip->i_ump;
  974         cg = dtog(fs, bno);
  975         if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
  976                 printf("bad block %lld, ino %ju\n", (long long)bno,
  977                     (uintmax_t)ip->i_number);
  978                 ext2_fserr(fs, ip->i_uid, "bad block");
  979                 return;
  980         }
  981         error = bread(ip->i_devvp,
  982             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  983             (int)fs->e2fs_bsize, NOCRED, &bp);
  984         if (error) {
  985                 brelse(bp);
  986                 return;
  987         }
  988         bbp = (char *)bp->b_data;
  989         bno = dtogd(fs, bno);
  990         if (isclr(bbp, bno)) {
  991                 printf("block = %lld, fs = %s\n",
  992                     (long long)bno, fs->e2fs_fsmnt);
  993                 panic("ext2_blkfree: freeing free block");
  994         }
  995         clrbit(bbp, bno);
  996         EXT2_LOCK(ump);
  997         ext2_clusteracct(fs, bbp, cg, bno, 1);
  998         fs->e2fs->e2fs_fbcount++;
  999         fs->e2fs_gd[cg].ext2bgd_nbfree++;
 1000         fs->e2fs_fmod = 1;
 1001         EXT2_UNLOCK(ump);
 1002         bdwrite(bp);
 1003 }
 1004 
 1005 /*
 1006  * Free an inode.
 1007  *
 1008  */
 1009 int
 1010 ext2_vfree(struct vnode *pvp, ino_t ino, int mode)
 1011 {
 1012         struct m_ext2fs *fs;
 1013         struct inode *pip;
 1014         struct buf *bp;
 1015         struct ext2mount *ump;
 1016         int error, cg;
 1017         char *ibp;
 1018 
 1019         pip = VTOI(pvp);
 1020         fs = pip->i_e2fs;
 1021         ump = pip->i_ump;
 1022         if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
 1023                 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
 1024                     pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
 1025 
 1026         cg = ino_to_cg(fs, ino);
 1027         error = bread(pip->i_devvp,
 1028             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
 1029             (int)fs->e2fs_bsize, NOCRED, &bp);
 1030         if (error) {
 1031                 brelse(bp);
 1032                 return (0);
 1033         }
 1034         ibp = (char *)bp->b_data;
 1035         ino = (ino - 1) % fs->e2fs->e2fs_ipg;
 1036         if (isclr(ibp, ino)) {
 1037                 printf("ino = %llu, fs = %s\n",
 1038                     (unsigned long long)ino, fs->e2fs_fsmnt);
 1039                 if (fs->e2fs_ronly == 0)
 1040                         panic("ext2_vfree: freeing free inode");
 1041         }
 1042         clrbit(ibp, ino);
 1043         EXT2_LOCK(ump);
 1044         fs->e2fs->e2fs_ficount++;
 1045         fs->e2fs_gd[cg].ext2bgd_nifree++;
 1046         if ((mode & IFMT) == IFDIR) {
 1047                 fs->e2fs_gd[cg].ext2bgd_ndirs--;
 1048                 fs->e2fs_total_dir--;
 1049         }
 1050         fs->e2fs_fmod = 1;
 1051         EXT2_UNLOCK(ump);
 1052         bdwrite(bp);
 1053         return (0);
 1054 }
 1055 
 1056 /*
 1057  * Find a block in the specified cylinder group.
 1058  *
 1059  * It is a panic if a request is made to find a block if none are
 1060  * available.
 1061  */
 1062 static daddr_t
 1063 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
 1064 {
 1065         char *loc;
 1066         int start, len;
 1067 
 1068         /*
 1069          * find the fragment by searching through the free block
 1070          * map for an appropriate bit pattern
 1071          */
 1072         if (bpref)
 1073                 start = dtogd(fs, bpref) / NBBY;
 1074         else
 1075                 start = 0;
 1076         len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
 1077         loc = memcchr(&bbp[start], 0xff, len);
 1078         if (loc == NULL) {
 1079                 len = start + 1;
 1080                 start = 0;
 1081                 loc = memcchr(&bbp[start], 0xff, len);
 1082                 if (loc == NULL) {
 1083                         printf("start = %d, len = %d, fs = %s\n",
 1084                             start, len, fs->e2fs_fsmnt);
 1085                         panic("ext2_mapsearch: map corrupted");
 1086                         /* NOTREACHED */
 1087                 }
 1088         }
 1089         return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
 1090 }
 1091 
 1092 /*
 1093  * Fserr prints the name of a filesystem with an error diagnostic.
 1094  *
 1095  * The form of the error message is:
 1096  *      fs: error message
 1097  */
 1098 static void
 1099 ext2_fserr(struct m_ext2fs *fs, uid_t uid, char *cp)
 1100 {
 1101 
 1102         log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
 1103 }
 1104 
 1105 int
 1106 cg_has_sb(int i)
 1107 {
 1108         int a3, a5, a7;
 1109 
 1110         if (i == 0 || i == 1)
 1111                 return 1;
 1112         for (a3 = 3, a5 = 5, a7 = 7;
 1113             a3 <= i || a5 <= i || a7 <= i;
 1114             a3 *= 3, a5 *= 5, a7 *= 7)
 1115                 if (i == a3 || i == a5 || i == a7)
 1116                         return 1;
 1117         return 0;
 1118 }

Cache object: da5acd395953d1259850a7481cf8a442


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.