The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/ext2fs/ext2_alloc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  *  modified for Lites 1.1
    3  *
    4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
    5  *  University of Utah, Department of Computer Science
    6  */
    7 /*-
    8  * Copyright (c) 1982, 1986, 1989, 1993
    9  *      The Regents of the University of California.  All rights reserved.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 4. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94
   36  * $FreeBSD$
   37  */
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/conf.h>
   42 #include <sys/vnode.h>
   43 #include <sys/stat.h>
   44 #include <sys/mount.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/syslog.h>
   47 #include <sys/buf.h>
   48 
   49 #include <fs/ext2fs/fs.h>
   50 #include <fs/ext2fs/inode.h>
   51 #include <fs/ext2fs/ext2_mount.h>
   52 #include <fs/ext2fs/ext2fs.h>
   53 #include <fs/ext2fs/ext2_extern.h>
   54 
   55 static daddr_t  ext2_alloccg(struct inode *, int, daddr_t, int);
   56 static daddr_t  ext2_clusteralloc(struct inode *, int, daddr_t, int);
   57 static u_long   ext2_dirpref(struct inode *);
   58 static void     ext2_fserr(struct m_ext2fs *, uid_t, char *);
   59 static u_long   ext2_hashalloc(struct inode *, int, long, int,
   60                                 daddr_t (*)(struct inode *, int, daddr_t, 
   61                                                 int));
   62 static daddr_t  ext2_nodealloccg(struct inode *, int, daddr_t, int);
   63 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
   64 
   65 /*
   66  * Allocate a block in the filesystem.
   67  *
   68  * A preference may be optionally specified. If a preference is given
   69  * the following hierarchy is used to allocate a block:
   70  *   1) allocate the requested block.
   71  *   2) allocate a rotationally optimal block in the same cylinder.
   72  *   3) allocate a block in the same cylinder group.
   73  *   4) quadradically rehash into other cylinder groups, until an
   74  *        available block is located.
   75  * If no block preference is given the following hierarchy is used
   76  * to allocate a block:
   77  *   1) allocate a block in the cylinder group that contains the
   78  *        inode for the file.
   79  *   2) quadradically rehash into other cylinder groups, until an
   80  *        available block is located.
   81  */
   82 int
   83 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
   84     struct ucred *cred, e4fs_daddr_t *bnp)
   85 {
   86         struct m_ext2fs *fs;
   87         struct ext2mount *ump;
   88         int32_t bno;
   89         int cg;
   90 
   91         *bnp = 0;
   92         fs = ip->i_e2fs;
   93         ump = ip->i_ump;
   94         mtx_assert(EXT2_MTX(ump), MA_OWNED);
   95 #ifdef INVARIANTS
   96         if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
   97                 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
   98                     (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
   99                 panic("ext2_alloc: bad size");
  100         }
  101         if (cred == NOCRED)
  102                 panic("ext2_alloc: missing credential");
  103 #endif          /* INVARIANTS */
  104         if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
  105                 goto nospace;
  106         if (cred->cr_uid != 0 &&
  107             fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
  108                 goto nospace;
  109         if (bpref >= fs->e2fs->e2fs_bcount)
  110                 bpref = 0;
  111         if (bpref == 0)
  112                 cg = ino_to_cg(fs, ip->i_number);
  113         else
  114                 cg = dtog(fs, bpref);
  115         bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
  116             ext2_alloccg);
  117         if (bno > 0) {
  118                 /* set next_alloc fields as done in block_getblk */
  119                 ip->i_next_alloc_block = lbn;
  120                 ip->i_next_alloc_goal = bno;
  121 
  122                 ip->i_blocks += btodb(fs->e2fs_bsize);
  123                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  124                 *bnp = bno;
  125                 return (0);
  126         }
  127 nospace:
  128         EXT2_UNLOCK(ump);
  129         ext2_fserr(fs, cred->cr_uid, "filesystem full");
  130         uprintf("\n%s: write failed, filesystem is full\n", fs->e2fs_fsmnt);
  131         return (ENOSPC);
  132 }
  133 
  134 /*
  135  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  136  *
  137  * The vnode and an array of buffer pointers for a range of sequential
  138  * logical blocks to be made contiguous is given. The allocator attempts
  139  * to find a range of sequential blocks starting as close as possible to
  140  * an fs_rotdelay offset from the end of the allocation for the logical
  141  * block immediately preceding the current range. If successful, the
  142  * physical block numbers in the buffer pointers and in the inode are
  143  * changed to reflect the new allocation. If unsuccessful, the allocation
  144  * is left unchanged. The success in doing the reallocation is returned.
  145  * Note that the error return is not reflected back to the user. Rather
  146  * the previous block allocation will be used.
  147  */
  148 
  149 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
  150 
  151 static int doasyncfree = 1;
  152 
  153 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
  154     "Use asychronous writes to update block pointers when freeing blocks");
  155 
  156 static int doreallocblks = 0;
  157 
  158 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
  159 
  160 int
  161 ext2_reallocblks(struct vop_reallocblks_args *ap)
  162 {
  163         struct m_ext2fs *fs;
  164         struct inode *ip;
  165         struct vnode *vp;
  166         struct buf *sbp, *ebp;
  167         uint32_t *bap, *sbap, *ebap;
  168         struct ext2mount *ump;
  169         struct cluster_save *buflist;
  170         struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
  171         e2fs_lbn_t start_lbn, end_lbn;
  172         int soff;
  173         e2fs_daddr_t newblk, blkno;
  174         int i, len, start_lvl, end_lvl, pref, ssize;
  175 
  176         if (doreallocblks == 0)
  177                 return (ENOSPC);
  178 
  179         vp = ap->a_vp;
  180         ip = VTOI(vp);
  181         fs = ip->i_e2fs;
  182         ump = ip->i_ump;
  183 
  184         if (fs->e2fs_contigsumsize <= 0)
  185                 return (ENOSPC);
  186 
  187         buflist = ap->a_buflist;
  188         len = buflist->bs_nchildren;
  189         start_lbn = buflist->bs_children[0]->b_lblkno;
  190         end_lbn = start_lbn + len - 1;
  191 #ifdef INVARIANTS
  192         for (i = 1; i < len; i++)
  193                 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
  194                         panic("ext2_reallocblks: non-cluster");
  195 #endif
  196         /*
  197          * If the cluster crosses the boundary for the first indirect
  198          * block, leave space for the indirect block. Indirect blocks
  199          * are initially laid out in a position after the last direct
  200          * block. Block reallocation would usually destroy locality by
  201          * moving the indirect block out of the way to make room for
  202          * data blocks if we didn't compensate here. We should also do
  203          * this for other indirect block boundaries, but it is only
  204          * important for the first one.
  205          */
  206         if (start_lbn < NDADDR && end_lbn >= NDADDR)
  207                 return (ENOSPC);
  208         /*
  209          * If the latest allocation is in a new cylinder group, assume that
  210          * the filesystem has decided to move and do not force it back to
  211          * the previous cylinder group.
  212          */
  213         if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
  214             dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
  215                 return (ENOSPC);
  216         if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
  217             ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
  218                 return (ENOSPC);
  219         /*
  220          * Get the starting offset and block map for the first block.
  221          */
  222         if (start_lvl == 0) {
  223                 sbap = &ip->i_db[0];
  224                 soff = start_lbn;
  225         } else {
  226                 idp = &start_ap[start_lvl - 1];
  227                 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
  228                         brelse(sbp);
  229                         return (ENOSPC);
  230                 }
  231                 sbap = (u_int *)sbp->b_data;
  232                 soff = idp->in_off;
  233         }
  234         /*
  235          * If the block range spans two block maps, get the second map.
  236          */
  237         ebap = NULL;
  238         if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
  239                 ssize = len;
  240         } else {
  241 #ifdef INVARIANTS
  242                 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
  243                         panic("ext2_reallocblks: start == end");
  244 #endif
  245                 ssize = len - (idp->in_off + 1);
  246                 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
  247                         goto fail;
  248                 ebap = (u_int *)ebp->b_data;
  249         }
  250         /*
  251          * Find the preferred location for the cluster.
  252          */
  253         EXT2_LOCK(ump);
  254         pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
  255         /*
  256          * Search the block map looking for an allocation of the desired size.
  257          */
  258         if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
  259             len, ext2_clusteralloc)) == 0) {
  260                 EXT2_UNLOCK(ump);
  261                 goto fail;
  262         }
  263         /*
  264          * We have found a new contiguous block.
  265          *
  266          * First we have to replace the old block pointers with the new
  267          * block pointers in the inode and indirect blocks associated
  268          * with the file.
  269          */
  270 #ifdef DEBUG
  271         printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
  272             (intmax_t)start_lbn, (intmax_t)end_lbn);
  273 #endif  /* DEBUG */
  274         blkno = newblk;
  275         for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  276                 if (i == ssize) {
  277                         bap = ebap;
  278                         soff = -i;
  279                 }
  280 #ifdef INVARIANTS
  281                 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
  282                         panic("ext2_reallocblks: alloc mismatch");
  283 #endif
  284 #ifdef DEBUG
  285                 printf(" %d,", *bap);
  286 #endif  /* DEBUG */
  287                 *bap++ = blkno;
  288         }
  289         /*
  290          * Next we must write out the modified inode and indirect blocks.
  291          * For strict correctness, the writes should be synchronous since
  292          * the old block values may have been written to disk. In practise
  293          * they are almost never written, but if we are concerned about
  294          * strict correctness, the `doasyncfree' flag should be set to zero.
  295          *
  296          * The test on `doasyncfree' should be changed to test a flag
  297          * that shows whether the associated buffers and inodes have
  298          * been written. The flag should be set when the cluster is
  299          * started and cleared whenever the buffer or inode is flushed.
  300          * We can then check below to see if it is set, and do the
  301          * synchronous write only when it has been cleared.
  302          */
  303         if (sbap != &ip->i_db[0]) {
  304                 if (doasyncfree)
  305                         bdwrite(sbp);
  306                 else
  307                         bwrite(sbp);
  308         } else {
  309                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  310                 if (!doasyncfree)
  311                         ext2_update(vp, 1);
  312         }
  313         if (ssize < len) {
  314                 if (doasyncfree)
  315                         bdwrite(ebp);
  316                 else
  317                         bwrite(ebp);
  318         }
  319         /*
  320          * Last, free the old blocks and assign the new blocks to the buffers.
  321          */
  322 #ifdef DEBUG
  323         printf("\n\tnew:");
  324 #endif  /* DEBUG */
  325         for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  326                 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
  327                     fs->e2fs_bsize);
  328                 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
  329 #ifdef DEBUG
  330                 printf(" %d,", blkno);
  331 #endif  /* DEBUG */
  332         }
  333 #ifdef DEBUG
  334         printf("\n");
  335 #endif  /* DEBUG */
  336         return (0);
  337 
  338 fail:
  339         if (ssize < len)
  340                 brelse(ebp);
  341         if (sbap != &ip->i_db[0])
  342                 brelse(sbp);
  343         return (ENOSPC);
  344 }
  345 
  346 /*
  347  * Allocate an inode in the filesystem.
  348  *
  349  */
  350 int
  351 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
  352 {
  353         struct timespec ts;
  354         struct inode *pip;
  355         struct m_ext2fs *fs;
  356         struct inode *ip;
  357         struct ext2mount *ump;
  358         ino_t ino, ipref;
  359         int i, error, cg;
  360 
  361         *vpp = NULL;
  362         pip = VTOI(pvp);
  363         fs = pip->i_e2fs;
  364         ump = pip->i_ump;
  365 
  366         EXT2_LOCK(ump);
  367         if (fs->e2fs->e2fs_ficount == 0)
  368                 goto noinodes;
  369         /*
  370          * If it is a directory then obtain a cylinder group based on
  371          * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
  372          * always the next inode.
  373          */
  374         if ((mode & IFMT) == IFDIR) {
  375                 cg = ext2_dirpref(pip);
  376                 if (fs->e2fs_contigdirs[cg] < 255)
  377                         fs->e2fs_contigdirs[cg]++;
  378         } else {
  379                 cg = ino_to_cg(fs, pip->i_number);
  380                 if (fs->e2fs_contigdirs[cg] > 0)
  381                         fs->e2fs_contigdirs[cg]--;
  382         }
  383         ipref = cg * fs->e2fs->e2fs_ipg + 1;
  384         ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
  385 
  386         if (ino == 0)
  387                 goto noinodes;
  388         error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
  389         if (error) {
  390                 ext2_vfree(pvp, ino, mode);
  391                 return (error);
  392         }
  393         ip = VTOI(*vpp);
  394 
  395         /*
  396          * The question is whether using VGET was such good idea at all:
  397          * Linux doesn't read the old inode in when it is allocating a
  398          * new one. I will set at least i_size and i_blocks to zero.
  399          */
  400         ip->i_flag = 0;
  401         ip->i_size = 0;
  402         ip->i_blocks = 0;
  403         ip->i_mode = 0;
  404         ip->i_flags = 0;
  405         /* now we want to make sure that the block pointers are zeroed out */
  406         for (i = 0; i < NDADDR; i++)
  407                 ip->i_db[i] = 0;
  408         for (i = 0; i < NIADDR; i++)
  409                 ip->i_ib[i] = 0;
  410 
  411         /*
  412          * Set up a new generation number for this inode.
  413          */
  414         ip->i_gen = arc4random();
  415 
  416         vfs_timestamp(&ts);
  417         ip->i_birthtime = ts.tv_sec;
  418         ip->i_birthnsec = ts.tv_nsec;
  419 
  420 /*
  421 printf("ext2_valloc: allocated inode %d\n", ino);
  422 */
  423         return (0);
  424 noinodes:
  425         EXT2_UNLOCK(ump);
  426         ext2_fserr(fs, cred->cr_uid, "out of inodes");
  427         uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
  428         return (ENOSPC);
  429 }
  430 
  431 /*
  432  * Find a cylinder to place a directory.
  433  *
  434  * The policy implemented by this algorithm is to allocate a
  435  * directory inode in the same cylinder group as its parent
  436  * directory, but also to reserve space for its files inodes
  437  * and data. Restrict the number of directories which may be
  438  * allocated one after another in the same cylinder group
  439  * without intervening allocation of files.
  440  *
  441  * If we allocate a first level directory then force allocation
  442  * in another cylinder group.
  443  *
  444  */
  445 static u_long
  446 ext2_dirpref(struct inode *pip)
  447 {
  448         struct m_ext2fs *fs;
  449         int cg, prefcg, cgsize;
  450         u_int avgifree, avgbfree, avgndir, curdirsize;
  451         u_int minifree, minbfree, maxndir;
  452         u_int mincg, minndir;
  453         u_int dirsize, maxcontigdirs;
  454 
  455         mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
  456         fs = pip->i_e2fs;
  457 
  458         avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
  459         avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
  460         avgndir = fs->e2fs_total_dir / fs->e2fs_gcount;
  461 
  462         /*
  463          * Force allocation in another cg if creating a first level dir.
  464          */
  465         ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
  466         if (ITOV(pip)->v_vflag & VV_ROOT) {
  467                 prefcg = arc4random() % fs->e2fs_gcount;
  468                 mincg = prefcg;
  469                 minndir = fs->e2fs_ipg;
  470                 for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  471                         if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
  472                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
  473                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
  474                                 mincg = cg;
  475                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
  476                         }
  477                 for (cg = 0; cg < prefcg; cg++)
  478                         if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
  479                             fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
  480                             fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
  481                                 mincg = cg;
  482                                 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
  483                         }
  484                 return (mincg);
  485         }
  486         /*
  487          * Count various limits which used for
  488          * optimal allocation of a directory inode.
  489          */
  490         maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
  491         minifree = avgifree - avgifree / 4;
  492         if (minifree < 1)
  493                 minifree = 1;
  494         minbfree = avgbfree - avgbfree / 4;
  495         if (minbfree < 1)
  496                 minbfree = 1;
  497         cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
  498         dirsize = AVGDIRSIZE;
  499         curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
  500         if (dirsize < curdirsize)
  501                 dirsize = curdirsize;
  502         maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
  503         maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
  504         if (maxcontigdirs == 0)
  505                 maxcontigdirs = 1;
  506 
  507         /*
  508          * Limit number of dirs in one cg and reserve space for
  509          * regular files, but only if we have no deficit in
  510          * inodes or space.
  511          */
  512         prefcg = ino_to_cg(fs, pip->i_number);
  513         for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  514                 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
  515                     fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
  516                     fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
  517                         if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
  518                                 return (cg);
  519                 }
  520         for (cg = 0; cg < prefcg; cg++)
  521                 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
  522                     fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
  523                     fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
  524                         if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
  525                                 return (cg);
  526                 }
  527         /*
  528          * This is a backstop when we have deficit in space.
  529          */
  530         for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
  531                 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
  532                         return (cg);
  533         for (cg = 0; cg < prefcg; cg++)
  534                 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
  535                         break;
  536         return (cg);
  537 }
  538 
  539 /*
  540  * Select the desired position for the next block in a file.
  541  *
  542  * we try to mimic what Remy does in inode_getblk/block_getblk
  543  *
  544  * we note: blocknr == 0 means that we're about to allocate either
  545  * a direct block or a pointer block at the first level of indirection
  546  * (In other words, stuff that will go in i_db[] or i_ib[])
  547  *
  548  * blocknr != 0 means that we're allocating a block that is none
  549  * of the above. Then, blocknr tells us the number of the block
  550  * that will hold the pointer
  551  */
  552 e4fs_daddr_t
  553 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
  554     e2fs_daddr_t blocknr)
  555 {
  556         int tmp;
  557 
  558         mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
  559 
  560         /*
  561          * If the next block is actually what we thought it is, then set the
  562          * goal to what we thought it should be.
  563          */
  564         if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
  565                 return ip->i_next_alloc_goal;
  566 
  567         /*
  568          * Now check whether we were provided with an array that basically
  569          * tells us previous blocks to which we want to stay close.
  570          */
  571         if (bap)
  572                 for (tmp = indx - 1; tmp >= 0; tmp--)
  573                         if (bap[tmp])
  574                                 return bap[tmp];
  575 
  576         /*
  577          * Else lets fall back to the blocknr or, if there is none, follow
  578          * the rule that a block should be allocated near its inode.
  579          */
  580         return blocknr ? blocknr :
  581             (e2fs_daddr_t)(ip->i_block_group *
  582             EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
  583             ip->i_e2fs->e2fs->e2fs_first_dblock;
  584 }
  585 
  586 /*
  587  * Implement the cylinder overflow algorithm.
  588  *
  589  * The policy implemented by this algorithm is:
  590  *   1) allocate the block in its requested cylinder group.
  591  *   2) quadradically rehash on the cylinder group number.
  592  *   3) brute force search for a free block.
  593  */
  594 static u_long
  595 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
  596     daddr_t (*allocator) (struct inode *, int, daddr_t, int))
  597 {
  598         struct m_ext2fs *fs;
  599         ino_t result;
  600         int i, icg = cg;
  601 
  602         mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
  603         fs = ip->i_e2fs;
  604         /*
  605          * 1: preferred cylinder group
  606          */
  607         result = (*allocator)(ip, cg, pref, size);
  608         if (result)
  609                 return (result);
  610         /*
  611          * 2: quadratic rehash
  612          */
  613         for (i = 1; i < fs->e2fs_gcount; i *= 2) {
  614                 cg += i;
  615                 if (cg >= fs->e2fs_gcount)
  616                         cg -= fs->e2fs_gcount;
  617                 result = (*allocator)(ip, cg, 0, size);
  618                 if (result)
  619                         return (result);
  620         }
  621         /*
  622          * 3: brute force search
  623          * Note that we start at i == 2, since 0 was checked initially,
  624          * and 1 is always checked in the quadratic rehash.
  625          */
  626         cg = (icg + 2) % fs->e2fs_gcount;
  627         for (i = 2; i < fs->e2fs_gcount; i++) {
  628                 result = (*allocator)(ip, cg, 0, size);
  629                 if (result)
  630                         return (result);
  631                 cg++;
  632                 if (cg == fs->e2fs_gcount)
  633                         cg = 0;
  634         }
  635         return (0);
  636 }
  637 
  638 /*
  639  * Determine whether a block can be allocated.
  640  *
  641  * Check to see if a block of the appropriate size is available,
  642  * and if it is, allocate it.
  643  */
  644 static daddr_t
  645 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
  646 {
  647         struct m_ext2fs *fs;
  648         struct buf *bp;
  649         struct ext2mount *ump;
  650         daddr_t bno, runstart, runlen;
  651         int bit, loc, end, error, start;
  652         char *bbp;
  653         /* XXX ondisk32 */
  654         fs = ip->i_e2fs;
  655         ump = ip->i_ump;
  656         if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
  657                 return (0);
  658         EXT2_UNLOCK(ump);
  659         error = bread(ip->i_devvp, fsbtodb(fs,
  660             fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  661             (int)fs->e2fs_bsize, NOCRED, &bp);
  662         if (error) {
  663                 brelse(bp);
  664                 EXT2_LOCK(ump);
  665                 return (0);
  666         }
  667         if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
  668                 /*
  669                  * Another thread allocated the last block in this
  670                  * group while we were waiting for the buffer.
  671                  */
  672                 brelse(bp);
  673                 EXT2_LOCK(ump);
  674                 return (0);
  675         }
  676         bbp = (char *)bp->b_data;
  677 
  678         if (dtog(fs, bpref) != cg)
  679                 bpref = 0;
  680         if (bpref != 0) {
  681                 bpref = dtogd(fs, bpref);
  682                 /*
  683                  * if the requested block is available, use it
  684                  */
  685                 if (isclr(bbp, bpref)) {
  686                         bno = bpref;
  687                         goto gotit;
  688                 }
  689         }
  690         /*
  691          * no blocks in the requested cylinder, so take next
  692          * available one in this cylinder group.
  693          * first try to get 8 contigous blocks, then fall back to a single
  694          * block.
  695          */
  696         if (bpref)
  697                 start = dtogd(fs, bpref) / NBBY;
  698         else
  699                 start = 0;
  700         end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
  701 retry:
  702         runlen = 0;
  703         runstart = 0;
  704         for (loc = start; loc < end; loc++) {
  705                 if (bbp[loc] == (char)0xff) {
  706                         runlen = 0;
  707                         continue;
  708                 }
  709 
  710                 /* Start of a run, find the number of high clear bits. */
  711                 if (runlen == 0) {
  712                         bit = fls(bbp[loc]);
  713                         runlen = NBBY - bit;
  714                         runstart = loc * NBBY + bit;
  715                 } else if (bbp[loc] == 0) {
  716                         /* Continue a run. */
  717                         runlen += NBBY;
  718                 } else {
  719                         /*
  720                          * Finish the current run.  If it isn't long
  721                          * enough, start a new one.
  722                          */
  723                         bit = ffs(bbp[loc]) - 1;
  724                         runlen += bit;
  725                         if (runlen >= 8) {
  726                                 bno = runstart;
  727                                 goto gotit;
  728                         }
  729 
  730                         /* Run was too short, start a new one. */
  731                         bit = fls(bbp[loc]);
  732                         runlen = NBBY - bit;
  733                         runstart = loc * NBBY + bit;
  734                 }
  735 
  736                 /* If the current run is long enough, use it. */
  737                 if (runlen >= 8) {
  738                         bno = runstart;
  739                         goto gotit;
  740                 }
  741         }
  742         if (start != 0) {
  743                 end = start;
  744                 start = 0;
  745                 goto retry;
  746         }
  747         bno = ext2_mapsearch(fs, bbp, bpref);
  748         if (bno < 0) {
  749                 brelse(bp);
  750                 EXT2_LOCK(ump);
  751                 return (0);
  752         }
  753 gotit:
  754 #ifdef INVARIANTS
  755         if (isset(bbp, bno)) {
  756                 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
  757                     cg, (intmax_t)bno, fs->e2fs_fsmnt);
  758                 panic("ext2fs_alloccg: dup alloc");
  759         }
  760 #endif
  761         setbit(bbp, bno);
  762         EXT2_LOCK(ump);
  763         ext2_clusteracct(fs, bbp, cg, bno, -1);
  764         fs->e2fs->e2fs_fbcount--;
  765         fs->e2fs_gd[cg].ext2bgd_nbfree--;
  766         fs->e2fs_fmod = 1;
  767         EXT2_UNLOCK(ump);
  768         bdwrite(bp);
  769         return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
  770 }
  771 
  772 /*
  773  * Determine whether a cluster can be allocated.
  774  */
  775 static daddr_t
  776 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
  777 {
  778         struct m_ext2fs *fs;
  779         struct ext2mount *ump;
  780         struct buf *bp;
  781         char *bbp;
  782         int bit, error, got, i, loc, run;
  783         int32_t *lp;
  784         daddr_t bno;
  785 
  786         fs = ip->i_e2fs;
  787         ump = ip->i_ump;
  788 
  789         if (fs->e2fs_maxcluster[cg] < len)
  790                 return (0);
  791 
  792         EXT2_UNLOCK(ump);
  793         error = bread(ip->i_devvp,
  794             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  795             (int)fs->e2fs_bsize, NOCRED, &bp);
  796         if (error)
  797                 goto fail_lock;
  798 
  799         bbp = (char *)bp->b_data;
  800         EXT2_LOCK(ump);
  801         /*
  802          * Check to see if a cluster of the needed size (or bigger) is
  803          * available in this cylinder group.
  804          */
  805         lp = &fs->e2fs_clustersum[cg].cs_sum[len];
  806         for (i = len; i <= fs->e2fs_contigsumsize; i++)
  807                 if (*lp++ > 0)
  808                         break;
  809         if (i > fs->e2fs_contigsumsize) {
  810                 /*
  811                  * Update the cluster summary information to reflect
  812                  * the true maximum-sized cluster so that future cluster
  813                  * allocation requests can avoid reading the bitmap only
  814                  * to find no cluster.
  815                  */
  816                 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
  817                 for (i = len - 1; i > 0; i--)
  818                         if (*lp-- > 0)
  819                                 break;
  820                 fs->e2fs_maxcluster[cg] = i;
  821                 goto fail;
  822         }
  823         EXT2_UNLOCK(ump);
  824 
  825         /* Search the bitmap to find a big enough cluster like in FFS. */
  826         if (dtog(fs, bpref) != cg)
  827                 bpref = 0;
  828         if (bpref != 0)
  829                 bpref = dtogd(fs, bpref);
  830         loc = bpref / NBBY;
  831         bit = 1 << (bpref % NBBY);
  832         for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
  833                 if ((bbp[loc] & bit) != 0)
  834                         run = 0;
  835                 else {
  836                         run++;
  837                         if (run == len)
  838                                 break;
  839                 }
  840                 if ((got & (NBBY - 1)) != (NBBY - 1))
  841                         bit <<= 1;
  842                 else {
  843                         loc++;
  844                         bit = 1;
  845                 }
  846         }
  847 
  848         if (got >= fs->e2fs->e2fs_fpg)
  849                 goto fail_lock;
  850 
  851         /* Allocate the cluster that we found. */
  852         for (i = 1; i < len; i++)
  853                 if (!isclr(bbp, got - run + i))
  854                         panic("ext2_clusteralloc: map mismatch");
  855 
  856         bno = got - run + 1;
  857         if (bno >= fs->e2fs->e2fs_fpg)
  858                 panic("ext2_clusteralloc: allocated out of group");
  859 
  860         EXT2_LOCK(ump);
  861         for (i = 0; i < len; i += fs->e2fs_fpb) {
  862                 setbit(bbp, bno + i);
  863                 ext2_clusteracct(fs, bbp, cg, bno + i, -1);
  864                 fs->e2fs->e2fs_fbcount--;
  865                 fs->e2fs_gd[cg].ext2bgd_nbfree--;
  866         }
  867         fs->e2fs_fmod = 1;
  868         EXT2_UNLOCK(ump);
  869 
  870         bdwrite(bp);
  871         return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
  872 
  873 fail_lock:
  874         EXT2_LOCK(ump);
  875 fail:
  876         brelse(bp);
  877         return (0);
  878 }
  879 
  880 /*
  881  * Determine whether an inode can be allocated.
  882  *
  883  * Check to see if an inode is available, and if it is,
  884  * allocate it using tode in the specified cylinder group.
  885  */
  886 static daddr_t
  887 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
  888 {
  889         struct m_ext2fs *fs;
  890         struct buf *bp;
  891         struct ext2mount *ump;
  892         int error, start, len;
  893         char *ibp, *loc;
  894 
  895         ipref--;        /* to avoid a lot of (ipref -1) */
  896         if (ipref == -1)
  897                 ipref = 0;
  898         fs = ip->i_e2fs;
  899         ump = ip->i_ump;
  900         if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
  901                 return (0);
  902         EXT2_UNLOCK(ump);
  903         error = bread(ip->i_devvp, fsbtodb(fs,
  904             fs->e2fs_gd[cg].ext2bgd_i_bitmap),
  905             (int)fs->e2fs_bsize, NOCRED, &bp);
  906         if (error) {
  907                 brelse(bp);
  908                 EXT2_LOCK(ump);
  909                 return (0);
  910         }
  911         if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) {
  912                 /*
  913                  * Another thread allocated the last i-node in this
  914                  * group while we were waiting for the buffer.
  915                  */
  916                 brelse(bp);
  917                 EXT2_LOCK(ump);
  918                 return (0);
  919         }
  920         ibp = (char *)bp->b_data;
  921         if (ipref) {
  922                 ipref %= fs->e2fs->e2fs_ipg;
  923                 if (isclr(ibp, ipref))
  924                         goto gotit;
  925         }
  926         start = ipref / NBBY;
  927         len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
  928         loc = memcchr(&ibp[start], 0xff, len);
  929         if (loc == NULL) {
  930                 len = start + 1;
  931                 start = 0;
  932                 loc = memcchr(&ibp[start], 0xff, len);
  933                 if (loc == NULL) {
  934                         printf("cg = %d, ipref = %lld, fs = %s\n",
  935                             cg, (long long)ipref, fs->e2fs_fsmnt);
  936                         panic("ext2fs_nodealloccg: map corrupted");
  937                         /* NOTREACHED */
  938                 }
  939         }
  940         ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
  941 gotit:
  942         setbit(ibp, ipref);
  943         EXT2_LOCK(ump);
  944         fs->e2fs_gd[cg].ext2bgd_nifree--;
  945         fs->e2fs->e2fs_ficount--;
  946         fs->e2fs_fmod = 1;
  947         if ((mode & IFMT) == IFDIR) {
  948                 fs->e2fs_gd[cg].ext2bgd_ndirs++;
  949                 fs->e2fs_total_dir++;
  950         }
  951         EXT2_UNLOCK(ump);
  952         bdwrite(bp);
  953         return (cg * fs->e2fs->e2fs_ipg + ipref + 1);
  954 }
  955 
  956 /*
  957  * Free a block or fragment.
  958  *
  959  */
  960 void
  961 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
  962 {
  963         struct m_ext2fs *fs;
  964         struct buf *bp;
  965         struct ext2mount *ump;
  966         int cg, error;
  967         char *bbp;
  968 
  969         fs = ip->i_e2fs;
  970         ump = ip->i_ump;
  971         cg = dtog(fs, bno);
  972         if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
  973                 printf("bad block %lld, ino %llu\n", (long long)bno,
  974                     (unsigned long long)ip->i_number);
  975                 ext2_fserr(fs, ip->i_uid, "bad block");
  976                 return;
  977         }
  978         error = bread(ip->i_devvp,
  979             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
  980             (int)fs->e2fs_bsize, NOCRED, &bp);
  981         if (error) {
  982                 brelse(bp);
  983                 return;
  984         }
  985         bbp = (char *)bp->b_data;
  986         bno = dtogd(fs, bno);
  987         if (isclr(bbp, bno)) {
  988                 printf("block = %lld, fs = %s\n",
  989                     (long long)bno, fs->e2fs_fsmnt);
  990                 panic("ext2_blkfree: freeing free block");
  991         }
  992         clrbit(bbp, bno);
  993         EXT2_LOCK(ump);
  994         ext2_clusteracct(fs, bbp, cg, bno, 1);
  995         fs->e2fs->e2fs_fbcount++;
  996         fs->e2fs_gd[cg].ext2bgd_nbfree++;
  997         fs->e2fs_fmod = 1;
  998         EXT2_UNLOCK(ump);
  999         bdwrite(bp);
 1000 }
 1001 
 1002 /*
 1003  * Free an inode.
 1004  *
 1005  */
 1006 int
 1007 ext2_vfree(struct vnode *pvp, ino_t ino, int mode)
 1008 {
 1009         struct m_ext2fs *fs;
 1010         struct inode *pip;
 1011         struct buf *bp;
 1012         struct ext2mount *ump;
 1013         int error, cg;
 1014         char *ibp;
 1015 
 1016         pip = VTOI(pvp);
 1017         fs = pip->i_e2fs;
 1018         ump = pip->i_ump;
 1019         if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
 1020                 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
 1021                     pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
 1022 
 1023         cg = ino_to_cg(fs, ino);
 1024         error = bread(pip->i_devvp,
 1025             fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
 1026             (int)fs->e2fs_bsize, NOCRED, &bp);
 1027         if (error) {
 1028                 brelse(bp);
 1029                 return (0);
 1030         }
 1031         ibp = (char *)bp->b_data;
 1032         ino = (ino - 1) % fs->e2fs->e2fs_ipg;
 1033         if (isclr(ibp, ino)) {
 1034                 printf("ino = %llu, fs = %s\n",
 1035                     (unsigned long long)ino, fs->e2fs_fsmnt);
 1036                 if (fs->e2fs_ronly == 0)
 1037                         panic("ext2_vfree: freeing free inode");
 1038         }
 1039         clrbit(ibp, ino);
 1040         EXT2_LOCK(ump);
 1041         fs->e2fs->e2fs_ficount++;
 1042         fs->e2fs_gd[cg].ext2bgd_nifree++;
 1043         if ((mode & IFMT) == IFDIR) {
 1044                 fs->e2fs_gd[cg].ext2bgd_ndirs--;
 1045                 fs->e2fs_total_dir--;
 1046         }
 1047         fs->e2fs_fmod = 1;
 1048         EXT2_UNLOCK(ump);
 1049         bdwrite(bp);
 1050         return (0);
 1051 }
 1052 
 1053 /*
 1054  * Find a block in the specified cylinder group.
 1055  *
 1056  * It is a panic if a request is made to find a block if none are
 1057  * available.
 1058  */
 1059 static daddr_t
 1060 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
 1061 {
 1062         char *loc;
 1063         int start, len;
 1064 
 1065         /*
 1066          * find the fragment by searching through the free block
 1067          * map for an appropriate bit pattern
 1068          */
 1069         if (bpref)
 1070                 start = dtogd(fs, bpref) / NBBY;
 1071         else
 1072                 start = 0;
 1073         len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
 1074         loc = memcchr(&bbp[start], 0xff, len);
 1075         if (loc == NULL) {
 1076                 len = start + 1;
 1077                 start = 0;
 1078                 loc = memcchr(&bbp[start], 0xff, len);
 1079                 if (loc == NULL) {
 1080                         printf("start = %d, len = %d, fs = %s\n",
 1081                             start, len, fs->e2fs_fsmnt);
 1082                         panic("ext2_mapsearch: map corrupted");
 1083                         /* NOTREACHED */
 1084                 }
 1085         }
 1086         return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
 1087 }
 1088 
 1089 /*
 1090  * Fserr prints the name of a filesystem with an error diagnostic.
 1091  *
 1092  * The form of the error message is:
 1093  *      fs: error message
 1094  */
 1095 static void
 1096 ext2_fserr(struct m_ext2fs *fs, uid_t uid, char *cp)
 1097 {
 1098 
 1099         log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
 1100 }
 1101 
 1102 int
 1103 cg_has_sb(int i)
 1104 {
 1105         int a3, a5, a7;
 1106 
 1107         if (i == 0 || i == 1)
 1108                 return 1;
 1109         for (a3 = 3, a5 = 5, a7 = 7;
 1110             a3 <= i || a5 <= i || a7 <= i;
 1111             a3 *= 3, a5 *= 5, a7 *= 7)
 1112                 if (i == a3 || i == a5 || i == a7)
 1113                         return 1;
 1114         return 0;
 1115 }

Cache object: 7c8b07975169943c96d0e3677882a69c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.