The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vfs/hammer/hammer_prune.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
    3  * 
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  * 
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  * 
   34  * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.19 2008/09/23 21:03:52 dillon Exp $
   35  */
   36 
   37 #include "hammer.h"
   38 
   39 /*
   40  * Iterate through the specified range of object ids and remove any
   41  * deleted records that fall entirely within a prune modulo.
   42  *
   43  * A reverse iteration is used to prevent overlapping records from being
   44  * created during the iteration due to alignments.  This also allows us
   45  * to adjust alignments without blowing up the B-Tree.
   46  */
   47 static int prune_should_delete(struct hammer_ioc_prune *prune,
   48                                hammer_btree_leaf_elm_t elm);
   49 static void prune_check_nlinks(hammer_cursor_t cursor,
   50                                hammer_btree_leaf_elm_t elm);
   51 
   52 int
   53 hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
   54                  struct hammer_ioc_prune *prune)
   55 {
   56         struct hammer_cursor cursor;
   57         hammer_btree_leaf_elm_t elm;
   58         struct hammer_ioc_prune_elm *copy_elms;
   59         struct hammer_ioc_prune_elm *user_elms;
   60         int error;
   61         int isdir;
   62         int elm_array_size;
   63         int seq;
   64 
   65         if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS)
   66                 return(EINVAL);
   67         if ((prune->key_beg.localization | prune->key_end.localization) &
   68             HAMMER_LOCALIZE_PSEUDOFS_MASK) {
   69                 return(EINVAL);
   70         }
   71         if (prune->key_beg.localization > prune->key_end.localization)
   72                 return(EINVAL);
   73         if (prune->key_beg.localization == prune->key_end.localization) {
   74                 if (prune->key_beg.obj_id > prune->key_end.obj_id)
   75                         return(EINVAL);
   76                 /* key-space limitations - no check needed */
   77         }
   78         if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
   79                 return(EINVAL);
   80 
   81         prune->key_cur.localization = (prune->key_end.localization &
   82                                         HAMMER_LOCALIZE_MASK) +
   83                                       ip->obj_localization;
   84         prune->key_cur.obj_id = prune->key_end.obj_id;
   85         prune->key_cur.key = HAMMER_MAX_KEY;
   86 
   87         /*
   88          * Copy element array from userland
   89          */
   90         elm_array_size = sizeof(*copy_elms) * prune->nelms;
   91         user_elms = prune->elms;
   92         copy_elms = kmalloc(elm_array_size, M_TEMP, M_WAITOK);
   93         if ((error = copyin(user_elms, copy_elms, elm_array_size)) != 0)
   94                 goto failed;
   95         prune->elms = copy_elms;
   96 
   97         seq = trans->hmp->flusher.done;
   98 
   99         /*
  100          * Scan backwards.  Retries typically occur if a deadlock is detected.
  101          */
  102 retry:
  103         error = hammer_init_cursor(trans, &cursor, NULL, NULL);
  104         if (error) {
  105                 hammer_done_cursor(&cursor);
  106                 goto failed;
  107         }
  108         cursor.key_beg.localization = (prune->key_beg.localization &
  109                                         HAMMER_LOCALIZE_MASK) +
  110                                       ip->obj_localization;
  111         cursor.key_beg.obj_id = prune->key_beg.obj_id;
  112         cursor.key_beg.key = HAMMER_MIN_KEY;
  113         cursor.key_beg.create_tid = 1;
  114         cursor.key_beg.delete_tid = 0;
  115         cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
  116         cursor.key_beg.obj_type = 0;
  117 
  118         cursor.key_end.localization = prune->key_cur.localization;
  119         cursor.key_end.obj_id = prune->key_cur.obj_id;
  120         cursor.key_end.key = prune->key_cur.key;
  121         cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
  122         cursor.key_end.delete_tid = 0;
  123         cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
  124         cursor.key_end.obj_type = 0;
  125 
  126         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
  127         cursor.flags |= HAMMER_CURSOR_BACKEND;
  128 
  129         /*
  130          * This flag allows the B-Tree code to clean up loose ends.  At
  131          * the moment (XXX) it also means we have to hold the sync lock
  132          * through the iteration.
  133          */
  134         cursor.flags |= HAMMER_CURSOR_PRUNING;
  135 
  136         hammer_sync_lock_sh(trans);
  137         error = hammer_btree_last(&cursor);
  138         hammer_sync_unlock(trans);
  139 
  140         while (error == 0) {
  141                 /*
  142                  * Check for work
  143                  */
  144                 elm = &cursor.node->ondisk->elms[cursor.index].leaf;
  145                 prune->key_cur = elm->base;
  146 
  147                 /*
  148                  * Yield to more important tasks
  149                  */
  150                 if ((error = hammer_signal_check(trans->hmp)) != 0)
  151                         break;
  152 
  153                 if (prune->stat_oldest_tid > elm->base.create_tid)
  154                         prune->stat_oldest_tid = elm->base.create_tid;
  155 
  156                 if (hammer_debug_general & 0x0200) {
  157                         kprintf("check %016llx %016llx cre=%016llx del=%016llx\n",
  158                                         (long long)elm->base.obj_id,
  159                                         (long long)elm->base.key,
  160                                         (long long)elm->base.create_tid,
  161                                         (long long)elm->base.delete_tid);
  162                 }
  163                                 
  164                 if (prune_should_delete(prune, elm)) {
  165                         if (hammer_debug_general & 0x0200) {
  166                                 kprintf("check %016llx %016llx: DELETE\n",
  167                                         (long long)elm->base.obj_id,
  168                                         (long long)elm->base.key);
  169                         }
  170 
  171                         /*
  172                          * NOTE: This can return EDEADLK
  173                          *
  174                          * Acquiring the sync lock guarantees that the
  175                          * operation will not cross a synchronization
  176                          * boundary (see the flusher).
  177                          *
  178                          * We dont need to track inodes or next_tid when
  179                          * we are destroying deleted records.
  180                          */
  181                         isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);
  182 
  183                         hammer_sync_lock_sh(trans);
  184                         error = hammer_delete_at_cursor(&cursor,
  185                                                         HAMMER_DELETE_DESTROY,
  186                                                         cursor.trans->tid,
  187                                                         cursor.trans->time32,
  188                                                         0, &prune->stat_bytes);
  189                         hammer_sync_unlock(trans);
  190                         if (error)
  191                                 break;
  192 
  193                         if (isdir)
  194                                 ++prune->stat_dirrecords;
  195                         else
  196                                 ++prune->stat_rawrecords;
  197 
  198                         /*
  199                          * The current record might now be the one after
  200                          * the one we deleted, set ATEDISK to force us
  201                          * to skip it (since we are iterating backwards).
  202                          */
  203                         cursor.flags |= HAMMER_CURSOR_ATEDISK;
  204                 } else {
  205                         /*
  206                          * Nothing to delete, but we may have to check other
  207                          * things.
  208                          */
  209                         prune_check_nlinks(&cursor, elm);
  210                         cursor.flags |= HAMMER_CURSOR_ATEDISK;
  211                         if (hammer_debug_general & 0x0100) {
  212                                 kprintf("check %016llx %016llx: SKIP\n",
  213                                         (long long)elm->base.obj_id,
  214                                         (long long)elm->base.key);
  215                         }
  216                 }
  217                 ++prune->stat_scanrecords;
  218 
  219                 /*
  220                  * WARNING: See warnings in hammer_unlock_cursor() function.
  221                  */
  222                 while (hammer_flusher_meta_halflimit(trans->hmp) ||
  223                        hammer_flusher_undo_exhausted(trans, 2)) {
  224                         hammer_unlock_cursor(&cursor);
  225                         hammer_flusher_wait(trans->hmp, seq);
  226                         hammer_lock_cursor(&cursor);
  227                         seq = hammer_flusher_async_one(trans->hmp);
  228                 }
  229                 hammer_sync_lock_sh(trans);
  230                 error = hammer_btree_iterate_reverse(&cursor);
  231                 hammer_sync_unlock(trans);
  232         }
  233         if (error == ENOENT)
  234                 error = 0;
  235         hammer_done_cursor(&cursor);
  236         if (error == EDEADLK)
  237                 goto retry;
  238         if (error == EINTR) {
  239                 prune->head.flags |= HAMMER_IOC_HEAD_INTR;
  240                 error = 0;
  241         }
  242 failed:
  243         prune->key_cur.localization &= HAMMER_LOCALIZE_MASK;
  244         prune->elms = user_elms;
  245         kfree(copy_elms, M_TEMP);
  246         return(error);
  247 }
  248 
  249 /*
  250  * Check pruning list.  The list must be sorted in descending order.
  251  *
  252  * Return non-zero if the record should be deleted.
  253  */
  254 static int
  255 prune_should_delete(struct hammer_ioc_prune *prune, hammer_btree_leaf_elm_t elm)
  256 {
  257         struct hammer_ioc_prune_elm *scan;
  258         int i;
  259 
  260         /*
  261          * If pruning everything remove all records with a non-zero
  262          * delete_tid.
  263          */
  264         if (prune->head.flags & HAMMER_IOC_PRUNE_ALL) {
  265                 if (elm->base.delete_tid != 0)
  266                         return(1);
  267                 return(0);
  268         }
  269 
  270         for (i = 0; i < prune->nelms; ++i) {
  271                 scan = &prune->elms[i];
  272 
  273                 /*
  274                  * Check for loop termination.
  275                  */
  276                 if (elm->base.create_tid >= scan->end_tid ||
  277                     elm->base.delete_tid > scan->end_tid) {
  278                         break;
  279                 }
  280 
  281                 /*
  282                  * Determine if we can delete the record.
  283                  */
  284                 if (elm->base.delete_tid &&
  285                     elm->base.create_tid >= scan->beg_tid &&
  286                     elm->base.delete_tid <= scan->end_tid &&
  287                     (elm->base.create_tid - scan->beg_tid) / scan->mod_tid ==
  288                     (elm->base.delete_tid - scan->beg_tid) / scan->mod_tid) {
  289                         return(1);
  290                 }
  291         }
  292         return(0);
  293 }
  294 
  295 /*
  296  * Dangling inodes can occur if processes are holding open descriptors on
  297  * deleted files as-of when a machine crashes.  When we find one simply
  298  * acquire the inode and release it.  The inode handling code will then
  299  * do the right thing.
  300  */
  301 static
  302 void
  303 prune_check_nlinks(hammer_cursor_t cursor, hammer_btree_leaf_elm_t elm)
  304 {
  305         hammer_inode_t ip;
  306         int error;
  307 
  308         if (elm->base.rec_type != HAMMER_RECTYPE_INODE)
  309                 return;
  310         if (elm->base.delete_tid != 0)
  311                 return;
  312         if (hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA))
  313                 return;
  314         if (cursor->data->inode.nlinks)
  315                 return;
  316         hammer_cursor_downgrade(cursor);
  317         ip = hammer_get_inode(cursor->trans, NULL, elm->base.obj_id,
  318                       HAMMER_MAX_TID,
  319                       elm->base.localization & HAMMER_LOCALIZE_PSEUDOFS_MASK,
  320                       0, &error);
  321         if (ip) {
  322                 if (hammer_debug_general & 0x0001) {
  323                         kprintf("pruning disconnected inode %016llx\n",
  324                                 (long long)elm->base.obj_id);
  325                 }
  326                 hammer_rel_inode(ip, 0);
  327                 hammer_inode_waitreclaims(cursor->trans);
  328         } else {
  329                 kprintf("unable to prune disconnected inode %016llx\n",
  330                         (long long)elm->base.obj_id);
  331         }
  332 }
  333 
  334 #if 0
  335 
  336 /*
  337  * NOTE: THIS CODE HAS BEEN REMOVED!  Pruning no longer attempts to realign
  338  *       adjacent records because it seriously interferes with every 
  339  *       mirroring algorithm I could come up with.
  340  *
  341  *       This means that historical accesses beyond the first snapshot
  342  *       softlink should be on snapshot boundaries only.  Historical
  343  *       accesses from "now" to the first snapshot softlink continue to
  344  *       be fine-grained.
  345  *
  346  * NOTE: It also looks like there's a bug in the removed code.  It is believed
  347  *       that create_tid can sometimes get set to 0xffffffffffffffff.  Just as
  348  *       well we no longer try to do this fancy shit.  Probably the attempt to
  349  *       correct the rhb is blowing up the cursor's indexing or addressing mapping.
  350  *
  351  * Align the record to cover any gaps created through the deletion of
  352  * records within the pruning space.  If we were to just delete the records
  353  * there would be gaps which in turn would cause a snapshot that is NOT on
  354  * a pruning boundary to appear corrupt to the user.  Forcing alignment
  355  * of the create_tid and delete_tid for retained records 'reconnects'
  356  * the previously contiguous space, making it contiguous again after the
  357  * deletions.
  358  *
  359  * The use of a reverse iteration allows us to safely align the records and
  360  * related elements without creating temporary overlaps.  XXX we should
  361  * add ordering dependancies for record buffers to guarantee consistency
  362  * during recovery.
  363  */
  364 static int
  365 realign_prune(struct hammer_ioc_prune *prune,
  366               hammer_cursor_t cursor, int realign_cre, int realign_del)
  367 {
  368         struct hammer_ioc_prune_elm *scan;
  369         hammer_btree_elm_t elm;
  370         hammer_tid_t delta;
  371         hammer_tid_t tid;
  372         int error;
  373 
  374         hammer_cursor_downgrade(cursor);
  375 
  376         elm = &cursor->node->ondisk->elms[cursor->index];
  377         ++prune->stat_realignments;
  378 
  379         /*
  380          * Align the create_tid.  By doing a reverse iteration we guarantee
  381          * that all records after our current record have already been
  382          * aligned, allowing us to safely correct the right-hand-boundary
  383          * (because no record to our right is otherwise exactly matching
  384          * will have a create_tid to the left of our aligned create_tid).
  385          */
  386         error = 0;
  387         if (realign_cre >= 0) {
  388                 scan = &prune->elms[realign_cre];
  389 
  390                 delta = (elm->leaf.base.create_tid - scan->beg_tid) % 
  391                         scan->mod_tid;
  392                 if (delta) {
  393                         tid = elm->leaf.base.create_tid - delta + scan->mod_tid;
  394 
  395                         /* can EDEADLK */
  396                         error = hammer_btree_correct_rhb(cursor, tid + 1);
  397                         if (error == 0) {
  398                                 error = hammer_btree_extract(cursor,
  399                                                      HAMMER_CURSOR_GET_LEAF);
  400                         }
  401                         if (error == 0) {
  402                                 /* can EDEADLK */
  403                                 error = hammer_cursor_upgrade(cursor);
  404                         }
  405                         if (error == 0) {
  406                                 hammer_modify_node(cursor->trans, cursor->node,
  407                                             &elm->leaf.base.create_tid,
  408                                             sizeof(elm->leaf.base.create_tid));
  409                                 elm->leaf.base.create_tid = tid;
  410                                 hammer_modify_node_done(cursor->node);
  411                         }
  412                 }
  413         }
  414 
  415         /*
  416          * Align the delete_tid.  This only occurs if the record is historical
  417          * was deleted at some point.  Realigning the delete_tid does not
  418          * move the record within the B-Tree but may cause it to temporarily
  419          * overlap a record that has not yet been pruned.
  420          */
  421         if (error == 0 && realign_del >= 0) {
  422                 scan = &prune->elms[realign_del];
  423 
  424                 delta = (elm->leaf.base.delete_tid - scan->beg_tid) % 
  425                         scan->mod_tid;
  426                 if (delta) {
  427                         error = hammer_btree_extract(cursor,
  428                                                      HAMMER_CURSOR_GET_LEAF);
  429                         if (error == 0) {
  430                                 hammer_modify_node(cursor->trans, cursor->node,
  431                                             &elm->leaf.base.delete_tid,
  432                                             sizeof(elm->leaf.base.delete_tid));
  433                                 elm->leaf.base.delete_tid =
  434                                             elm->leaf.base.delete_tid -
  435                                             delta + scan->mod_tid;
  436                                 hammer_modify_node_done(cursor->node);
  437                         }
  438                 }
  439         }
  440         return (error);
  441 }
  442 
  443 #endif

Cache object: 9f2c02715065d002b4f0069a6c7f265a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.