The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vfs/hammer/hammer_pfs.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
    3  * 
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  * 
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  */
   34 /*
   35  * HAMMER PFS ioctls - Manage pseudo-fs configurations
   36  */
   37 
   38 #include "hammer.h"
   39 
   40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
   41                                 hammer_inode_t ip);
   42 static int hammer_pfs_rollback(hammer_transaction_t trans,
   43                                 hammer_pseudofs_inmem_t pfsm,
   44                                 hammer_tid_t trunc_tid);
   45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
   46                                 hammer_tid_t trunc_tid);
   47 
   48 /*
   49  * Get mirroring/pseudo-fs information
   50  *
   51  * NOTE: The ip used for ioctl is not necessarily related to the PFS
   52  */
   53 int
   54 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
   55                         struct hammer_ioc_pseudofs_rw *pfs)
   56 {
   57         hammer_pseudofs_inmem_t pfsm;
   58         u_int32_t localization;
   59         int error;
   60 
   61         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
   62                 return(error);
   63         localization = (u_int32_t)pfs->pfs_id << 16;
   64         pfs->bytes = sizeof(struct hammer_pseudofs_data);
   65         pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
   66 
   67         pfsm = hammer_load_pseudofs(trans, localization, &error);
   68         if (error) {
   69                 hammer_rel_pseudofs(trans->hmp, pfsm);
   70                 return(error);
   71         }
   72 
   73         /*
   74          * If the PFS is a master the sync tid is set by normal operation
   75          * rather than the mirroring code, and will always track the
   76          * real HAMMER filesystem.
   77          *
   78          * We use flush_tid1, which is the highest fully committed TID.
   79          * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
   80          * caught up to it yet so a crash will roll us back to flush_tid1.
   81          */
   82         if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
   83                 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
   84 
   85         /*
   86          * Copy out to userland.
   87          */
   88         error = 0;
   89         if (pfs->ondisk && error == 0)
   90                 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
   91         hammer_rel_pseudofs(trans->hmp, pfsm);
   92         return(error);
   93 }
   94 
   95 /*
   96  * Set mirroring/pseudo-fs information
   97  *
   98  * NOTE: The ip used for ioctl is not necessarily related to the PFS
   99  */
  100 int
  101 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  102                         struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
  103 {
  104         hammer_pseudofs_inmem_t pfsm;
  105         u_int32_t localization;
  106         int error;
  107 
  108         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  109                 return(error);
  110         localization = (u_int32_t)pfs->pfs_id << 16;
  111         if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
  112                 error = EINVAL;
  113         localization = (u_int32_t)pfs->pfs_id << 16;
  114 
  115         if (error == 0 && pfs->ondisk) {
  116                 /*
  117                  * Load the PFS so we can modify our in-core copy.  Ignore
  118                  * ENOENT errors.
  119                  */
  120                 pfsm = hammer_load_pseudofs(trans, localization, &error);
  121                 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
  122 
  123                 /*
  124                  * Save it back, create a root inode if we are in master
  125                  * mode and no root exists.
  126                  *
  127                  * We do not create root inodes for slaves, the root inode
  128                  * must be mirrored from the master.
  129                  */
  130                 if (error == 0 &&
  131                     (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
  132                         error = hammer_mkroot_pseudofs(trans, cred, pfsm);
  133                 }
  134                 if (error == 0)
  135                         error = hammer_save_pseudofs(trans, pfsm);
  136 
  137                 /*
  138                  * Wakeup anyone waiting for a TID update for this PFS
  139                  */
  140                 wakeup(&pfsm->pfsd.sync_end_tid);
  141                 hammer_rel_pseudofs(trans->hmp, pfsm);
  142         }
  143         return(error);
  144 }
  145 
  146 /*
  147  * Upgrade a slave to a master
  148  *
  149  * This is fairly easy to do, but we must physically undo any partial syncs
  150  * for transaction ids > sync_end_tid.  Effective, we must do a partial
  151  * rollback.
  152  *
  153  * NOTE: The ip used for ioctl is not necessarily related to the PFS
  154  */
  155 int
  156 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  157                         struct hammer_ioc_pseudofs_rw *pfs)
  158 {
  159         hammer_pseudofs_inmem_t pfsm;
  160         u_int32_t localization;
  161         int error;
  162 
  163         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  164                 return(error);
  165         localization = (u_int32_t)pfs->pfs_id << 16;
  166         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
  167                 return(error);
  168 
  169         /*
  170          * A master id must be set when upgrading
  171          */
  172         pfsm = hammer_load_pseudofs(trans, localization, &error);
  173         if (error == 0) {
  174                 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
  175                         error = hammer_pfs_rollback(trans, pfsm,
  176                                             pfsm->pfsd.sync_end_tid + 1);
  177                         if (error == 0) {
  178                                 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
  179                                 error = hammer_save_pseudofs(trans, pfsm);
  180                         }
  181                 }
  182         }
  183         hammer_rel_pseudofs(trans->hmp, pfsm);
  184         if (error == EINTR) {
  185                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
  186                 error = 0;
  187         }
  188         return (error);
  189 }
  190 
  191 /*
  192  * Downgrade a master to a slave
  193  *
  194  * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
  195  *
  196  * We previously did not update sync_end_tid in consideration for a slave
  197  * upgraded to a master and then downgraded again, but this completely breaks
  198  * the case where one starts with a master and then downgrades to a slave,
  199  * then upgrades again.
  200  *
  201  * NOTE: The ip used for ioctl is not necessarily related to the PFS
  202  */
  203 int
  204 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  205                         struct hammer_ioc_pseudofs_rw *pfs)
  206 {
  207         hammer_mount_t hmp = trans->hmp;
  208         hammer_pseudofs_inmem_t pfsm;
  209         u_int32_t localization;
  210         int error;
  211 
  212         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  213                 return(error);
  214         localization = (u_int32_t)pfs->pfs_id << 16;
  215         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
  216                 return(error);
  217 
  218         pfsm = hammer_load_pseudofs(trans, localization, &error);
  219         if (error == 0) {
  220                 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
  221                         pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
  222                         if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
  223                                 pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
  224                         error = hammer_save_pseudofs(trans, pfsm);
  225                 }
  226         }
  227         hammer_rel_pseudofs(trans->hmp, pfsm);
  228         return (error);
  229 }
  230 
  231 /*
  232  * Destroy a PFS
  233  *
  234  * We can destroy a PFS by scanning and deleting all of its records in the
  235  * B-Tree.  The hammer utility will delete the softlink in the primary
  236  * filesystem.
  237  *
  238  * NOTE: The ip used for ioctl is not necessarily related to the PFS
  239  */
  240 int
  241 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  242                         struct hammer_ioc_pseudofs_rw *pfs)
  243 {
  244         hammer_pseudofs_inmem_t pfsm;
  245         u_int32_t localization;
  246         int error;
  247 
  248         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  249                 return(error);
  250         localization = (u_int32_t)pfs->pfs_id << 16;
  251 
  252         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
  253                 return(error);
  254 
  255         pfsm = hammer_load_pseudofs(trans, localization, &error);
  256         if (error == 0) {
  257                 error = hammer_pfs_rollback(trans, pfsm, 0);
  258                 if (error == 0) {
  259                         pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
  260                         error = hammer_save_pseudofs(trans, pfsm);
  261                 }
  262         }
  263         hammer_rel_pseudofs(trans->hmp, pfsm);
  264         if (error == EINTR) {
  265                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
  266                 error = 0;
  267         }
  268         return(error);
  269 }
  270 
  271 /*
  272  * Wait for the PFS to sync past the specified TID
  273  */
  274 int
  275 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  276                          struct hammer_ioc_pseudofs_rw *pfs)
  277 {
  278         hammer_pseudofs_inmem_t pfsm;
  279         struct hammer_pseudofs_data pfsd;
  280         u_int32_t localization;
  281         hammer_tid_t tid;
  282         void *waitp;
  283         int error;
  284 
  285         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  286                 return(error);
  287         localization = (u_int32_t)pfs->pfs_id << 16;
  288 
  289         if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
  290                 return(error);
  291 
  292         pfsm = hammer_load_pseudofs(trans, localization, &error);
  293         if (error == 0) {
  294                 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
  295                         tid = pfsm->pfsd.sync_end_tid;
  296                         waitp = &pfsm->pfsd.sync_end_tid;
  297                 } else {
  298                         tid = trans->hmp->flush_tid1;
  299                         waitp = &trans->hmp->flush_tid1;
  300                 }
  301                 if (tid <= pfsd.sync_end_tid)
  302                         tsleep(waitp, PCATCH, "hmrmwt", 0);
  303         }
  304         hammer_rel_pseudofs(trans->hmp, pfsm);
  305         if (error == EINTR) {
  306                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
  307                 error = 0;
  308         }
  309         return(error);
  310 }
  311 
  312 
  313 /*
  314  * Auto-detect the pseudofs and do basic bounds checking.
  315  */
  316 static
  317 int
  318 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
  319 {
  320         int error = 0;
  321 
  322         if (pfs->pfs_id == -1)
  323                 pfs->pfs_id = (int)(ip->obj_localization >> 16);
  324         if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
  325                 error = EINVAL;
  326         if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
  327                 error = EINVAL;
  328         return(error);
  329 }
  330 
  331 /*
  332  * Rollback the specified PFS to (trunc_tid - 1), removing everything
  333  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
  334  * mode or the MIRROR_FILTERED scan will not work properly.
  335  *
  336  * This is typically used to remove any partial syncs when upgrading a
  337  * slave to a master.  It can theoretically also be used to rollback
  338  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
  339  * PRUNED, and to points that are older only if they are on a retained
  340  * (pruning softlink) boundary.
  341  *
  342  * Rollbacks destroy information.  If you don't mind inode numbers changing
  343  * a better way would be to cpdup a snapshot back onto the master.
  344  */
  345 static
  346 int
  347 hammer_pfs_rollback(hammer_transaction_t trans,
  348                     hammer_pseudofs_inmem_t pfsm,
  349                     hammer_tid_t trunc_tid)
  350 {
  351         struct hammer_cmirror cmirror;
  352         struct hammer_cursor cursor;
  353         struct hammer_base_elm key_cur;
  354         int error;
  355         int seq;
  356 
  357         bzero(&cmirror, sizeof(cmirror));
  358         bzero(&key_cur, sizeof(key_cur));
  359         key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
  360         key_cur.obj_id = HAMMER_MIN_OBJID;
  361         key_cur.key = HAMMER_MIN_KEY;
  362         key_cur.create_tid = 1;
  363         key_cur.rec_type = HAMMER_MIN_RECTYPE;
  364 
  365         seq = trans->hmp->flusher.done;
  366 
  367 retry:
  368         error = hammer_init_cursor(trans, &cursor, NULL, NULL);
  369         if (error) {
  370                 hammer_done_cursor(&cursor);
  371                 goto failed;
  372         }
  373         cursor.key_beg = key_cur;
  374         cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
  375                                       pfsm->localization;
  376         cursor.key_end.obj_id = HAMMER_MAX_OBJID;
  377         cursor.key_end.key = HAMMER_MAX_KEY;
  378         cursor.key_end.create_tid = HAMMER_MAX_TID;
  379         cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
  380 
  381         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
  382         cursor.flags |= HAMMER_CURSOR_BACKEND;
  383 
  384         /*
  385          * Do an optimized scan of only records created or modified
  386          * >= trunc_tid, so we can fix up those records.  We must
  387          * still check the TIDs but this greatly reduces the size of
  388          * the scan.
  389          */
  390         cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
  391         cursor.cmirror = &cmirror;
  392         cmirror.mirror_tid = trunc_tid;
  393 
  394         error = hammer_btree_first(&cursor);
  395         while (error == 0) {
  396                 /*
  397                  * Abort the rollback.
  398                  */
  399                 if (error == 0) {
  400                         error = hammer_signal_check(trans->hmp);
  401                         if (error)
  402                                 break;
  403                 }
  404 
  405                 /*
  406                  * We only care about leafs.  Internal nodes can be returned
  407                  * in mirror-filtered mode (they are used to generate SKIP
  408                  * mrecords), but we don't need them for this code.
  409                  *
  410                  * WARNING: See warnings in hammer_unlock_cursor() function.
  411                  */
  412                 cursor.flags |= HAMMER_CURSOR_ATEDISK;
  413                 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
  414                         key_cur = cursor.node->ondisk->elms[cursor.index].base;
  415                         error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
  416                 }
  417 
  418                 while (hammer_flusher_meta_halflimit(trans->hmp) ||
  419                        hammer_flusher_undo_exhausted(trans, 2)) {
  420                         hammer_unlock_cursor(&cursor);
  421                         hammer_flusher_wait(trans->hmp, seq);
  422                         hammer_lock_cursor(&cursor);
  423                         seq = hammer_flusher_async_one(trans->hmp);
  424                 }
  425 
  426                 if (error == 0)
  427                         error = hammer_btree_iterate(&cursor);
  428         }
  429         if (error == ENOENT)
  430                 error = 0;
  431         hammer_done_cursor(&cursor);
  432         if (error == EDEADLK)
  433                 goto retry;
  434 failed:
  435         return(error);
  436 }
  437 
  438 /*
  439  * Helper function - perform rollback on a B-Tree element given trunc_tid.
  440  *
  441  * If create_tid >= trunc_tid the record is physically destroyed.
  442  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
  443  */
  444 static
  445 int
  446 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
  447 {
  448         hammer_btree_leaf_elm_t elm;
  449         int error;
  450 
  451         elm = &cursor->node->ondisk->elms[cursor->index].leaf;
  452         if (elm->base.create_tid < trunc_tid &&
  453             elm->base.delete_tid < trunc_tid) {
  454                 return(0);
  455         }
  456 
  457         if (elm->base.create_tid >= trunc_tid) {
  458                 error = hammer_delete_at_cursor(
  459                                 cursor, HAMMER_DELETE_DESTROY,
  460                                 cursor->trans->tid, cursor->trans->time32,
  461                                 1, NULL);
  462         } else if (elm->base.delete_tid >= trunc_tid) {
  463                 error = hammer_delete_at_cursor(
  464                                 cursor, HAMMER_DELETE_ADJUST,
  465                                 0, 0,
  466                                 1, NULL);
  467         } else {
  468                 error = 0;
  469         }
  470         return(error);
  471 }
  472 

Cache object: 99b8c32c5a45864ffeb88bc2342c3263


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.