The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_jops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2004-2006 The DragonFly Project.  All rights reserved.
    3  * 
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  * 
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  * $DragonFly: src/sys/kern/vfs_jops.c,v 1.36 2007/08/21 17:43:52 dillon Exp $
   35  */
   36 /*
   37  * Each mount point may have zero or more independantly configured journals
   38  * attached to it.  Each journal is represented by a memory FIFO and worker
   39  * thread.  Journal events are streamed through the FIFO to the thread,
   40  * batched up (typically on one-second intervals), and written out by the
   41  * thread. 
   42  *
   43  * Journal vnode ops are executed instead of mnt_vn_norm_ops when one or
   44  * more journals have been installed on a mount point.  It becomes the
   45  * responsibility of the journal op to call the underlying normal op as
   46  * appropriate.
   47  */
   48 #include <sys/param.h>
   49 #include <sys/systm.h>
   50 #include <sys/buf.h>
   51 #include <sys/conf.h>
   52 #include <sys/kernel.h>
   53 #include <sys/queue.h>
   54 #include <sys/lock.h>
   55 #include <sys/malloc.h>
   56 #include <sys/mount.h>
   57 #include <sys/unistd.h>
   58 #include <sys/vnode.h>
   59 #include <sys/poll.h>
   60 #include <sys/mountctl.h>
   61 #include <sys/journal.h>
   62 #include <sys/file.h>
   63 #include <sys/proc.h>
   64 #include <sys/socket.h>
   65 #include <sys/socketvar.h>
   66 
   67 #include <machine/limits.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/vm_object.h>
   71 #include <vm/vm_page.h>
   72 #include <vm/vm_pager.h>
   73 #include <vm/vnode_pager.h>
   74 
   75 #include <sys/file2.h>
   76 #include <sys/thread2.h>
   77 
   78 static int journal_attach(struct mount *mp);
   79 static void journal_detach(struct mount *mp);
   80 static int journal_install_vfs_journal(struct mount *mp, struct file *fp,
   81                             const struct mountctl_install_journal *info);
   82 static int journal_restart_vfs_journal(struct mount *mp, struct file *fp,
   83                             const struct mountctl_restart_journal *info);
   84 static int journal_remove_vfs_journal(struct mount *mp,
   85                             const struct mountctl_remove_journal *info);
   86 static int journal_restart(struct mount *mp, struct file *fp,
   87                             struct journal *jo, int flags);
   88 static int journal_destroy(struct mount *mp, struct journal *jo, int flags);
   89 static int journal_resync_vfs_journal(struct mount *mp, const void *ctl);
   90 static int journal_status_vfs_journal(struct mount *mp,
   91                        const struct mountctl_status_journal *info,
   92                        struct mountctl_journal_ret_status *rstat,
   93                        int buflen, int *res);
   94 
   95 static void jrecord_undo_file(struct jrecord *jrec, struct vnode *vp,
   96                              int jrflags, off_t off, off_t bytes);
   97 
   98 static int journal_setattr(struct vop_setattr_args *ap);
   99 static int journal_write(struct vop_write_args *ap);
  100 static int journal_fsync(struct vop_fsync_args *ap);
  101 static int journal_putpages(struct vop_putpages_args *ap);
  102 static int journal_setacl(struct vop_setacl_args *ap);
  103 static int journal_setextattr(struct vop_setextattr_args *ap);
  104 static int journal_ncreate(struct vop_ncreate_args *ap);
  105 static int journal_nmknod(struct vop_nmknod_args *ap);
  106 static int journal_nlink(struct vop_nlink_args *ap);
  107 static int journal_nsymlink(struct vop_nsymlink_args *ap);
  108 static int journal_nwhiteout(struct vop_nwhiteout_args *ap);
  109 static int journal_nremove(struct vop_nremove_args *ap);
  110 static int journal_nmkdir(struct vop_nmkdir_args *ap);
  111 static int journal_nrmdir(struct vop_nrmdir_args *ap);
  112 static int journal_nrename(struct vop_nrename_args *ap);
  113 
  114 #define JRUNDO_SIZE     0x00000001
  115 #define JRUNDO_UID      0x00000002
  116 #define JRUNDO_GID      0x00000004
  117 #define JRUNDO_FSID     0x00000008
  118 #define JRUNDO_MODES    0x00000010
  119 #define JRUNDO_INUM     0x00000020
  120 #define JRUNDO_ATIME    0x00000040
  121 #define JRUNDO_MTIME    0x00000080
  122 #define JRUNDO_CTIME    0x00000100
  123 #define JRUNDO_GEN      0x00000200
  124 #define JRUNDO_FLAGS    0x00000400
  125 #define JRUNDO_UDEV     0x00000800
  126 #define JRUNDO_NLINK    0x00001000
  127 #define JRUNDO_FILEDATA 0x00010000
  128 #define JRUNDO_GETVP    0x00020000
  129 #define JRUNDO_CONDLINK 0x00040000      /* write file data if link count 1 */
  130 #define JRUNDO_VATTR    (JRUNDO_SIZE|JRUNDO_UID|JRUNDO_GID|JRUNDO_FSID|\
  131                          JRUNDO_MODES|JRUNDO_INUM|JRUNDO_ATIME|JRUNDO_MTIME|\
  132                          JRUNDO_CTIME|JRUNDO_GEN|JRUNDO_FLAGS|JRUNDO_UDEV|\
  133                          JRUNDO_NLINK)
  134 #define JRUNDO_ALL      (JRUNDO_VATTR|JRUNDO_FILEDATA)
  135 
  136 static struct vop_ops journal_vnode_vops = {
  137     .vop_default =      vop_journal_operate_ap,
  138     .vop_mountctl =     journal_mountctl,
  139     .vop_setattr =      journal_setattr,
  140     .vop_write =        journal_write,
  141     .vop_fsync =        journal_fsync,
  142     .vop_putpages =     journal_putpages,
  143     .vop_setacl =       journal_setacl,
  144     .vop_setextattr =   journal_setextattr,
  145     .vop_ncreate =      journal_ncreate,
  146     .vop_nmknod =       journal_nmknod,
  147     .vop_nlink =        journal_nlink,
  148     .vop_nsymlink =     journal_nsymlink,
  149     .vop_nwhiteout =    journal_nwhiteout,
  150     .vop_nremove =      journal_nremove,
  151     .vop_nmkdir =       journal_nmkdir,
  152     .vop_nrmdir =       journal_nrmdir,
  153     .vop_nrename =      journal_nrename
  154 };
  155 
  156 int
  157 journal_mountctl(struct vop_mountctl_args *ap)
  158 {
  159     struct mount *mp;
  160     int error = 0;
  161 
  162     mp = ap->a_head.a_ops->head.vv_mount;
  163     KKASSERT(mp);
  164 
  165     if (mp->mnt_vn_journal_ops == NULL) {
  166         switch(ap->a_op) {
  167         case MOUNTCTL_INSTALL_VFS_JOURNAL:
  168             error = journal_attach(mp);
  169             if (error == 0 && ap->a_ctllen != sizeof(struct mountctl_install_journal))
  170                 error = EINVAL;
  171             if (error == 0 && ap->a_fp == NULL)
  172                 error = EBADF;
  173             if (error == 0)
  174                 error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
  175             if (TAILQ_EMPTY(&mp->mnt_jlist))
  176                 journal_detach(mp);
  177             break;
  178         case MOUNTCTL_RESTART_VFS_JOURNAL:
  179         case MOUNTCTL_REMOVE_VFS_JOURNAL:
  180         case MOUNTCTL_RESYNC_VFS_JOURNAL:
  181         case MOUNTCTL_STATUS_VFS_JOURNAL:
  182             error = ENOENT;
  183             break;
  184         default:
  185             error = EOPNOTSUPP;
  186             break;
  187         }
  188     } else {
  189         switch(ap->a_op) {
  190         case MOUNTCTL_INSTALL_VFS_JOURNAL:
  191             if (ap->a_ctllen != sizeof(struct mountctl_install_journal))
  192                 error = EINVAL;
  193             if (error == 0 && ap->a_fp == NULL)
  194                 error = EBADF;
  195             if (error == 0)
  196                 error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
  197             break;
  198         case MOUNTCTL_RESTART_VFS_JOURNAL:
  199             if (ap->a_ctllen != sizeof(struct mountctl_restart_journal))
  200                 error = EINVAL;
  201             if (error == 0 && ap->a_fp == NULL)
  202                 error = EBADF;
  203             if (error == 0)
  204                 error = journal_restart_vfs_journal(mp, ap->a_fp, ap->a_ctl);
  205             break;
  206         case MOUNTCTL_REMOVE_VFS_JOURNAL:
  207             if (ap->a_ctllen != sizeof(struct mountctl_remove_journal))
  208                 error = EINVAL;
  209             if (error == 0)
  210                 error = journal_remove_vfs_journal(mp, ap->a_ctl);
  211             if (TAILQ_EMPTY(&mp->mnt_jlist))
  212                 journal_detach(mp);
  213             break;
  214         case MOUNTCTL_RESYNC_VFS_JOURNAL:
  215             if (ap->a_ctllen != 0)
  216                 error = EINVAL;
  217             error = journal_resync_vfs_journal(mp, ap->a_ctl);
  218             break;
  219         case MOUNTCTL_STATUS_VFS_JOURNAL:
  220             if (ap->a_ctllen != sizeof(struct mountctl_status_journal))
  221                 error = EINVAL;
  222             if (error == 0) {
  223                 error = journal_status_vfs_journal(mp, ap->a_ctl, 
  224                                         ap->a_buf, ap->a_buflen, ap->a_res);
  225             }
  226             break;
  227         default:
  228             error = EOPNOTSUPP;
  229             break;
  230         }
  231     }
  232     return (error);
  233 }
  234 
  235 /*
  236  * High level mount point setup.  When a 
  237  */
  238 static int
  239 journal_attach(struct mount *mp)
  240 {
  241     KKASSERT(mp->mnt_jbitmap == NULL);
  242     vfs_add_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
  243     mp->mnt_jbitmap = kmalloc(JREC_STREAMID_JMAX/8, M_JOURNAL, M_WAITOK|M_ZERO);
  244     mp->mnt_streamid = JREC_STREAMID_JMIN;
  245     return(0);
  246 }
  247 
  248 static void
  249 journal_detach(struct mount *mp)
  250 {
  251     KKASSERT(mp->mnt_jbitmap != NULL);
  252     if (mp->mnt_vn_journal_ops)
  253         vfs_rm_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
  254     kfree(mp->mnt_jbitmap, M_JOURNAL);
  255     mp->mnt_jbitmap = NULL;
  256 }
  257 
  258 /*
  259  * Install a journal on a mount point.  Each journal has an associated worker
  260  * thread which is responsible for buffering and spooling the data to the
  261  * target.  A mount point may have multiple journals attached to it.  An
  262  * initial start record is generated when the journal is associated.
  263  */
  264 static int
  265 journal_install_vfs_journal(struct mount *mp, struct file *fp, 
  266                             const struct mountctl_install_journal *info)
  267 {
  268     struct journal *jo;
  269     struct jrecord jrec;
  270     int error = 0;
  271     int size;
  272 
  273     jo = kmalloc(sizeof(struct journal), M_JOURNAL, M_WAITOK|M_ZERO);
  274     bcopy(info->id, jo->id, sizeof(jo->id));
  275     jo->flags = info->flags & ~(MC_JOURNAL_WACTIVE | MC_JOURNAL_RACTIVE |
  276                                 MC_JOURNAL_STOP_REQ);
  277 
  278     /*
  279      * Memory FIFO size, round to nearest power of 2
  280      */
  281     if (info->membufsize) {
  282         if (info->membufsize < 65536)
  283             size = 65536;
  284         else if (info->membufsize > 128 * 1024 * 1024)
  285             size = 128 * 1024 * 1024;
  286         else
  287             size = (int)info->membufsize;
  288     } else {
  289         size = 1024 * 1024;
  290     }
  291     jo->fifo.size = 1;
  292     while (jo->fifo.size < size)
  293         jo->fifo.size <<= 1;
  294 
  295     /*
  296      * Other parameters.  If not specified the starting transaction id
  297      * will be the current date.
  298      */
  299     if (info->transid) {
  300         jo->transid = info->transid;
  301     } else {
  302         struct timespec ts;
  303         getnanotime(&ts);
  304         jo->transid = ((int64_t)ts.tv_sec << 30) | ts.tv_nsec;
  305     }
  306 
  307     jo->fp = fp;
  308 
  309     /*
  310      * Allocate the memory FIFO
  311      */
  312     jo->fifo.mask = jo->fifo.size - 1;
  313     jo->fifo.membase = kmalloc(jo->fifo.size, M_JFIFO, M_WAITOK|M_ZERO|M_NULLOK);
  314     if (jo->fifo.membase == NULL)
  315         error = ENOMEM;
  316 
  317     /*
  318      * Create the worker threads and generate the association record.
  319      */
  320     if (error) {
  321         kfree(jo, M_JOURNAL);
  322     } else {
  323         fhold(fp);
  324         journal_create_threads(jo);
  325         jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
  326         jrecord_write(&jrec, JTYPE_ASSOCIATE, 0);
  327         jrecord_done(&jrec, 0);
  328         TAILQ_INSERT_TAIL(&mp->mnt_jlist, jo, jentry);
  329     }
  330     return(error);
  331 }
  332 
  333 /*
  334  * Restart a journal with a new descriptor.   The existing reader and writer
  335  * threads are terminated and a new descriptor is associated with the
  336  * journal.  The FIFO rindex is reset to xindex and the threads are then
  337  * restarted.
  338  */
  339 static int
  340 journal_restart_vfs_journal(struct mount *mp, struct file *fp,
  341                            const struct mountctl_restart_journal *info)
  342 {
  343     struct journal *jo;
  344     int error;
  345 
  346     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
  347         if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
  348             break;
  349     }
  350     if (jo)
  351         error = journal_restart(mp, fp, jo, info->flags);
  352     else
  353         error = EINVAL;
  354     return (error);
  355 }
  356 
  357 static int
  358 journal_restart(struct mount *mp, struct file *fp, 
  359                 struct journal *jo, int flags)
  360 {
  361     /*
  362      * XXX lock the jo
  363      */
  364 
  365 #if 0
  366     /*
  367      * Record the fact that we are doing a restart in the journal.
  368      * XXX it isn't safe to do this if the journal is being restarted
  369      * because it was locked up and the writer thread has already exited.
  370      */
  371     jrecord_init(jo, &jrec, JREC_STREAMID_RESTART);
  372     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
  373     jrecord_done(&jrec, 0);
  374 #endif
  375 
  376     /*
  377      * Stop the reader and writer threads and clean up the current 
  378      * descriptor.
  379      */
  380     kprintf("RESTART WITH FP %p KILLING %p\n", fp, jo->fp);
  381     journal_destroy_threads(jo, flags);
  382 
  383     if (jo->fp)
  384         fdrop(jo->fp);
  385 
  386     /*
  387      * Associate the new descriptor, reset the FIFO index, and recreate
  388      * the threads.
  389      */
  390     fhold(fp);
  391     jo->fp = fp;
  392     jo->fifo.rindex = jo->fifo.xindex;
  393     journal_create_threads(jo);
  394 
  395     return(0);
  396 }
  397 
  398 /*
  399  * Disassociate a journal from a mount point and terminate its worker thread.
  400  * A final termination record is written out before the file pointer is
  401  * dropped.
  402  */
  403 static int
  404 journal_remove_vfs_journal(struct mount *mp, 
  405                            const struct mountctl_remove_journal *info)
  406 {
  407     struct journal *jo;
  408     int error;
  409 
  410     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
  411         if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
  412             break;
  413     }
  414     if (jo)
  415         error = journal_destroy(mp, jo, info->flags);
  416     else
  417         error = EINVAL;
  418     return (error);
  419 }
  420 
  421 /*
  422  * Remove all journals associated with a mount point.  Usually called
  423  * by the umount code.
  424  */
  425 void
  426 journal_remove_all_journals(struct mount *mp, int flags)
  427 {
  428     struct journal *jo;
  429 
  430     while ((jo = TAILQ_FIRST(&mp->mnt_jlist)) != NULL) {
  431         journal_destroy(mp, jo, flags);
  432     }
  433 }
  434 
  435 static int
  436 journal_destroy(struct mount *mp, struct journal *jo, int flags)
  437 {
  438     struct jrecord jrec;
  439 
  440     TAILQ_REMOVE(&mp->mnt_jlist, jo, jentry);
  441 
  442     jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
  443     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
  444     jrecord_done(&jrec, 0);
  445 
  446     journal_destroy_threads(jo, flags);
  447 
  448     if (jo->fp)
  449         fdrop(jo->fp);
  450     if (jo->fifo.membase)
  451         kfree(jo->fifo.membase, M_JFIFO);
  452     kfree(jo, M_JOURNAL);
  453 
  454     return(0);
  455 }
  456 
  457 static int
  458 journal_resync_vfs_journal(struct mount *mp, const void *ctl)
  459 {
  460     return(EINVAL);
  461 }
  462 
  463 static int
  464 journal_status_vfs_journal(struct mount *mp, 
  465                        const struct mountctl_status_journal *info,
  466                        struct mountctl_journal_ret_status *rstat,
  467                        int buflen, int *res)
  468 {
  469     struct journal *jo;
  470     int error = 0;
  471     int index;
  472 
  473     index = 0;
  474     *res = 0;
  475     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
  476         if (info->index == MC_JOURNAL_INDEX_ID) {
  477             if (bcmp(jo->id, info->id, sizeof(jo->id)) != 0)
  478                 continue;
  479         } else if (info->index >= 0) {
  480             if (info->index < index)
  481                 continue;
  482         } else if (info->index != MC_JOURNAL_INDEX_ALL) {
  483             continue;
  484         }
  485         if (buflen < sizeof(*rstat)) {
  486             if (*res)
  487                 rstat[-1].flags |= MC_JOURNAL_STATUS_MORETOCOME;
  488             else
  489                 error = EINVAL;
  490             break;
  491         }
  492         bzero(rstat, sizeof(*rstat));
  493         rstat->recsize = sizeof(*rstat);
  494         bcopy(jo->id, rstat->id, sizeof(jo->id));
  495         rstat->index = index;
  496         rstat->membufsize = jo->fifo.size;
  497         rstat->membufused = jo->fifo.windex - jo->fifo.xindex;
  498         rstat->membufunacked = jo->fifo.rindex - jo->fifo.xindex;
  499         rstat->bytessent = jo->total_acked;
  500         rstat->fifostalls = jo->fifostalls;
  501         ++rstat;
  502         ++index;
  503         *res += sizeof(*rstat);
  504         buflen -= sizeof(*rstat);
  505     }
  506     return(error);
  507 }
  508 
  509 /************************************************************************
  510  *                      PARALLEL TRANSACTION SUPPORT ROUTINES           *
  511  ************************************************************************
  512  *
  513  * JRECLIST_*() - routines which create and iterate over jrecord structures,
  514  *                because a mount point may have multiple attached journals.
  515  */
  516 
  517 /*
  518  * Initialize the passed jrecord_list and create a jrecord for each 
  519  * journal we need to write to.  Unnecessary mallocs are avoided by
  520  * using the passed jrecord structure as the first jrecord in the list.
  521  * A starting transaction is pushed for each jrecord.
  522  *
  523  * Returns non-zero if any of the journals require undo records.
  524  */
  525 static
  526 int
  527 jreclist_init(struct mount *mp, struct jrecord_list *jreclist, 
  528               struct jrecord *jreccache, int16_t rectype)
  529 {
  530     struct journal *jo;
  531     struct jrecord *jrec;
  532     int wantrev;
  533     int count;
  534     int16_t streamid;
  535 
  536     TAILQ_INIT(&jreclist->list);
  537 
  538     /*
  539      * Select the stream ID to use for the transaction.  We must select
  540      * a stream ID that is not currently in use by some other parallel
  541      * transaction.  
  542      *
  543      * Don't bother calculating the next streamid when reassigning
  544      * mnt_streamid, since parallel transactions are fairly rare.  This
  545      * also allows someone observing the raw records to clearly see
  546      * when parallel transactions occur.
  547      */
  548     streamid = mp->mnt_streamid;
  549     count = 0;
  550     while (mp->mnt_jbitmap[streamid >> 3] & (1 << (streamid & 7))) {
  551         if (++streamid == JREC_STREAMID_JMAX)
  552                 streamid = JREC_STREAMID_JMIN;
  553         if (++count == JREC_STREAMID_JMAX - JREC_STREAMID_JMIN) {
  554                 kprintf("jreclist_init: all streamid's in use! sleeping\n");
  555                 tsleep(jreclist, 0, "jsidfl", hz * 10);
  556                 count = 0;
  557         }
  558     }
  559     mp->mnt_jbitmap[streamid >> 3] |= 1 << (streamid & 7);
  560     mp->mnt_streamid = streamid;
  561     jreclist->streamid = streamid;
  562 
  563     /*
  564      * Now initialize a stream on each journal.
  565      */
  566     count = 0;
  567     wantrev = 0;
  568     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
  569         if (count == 0)
  570             jrec = jreccache;
  571         else
  572             jrec = kmalloc(sizeof(*jrec), M_JOURNAL, M_WAITOK);
  573         jrecord_init(jo, jrec, streamid);
  574         jrec->user_save = jrecord_push(jrec, rectype);
  575         TAILQ_INSERT_TAIL(&jreclist->list, jrec, user_entry);
  576         if (jo->flags & MC_JOURNAL_WANT_REVERSABLE)
  577             wantrev = 1;
  578         ++count;
  579     }
  580     return(wantrev);
  581 }
  582 
  583 /*
  584  * Terminate the journaled transactions started by jreclist_init().  If
  585  * an error occured, the transaction records will be aborted.
  586  */
  587 static
  588 void
  589 jreclist_done(struct mount *mp, struct jrecord_list *jreclist, int error)
  590 {
  591     struct jrecord *jrec;
  592     int count;
  593 
  594     /*
  595      * Cleanup the jrecord state on each journal.
  596      */
  597     TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
  598         jrecord_pop(jrec, jrec->user_save);
  599         jrecord_done(jrec, error);
  600     }
  601 
  602     /*
  603      * Free allocated jrec's (the first is always supplied)
  604      */
  605     count = 0;
  606     while ((jrec = TAILQ_FIRST(&jreclist->list)) != NULL) {
  607         TAILQ_REMOVE(&jreclist->list, jrec, user_entry);
  608         if (count)
  609             kfree(jrec, M_JOURNAL);
  610         ++count;
  611     }
  612 
  613     /*
  614      * Clear the streamid so it can be reused.
  615      */
  616     mp->mnt_jbitmap[jreclist->streamid >> 3] &= ~(1 << (jreclist->streamid & 7));
  617 }
  618 
  619 /*
  620  * This procedure writes out UNDO records for available reversable
  621  * journals.
  622  *
  623  * XXX could use improvement.  There is no need to re-read the file
  624  * for each journal.
  625  */
  626 static
  627 void
  628 jreclist_undo_file(struct jrecord_list *jreclist, struct vnode *vp, 
  629                    int jrflags, off_t off, off_t bytes)
  630 {
  631     struct jrecord *jrec;
  632     int error;
  633 
  634     error = 0;
  635     if (jrflags & JRUNDO_GETVP)
  636         error = vget(vp, LK_SHARED);
  637     if (error == 0) {
  638         TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
  639             if (jrec->jo->flags & MC_JOURNAL_WANT_REVERSABLE) {
  640                 jrecord_undo_file(jrec, vp, jrflags, off, bytes);
  641             }
  642         }
  643     }
  644     if (error == 0 && jrflags & JRUNDO_GETVP)
  645         vput(vp);
  646 }
  647 
  648 /************************************************************************
  649  *                      LOW LEVEL UNDO SUPPORT ROUTINE                  *
  650  ************************************************************************
  651  *
  652  * This function is used to support UNDO records.  It will generate an
  653  * appropriate record with the requested portion of the file data.  Note
  654  * that file data is only recorded if JRUNDO_FILEDATA is passed.  If bytes
  655  * is -1, it will be set to the size of the file.
  656  */
  657 static void
  658 jrecord_undo_file(struct jrecord *jrec, struct vnode *vp, int jrflags, 
  659                   off_t off, off_t bytes)
  660 {
  661     struct vattr attr;
  662     void *save1; /* warning, save pointers do not always remain valid */
  663     void *save2;
  664     int error;
  665 
  666     /*
  667      * Setup.  Start the UNDO record, obtain a shared lock on the vnode,
  668      * and retrieve attribute info.
  669      */
  670     save1 = jrecord_push(jrec, JTYPE_UNDO);
  671     error = VOP_GETATTR(vp, &attr);
  672     if (error)
  673         goto done;
  674 
  675     /*
  676      * Generate UNDO records as requested.
  677      */
  678     if (jrflags & JRUNDO_VATTR) {
  679         save2 = jrecord_push(jrec, JTYPE_VATTR);
  680         jrecord_leaf(jrec, JLEAF_VTYPE, &attr.va_type, sizeof(attr.va_type));
  681         if ((jrflags & JRUNDO_NLINK) && attr.va_nlink != VNOVAL)
  682             jrecord_leaf(jrec, JLEAF_NLINK, &attr.va_nlink, sizeof(attr.va_nlink));
  683         if ((jrflags & JRUNDO_SIZE) && attr.va_size != VNOVAL)
  684             jrecord_leaf(jrec, JLEAF_SIZE, &attr.va_size, sizeof(attr.va_size));
  685         if ((jrflags & JRUNDO_UID) && attr.va_uid != VNOVAL)
  686             jrecord_leaf(jrec, JLEAF_UID, &attr.va_uid, sizeof(attr.va_uid));
  687         if ((jrflags & JRUNDO_GID) && attr.va_gid != VNOVAL)
  688             jrecord_leaf(jrec, JLEAF_GID, &attr.va_gid, sizeof(attr.va_gid));
  689         if ((jrflags & JRUNDO_FSID) && attr.va_fsid != VNOVAL)
  690             jrecord_leaf(jrec, JLEAF_FSID, &attr.va_fsid, sizeof(attr.va_fsid));
  691         if ((jrflags & JRUNDO_MODES) && attr.va_mode != (mode_t)VNOVAL)
  692             jrecord_leaf(jrec, JLEAF_MODES, &attr.va_mode, sizeof(attr.va_mode));
  693         if ((jrflags & JRUNDO_INUM) && attr.va_fileid != VNOVAL)
  694             jrecord_leaf(jrec, JLEAF_INUM, &attr.va_fileid, sizeof(attr.va_fileid));
  695         if ((jrflags & JRUNDO_ATIME) && attr.va_atime.tv_sec != VNOVAL)
  696             jrecord_leaf(jrec, JLEAF_ATIME, &attr.va_atime, sizeof(attr.va_atime));
  697         if ((jrflags & JRUNDO_MTIME) && attr.va_mtime.tv_sec != VNOVAL)
  698             jrecord_leaf(jrec, JLEAF_MTIME, &attr.va_mtime, sizeof(attr.va_mtime));
  699         if ((jrflags & JRUNDO_CTIME) && attr.va_ctime.tv_sec != VNOVAL)
  700             jrecord_leaf(jrec, JLEAF_CTIME, &attr.va_ctime, sizeof(attr.va_ctime));
  701         if ((jrflags & JRUNDO_GEN) && attr.va_gen != VNOVAL)
  702             jrecord_leaf(jrec, JLEAF_GEN, &attr.va_gen, sizeof(attr.va_gen));
  703         if ((jrflags & JRUNDO_FLAGS) && attr.va_flags != VNOVAL)
  704             jrecord_leaf(jrec, JLEAF_FLAGS, &attr.va_flags, sizeof(attr.va_flags));
  705         if ((jrflags & JRUNDO_UDEV) && attr.va_rmajor != VNOVAL) {
  706             udev_t rdev = makeudev(attr.va_rmajor, attr.va_rminor);
  707             jrecord_leaf(jrec, JLEAF_UDEV, &rdev, sizeof(rdev));
  708             jrecord_leaf(jrec, JLEAF_UMAJOR, &attr.va_rmajor, sizeof(attr.va_rmajor));
  709             jrecord_leaf(jrec, JLEAF_UMINOR, &attr.va_rminor, sizeof(attr.va_rminor));
  710         }
  711         jrecord_pop(jrec, save2);
  712     }
  713 
  714     /*
  715      * Output the file data being overwritten by reading the file and
  716      * writing it out to the journal prior to the write operation.  We
  717      * do not need to write out data past the current file EOF.
  718      *
  719      * XXX support JRUNDO_CONDLINK - do not write out file data for files
  720      * with a link count > 1.  The undo code needs to locate the inode and
  721      * regenerate the hardlink.
  722      */
  723     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VREG) {
  724         if (attr.va_size != VNOVAL) {
  725             if (bytes == -1)
  726                 bytes = attr.va_size - off;
  727             if (off + bytes > attr.va_size)
  728                 bytes = attr.va_size - off;
  729             if (bytes > 0)
  730                 jrecord_file_data(jrec, vp, off, bytes);
  731         } else {
  732             error = EINVAL;
  733         }
  734     }
  735     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VLNK) {
  736         struct iovec aiov;
  737         struct uio auio;
  738         char *buf;
  739 
  740         buf = kmalloc(PATH_MAX, M_JOURNAL, M_WAITOK);
  741         aiov.iov_base = buf;
  742         aiov.iov_len = PATH_MAX;
  743         auio.uio_iov = &aiov;
  744         auio.uio_iovcnt = 1;
  745         auio.uio_offset = 0;
  746         auio.uio_rw = UIO_READ;
  747         auio.uio_segflg = UIO_SYSSPACE;
  748         auio.uio_td = curthread;
  749         auio.uio_resid = PATH_MAX;
  750         error = VOP_READLINK(vp, &auio, proc0.p_ucred);
  751         if (error == 0) {
  752                 jrecord_leaf(jrec, JLEAF_SYMLINKDATA, buf, 
  753                                 PATH_MAX - auio.uio_resid);
  754         }
  755         kfree(buf, M_JOURNAL);
  756     }
  757 done:
  758     if (error)
  759         jrecord_leaf(jrec, JLEAF_ERROR, &error, sizeof(error));
  760     jrecord_pop(jrec, save1);
  761 }
  762 
  763 /************************************************************************
  764  *                      JOURNAL VNOPS                                   *
  765  ************************************************************************
  766  *
  767  * These are function shims replacing the normal filesystem ops.  We become
  768  * responsible for calling the underlying filesystem ops.  We have the choice
  769  * of executing the underlying op first and then generating the journal entry,
  770  * or starting the journal entry, executing the underlying op, and then
  771  * either completing or aborting it.  
  772  *
  773  * The journal is supposed to be a high-level entity, which generally means
  774  * identifying files by name rather then by inode.  Supplying both allows
  775  * the journal to be used both for inode-number-compatible 'mirrors' and
  776  * for simple filesystem replication.
  777  *
  778  * Writes are particularly difficult to deal with because a single write may
  779  * represent a hundred megabyte buffer or more, and both writes and truncations
  780  * require the 'old' data to be written out as well as the new data if the
  781  * log is reversable.  Other issues:
  782  *
  783  * - How to deal with operations on unlinked files (no path available),
  784  *   but which may still be filesystem visible due to hard links.
  785  *
  786  * - How to deal with modifications made via a memory map.
  787  *
  788  * - Future cache coherency support will require cache coherency API calls
  789  *   both prior to and after the call to the underlying VFS.
  790  *
  791  * ALSO NOTE: We do not have to shim compatibility VOPs like MKDIR which have
  792  * new VFS equivalents (NMKDIR).
  793  */
  794 
  795 /*
  796  * Journal vop_setattr { a_vp, a_vap, a_cred }
  797  */
  798 static
  799 int
  800 journal_setattr(struct vop_setattr_args *ap)
  801 {
  802     struct jrecord_list jreclist;
  803     struct jrecord jreccache;
  804     struct jrecord *jrec;
  805     struct mount *mp;
  806     void *save;
  807     int error;
  808 
  809     mp = ap->a_head.a_ops->head.vv_mount;
  810     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETATTR)) {
  811         jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_VATTR, 0, 0);
  812     }
  813     error = vop_journal_operate_ap(&ap->a_head);
  814     if (error == 0) {
  815         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
  816             jrecord_write_cred(jrec, curthread, ap->a_cred);
  817             jrecord_write_vnode_ref(jrec, ap->a_vp);
  818             save = jrecord_push(jrec, JTYPE_REDO);
  819             jrecord_write_vattr(jrec, ap->a_vap);
  820             jrecord_pop(jrec, save);
  821         }
  822     }
  823     jreclist_done(mp, &jreclist, error);
  824     return (error);
  825 }
  826 
  827 /*
  828  * Journal vop_write { a_vp, a_uio, a_ioflag, a_cred }
  829  */
  830 static
  831 int
  832 journal_write(struct vop_write_args *ap)
  833 {
  834     struct jrecord_list jreclist;
  835     struct jrecord jreccache;
  836     struct jrecord *jrec;
  837     struct mount *mp;
  838     struct uio uio_copy;
  839     struct iovec uio_one_iovec;
  840     void *save;
  841     int error;
  842 
  843     /*
  844      * Special synchronizing writes for VM backing store do not supply any
  845      * real data
  846      */
  847     if (ap->a_uio->uio_segflg == UIO_NOCOPY) {
  848             error = vop_journal_operate_ap(&ap->a_head);
  849             return (error);
  850     }
  851 
  852     /*
  853      * This is really nasty.  UIO's don't retain sufficient information to
  854      * be reusable once they've gone through the VOP chain.  The iovecs get
  855      * cleared, so we have to copy the UIO.
  856      *
  857      * XXX fix the UIO code to not destroy iov's during a scan so we can
  858      *     reuse the uio over and over again.
  859      *
  860      * XXX UNDO code needs to journal the old data prior to the write.
  861      */
  862     uio_copy = *ap->a_uio;
  863     if (uio_copy.uio_iovcnt == 1) {
  864         uio_one_iovec = ap->a_uio->uio_iov[0];
  865         uio_copy.uio_iov = &uio_one_iovec;
  866     } else {
  867         uio_copy.uio_iov = kmalloc(uio_copy.uio_iovcnt * sizeof(struct iovec),
  868                                     M_JOURNAL, M_WAITOK);
  869         bcopy(ap->a_uio->uio_iov, uio_copy.uio_iov, 
  870                 uio_copy.uio_iovcnt * sizeof(struct iovec));
  871     }
  872 
  873     /*
  874      * Write out undo data.  Note that uio_offset is incorrect if
  875      * IO_APPEND is set, but fortunately we have no undo file data to
  876      * write out in that case.
  877      */
  878     mp = ap->a_head.a_ops->head.vv_mount;
  879     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_WRITE)) {
  880         if (ap->a_ioflag & IO_APPEND) {
  881             jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_SIZE|JRUNDO_MTIME, 0, 0);
  882         } else {
  883             jreclist_undo_file(&jreclist, ap->a_vp, 
  884                                JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME, 
  885                                uio_copy.uio_offset, uio_copy.uio_resid);
  886         }
  887     }
  888     error = vop_journal_operate_ap(&ap->a_head);
  889 
  890     /*
  891      * XXX bad hack to figure out the offset for O_APPEND writes (note: 
  892      * uio field state after the VFS operation).
  893      */
  894     uio_copy.uio_offset = ap->a_uio->uio_offset - 
  895                           (uio_copy.uio_resid - ap->a_uio->uio_resid);
  896 
  897     /*
  898      * Output the write data to the journal.
  899      */
  900     if (error == 0) {
  901         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
  902             jrecord_write_cred(jrec, NULL, ap->a_cred);
  903             jrecord_write_vnode_ref(jrec, ap->a_vp);
  904             save = jrecord_push(jrec, JTYPE_REDO);
  905             jrecord_write_uio(jrec, JLEAF_FILEDATA, &uio_copy);
  906             jrecord_pop(jrec, save);
  907         }
  908     }
  909     jreclist_done(mp, &jreclist, error);
  910 
  911     if (uio_copy.uio_iov != &uio_one_iovec)
  912         kfree(uio_copy.uio_iov, M_JOURNAL);
  913     return (error);
  914 }
  915 
  916 /*
  917  * Journal vop_fsync { a_vp, a_waitfor }
  918  */
  919 static
  920 int
  921 journal_fsync(struct vop_fsync_args *ap)
  922 {
  923 #if 0
  924     struct mount *mp;
  925     struct journal *jo;
  926 #endif
  927     int error;
  928 
  929     error = vop_journal_operate_ap(&ap->a_head);
  930 #if 0
  931     mp = ap->a_head.a_ops->head.vv_mount;
  932     if (error == 0) {
  933         TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
  934             /* XXX synchronize pending journal records */
  935         }
  936     }
  937 #endif
  938     return (error);
  939 }
  940 
  941 /*
  942  * Journal vop_putpages { a_vp, a_m, a_count, a_sync, a_rtvals, a_offset }
  943  *
  944  * note: a_count is in bytes.
  945  */
  946 static
  947 int
  948 journal_putpages(struct vop_putpages_args *ap)
  949 {
  950     struct jrecord_list jreclist;
  951     struct jrecord jreccache;
  952     struct jrecord *jrec;
  953     struct mount *mp;
  954     void *save;
  955     int error;
  956 
  957     mp = ap->a_head.a_ops->head.vv_mount;
  958     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_PUTPAGES) && 
  959         ap->a_count > 0
  960     ) {
  961         jreclist_undo_file(&jreclist, ap->a_vp, 
  962                            JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME, 
  963                            ap->a_offset, btoc(ap->a_count));
  964     }
  965     error = vop_journal_operate_ap(&ap->a_head);
  966     if (error == 0 && ap->a_count > 0) {
  967         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
  968             jrecord_write_vnode_ref(jrec, ap->a_vp);
  969             save = jrecord_push(jrec, JTYPE_REDO);
  970             jrecord_write_pagelist(jrec, JLEAF_FILEDATA, ap->a_m, ap->a_rtvals, 
  971                                    btoc(ap->a_count), ap->a_offset);
  972             jrecord_pop(jrec, save);
  973         }
  974     }
  975     jreclist_done(mp, &jreclist, error);
  976     return (error);
  977 }
  978 
  979 /*
  980  * Journal vop_setacl { a_vp, a_type, a_aclp, a_cred }
  981  */
  982 static
  983 int
  984 journal_setacl(struct vop_setacl_args *ap)
  985 {
  986     struct jrecord_list jreclist;
  987     struct jrecord jreccache;
  988     struct jrecord *jrec;
  989     struct mount *mp;
  990     int error;
  991 
  992     mp = ap->a_head.a_ops->head.vv_mount;
  993     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETACL);
  994     error = vop_journal_operate_ap(&ap->a_head);
  995     if (error == 0) {
  996         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
  997 #if 0
  998             if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
  999                 jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
 1000 #endif
 1001             jrecord_write_cred(jrec, curthread, ap->a_cred);
 1002             jrecord_write_vnode_ref(jrec, ap->a_vp);
 1003 #if 0
 1004             save = jrecord_push(jrec, JTYPE_REDO);
 1005             /* XXX type, aclp */
 1006             jrecord_pop(jrec, save);
 1007 #endif
 1008         }
 1009     }
 1010     jreclist_done(mp, &jreclist, error);
 1011     return (error);
 1012 }
 1013 
 1014 /*
 1015  * Journal vop_setextattr { a_vp, a_name, a_uio, a_cred }
 1016  */
 1017 static
 1018 int
 1019 journal_setextattr(struct vop_setextattr_args *ap)
 1020 {
 1021     struct jrecord_list jreclist;
 1022     struct jrecord jreccache;
 1023     struct jrecord *jrec;
 1024     struct mount *mp;
 1025     void *save;
 1026     int error;
 1027 
 1028     mp = ap->a_head.a_ops->head.vv_mount;
 1029     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETEXTATTR);
 1030     error = vop_journal_operate_ap(&ap->a_head);
 1031     if (error == 0) {
 1032         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1033 #if 0
 1034             if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
 1035                 jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
 1036 #endif
 1037             jrecord_write_cred(jrec, curthread, ap->a_cred);
 1038             jrecord_write_vnode_ref(jrec, ap->a_vp);
 1039             jrecord_leaf(jrec, JLEAF_ATTRNAME, ap->a_attrname,
 1040                         strlen(ap->a_attrname));
 1041             save = jrecord_push(jrec, JTYPE_REDO);
 1042             jrecord_write_uio(jrec, JLEAF_FILEDATA, ap->a_uio);
 1043             jrecord_pop(jrec, save);
 1044         }
 1045     }
 1046     jreclist_done(mp, &jreclist, error);
 1047     return (error);
 1048 }
 1049 
 1050 /*
 1051  * Journal vop_ncreate { a_nch, a_vpp, a_cred, a_vap }
 1052  */
 1053 static
 1054 int
 1055 journal_ncreate(struct vop_ncreate_args *ap)
 1056 {
 1057     struct jrecord_list jreclist;
 1058     struct jrecord jreccache;
 1059     struct jrecord *jrec;
 1060     struct mount *mp;
 1061     void *save;
 1062     int error;
 1063 
 1064     mp = ap->a_head.a_ops->head.vv_mount;
 1065     jreclist_init(mp, &jreclist, &jreccache, JTYPE_CREATE);
 1066     error = vop_journal_operate_ap(&ap->a_head);
 1067     if (error == 0) {
 1068         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1069             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1070             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1071             if (*ap->a_vpp)
 1072                 jrecord_write_vnode_ref(jrec, *ap->a_vpp);
 1073             save = jrecord_push(jrec, JTYPE_REDO);
 1074             jrecord_write_vattr(jrec, ap->a_vap);
 1075             jrecord_pop(jrec, save);
 1076         }
 1077     }
 1078     jreclist_done(mp, &jreclist, error);
 1079     return (error);
 1080 }
 1081 
 1082 /*
 1083  * Journal vop_nmknod { a_nch, a_vpp, a_cred, a_vap }
 1084  */
 1085 static
 1086 int
 1087 journal_nmknod(struct vop_nmknod_args *ap)
 1088 {
 1089     struct jrecord_list jreclist;
 1090     struct jrecord jreccache;
 1091     struct jrecord *jrec;
 1092     struct mount *mp;
 1093     void *save;
 1094     int error;
 1095 
 1096     mp = ap->a_head.a_ops->head.vv_mount;
 1097     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKNOD);
 1098     error = vop_journal_operate_ap(&ap->a_head);
 1099     if (error == 0) {
 1100         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1101             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1102             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1103             save = jrecord_push(jrec, JTYPE_REDO);
 1104             jrecord_write_vattr(jrec, ap->a_vap);
 1105             jrecord_pop(jrec, save);
 1106             if (*ap->a_vpp)
 1107                 jrecord_write_vnode_ref(jrec, *ap->a_vpp);
 1108         }
 1109     }
 1110     jreclist_done(mp, &jreclist, error);
 1111     return (error);
 1112 }
 1113 
 1114 /*
 1115  * Journal vop_nlink { a_nch, a_vp, a_cred }
 1116  */
 1117 static
 1118 int
 1119 journal_nlink(struct vop_nlink_args *ap)
 1120 {
 1121     struct jrecord_list jreclist;
 1122     struct jrecord jreccache;
 1123     struct jrecord *jrec;
 1124     struct mount *mp;
 1125     void *save;
 1126     int error;
 1127 
 1128     mp = ap->a_head.a_ops->head.vv_mount;
 1129     jreclist_init(mp, &jreclist, &jreccache, JTYPE_LINK);
 1130     error = vop_journal_operate_ap(&ap->a_head);
 1131     if (error == 0) {
 1132         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1133             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1134             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1135             /* XXX PATH to VP and inode number */
 1136             /* XXX this call may not record the correct path when
 1137              * multiple paths are available */
 1138             save = jrecord_push(jrec, JTYPE_REDO);
 1139             jrecord_write_vnode_link(jrec, ap->a_vp, ap->a_nch->ncp);
 1140             jrecord_pop(jrec, save);
 1141         }
 1142     }
 1143     jreclist_done(mp, &jreclist, error);
 1144     return (error);
 1145 }
 1146 
 1147 /*
 1148  * Journal vop_symlink { a_nch, a_vpp, a_cred, a_vap, a_target }
 1149  */
 1150 static
 1151 int
 1152 journal_nsymlink(struct vop_nsymlink_args *ap)
 1153 {
 1154     struct jrecord_list jreclist;
 1155     struct jrecord jreccache;
 1156     struct jrecord *jrec;
 1157     struct mount *mp;
 1158     void *save;
 1159     int error;
 1160 
 1161     mp = ap->a_head.a_ops->head.vv_mount;
 1162     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SYMLINK);
 1163     error = vop_journal_operate_ap(&ap->a_head);
 1164     if (error == 0) {
 1165         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1166             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1167             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1168             save = jrecord_push(jrec, JTYPE_REDO);
 1169             jrecord_leaf(jrec, JLEAF_SYMLINKDATA,
 1170                         ap->a_target, strlen(ap->a_target));
 1171             jrecord_pop(jrec, save);
 1172             if (*ap->a_vpp)
 1173                 jrecord_write_vnode_ref(jrec, *ap->a_vpp);
 1174         }
 1175     }
 1176     jreclist_done(mp, &jreclist, error);
 1177     return (error);
 1178 }
 1179 
 1180 /*
 1181  * Journal vop_nwhiteout { a_nch, a_cred, a_flags }
 1182  */
 1183 static
 1184 int
 1185 journal_nwhiteout(struct vop_nwhiteout_args *ap)
 1186 {
 1187     struct jrecord_list jreclist;
 1188     struct jrecord jreccache;
 1189     struct jrecord *jrec;
 1190     struct mount *mp;
 1191     int error;
 1192 
 1193     mp = ap->a_head.a_ops->head.vv_mount;
 1194     jreclist_init(mp, &jreclist, &jreccache, JTYPE_WHITEOUT);
 1195     error = vop_journal_operate_ap(&ap->a_head);
 1196     if (error == 0) {
 1197         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1198             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1199             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1200         }
 1201     }
 1202     jreclist_done(mp, &jreclist, error);
 1203     return (error);
 1204 }
 1205 
 1206 /*
 1207  * Journal vop_nremove { a_nch, a_cred }
 1208  */
 1209 static
 1210 int
 1211 journal_nremove(struct vop_nremove_args *ap)
 1212 {
 1213     struct jrecord_list jreclist;
 1214     struct jrecord jreccache;
 1215     struct jrecord *jrec;
 1216     struct mount *mp;
 1217     int error;
 1218 
 1219     mp = ap->a_head.a_ops->head.vv_mount;
 1220     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_REMOVE) &&
 1221         ap->a_nch->ncp->nc_vp
 1222     ) {
 1223         jreclist_undo_file(&jreclist, ap->a_nch->ncp->nc_vp, 
 1224                            JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
 1225     }
 1226     error = vop_journal_operate_ap(&ap->a_head);
 1227     if (error == 0) {
 1228         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1229             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1230             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1231         }
 1232     }
 1233     jreclist_done(mp, &jreclist, error);
 1234     return (error);
 1235 }
 1236 
 1237 /*
 1238  * Journal vop_nmkdir { a_nch, a_vpp, a_cred, a_vap }
 1239  */
 1240 static
 1241 int
 1242 journal_nmkdir(struct vop_nmkdir_args *ap)
 1243 {
 1244     struct jrecord_list jreclist;
 1245     struct jrecord jreccache;
 1246     struct jrecord *jrec;
 1247     struct mount *mp;
 1248     int error;
 1249 
 1250     mp = ap->a_head.a_ops->head.vv_mount;
 1251     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKDIR);
 1252     error = vop_journal_operate_ap(&ap->a_head);
 1253     if (error == 0) {
 1254         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1255 #if 0
 1256             if (jo->flags & MC_JOURNAL_WANT_AUDIT) {
 1257                 jrecord_write_audit(jrec);
 1258             }
 1259 #endif
 1260             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1261             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1262             jrecord_write_vattr(jrec, ap->a_vap);
 1263             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1264             if (*ap->a_vpp)
 1265                 jrecord_write_vnode_ref(jrec, *ap->a_vpp);
 1266         }
 1267     }
 1268     jreclist_done(mp, &jreclist, error);
 1269     return (error);
 1270 }
 1271 
 1272 /*
 1273  * Journal vop_nrmdir { a_nch, a_cred }
 1274  */
 1275 static
 1276 int
 1277 journal_nrmdir(struct vop_nrmdir_args *ap)
 1278 {
 1279     struct jrecord_list jreclist;
 1280     struct jrecord jreccache;
 1281     struct jrecord *jrec;
 1282     struct mount *mp;
 1283     int error;
 1284 
 1285     mp = ap->a_head.a_ops->head.vv_mount;
 1286     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RMDIR)) {
 1287         jreclist_undo_file(&jreclist, ap->a_nch->ncp->nc_vp,
 1288                            JRUNDO_VATTR|JRUNDO_GETVP, 0, 0);
 1289     }
 1290     error = vop_journal_operate_ap(&ap->a_head);
 1291     if (error == 0) {
 1292         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1293             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1294             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
 1295         }
 1296     }
 1297     jreclist_done(mp, &jreclist, error);
 1298     return (error);
 1299 }
 1300 
 1301 /*
 1302  * Journal vop_nrename { a_fnch, a_tnch, a_cred }
 1303  */
 1304 static
 1305 int
 1306 journal_nrename(struct vop_nrename_args *ap)
 1307 {
 1308     struct jrecord_list jreclist;
 1309     struct jrecord jreccache;
 1310     struct jrecord *jrec;
 1311     struct mount *mp;
 1312     int error;
 1313 
 1314     mp = ap->a_head.a_ops->head.vv_mount;
 1315     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RENAME) &&
 1316         ap->a_tnch->ncp->nc_vp
 1317     ) {
 1318         jreclist_undo_file(&jreclist, ap->a_tnch->ncp->nc_vp, 
 1319                            JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
 1320     }
 1321     error = vop_journal_operate_ap(&ap->a_head);
 1322     if (error == 0) {
 1323         TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
 1324             jrecord_write_cred(jrec, NULL, ap->a_cred);
 1325             jrecord_write_path(jrec, JLEAF_PATH1, ap->a_fnch->ncp);
 1326             jrecord_write_path(jrec, JLEAF_PATH2, ap->a_tnch->ncp);
 1327         }
 1328     }
 1329     jreclist_done(mp, &jreclist, error);
 1330     return (error);
 1331 }
 1332 

Cache object: 2cbdedb4252a6f10a1d19d1192a9e870


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.