The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_sync.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*       $OpenBSD: vfs_sync.c,v 1.68 2022/08/14 01:58:28 jsg Exp $  */
    2 
    3 /*
    4  *  Portions of this code are:
    5  *
    6  * Copyright (c) 1989, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  * (c) UNIX System Laboratories, Inc.
    9  * All or some portions of this file are derived from material licensed
   10  * to the University of California by American Telephone and Telegraph
   11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   12  * the permission of UNIX System Laboratories, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  */
   38 
   39 /*
   40  * Syncer daemon
   41  */
   42 
   43 #include <sys/queue.h>
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/proc.h>
   47 #include <sys/mount.h>
   48 #include <sys/vnode.h>
   49 #include <sys/lock.h>
   50 #include <sys/malloc.h>
   51 #include <sys/time.h>
   52 
   53 #ifdef FFS_SOFTUPDATES
   54 int   softdep_process_worklist(struct mount *);
   55 #endif
   56 
   57 /*
   58  * The workitem queue.
   59  */
   60 #define SYNCER_MAXDELAY 32              /* maximum sync delay time */
   61 #define SYNCER_DEFAULT 30               /* default sync delay time */
   62 int syncer_maxdelay = SYNCER_MAXDELAY;  /* maximum delay time */
   63 int syncdelay = SYNCER_DEFAULT;         /* time to delay syncing vnodes */
   64 
   65 int rushjob = 0;                        /* number of slots to run ASAP */
   66 int stat_rush_requests = 0;             /* number of rush requests */
   67 
   68 int syncer_delayno = 0;
   69 long syncer_mask;
   70 LIST_HEAD(synclist, vnode);
   71 static struct synclist *syncer_workitem_pending;
   72 
   73 struct proc *syncerproc;
   74 int syncer_chan;
   75 
   76 /*
   77  * The workitem queue.
   78  *
   79  * It is useful to delay writes of file data and filesystem metadata
   80  * for tens of seconds so that quickly created and deleted files need
   81  * not waste disk bandwidth being created and removed. To realize this,
   82  * we append vnodes to a "workitem" queue. When running with a soft
   83  * updates implementation, most pending metadata dependencies should
   84  * not wait for more than a few seconds. Thus, mounted block devices
   85  * are delayed only about half the time that file data is delayed.
   86  * Similarly, directory updates are more critical, so are only delayed
   87  * about a third the time that file data is delayed. Thus, there are
   88  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
   89  * one each second (driven off the filesystem syncer process). The
   90  * syncer_delayno variable indicates the next queue that is to be processed.
   91  * Items that need to be processed soon are placed in this queue:
   92  *
   93  *      syncer_workitem_pending[syncer_delayno]
   94  *
   95  * A delay of fifteen seconds is done by placing the request fifteen
   96  * entries later in the queue:
   97  *
   98  *      syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
   99  *
  100  */
  101 
  102 void
  103 vn_initialize_syncerd(void)
  104 {
  105         syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
  106             &syncer_mask);
  107         syncer_maxdelay = syncer_mask + 1;
  108 }
  109 
  110 /*
  111  * Add an item to the syncer work queue.
  112  */
  113 void
  114 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
  115 {
  116         int s, slot;
  117 
  118         if (delay > syncer_maxdelay - 2)
  119                 delay = syncer_maxdelay - 2;
  120         slot = (syncer_delayno + delay) & syncer_mask;
  121 
  122         s = splbio();
  123         if (vp->v_bioflag & VBIOONSYNCLIST)
  124                 LIST_REMOVE(vp, v_synclist);
  125 
  126         vp->v_bioflag |= VBIOONSYNCLIST;
  127         LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
  128         splx(s);
  129 }
  130 
  131 /*
  132  * System filesystem synchronizer daemon.
  133  */
  134 void
  135 syncer_thread(void *arg)
  136 {
  137         uint64_t elapsed, start;
  138         struct proc *p = curproc;
  139         struct synclist *slp;
  140         struct vnode *vp;
  141         int s;
  142 
  143         for (;;) {
  144                 start = getnsecuptime();
  145 
  146                 /*
  147                  * Push files whose dirty time has expired.
  148                  */
  149                 s = splbio();
  150                 slp = &syncer_workitem_pending[syncer_delayno];
  151 
  152                 syncer_delayno += 1;
  153                 if (syncer_delayno == syncer_maxdelay)
  154                         syncer_delayno = 0;
  155 
  156                 while ((vp = LIST_FIRST(slp)) != NULL) {
  157                         if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT)) {
  158                                 /*
  159                                  * If we fail to get the lock, we move this
  160                                  * vnode one second ahead in time.
  161                                  * XXX - no good, but the best we can do.
  162                                  */
  163                                 vn_syncer_add_to_worklist(vp, 1);
  164                                 continue;
  165                         }
  166                         splx(s);
  167                         (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
  168                         vput(vp);
  169                         s = splbio();
  170                         if (LIST_FIRST(slp) == vp) {
  171                                 /*
  172                                  * Note: disk vps can remain on the
  173                                  * worklist too with no dirty blocks, but
  174                                  * since sync_fsync() moves it to a different
  175                                  * slot we are safe.
  176                                  */
  177 #ifdef DIAGNOSTIC
  178                                 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
  179                                     vp->v_type != VBLK) {
  180                                         vprint("fsync failed", vp);
  181                                         if (vp->v_mount != NULL)
  182                                                 printf("mounted on: %s\n",
  183                                                     vp->v_mount->mnt_stat.f_mntonname);
  184                                         panic("%s: fsync failed", __func__);
  185                                 }
  186 #endif /* DIAGNOSTIC */
  187                                 /*
  188                                  * Put us back on the worklist.  The worklist
  189                                  * routine will remove us from our current
  190                                  * position and then add us back in at a later
  191                                  * position.
  192                                  */
  193                                 vn_syncer_add_to_worklist(vp, syncdelay);
  194                         }
  195 
  196                         sched_pause(yield);
  197                 }
  198 
  199                 splx(s);
  200 
  201 #ifdef FFS_SOFTUPDATES
  202                 /*
  203                  * Do soft update processing.
  204                  */
  205                 softdep_process_worklist(NULL);
  206 #endif
  207 
  208                 /*
  209                  * The variable rushjob allows the kernel to speed up the
  210                  * processing of the filesystem syncer process. A rushjob
  211                  * value of N tells the filesystem syncer to process the next
  212                  * N seconds worth of work on its queue ASAP. Currently rushjob
  213                  * is used by the soft update code to speed up the filesystem
  214                  * syncer process when the incore state is getting so far
  215                  * ahead of the disk that the kernel memory pool is being
  216                  * threatened with exhaustion.
  217                  */
  218                 if (rushjob > 0) {
  219                         rushjob -= 1;
  220                         continue;
  221                 }
  222 
  223                 /*
  224                  * If it has taken us less than a second to process the
  225                  * current work, then wait. Otherwise start right over
  226                  * again. We can still lose time if any single round
  227                  * takes more than two seconds, but it does not really
  228                  * matter as we are just trying to generally pace the
  229                  * filesystem activity.
  230                  */
  231                 elapsed = getnsecuptime() - start;
  232                 if (elapsed < SEC_TO_NSEC(1)) {
  233                         tsleep_nsec(&syncer_chan, PPAUSE, "syncer",
  234                             SEC_TO_NSEC(1) - elapsed);
  235                 }
  236         }
  237 }
  238 
  239 /*
  240  * Request the syncer daemon to speed up its work.
  241  * We never push it to speed up more than half of its
  242  * normal turn time, otherwise it could take over the cpu.
  243  */
  244 int
  245 speedup_syncer(void)
  246 {
  247         if (syncerproc)
  248                 wakeup_proc(syncerproc, &syncer_chan);
  249         if (rushjob < syncdelay / 2) {
  250                 rushjob += 1;
  251                 stat_rush_requests += 1;
  252                 return 1;
  253         }
  254         return 0;
  255 }
  256 
  257 /* Routine to create and manage a filesystem syncer vnode. */
  258 int   sync_fsync(void *);
  259 int   sync_inactive(void *);
  260 int   sync_print(void *);
  261 
  262 const struct vops sync_vops = {
  263         .vop_close      = nullop,
  264         .vop_fsync      = sync_fsync,
  265         .vop_inactive   = sync_inactive,
  266         .vop_reclaim    = nullop,
  267         .vop_lock       = nullop,
  268         .vop_unlock     = nullop,
  269         .vop_islocked   = nullop,
  270         .vop_print      = sync_print
  271 };
  272 
  273 /*
  274  * Create a new filesystem syncer vnode for the specified mount point.
  275  */
  276 int
  277 vfs_allocate_syncvnode(struct mount *mp)
  278 {
  279         struct vnode *vp;
  280         static long start, incr, next;
  281         int error;
  282 
  283         /* Allocate a new vnode */
  284         if ((error = getnewvnode(VT_VFS, mp, &sync_vops, &vp)) != 0) {
  285                 mp->mnt_syncer = NULL;
  286                 return (error);
  287         }
  288         vp->v_writecount = 1;
  289         vp->v_type = VNON;
  290         /*
  291          * Place the vnode onto the syncer worklist. We attempt to
  292          * scatter them about on the list so that they will go off
  293          * at evenly distributed times even if all the filesystems
  294          * are mounted at once.
  295          */
  296         next += incr;
  297         if (next == 0 || next > syncer_maxdelay) {
  298                 start /= 2;
  299                 incr /= 2;
  300                 if (start == 0) {
  301                         start = syncer_maxdelay / 2;
  302                         incr = syncer_maxdelay;
  303                 }
  304                 next = start;
  305         }
  306         vn_syncer_add_to_worklist(vp, next);
  307         mp->mnt_syncer = vp;
  308         return (0);
  309 }
  310 
  311 /*
  312  * Do a lazy sync of the filesystem.
  313  */
  314 int
  315 sync_fsync(void *v)
  316 {
  317         struct vop_fsync_args *ap = v;
  318         struct vnode *syncvp = ap->a_vp;
  319         struct mount *mp = syncvp->v_mount;
  320         int asyncflag;
  321 
  322         /*
  323          * We only need to do something if this is a lazy evaluation.
  324          */
  325         if (ap->a_waitfor != MNT_LAZY)
  326                 return (0);
  327 
  328         /*
  329          * Move ourselves to the back of the sync list.
  330          */
  331         vn_syncer_add_to_worklist(syncvp, syncdelay);
  332 
  333         /*
  334          * Walk the list of vnodes pushing all that are dirty and
  335          * not already on the sync list.
  336          */
  337         if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
  338                 asyncflag = mp->mnt_flag & MNT_ASYNC;
  339                 mp->mnt_flag &= ~MNT_ASYNC;
  340                 VFS_SYNC(mp, MNT_LAZY, 0, ap->a_cred, ap->a_p);
  341                 if (asyncflag)
  342                         mp->mnt_flag |= MNT_ASYNC;
  343                 vfs_unbusy(mp);
  344         }
  345 
  346         return (0);
  347 }
  348 
  349 /*
  350  * The syncer vnode is no longer needed and is being decommissioned.
  351  */
  352 int
  353 sync_inactive(void *v)
  354 {
  355         struct vop_inactive_args *ap = v;
  356 
  357         struct vnode *vp = ap->a_vp;
  358         int s;
  359 
  360         if (vp->v_usecount == 0) {
  361                 VOP_UNLOCK(vp);
  362                 return (0);
  363         }
  364 
  365         vp->v_mount->mnt_syncer = NULL;
  366 
  367         s = splbio();
  368 
  369         LIST_REMOVE(vp, v_synclist);
  370         vp->v_bioflag &= ~VBIOONSYNCLIST;
  371 
  372         splx(s);
  373 
  374         vp->v_writecount = 0;
  375         vput(vp);
  376 
  377         return (0);
  378 }
  379 
  380 /*
  381  * Print out a syncer vnode.
  382  */
  383 int
  384 sync_print(void *v)
  385 {
  386         printf("syncer vnode\n");
  387 
  388         return (0);
  389 }

Cache object: 956a7f32c8b4fead9a96878a0ddc9317


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.