The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_dsched.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2009, 2010 The DragonFly Project.  All rights reserved.
    3  *
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Alex Hornung <ahornung@gmail.com>
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  *
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  */
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/kernel.h>
   37 #include <sys/proc.h>
   38 #include <sys/sysctl.h>
   39 #include <sys/buf.h>
   40 #include <sys/conf.h>
   41 #include <sys/diskslice.h>
   42 #include <sys/disk.h>
   43 #include <sys/malloc.h>
   44 #include <machine/md_var.h>
   45 #include <sys/ctype.h>
   46 #include <sys/syslog.h>
   47 #include <sys/device.h>
   48 #include <sys/msgport.h>
   49 #include <sys/msgport2.h>
   50 #include <sys/buf2.h>
   51 #include <sys/dsched.h>
   52 #include <sys/fcntl.h>
   53 #include <machine/varargs.h>
   54 
   55 TAILQ_HEAD(tdio_list_head, dsched_thread_io);
   56 
   57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
   58 
   59 static dsched_prepare_t         noop_prepare;
   60 static dsched_teardown_t        noop_teardown;
   61 static dsched_cancel_t          noop_cancel;
   62 static dsched_queue_t           noop_queue;
   63 
   64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio);
   65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
   66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx);
   67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio);
   68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx);
   69 
   70 static struct dsched_thread_io *dsched_thread_io_alloc(
   71                 struct disk *dp, struct dsched_thread_ctx *tdctx,
   72                 struct dsched_policy *pol, int tdctx_locked);
   73 
   74 static int      dsched_inited = 0;
   75 static int      default_set = 0;
   76 
   77 struct lock     dsched_lock;
   78 static int      dsched_debug_enable = 0;
   79 
   80 struct dsched_stats     dsched_stats;
   81 
   82 struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
   83         DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
   84 struct objcache_malloc_args dsched_thread_io_malloc_args = {
   85         DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
   86 struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
   87         DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
   88 
   89 static struct objcache  *dsched_diskctx_cache;
   90 static struct objcache  *dsched_tdctx_cache;
   91 static struct objcache  *dsched_tdio_cache;
   92 
   93 struct lock     dsched_tdctx_lock;
   94 
   95 static struct dsched_policy_head dsched_policy_list =
   96                 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
   97 
   98 static struct dsched_policy dsched_noop_policy = {
   99         .name = "noop",
  100 
  101         .prepare = noop_prepare,
  102         .teardown = noop_teardown,
  103         .cancel_all = noop_cancel,
  104         .bio_queue = noop_queue
  105 };
  106 
  107 static struct dsched_policy *default_policy = &dsched_noop_policy;
  108 
  109 /*
  110  * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
  111  * using kvprintf
  112  */
  113 int
  114 dsched_debug(int level, char *fmt, ...)
  115 {
  116         __va_list ap;
  117 
  118         __va_start(ap, fmt);
  119         if (level <= dsched_debug_enable)
  120                 kvprintf(fmt, ap);
  121         __va_end(ap);
  122 
  123         return 0;
  124 }
  125 
  126 /*
  127  * Called on disk_create()
  128  * tries to read which policy to use from loader.conf, if there's
  129  * none specified, the default policy is used.
  130  */
  131 void
  132 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
  133 {
  134         char tunable_key[SPECNAMELEN + 48];
  135         char sched_policy[DSCHED_POLICY_NAME_LENGTH];
  136         char *ptr;
  137         struct dsched_policy *policy = NULL;
  138 
  139         /* Also look for serno stuff? */
  140         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  141 
  142         ksnprintf(tunable_key, sizeof(tunable_key),
  143                   "dsched.policy.%s%d", head_name, unit);
  144         if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
  145             sizeof(sched_policy)) != 0) {
  146                 policy = dsched_find_policy(sched_policy);
  147         }
  148 
  149         ksnprintf(tunable_key, sizeof(tunable_key),
  150                   "dsched.policy.%s", head_name);
  151 
  152         for (ptr = tunable_key; *ptr; ptr++) {
  153                 if (*ptr == '/')
  154                         *ptr = '-';
  155         }
  156         if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
  157             sizeof(sched_policy)) != 0)) {
  158                 policy = dsched_find_policy(sched_policy);
  159         }
  160 
  161         ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
  162         if (!policy && !default_set &&
  163             (TUNABLE_STR_FETCH(tunable_key, sched_policy,
  164                                sizeof(sched_policy)) != 0)) {
  165                 policy = dsched_find_policy(sched_policy);
  166         }
  167 
  168         if (!policy) {
  169                 if (!default_set && bootverbose) {
  170                         dsched_debug(0,
  171                                      "No policy for %s%d specified, "
  172                                      "or policy not found\n",
  173                                      head_name, unit);
  174                 }
  175                 dsched_set_policy(dp, default_policy);
  176         } else {
  177                 dsched_set_policy(dp, policy);
  178         }
  179 
  180         if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0)
  181                 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name);
  182         else
  183                 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
  184         for (ptr = tunable_key; *ptr; ptr++) {
  185                 if (*ptr == '/')
  186                         *ptr = '-';
  187         }
  188         dsched_sysctl_add_disk(
  189             (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
  190             tunable_key);
  191 
  192         lockmgr(&dsched_lock, LK_RELEASE);
  193 }
  194 
  195 /*
  196  * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
  197  * there's any policy associated with the serial number of the device.
  198  */
  199 void
  200 dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
  201 {
  202         char tunable_key[SPECNAMELEN + 48];
  203         char sched_policy[DSCHED_POLICY_NAME_LENGTH];
  204         struct dsched_policy *policy = NULL;
  205 
  206         if (info->d_serialno == NULL)
  207                 return;
  208 
  209         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  210 
  211         ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
  212             info->d_serialno);
  213 
  214         if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
  215             sizeof(sched_policy)) != 0)) {
  216                 policy = dsched_find_policy(sched_policy);      
  217         }
  218 
  219         if (policy) {
  220                 dsched_switch(dp, policy);      
  221         }
  222 
  223         dsched_sysctl_add_disk(
  224             (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
  225             info->d_serialno);
  226 
  227         lockmgr(&dsched_lock, LK_RELEASE);
  228 }
  229 
  230 /*
  231  * Called on disk_destroy()
  232  * shuts down the scheduler core and cancels all remaining bios
  233  */
  234 void
  235 dsched_disk_destroy_callback(struct disk *dp)
  236 {
  237         struct dsched_policy *old_policy;
  238         struct dsched_disk_ctx *diskctx;
  239 
  240         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  241 
  242         diskctx = dsched_get_disk_priv(dp);
  243 
  244         old_policy = dp->d_sched_policy;
  245         dp->d_sched_policy = &dsched_noop_policy;
  246         old_policy->cancel_all(dsched_get_disk_priv(dp));
  247         old_policy->teardown(dsched_get_disk_priv(dp));
  248 
  249         if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
  250                 sysctl_ctx_free(&diskctx->sysctl_ctx);
  251 
  252         policy_destroy(dp);
  253         atomic_subtract_int(&old_policy->ref_count, 1);
  254         KKASSERT(old_policy->ref_count >= 0);
  255 
  256         lockmgr(&dsched_lock, LK_RELEASE);
  257 }
  258 
  259 
  260 /*
  261  * Caller must have dp->diskctx locked
  262  */
  263 void
  264 dsched_queue(struct disk *dp, struct bio *bio)
  265 {
  266         struct dsched_thread_ctx        *tdctx;
  267         struct dsched_thread_io         *tdio;
  268         struct dsched_disk_ctx          *diskctx;
  269         int     found;
  270         int     error;
  271 
  272         if (dp->d_sched_policy == &dsched_noop_policy) {
  273                 dsched_clr_buf_priv(bio->bio_buf);
  274                 atomic_add_int(&dsched_stats.no_tdctx, 1);
  275                 dsched_strategy_raw(dp, bio);
  276                 return;
  277         }
  278 
  279         found = 0;
  280         error = 0;
  281         tdctx = dsched_get_buf_priv(bio->bio_buf);
  282         if (tdctx == NULL) {
  283                 /* We don't handle this case, let dsched dispatch */
  284                 atomic_add_int(&dsched_stats.no_tdctx, 1);
  285                 dsched_strategy_raw(dp, bio);
  286                 return;
  287         }
  288 
  289         DSCHED_THREAD_CTX_LOCK(tdctx);
  290 
  291         /*
  292          * XXX:
  293          * iterate in reverse to make sure we find the most up-to-date
  294          * tdio for a given disk. After a switch it may take some time
  295          * for everything to clean up.
  296          */
  297         TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) {
  298                 if (tdio->dp == dp) {
  299                         dsched_thread_io_ref(tdio);
  300                         break;
  301                 }
  302         }
  303         if (tdio == NULL) {
  304                 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy, 1);
  305                 dsched_thread_io_ref(tdio);
  306         }
  307 
  308         DSCHED_THREAD_CTX_UNLOCK(tdctx);
  309         dsched_clr_buf_priv(bio->bio_buf);
  310         dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
  311 
  312         KKASSERT(found == 1);
  313         diskctx = dsched_get_disk_priv(dp);
  314         dsched_disk_ctx_ref(diskctx);
  315 
  316         if (dp->d_sched_policy != &dsched_noop_policy)
  317                 KKASSERT(tdio->debug_policy == dp->d_sched_policy);
  318 
  319         KKASSERT(tdio->debug_inited == 0xF00F1234);
  320 
  321         error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
  322 
  323         if (error) {
  324                 dsched_strategy_raw(dp, bio);
  325         }
  326         dsched_disk_ctx_unref(diskctx);
  327         dsched_thread_io_unref(tdio);
  328 }
  329 
  330 
  331 /*
  332  * Called from each module_init or module_attach of each policy
  333  * registers the policy in the local policy list.
  334  */
  335 int
  336 dsched_register(struct dsched_policy *d_policy)
  337 {
  338         struct dsched_policy *policy;
  339         int error = 0;
  340 
  341         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  342 
  343         policy = dsched_find_policy(d_policy->name);
  344 
  345         if (!policy) {
  346                 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
  347                 atomic_add_int(&d_policy->ref_count, 1);
  348         } else {
  349                 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
  350                     d_policy->name);
  351                 error = EEXIST;
  352         }
  353 
  354         lockmgr(&dsched_lock, LK_RELEASE);
  355         return error;
  356 }
  357 
  358 /*
  359  * Called from each module_detach of each policy
  360  * unregisters the policy
  361  */
  362 int
  363 dsched_unregister(struct dsched_policy *d_policy)
  364 {
  365         struct dsched_policy *policy;
  366 
  367         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  368         policy = dsched_find_policy(d_policy->name);
  369 
  370         if (policy) {
  371                 if (policy->ref_count > 1) {
  372                         lockmgr(&dsched_lock, LK_RELEASE);
  373                         return EBUSY;
  374                 }
  375                 TAILQ_REMOVE(&dsched_policy_list, policy, link);
  376                 atomic_subtract_int(&policy->ref_count, 1);
  377                 KKASSERT(policy->ref_count == 0);
  378         }
  379         lockmgr(&dsched_lock, LK_RELEASE);
  380 
  381         return 0;
  382 }
  383 
  384 
  385 /*
  386  * switches the policy by first removing the old one and then
  387  * enabling the new one.
  388  */
  389 int
  390 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
  391 {
  392         struct dsched_policy *old_policy;
  393 
  394         /* If we are asked to set the same policy, do nothing */
  395         if (dp->d_sched_policy == new_policy)
  396                 return 0;
  397 
  398         /* lock everything down, diskwise */
  399         lockmgr(&dsched_lock, LK_EXCLUSIVE);
  400         old_policy = dp->d_sched_policy;
  401 
  402         atomic_subtract_int(&old_policy->ref_count, 1);
  403         KKASSERT(old_policy->ref_count >= 0);
  404 
  405         dp->d_sched_policy = &dsched_noop_policy;
  406         old_policy->teardown(dsched_get_disk_priv(dp));
  407         policy_destroy(dp);
  408 
  409         /* Bring everything back to life */
  410         dsched_set_policy(dp, new_policy);
  411         lockmgr(&dsched_lock, LK_RELEASE);
  412 
  413         return 0;
  414 }
  415 
  416 
  417 /*
  418  * Loads a given policy and attaches it to the specified disk.
  419  * Also initializes the core for the policy
  420  */
  421 void
  422 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
  423 {
  424         int locked = 0;
  425 
  426         /* Check if it is locked already. if not, we acquire the devfs lock */
  427         if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) {
  428                 lockmgr(&dsched_lock, LK_EXCLUSIVE);
  429                 locked = 1;
  430         }
  431 
  432         DSCHED_GLOBAL_THREAD_CTX_LOCK();
  433 
  434         policy_new(dp, new_policy);
  435         new_policy->prepare(dsched_get_disk_priv(dp));
  436         dp->d_sched_policy = new_policy;
  437         atomic_add_int(&new_policy->ref_count, 1);
  438 
  439         DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
  440 
  441         kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
  442             new_policy->name);
  443 
  444         /* If we acquired the lock, we also get rid of it */
  445         if (locked)
  446                 lockmgr(&dsched_lock, LK_RELEASE);
  447 }
  448 
  449 struct dsched_policy*
  450 dsched_find_policy(char *search)
  451 {
  452         struct dsched_policy *policy;
  453         struct dsched_policy *policy_found = NULL;
  454         int locked = 0;
  455 
  456         /* Check if it is locked already. if not, we acquire the devfs lock */
  457         if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) {
  458                 lockmgr(&dsched_lock, LK_EXCLUSIVE);
  459                 locked = 1;
  460         }
  461 
  462         TAILQ_FOREACH(policy, &dsched_policy_list, link) {
  463                 if (!strcmp(policy->name, search)) {
  464                         policy_found = policy;
  465                         break;
  466                 }
  467         }
  468 
  469         /* If we acquired the lock, we also get rid of it */
  470         if (locked)
  471                 lockmgr(&dsched_lock, LK_RELEASE);
  472 
  473         return policy_found;
  474 }
  475 
  476 /*
  477  * Returns ref'd disk
  478  */
  479 struct disk *
  480 dsched_find_disk(char *search)
  481 {
  482         struct disk marker;
  483         struct disk *dp = NULL;
  484 
  485         while ((dp = disk_enumerate(&marker, dp)) != NULL) {
  486                 if (strcmp(dp->d_cdev->si_name, search) == 0) {
  487                         disk_enumerate_stop(&marker, NULL);
  488                         /* leave ref on dp */
  489                         break;
  490                 }
  491         }
  492         return dp;
  493 }
  494 
  495 struct disk *
  496 dsched_disk_enumerate(struct disk *marker, struct disk *dp,
  497                       struct dsched_policy *policy)
  498 {
  499         while ((dp = disk_enumerate(marker, dp)) != NULL) {
  500                 if (dp->d_sched_policy == policy)
  501                         break;
  502         }
  503         return NULL;
  504 }
  505 
  506 struct dsched_policy *
  507 dsched_policy_enumerate(struct dsched_policy *pol)
  508 {
  509         if (!pol)
  510                 return (TAILQ_FIRST(&dsched_policy_list));
  511         else
  512                 return (TAILQ_NEXT(pol, link));
  513 }
  514 
  515 void
  516 dsched_cancel_bio(struct bio *bp)
  517 {
  518         bp->bio_buf->b_error = ENXIO;
  519         bp->bio_buf->b_flags |= B_ERROR;
  520         bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
  521 
  522         biodone(bp);
  523 }
  524 
  525 void
  526 dsched_strategy_raw(struct disk *dp, struct bio *bp)
  527 {
  528         /*
  529          * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
  530          * to avoid panics
  531          */
  532         KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
  533         if(bp->bio_track != NULL) {
  534                 dsched_debug(LOG_INFO,
  535                     "dsched_strategy_raw sees non-NULL bio_track!! "
  536                     "bio: %p\n", bp);
  537                 bp->bio_track = NULL;
  538         }
  539         dev_dstrategy(dp->d_rawdev, bp);
  540 }
  541 
  542 void
  543 dsched_strategy_sync(struct disk *dp, struct bio *bio)
  544 {
  545         struct buf *bp, *nbp;
  546         struct bio *nbio;
  547 
  548         bp = bio->bio_buf;
  549 
  550         nbp = getpbuf(NULL);
  551         nbio = &nbp->b_bio1;
  552 
  553         nbp->b_cmd = bp->b_cmd;
  554         nbp->b_bufsize = bp->b_bufsize;
  555         nbp->b_runningbufspace = bp->b_runningbufspace;
  556         nbp->b_bcount = bp->b_bcount;
  557         nbp->b_resid = bp->b_resid;
  558         nbp->b_data = bp->b_data;
  559 #if 0
  560         /*
  561          * Buffers undergoing device I/O do not need a kvabase/size.
  562          */
  563         nbp->b_kvabase = bp->b_kvabase;
  564         nbp->b_kvasize = bp->b_kvasize;
  565 #endif
  566         nbp->b_dirtyend = bp->b_dirtyend;
  567 
  568         nbio->bio_done = biodone_sync;
  569         nbio->bio_flags |= BIO_SYNC;
  570         nbio->bio_track = NULL;
  571 
  572         nbio->bio_caller_info1.ptr = dp;
  573         nbio->bio_offset = bio->bio_offset;
  574 
  575         dev_dstrategy(dp->d_rawdev, nbio);
  576         biowait(nbio, "dschedsync");
  577         bp->b_resid = nbp->b_resid;
  578         bp->b_error = nbp->b_error;
  579         biodone(bio);
  580 #if 0
  581         nbp->b_kvabase = NULL;
  582         nbp->b_kvasize = 0;
  583 #endif
  584         relpbuf(nbp, NULL);
  585 }
  586 
  587 void
  588 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
  589 {
  590         struct bio *nbio;
  591 
  592         nbio = push_bio(bio);
  593         nbio->bio_done = done;
  594         nbio->bio_offset = bio->bio_offset;
  595 
  596         dsched_set_bio_dp(nbio, dp);
  597         dsched_set_bio_priv(nbio, priv);
  598 
  599         getmicrotime(&nbio->bio_caller_info3.tv);
  600         dev_dstrategy(dp->d_rawdev, nbio);
  601 }
  602 
  603 /*
  604  * A special bio done call back function
  605  * used by policy having request polling implemented.
  606  */
  607 static void
  608 request_polling_biodone(struct bio *bp)
  609 {
  610         struct dsched_disk_ctx *diskctx = NULL;
  611         struct disk *dp = NULL;
  612         struct bio *obio;
  613         struct dsched_policy *policy;
  614 
  615         dp = dsched_get_bio_dp(bp);
  616         policy = dp->d_sched_policy;
  617         diskctx = dsched_get_disk_priv(dp);
  618         KKASSERT(diskctx && policy);
  619         dsched_disk_ctx_ref(diskctx);
  620 
  621         /*
  622          * XXX:
  623          * the bio_done function should not be blocked !
  624          */
  625         if (diskctx->dp->d_sched_policy->bio_done)
  626                 diskctx->dp->d_sched_policy->bio_done(bp);
  627 
  628         obio = pop_bio(bp);
  629         biodone(obio);
  630 
  631         atomic_subtract_int(&diskctx->current_tag_queue_depth, 1);
  632 
  633         /* call the polling function,
  634          * XXX:
  635          * the polling function should not be blocked!
  636          */
  637         if (policy->polling_func)
  638                 policy->polling_func(diskctx);
  639         else
  640                 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n");
  641         dsched_disk_ctx_unref(diskctx);
  642 }
  643 
  644 /*
  645  * A special dsched strategy used by policy having request polling
  646  * (polling function) implemented.
  647  *
  648  * The strategy is the just like dsched_strategy_async(), but
  649  * the biodone call back is set to a preset one.
  650  *
  651  * If the policy needs its own biodone callback, it should
  652  * register it in the policy structure. (bio_done field)
  653  *
  654  * The current_tag_queue_depth is maintained by this function
  655  * and the request_polling_biodone() function
  656  */
  657 
  658 void
  659 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx)
  660 {
  661         atomic_add_int(&diskctx->current_tag_queue_depth, 1);
  662         dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio));
  663 }
  664 
  665 /*
  666  * Ref and deref various structures.  The 1->0 transition of the reference
  667  * count actually transitions 1->0x80000000 and causes the object to be
  668  * destroyed.  It is possible for transitory references to occur on the
  669  * object while it is being destroyed.  We use bit 31 to indicate that
  670  * destruction is in progress and to prevent nested destructions.
  671  */
  672 void
  673 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
  674 {
  675         int refcount;
  676 
  677         refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
  678 }
  679 
  680 void
  681 dsched_thread_io_ref(struct dsched_thread_io *tdio)
  682 {
  683         int refcount;
  684 
  685         refcount = atomic_fetchadd_int(&tdio->refcount, 1);
  686 }
  687 
  688 void
  689 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
  690 {
  691         int refcount;
  692 
  693         refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
  694 }
  695 
  696 void
  697 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
  698 {
  699         int refs;
  700         int nrefs;
  701 
  702         /*
  703          * Handle 1->0 transitions for diskctx and nested destruction
  704          * recursions.  If the refs are already in destruction mode (bit 31
  705          * set) on the 1->0 transition we don't try to destruct it again.
  706          *
  707          * 0x80000001->0x80000000 transitions are handled normally and
  708          * thus avoid nested dstruction.
  709          */
  710         for (;;) {
  711                 refs = diskctx->refcount;
  712                 cpu_ccfence();
  713                 nrefs = refs - 1;
  714 
  715                 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
  716                 if (nrefs) {
  717                         if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs))
  718                                 break;
  719                         continue;
  720                 }
  721                 nrefs = 0x80000000;
  722                 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) {
  723                         dsched_disk_ctx_destroy(diskctx);
  724                         break;
  725                 }
  726         }
  727 }
  728 
  729 static
  730 void
  731 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx)
  732 {
  733         struct dsched_thread_io *tdio;
  734         int refs;
  735         int nrefs;
  736 
  737 #if 0
  738         kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
  739         print_backtrace(4);
  740 #endif
  741         lockmgr(&diskctx->lock, LK_EXCLUSIVE);
  742         while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) {
  743                 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
  744                 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
  745                 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
  746                 tdio->diskctx = NULL;
  747                 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/
  748                 lockmgr(&diskctx->lock, LK_RELEASE);
  749                 dsched_thread_io_unref_destroy(tdio);
  750                 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
  751         }
  752         lockmgr(&diskctx->lock, LK_RELEASE);
  753 
  754         /*
  755          * Expect diskctx->refcount to be 0x80000000.  If it isn't someone
  756          * else still has a temporary ref on the diskctx and we have to
  757          * transition it back to an undestroyed-state (albeit without any
  758          * associations), so the other user destroys it properly when the
  759          * ref is released.
  760          */
  761         while ((refs = diskctx->refcount) != 0x80000000) {
  762                 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx);
  763                 cpu_ccfence();
  764                 KKASSERT(refs & 0x80000000);
  765                 nrefs = refs & 0x7FFFFFFF;
  766                 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs))
  767                         return;
  768         }
  769 
  770         /*
  771          * Really for sure now.
  772          */
  773         if (diskctx->dp->d_sched_policy->destroy_diskctx)
  774                 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
  775         objcache_put(dsched_diskctx_cache, diskctx);
  776         atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
  777 }
  778 
  779 void
  780 dsched_thread_io_unref(struct dsched_thread_io *tdio)
  781 {
  782         int refs;
  783         int nrefs;
  784 
  785         /*
  786          * Handle 1->0 transitions for tdio and nested destruction
  787          * recursions.  If the refs are already in destruction mode (bit 31
  788          * set) on the 1->0 transition we don't try to destruct it again.
  789          *
  790          * 0x80000001->0x80000000 transitions are handled normally and
  791          * thus avoid nested dstruction.
  792          */
  793         for (;;) {
  794                 refs = tdio->refcount;
  795                 cpu_ccfence();
  796                 nrefs = refs - 1;
  797 
  798                 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
  799                 if (nrefs) {
  800                         if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
  801                                 break;
  802                         continue;
  803                 }
  804                 nrefs = 0x80000000;
  805                 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) {
  806                         dsched_thread_io_destroy(tdio);
  807                         break;
  808                 }
  809         }
  810 }
  811 
  812 /*
  813  * Unref and destroy the tdio even if additional refs are present.
  814  */
  815 static
  816 void
  817 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio)
  818 {
  819         int refs;
  820         int nrefs;
  821 
  822         /*
  823          * If not already transitioned to destroy-in-progress we transition
  824          * to destroy-in-progress, cleanup our ref, and destroy the tdio.
  825          */
  826         for (;;) {
  827                 refs = tdio->refcount;
  828                 cpu_ccfence();
  829                 nrefs = refs - 1;
  830 
  831                 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
  832                 if (nrefs & 0x80000000) {
  833                         if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
  834                                 break;
  835                         continue;
  836                 }
  837                 nrefs |= 0x80000000;
  838                 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) {
  839                         dsched_thread_io_destroy(tdio);
  840                         break;
  841                 }
  842         }
  843 }
  844 
  845 static void
  846 dsched_thread_io_destroy(struct dsched_thread_io *tdio)
  847 {
  848         struct dsched_thread_ctx *tdctx;
  849         struct dsched_disk_ctx  *diskctx;
  850         int refs;
  851         int nrefs;
  852 
  853 #if 0
  854         kprintf("tdio (%p) destruction started, trace:\n", tdio);
  855         print_backtrace(8);
  856 #endif
  857         KKASSERT(tdio->qlength == 0);
  858 
  859         while ((diskctx = tdio->diskctx) != NULL) {
  860                 dsched_disk_ctx_ref(diskctx);
  861                 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
  862                 if (diskctx != tdio->diskctx) {
  863                         lockmgr(&diskctx->lock, LK_RELEASE);
  864                         dsched_disk_ctx_unref(diskctx);
  865                         continue;
  866                 }
  867                 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
  868                 if (diskctx->dp->d_sched_policy->destroy_tdio)
  869                         diskctx->dp->d_sched_policy->destroy_tdio(tdio);
  870                 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
  871                 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
  872                 tdio->diskctx = NULL;
  873                 dsched_thread_io_unref(tdio);
  874                 lockmgr(&diskctx->lock, LK_RELEASE);
  875                 dsched_disk_ctx_unref(diskctx);
  876         }
  877         while ((tdctx = tdio->tdctx) != NULL) {
  878                 dsched_thread_ctx_ref(tdctx);
  879                 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
  880                 if (tdctx != tdio->tdctx) {
  881                         lockmgr(&tdctx->lock, LK_RELEASE);
  882                         dsched_thread_ctx_unref(tdctx);
  883                         continue;
  884                 }
  885                 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
  886                 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
  887                 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
  888                 tdio->tdctx = NULL;
  889                 dsched_thread_io_unref(tdio);
  890                 lockmgr(&tdctx->lock, LK_RELEASE);
  891                 dsched_thread_ctx_unref(tdctx);
  892         }
  893 
  894         /*
  895          * Expect tdio->refcount to be 0x80000000.  If it isn't someone else
  896          * still has a temporary ref on the tdio and we have to transition
  897          * it back to an undestroyed-state (albeit without any associations)
  898          * so the other user destroys it properly when the ref is released.
  899          */
  900         while ((refs = tdio->refcount) != 0x80000000) {
  901                 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio);
  902                 cpu_ccfence();
  903                 KKASSERT(refs & 0x80000000);
  904                 nrefs = refs & 0x7FFFFFFF;
  905                 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
  906                         return;
  907         }
  908 
  909         /*
  910          * Really for sure now.
  911          */
  912         objcache_put(dsched_tdio_cache, tdio);
  913         atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
  914 }
  915 
  916 void
  917 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
  918 {
  919         int refs;
  920         int nrefs;
  921 
  922         /*
  923          * Handle 1->0 transitions for tdctx and nested destruction
  924          * recursions.  If the refs are already in destruction mode (bit 31
  925          * set) on the 1->0 transition we don't try to destruct it again.
  926          *
  927          * 0x80000001->0x80000000 transitions are handled normally and
  928          * thus avoid nested dstruction.
  929          */
  930         for (;;) {
  931                 refs = tdctx->refcount;
  932                 cpu_ccfence();
  933                 nrefs = refs - 1;
  934 
  935                 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
  936                 if (nrefs) {
  937                         if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs))
  938                                 break;
  939                         continue;
  940                 }
  941                 nrefs = 0x80000000;
  942                 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) {
  943                         dsched_thread_ctx_destroy(tdctx);
  944                         break;
  945                 }
  946         }
  947 }
  948 
  949 static void
  950 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx)
  951 {
  952         struct dsched_thread_io *tdio;
  953 
  954         lockmgr(&tdctx->lock, LK_EXCLUSIVE);
  955 
  956         while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) {
  957                 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
  958                 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
  959                 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
  960                 tdio->tdctx = NULL;
  961                 lockmgr(&tdctx->lock, LK_RELEASE);      /* avoid deadlock */
  962                 dsched_thread_io_unref_destroy(tdio);
  963                 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
  964         }
  965         KKASSERT(tdctx->refcount == 0x80000000);
  966 
  967         lockmgr(&tdctx->lock, LK_RELEASE);
  968 
  969         objcache_put(dsched_tdctx_cache, tdctx);
  970         atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
  971 }
  972 
  973 /*
  974  * Ensures that a tdio is assigned to tdctx and disk.
  975  */
  976 static
  977 struct dsched_thread_io *
  978 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
  979                        struct dsched_policy *pol, int tdctx_locked)
  980 {
  981         struct dsched_thread_io *tdio;
  982 #if 0
  983         dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
  984 #endif
  985         tdio = objcache_get(dsched_tdio_cache, M_INTWAIT);
  986         bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
  987 
  988         dsched_thread_io_ref(tdio);     /* prevent ripout */
  989         dsched_thread_io_ref(tdio);     /* for diskctx ref */
  990 
  991         DSCHED_THREAD_IO_LOCKINIT(tdio);
  992         tdio->dp = dp;
  993 
  994         tdio->diskctx = dsched_get_disk_priv(dp);
  995         TAILQ_INIT(&tdio->queue);
  996 
  997         if (pol->new_tdio)
  998                 pol->new_tdio(tdio);
  999 
 1000         lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE);
 1001         TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
 1002         atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
 1003 
 1004         if (tdctx) {
 1005                 /*
 1006                  * Put the tdio in the tdctx list.  Inherit the temporary
 1007                  * ref (one ref for each list).
 1008                  */
 1009                 if (tdctx_locked == 0)
 1010                         DSCHED_THREAD_CTX_LOCK(tdctx);
 1011                 tdio->tdctx = tdctx;
 1012                 tdio->p = tdctx->p;
 1013                 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
 1014                 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
 1015                 if (tdctx_locked == 0)
 1016                         DSCHED_THREAD_CTX_UNLOCK(tdctx);
 1017         } else {
 1018                 dsched_thread_io_unref(tdio);
 1019         }
 1020 
 1021         tdio->debug_policy = pol;
 1022         tdio->debug_inited = 0xF00F1234;
 1023 
 1024         atomic_add_int(&dsched_stats.tdio_allocations, 1);
 1025 
 1026         return(tdio);
 1027 }
 1028 
 1029 
 1030 struct dsched_disk_ctx *
 1031 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
 1032 {
 1033         struct dsched_disk_ctx *diskctx;
 1034 
 1035         diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
 1036         bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
 1037         dsched_disk_ctx_ref(diskctx);
 1038         diskctx->dp = dp;
 1039         DSCHED_DISK_CTX_LOCKINIT(diskctx);
 1040         TAILQ_INIT(&diskctx->tdio_list);
 1041         /*
 1042          * XXX: magic number 32: most device has a tag queue
 1043          * of depth 32.
 1044          * Better to retrive more precise value from the driver
 1045          */
 1046         diskctx->max_tag_queue_depth = 32;
 1047         diskctx->current_tag_queue_depth = 0;
 1048 
 1049         atomic_add_int(&dsched_stats.diskctx_allocations, 1);
 1050         if (pol->new_diskctx)
 1051                 pol->new_diskctx(diskctx);
 1052         return diskctx;
 1053 }
 1054 
 1055 
 1056 struct dsched_thread_ctx *
 1057 dsched_thread_ctx_alloc(struct proc *p)
 1058 {
 1059         struct dsched_thread_ctx        *tdctx;
 1060 
 1061         tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
 1062         bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
 1063         dsched_thread_ctx_ref(tdctx);
 1064 #if 0
 1065         kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
 1066 #endif
 1067         DSCHED_THREAD_CTX_LOCKINIT(tdctx);
 1068         TAILQ_INIT(&tdctx->tdio_list);
 1069         tdctx->p = p;
 1070 
 1071         atomic_add_int(&dsched_stats.tdctx_allocations, 1);
 1072         /* XXX: no callback here */
 1073 
 1074         return tdctx;
 1075 }
 1076 
 1077 void
 1078 policy_new(struct disk *dp, struct dsched_policy *pol)
 1079 {
 1080         struct dsched_disk_ctx *diskctx;
 1081 
 1082         diskctx = dsched_disk_ctx_alloc(dp, pol);
 1083         dsched_disk_ctx_ref(diskctx);
 1084         dsched_set_disk_priv(dp, diskctx);
 1085 }
 1086 
 1087 void
 1088 policy_destroy(struct disk *dp) {
 1089         struct dsched_disk_ctx *diskctx;
 1090 
 1091         diskctx = dsched_get_disk_priv(dp);
 1092         KKASSERT(diskctx != NULL);
 1093 
 1094         dsched_disk_ctx_unref(diskctx); /* from prepare */
 1095         dsched_disk_ctx_unref(diskctx); /* from alloc */
 1096 
 1097         dsched_set_disk_priv(dp, NULL);
 1098 }
 1099 
 1100 void
 1101 dsched_new_buf(struct buf *bp)
 1102 {
 1103         struct dsched_thread_ctx        *tdctx = NULL;
 1104 
 1105         if (dsched_inited == 0)
 1106                 return;
 1107 
 1108         if (curproc != NULL) {
 1109                 tdctx = dsched_get_proc_priv(curproc);
 1110         } else {
 1111                 /* This is a kernel thread, so no proc info is available */
 1112                 tdctx = dsched_get_thread_priv(curthread);
 1113         }
 1114 
 1115 #if 0
 1116         /*
 1117          * XXX: hack. we don't want this assert because we aren't catching all
 1118          *      threads. mi_startup() is still getting away without an tdctx.
 1119          */
 1120 
 1121         /* by now we should have an tdctx. if not, something bad is going on */
 1122         KKASSERT(tdctx != NULL);
 1123 #endif
 1124 
 1125         if (tdctx) {
 1126                 dsched_thread_ctx_ref(tdctx);
 1127         }
 1128         dsched_set_buf_priv(bp, tdctx);
 1129 }
 1130 
 1131 void
 1132 dsched_exit_buf(struct buf *bp)
 1133 {
 1134         struct dsched_thread_ctx        *tdctx;
 1135 
 1136         tdctx = dsched_get_buf_priv(bp);
 1137         if (tdctx != NULL) {
 1138                 dsched_clr_buf_priv(bp);
 1139                 dsched_thread_ctx_unref(tdctx);
 1140         }
 1141 }
 1142 
 1143 void
 1144 dsched_new_proc(struct proc *p)
 1145 {
 1146         struct dsched_thread_ctx        *tdctx;
 1147 
 1148         if (dsched_inited == 0)
 1149                 return;
 1150 
 1151         KKASSERT(p != NULL);
 1152 
 1153         tdctx = dsched_thread_ctx_alloc(p);
 1154         tdctx->p = p;
 1155         dsched_thread_ctx_ref(tdctx);
 1156 
 1157         dsched_set_proc_priv(p, tdctx);
 1158         atomic_add_int(&dsched_stats.nprocs, 1);
 1159 }
 1160 
 1161 
 1162 void
 1163 dsched_new_thread(struct thread *td)
 1164 {
 1165         struct dsched_thread_ctx        *tdctx;
 1166 
 1167         if (dsched_inited == 0)
 1168                 return;
 1169 
 1170         KKASSERT(td != NULL);
 1171 
 1172         tdctx = dsched_thread_ctx_alloc(NULL);
 1173         tdctx->td = td;
 1174         dsched_thread_ctx_ref(tdctx);
 1175 
 1176         dsched_set_thread_priv(td, tdctx);
 1177         atomic_add_int(&dsched_stats.nthreads, 1);
 1178 }
 1179 
 1180 void
 1181 dsched_exit_proc(struct proc *p)
 1182 {
 1183         struct dsched_thread_ctx        *tdctx;
 1184 
 1185         if (dsched_inited == 0)
 1186                 return;
 1187 
 1188         KKASSERT(p != NULL);
 1189 
 1190         tdctx = dsched_get_proc_priv(p);
 1191         KKASSERT(tdctx != NULL);
 1192 
 1193         tdctx->dead = 0xDEAD;
 1194         dsched_set_proc_priv(p, NULL);
 1195 
 1196         dsched_thread_ctx_unref(tdctx); /* one for alloc, */
 1197         dsched_thread_ctx_unref(tdctx); /* one for ref */
 1198         atomic_subtract_int(&dsched_stats.nprocs, 1);
 1199 }
 1200 
 1201 
 1202 void
 1203 dsched_exit_thread(struct thread *td)
 1204 {
 1205         struct dsched_thread_ctx        *tdctx;
 1206 
 1207         if (dsched_inited == 0)
 1208                 return;
 1209 
 1210         KKASSERT(td != NULL);
 1211 
 1212         tdctx = dsched_get_thread_priv(td);
 1213         KKASSERT(tdctx != NULL);
 1214 
 1215         tdctx->dead = 0xDEAD;
 1216         dsched_set_thread_priv(td, 0);
 1217 
 1218         dsched_thread_ctx_unref(tdctx); /* one for alloc, */
 1219         dsched_thread_ctx_unref(tdctx); /* one for ref */
 1220         atomic_subtract_int(&dsched_stats.nthreads, 1);
 1221 }
 1222 
 1223 /*
 1224  * Returns ref'd tdio.
 1225  *
 1226  * tdio may have additional refs for the diskctx and tdctx it resides on.
 1227  */
 1228 void
 1229 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx,
 1230                               struct dsched_policy *pol)
 1231 {
 1232         struct dsched_thread_ctx *tdctx;
 1233 
 1234         tdctx = dsched_get_thread_priv(curthread);
 1235         KKASSERT(tdctx != NULL);
 1236         dsched_thread_io_alloc(diskctx->dp, tdctx, pol, 0);
 1237 }
 1238 
 1239 /* DEFAULT NOOP POLICY */
 1240 
 1241 static int
 1242 noop_prepare(struct dsched_disk_ctx *diskctx)
 1243 {
 1244         return 0;
 1245 }
 1246 
 1247 static void
 1248 noop_teardown(struct dsched_disk_ctx *diskctx)
 1249 {
 1250 
 1251 }
 1252 
 1253 static void
 1254 noop_cancel(struct dsched_disk_ctx *diskctx)
 1255 {
 1256 
 1257 }
 1258 
 1259 static int
 1260 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
 1261            struct bio *bio)
 1262 {
 1263         dsched_strategy_raw(diskctx->dp, bio);
 1264 #if 0
 1265         dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL);
 1266 #endif
 1267         return 0;
 1268 }
 1269 
 1270 /*
 1271  * SYSINIT stuff
 1272  */
 1273 static void
 1274 dsched_init(void)
 1275 {
 1276         dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
 1277                                            NULL, NULL, NULL,
 1278                                            objcache_malloc_alloc,
 1279                                            objcache_malloc_free,
 1280                                            &dsched_thread_io_malloc_args );
 1281 
 1282         dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
 1283                                            NULL, NULL, NULL,
 1284                                            objcache_malloc_alloc,
 1285                                            objcache_malloc_free,
 1286                                            &dsched_thread_ctx_malloc_args );
 1287 
 1288         dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
 1289                                            NULL, NULL, NULL,
 1290                                            objcache_malloc_alloc,
 1291                                            objcache_malloc_free,
 1292                                            &dsched_disk_ctx_malloc_args );
 1293 
 1294         bzero(&dsched_stats, sizeof(struct dsched_stats));
 1295 
 1296         lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
 1297         DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
 1298 
 1299         dsched_register(&dsched_noop_policy);
 1300 
 1301         dsched_inited = 1;
 1302 }
 1303 
 1304 static void
 1305 dsched_uninit(void)
 1306 {
 1307 }
 1308 
 1309 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
 1310 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
 1311 
 1312 /*
 1313  * SYSCTL stuff
 1314  */
 1315 static int
 1316 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
 1317 {
 1318         return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
 1319 }
 1320 
 1321 static int
 1322 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
 1323 {
 1324         struct dsched_policy *pol = NULL;
 1325         int error, first = 1;
 1326 
 1327         lockmgr(&dsched_lock, LK_EXCLUSIVE);
 1328 
 1329         while ((pol = dsched_policy_enumerate(pol))) {
 1330                 if (!first) {
 1331                         error = SYSCTL_OUT(req, " ", 1);
 1332                         if (error)
 1333                                 break;
 1334                 } else {
 1335                         first = 0;
 1336                 }
 1337                 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
 1338                 if (error)
 1339                         break;
 1340 
 1341         }
 1342 
 1343         lockmgr(&dsched_lock, LK_RELEASE);
 1344 
 1345         error = SYSCTL_OUT(req, "", 1);
 1346 
 1347         return error;
 1348 }
 1349 
 1350 static int
 1351 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
 1352 {
 1353         char buf[DSCHED_POLICY_NAME_LENGTH];
 1354         struct dsched_disk_ctx *diskctx = arg1;
 1355         struct dsched_policy *pol = NULL;
 1356         int error;
 1357 
 1358         if (diskctx == NULL) {
 1359                 return 0;
 1360         }
 1361 
 1362         lockmgr(&dsched_lock, LK_EXCLUSIVE);
 1363 
 1364         pol = diskctx->dp->d_sched_policy;
 1365         memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
 1366 
 1367         error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
 1368         if (error || req->newptr == NULL) {
 1369                 lockmgr(&dsched_lock, LK_RELEASE);
 1370                 return (error);
 1371         }
 1372 
 1373         pol = dsched_find_policy(buf);
 1374         if (pol == NULL) {
 1375                 lockmgr(&dsched_lock, LK_RELEASE);
 1376                 return 0;
 1377         }
 1378 
 1379         dsched_switch(diskctx->dp, pol);
 1380 
 1381         lockmgr(&dsched_lock, LK_RELEASE);
 1382 
 1383         return error;
 1384 }
 1385 
 1386 static int
 1387 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS)
 1388 {
 1389         char buf[DSCHED_POLICY_NAME_LENGTH];
 1390         struct dsched_policy *pol = NULL;
 1391         int error;
 1392 
 1393         lockmgr(&dsched_lock, LK_EXCLUSIVE);
 1394 
 1395         pol = default_policy;
 1396         memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
 1397 
 1398         error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
 1399         if (error || req->newptr == NULL) {
 1400                 lockmgr(&dsched_lock, LK_RELEASE);
 1401                 return (error);
 1402         }
 1403 
 1404         pol = dsched_find_policy(buf);
 1405         if (pol == NULL) {
 1406                 lockmgr(&dsched_lock, LK_RELEASE);
 1407                 return 0;
 1408         }
 1409 
 1410         default_set = 1;
 1411         default_policy = pol;
 1412 
 1413         lockmgr(&dsched_lock, LK_RELEASE);
 1414 
 1415         return error;
 1416 }
 1417 
 1418 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
 1419     "Disk Scheduler Framework (dsched) magic");
 1420 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
 1421     "List of disks and their policies");
 1422 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
 1423     0, "Enable dsched debugging");
 1424 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
 1425     0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
 1426     "dsched statistics");
 1427 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
 1428     NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
 1429 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW,
 1430     NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy");
 1431 
 1432 static void
 1433 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
 1434 {
 1435         if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
 1436                 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
 1437                 sysctl_ctx_init(&diskctx->sysctl_ctx);
 1438         }
 1439 
 1440         SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
 1441             OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
 1442             diskctx, 0, sysctl_dsched_policy, "A", "policy");
 1443 }

Cache object: 66e80fb96fb98bd963e954222d14a2be


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.