The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_sysctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Mike Karels at Berkeley Software Design, Inc.
    9  *
   10  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
   11  * project, to make these variables more userfriendly.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)kern_sysctl.c       8.4 (Berkeley) 4/14/94
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD$");
   42 
   43 #include "opt_capsicum.h"
   44 #include "opt_ddb.h"
   45 #include "opt_ktrace.h"
   46 #include "opt_sysctl.h"
   47 
   48 #include <sys/param.h>
   49 #include <sys/fail.h>
   50 #include <sys/systm.h>
   51 #include <sys/capsicum.h>
   52 #include <sys/kernel.h>
   53 #include <sys/limits.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/malloc.h>
   56 #include <sys/priv.h>
   57 #include <sys/proc.h>
   58 #include <sys/jail.h>
   59 #include <sys/kdb.h>
   60 #include <sys/lock.h>
   61 #include <sys/mutex.h>
   62 #include <sys/rmlock.h>
   63 #include <sys/sbuf.h>
   64 #include <sys/sx.h>
   65 #include <sys/sysproto.h>
   66 #include <sys/uio.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 
   71 #ifdef DDB
   72 #include <ddb/ddb.h>
   73 #include <ddb/db_lex.h>
   74 #endif
   75 
   76 #include <net/vnet.h>
   77 
   78 #include <security/mac/mac_framework.h>
   79 
   80 #include <vm/vm.h>
   81 #include <vm/vm_extern.h>
   82 
   83 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
   84 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
   85 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
   86 
   87 /*
   88  * The sysctllock protects the MIB tree.  It also protects sysctl
   89  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
   90  * sysctl_unregister_oid() routines require the sysctllock to already
   91  * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
   92  * provided for the few places in the kernel which need to use that
   93  * API rather than using the dynamic API.  Use of the dynamic API is
   94  * strongly encouraged for most code.
   95  *
   96  * The sysctlmemlock is used to limit the amount of user memory wired for
   97  * sysctl requests.  This is implemented by serializing any userland
   98  * sysctl requests larger than a single page via an exclusive lock.
   99  *
  100  * The sysctlstringlock is used to protect concurrent access to writable
  101  * string nodes in sysctl_handle_string().
  102  */
  103 static struct rmlock sysctllock;
  104 static struct sx __exclusive_cache_line sysctlmemlock;
  105 static struct sx sysctlstringlock;
  106 
  107 #define SYSCTL_WLOCK()          rm_wlock(&sysctllock)
  108 #define SYSCTL_WUNLOCK()        rm_wunlock(&sysctllock)
  109 #define SYSCTL_RLOCK(tracker)   rm_rlock(&sysctllock, (tracker))
  110 #define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker))
  111 #define SYSCTL_WLOCKED()        rm_wowned(&sysctllock)
  112 #define SYSCTL_ASSERT_LOCKED()  rm_assert(&sysctllock, RA_LOCKED)
  113 #define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED)
  114 #define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED)
  115 #define SYSCTL_INIT()           rm_init_flags(&sysctllock, "sysctl lock", \
  116                                     RM_SLEEPABLE)
  117 #define SYSCTL_SLEEP(ch, wmesg, timo)                                   \
  118                                 rm_sleep(ch, &sysctllock, 0, wmesg, timo)
  119 
  120 static int sysctl_root(SYSCTL_HANDLER_ARGS);
  121 
  122 /* Root list */
  123 struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children);
  124 
  125 static char*    sysctl_escape_name(const char*);
  126 static int      sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
  127                     int recurse);
  128 static int      sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
  129 static int      sysctl_new_kernel(struct sysctl_req *, void *, size_t);
  130 
  131 static struct sysctl_oid *
  132 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
  133 {
  134         struct sysctl_oid *oidp;
  135 
  136         SYSCTL_ASSERT_LOCKED();
  137         SLIST_FOREACH(oidp, list, oid_link) {
  138                 if (strcmp(oidp->oid_name, name) == 0) {
  139                         return (oidp);
  140                 }
  141         }
  142         return (NULL);
  143 }
  144 
  145 /*
  146  * Initialization of the MIB tree.
  147  *
  148  * Order by number in each list.
  149  */
  150 void
  151 sysctl_wlock(void)
  152 {
  153 
  154         SYSCTL_WLOCK();
  155 }
  156 
  157 void
  158 sysctl_wunlock(void)
  159 {
  160 
  161         SYSCTL_WUNLOCK();
  162 }
  163 
  164 static int
  165 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
  166     struct sysctl_req *req, struct rm_priotracker *tracker)
  167 {
  168         int error;
  169 
  170         if (oid->oid_kind & CTLFLAG_DYN)
  171                 atomic_add_int(&oid->oid_running, 1);
  172 
  173         if (tracker != NULL)
  174                 SYSCTL_RUNLOCK(tracker);
  175         else
  176                 SYSCTL_WUNLOCK();
  177 
  178         /*
  179          * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same,
  180          * untill we're ready to remove all traces of Giant from sysctl(9).
  181          */
  182         if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
  183             (!(oid->oid_kind & CTLFLAG_MPSAFE)))
  184                 mtx_lock(&Giant);
  185         error = oid->oid_handler(oid, arg1, arg2, req);
  186         if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
  187             (!(oid->oid_kind & CTLFLAG_MPSAFE)))
  188                 mtx_unlock(&Giant);
  189 
  190         KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
  191 
  192         if (tracker != NULL)
  193                 SYSCTL_RLOCK(tracker);
  194         else
  195                 SYSCTL_WLOCK();
  196 
  197         if (oid->oid_kind & CTLFLAG_DYN) {
  198                 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
  199                     (oid->oid_kind & CTLFLAG_DYING) != 0)
  200                         wakeup(&oid->oid_running);
  201         }
  202 
  203         return (error);
  204 }
  205 
  206 static void
  207 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
  208 {
  209         struct sysctl_req req;
  210         struct sysctl_oid *curr;
  211         char *penv = NULL;
  212         char path[96];
  213         ssize_t rem = sizeof(path);
  214         ssize_t len;
  215         uint8_t data[512] __aligned(sizeof(uint64_t));
  216         int size;
  217         int error;
  218 
  219         path[--rem] = 0;
  220 
  221         for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
  222                 len = strlen(curr->oid_name);
  223                 rem -= len;
  224                 if (curr != oidp)
  225                         rem -= 1;
  226                 if (rem < 0) {
  227                         printf("OID path exceeds %d bytes\n", (int)sizeof(path));
  228                         return;
  229                 }
  230                 memcpy(path + rem, curr->oid_name, len);
  231                 if (curr != oidp)
  232                         path[rem + len] = '.';
  233         }
  234 
  235         memset(&req, 0, sizeof(req));
  236 
  237         req.td = curthread;
  238         req.oldfunc = sysctl_old_kernel;
  239         req.newfunc = sysctl_new_kernel;
  240         req.lock = REQ_UNWIRED;
  241 
  242         switch (oidp->oid_kind & CTLTYPE) {
  243         case CTLTYPE_INT:
  244                 if (getenv_array(path + rem, data, sizeof(data), &size,
  245                     sizeof(int), GETENV_SIGNED) == 0)
  246                         return;
  247                 req.newlen = size;
  248                 req.newptr = data;
  249                 break;
  250         case CTLTYPE_UINT:
  251                 if (getenv_array(path + rem, data, sizeof(data), &size,
  252                     sizeof(int), GETENV_UNSIGNED) == 0)
  253                         return;
  254                 req.newlen = size;
  255                 req.newptr = data;
  256                 break;
  257         case CTLTYPE_LONG:
  258                 if (getenv_array(path + rem, data, sizeof(data), &size,
  259                     sizeof(long), GETENV_SIGNED) == 0)
  260                         return;
  261                 req.newlen = size;
  262                 req.newptr = data;
  263                 break;
  264         case CTLTYPE_ULONG:
  265                 if (getenv_array(path + rem, data, sizeof(data), &size,
  266                     sizeof(long), GETENV_UNSIGNED) == 0)
  267                         return;
  268                 req.newlen = size;
  269                 req.newptr = data;
  270                 break;
  271         case CTLTYPE_S8:
  272                 if (getenv_array(path + rem, data, sizeof(data), &size,
  273                     sizeof(int8_t), GETENV_SIGNED) == 0)
  274                         return;
  275                 req.newlen = size;
  276                 req.newptr = data;
  277                 break;
  278         case CTLTYPE_S16:
  279                 if (getenv_array(path + rem, data, sizeof(data), &size,
  280                     sizeof(int16_t), GETENV_SIGNED) == 0)
  281                         return;
  282                 req.newlen = size;
  283                 req.newptr = data;
  284                 break;
  285         case CTLTYPE_S32:
  286                 if (getenv_array(path + rem, data, sizeof(data), &size,
  287                     sizeof(int32_t), GETENV_SIGNED) == 0)
  288                         return;
  289                 req.newlen = size;
  290                 req.newptr = data;
  291                 break;
  292         case CTLTYPE_S64:
  293                 if (getenv_array(path + rem, data, sizeof(data), &size,
  294                     sizeof(int64_t), GETENV_SIGNED) == 0)
  295                         return;
  296                 req.newlen = size;
  297                 req.newptr = data;
  298                 break;
  299         case CTLTYPE_U8:
  300                 if (getenv_array(path + rem, data, sizeof(data), &size,
  301                     sizeof(uint8_t), GETENV_UNSIGNED) == 0)
  302                         return;
  303                 req.newlen = size;
  304                 req.newptr = data;
  305                 break;
  306         case CTLTYPE_U16:
  307                 if (getenv_array(path + rem, data, sizeof(data), &size,
  308                     sizeof(uint16_t), GETENV_UNSIGNED) == 0)
  309                         return;
  310                 req.newlen = size;
  311                 req.newptr = data;
  312                 break;
  313         case CTLTYPE_U32:
  314                 if (getenv_array(path + rem, data, sizeof(data), &size,
  315                     sizeof(uint32_t), GETENV_UNSIGNED) == 0)
  316                         return;
  317                 req.newlen = size;
  318                 req.newptr = data;
  319                 break;
  320         case CTLTYPE_U64:
  321                 if (getenv_array(path + rem, data, sizeof(data), &size,
  322                     sizeof(uint64_t), GETENV_UNSIGNED) == 0)
  323                         return;
  324                 req.newlen = size;
  325                 req.newptr = data;
  326                 break;
  327         case CTLTYPE_STRING:
  328                 penv = kern_getenv(path + rem);
  329                 if (penv == NULL)
  330                         return;
  331                 req.newlen = strlen(penv);
  332                 req.newptr = penv;
  333                 break;
  334         default:
  335                 return;
  336         }
  337         error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
  338             oidp->oid_arg2, &req, NULL);
  339         if (error != 0)
  340                 printf("Setting sysctl %s failed: %d\n", path + rem, error);
  341         if (penv != NULL)
  342                 freeenv(penv);
  343 }
  344 
  345 /*
  346  * Locate the path to a given oid.  Returns the length of the resulting path,
  347  * or -1 if the oid was not found.  nodes must have room for CTL_MAXNAME
  348  * elements and be NULL initialized.
  349  */
  350 static int
  351 sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle)
  352 {
  353         int indx;
  354 
  355         SYSCTL_ASSERT_LOCKED();
  356         indx = 0;
  357         while (indx < CTL_MAXNAME && indx >= 0) {
  358                 if (nodes[indx] == NULL && indx == 0)
  359                         nodes[indx] = SLIST_FIRST(&sysctl__children);
  360                 else if (nodes[indx] == NULL)
  361                         nodes[indx] = SLIST_FIRST(&nodes[indx - 1]->oid_children);
  362                 else
  363                         nodes[indx] = SLIST_NEXT(nodes[indx], oid_link);
  364 
  365                 if (nodes[indx] == needle)
  366                         return (indx + 1);
  367 
  368                 if (nodes[indx] == NULL) {
  369                         indx--;
  370                         continue;
  371                 }
  372 
  373                 if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  374                         indx++;
  375                         continue;
  376                 }
  377         }
  378         return (-1);
  379 }
  380 
  381 static void
  382 sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf)
  383 {
  384         struct sysctl_oid *nodes[CTL_MAXNAME];
  385         char buf[128];
  386         struct sbuf sb;
  387         int rc, i;
  388 
  389         (void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
  390         sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
  391 
  392         sbuf_printf(&sb, "%s: can't re-use a leaf (", __func__);
  393 
  394         memset(nodes, 0, sizeof(nodes));
  395         rc = sysctl_search_oid(nodes, leaf);
  396         if (rc > 0) {
  397                 for (i = 0; i < rc; i++)
  398                         sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name,
  399                             i != (rc - 1), ".");
  400         } else {
  401                 sbuf_printf(&sb, "%s", leaf->oid_name);
  402         }
  403         sbuf_printf(&sb, ")!\n");
  404 
  405         (void)sbuf_finish(&sb);
  406 }
  407 
  408 #ifdef SYSCTL_DEBUG
  409 static int
  410 sysctl_reuse_test(SYSCTL_HANDLER_ARGS)
  411 {
  412         struct rm_priotracker tracker;
  413 
  414         SYSCTL_RLOCK(&tracker);
  415         sysctl_warn_reuse(__func__, oidp);
  416         SYSCTL_RUNLOCK(&tracker);
  417         return (0);
  418 }
  419 SYSCTL_PROC(_sysctl, OID_AUTO, reuse_test,
  420     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_reuse_test, "-",
  421     "");
  422 #endif
  423 
  424 void
  425 sysctl_register_oid(struct sysctl_oid *oidp)
  426 {
  427         struct sysctl_oid_list *parent = oidp->oid_parent;
  428         struct sysctl_oid *p;
  429         struct sysctl_oid *q;
  430         int oid_number;
  431         int timeout = 2;
  432 
  433         /*
  434          * First check if another oid with the same name already
  435          * exists in the parent's list.
  436          */
  437         SYSCTL_ASSERT_WLOCKED();
  438         p = sysctl_find_oidname(oidp->oid_name, parent);
  439         if (p != NULL) {
  440                 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  441                         p->oid_refcnt++;
  442                         return;
  443                 } else {
  444                         sysctl_warn_reuse(__func__, p);
  445                         return;
  446                 }
  447         }
  448         /* get current OID number */
  449         oid_number = oidp->oid_number;
  450 
  451 #if (OID_AUTO >= 0)
  452 #error "OID_AUTO is expected to be a negative value"
  453 #endif  
  454         /*
  455          * Any negative OID number qualifies as OID_AUTO. Valid OID
  456          * numbers should always be positive.
  457          *
  458          * NOTE: DO NOT change the starting value here, change it in
  459          * <sys/sysctl.h>, and make sure it is at least 256 to
  460          * accommodate e.g. net.inet.raw as a static sysctl node.
  461          */
  462         if (oid_number < 0) {
  463                 static int newoid;
  464 
  465                 /*
  466                  * By decrementing the next OID number we spend less
  467                  * time inserting the OIDs into a sorted list.
  468                  */
  469                 if (--newoid < CTL_AUTO_START)
  470                         newoid = 0x7fffffff;
  471 
  472                 oid_number = newoid;
  473         }
  474 
  475         /*
  476          * Insert the OID into the parent's list sorted by OID number.
  477          */
  478 retry:
  479         q = NULL;
  480         SLIST_FOREACH(p, parent, oid_link) {
  481                 /* check if the current OID number is in use */
  482                 if (oid_number == p->oid_number) {
  483                         /* get the next valid OID number */
  484                         if (oid_number < CTL_AUTO_START ||
  485                             oid_number == 0x7fffffff) {
  486                                 /* wraparound - restart */
  487                                 oid_number = CTL_AUTO_START;
  488                                 /* don't loop forever */
  489                                 if (!timeout--)
  490                                         panic("sysctl: Out of OID numbers\n");
  491                                 goto retry;
  492                         } else {
  493                                 oid_number++;
  494                         }
  495                 } else if (oid_number < p->oid_number)
  496                         break;
  497                 q = p;
  498         }
  499         /* check for non-auto OID number collision */
  500         if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
  501             oid_number >= CTL_AUTO_START) {
  502                 printf("sysctl: OID number(%d) is already in use for '%s'\n",
  503                     oidp->oid_number, oidp->oid_name);
  504         }
  505         /* update the OID number, if any */
  506         oidp->oid_number = oid_number;
  507         if (q != NULL)
  508                 SLIST_INSERT_AFTER(q, oidp, oid_link);
  509         else
  510                 SLIST_INSERT_HEAD(parent, oidp, oid_link);
  511 
  512         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
  513 #ifdef VIMAGE
  514             (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
  515 #endif
  516             (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
  517             (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
  518                 /* only fetch value once */
  519                 oidp->oid_kind |= CTLFLAG_NOFETCH;
  520                 /* try to fetch value from kernel environment */
  521                 sysctl_load_tunable_by_oid_locked(oidp);
  522         }
  523 }
  524 
  525 void
  526 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
  527 {
  528 
  529         /*
  530          * Mark the leaf as dormant if it's not to be immediately enabled.
  531          * We do not disable nodes as they can be shared between modules
  532          * and it is always safe to access a node.
  533          */
  534         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  535             ("internal flag is set in oid_kind"));
  536         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  537                 oidp->oid_kind |= CTLFLAG_DORMANT;
  538         sysctl_register_oid(oidp);
  539 }
  540 
  541 void
  542 sysctl_enable_oid(struct sysctl_oid *oidp)
  543 {
  544 
  545         SYSCTL_ASSERT_WLOCKED();
  546         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  547                 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  548                     ("sysctl node is marked as dormant"));
  549                 return;
  550         }
  551         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
  552             ("enabling already enabled sysctl oid"));
  553         oidp->oid_kind &= ~CTLFLAG_DORMANT;
  554 }
  555 
  556 void
  557 sysctl_unregister_oid(struct sysctl_oid *oidp)
  558 {
  559         struct sysctl_oid *p;
  560         int error;
  561 
  562         SYSCTL_ASSERT_WLOCKED();
  563         if (oidp->oid_number == OID_AUTO) {
  564                 error = EINVAL;
  565         } else {
  566                 error = ENOENT;
  567                 SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  568                         if (p == oidp) {
  569                                 SLIST_REMOVE(oidp->oid_parent, oidp,
  570                                     sysctl_oid, oid_link);
  571                                 error = 0;
  572                                 break;
  573                         }
  574                 }
  575         }
  576 
  577         /* 
  578          * This can happen when a module fails to register and is
  579          * being unloaded afterwards.  It should not be a panic()
  580          * for normal use.
  581          */
  582         if (error) {
  583                 printf("%s: failed(%d) to unregister sysctl(%s)\n",
  584                     __func__, error, oidp->oid_name);
  585         }
  586 }
  587 
  588 /* Initialize a new context to keep track of dynamically added sysctls. */
  589 int
  590 sysctl_ctx_init(struct sysctl_ctx_list *c)
  591 {
  592 
  593         if (c == NULL) {
  594                 return (EINVAL);
  595         }
  596 
  597         /*
  598          * No locking here, the caller is responsible for not adding
  599          * new nodes to a context until after this function has
  600          * returned.
  601          */
  602         TAILQ_INIT(c);
  603         return (0);
  604 }
  605 
  606 /* Free the context, and destroy all dynamic oids registered in this context */
  607 int
  608 sysctl_ctx_free(struct sysctl_ctx_list *clist)
  609 {
  610         struct sysctl_ctx_entry *e, *e1;
  611         int error;
  612 
  613         error = 0;
  614         /*
  615          * First perform a "dry run" to check if it's ok to remove oids.
  616          * XXX FIXME
  617          * XXX This algorithm is a hack. But I don't know any
  618          * XXX better solution for now...
  619          */
  620         SYSCTL_WLOCK();
  621         TAILQ_FOREACH(e, clist, link) {
  622                 error = sysctl_remove_oid_locked(e->entry, 0, 0);
  623                 if (error)
  624                         break;
  625         }
  626         /*
  627          * Restore deregistered entries, either from the end,
  628          * or from the place where error occurred.
  629          * e contains the entry that was not unregistered
  630          */
  631         if (error)
  632                 e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
  633         else
  634                 e1 = TAILQ_LAST(clist, sysctl_ctx_list);
  635         while (e1 != NULL) {
  636                 sysctl_register_oid(e1->entry);
  637                 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
  638         }
  639         if (error) {
  640                 SYSCTL_WUNLOCK();
  641                 return(EBUSY);
  642         }
  643         /* Now really delete the entries */
  644         e = TAILQ_FIRST(clist);
  645         while (e != NULL) {
  646                 e1 = TAILQ_NEXT(e, link);
  647                 error = sysctl_remove_oid_locked(e->entry, 1, 0);
  648                 if (error)
  649                         panic("sysctl_remove_oid: corrupt tree, entry: %s",
  650                             e->entry->oid_name);
  651                 free(e, M_SYSCTLOID);
  652                 e = e1;
  653         }
  654         SYSCTL_WUNLOCK();
  655         return (error);
  656 }
  657 
  658 /* Add an entry to the context */
  659 struct sysctl_ctx_entry *
  660 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  661 {
  662         struct sysctl_ctx_entry *e;
  663 
  664         SYSCTL_ASSERT_WLOCKED();
  665         if (clist == NULL || oidp == NULL)
  666                 return(NULL);
  667         e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
  668         e->entry = oidp;
  669         TAILQ_INSERT_HEAD(clist, e, link);
  670         return (e);
  671 }
  672 
  673 /* Find an entry in the context */
  674 struct sysctl_ctx_entry *
  675 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  676 {
  677         struct sysctl_ctx_entry *e;
  678 
  679         SYSCTL_ASSERT_WLOCKED();
  680         if (clist == NULL || oidp == NULL)
  681                 return(NULL);
  682         TAILQ_FOREACH(e, clist, link) {
  683                 if(e->entry == oidp)
  684                         return(e);
  685         }
  686         return (e);
  687 }
  688 
  689 /*
  690  * Delete an entry from the context.
  691  * NOTE: this function doesn't free oidp! You have to remove it
  692  * with sysctl_remove_oid().
  693  */
  694 int
  695 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  696 {
  697         struct sysctl_ctx_entry *e;
  698 
  699         if (clist == NULL || oidp == NULL)
  700                 return (EINVAL);
  701         SYSCTL_WLOCK();
  702         e = sysctl_ctx_entry_find(clist, oidp);
  703         if (e != NULL) {
  704                 TAILQ_REMOVE(clist, e, link);
  705                 SYSCTL_WUNLOCK();
  706                 free(e, M_SYSCTLOID);
  707                 return (0);
  708         } else {
  709                 SYSCTL_WUNLOCK();
  710                 return (ENOENT);
  711         }
  712 }
  713 
  714 /*
  715  * Remove dynamically created sysctl trees.
  716  * oidp - top of the tree to be removed
  717  * del - if 0 - just deregister, otherwise free up entries as well
  718  * recurse - if != 0 traverse the subtree to be deleted
  719  */
  720 int
  721 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
  722 {
  723         int error;
  724 
  725         SYSCTL_WLOCK();
  726         error = sysctl_remove_oid_locked(oidp, del, recurse);
  727         SYSCTL_WUNLOCK();
  728         return (error);
  729 }
  730 
  731 int
  732 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
  733     int del, int recurse)
  734 {
  735         struct sysctl_oid *p, *tmp;
  736         int error;
  737 
  738         error = ENOENT;
  739         SYSCTL_WLOCK();
  740         SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
  741                 if (strcmp(p->oid_name, name) == 0) {
  742                         error = sysctl_remove_oid_locked(p, del, recurse);
  743                         break;
  744                 }
  745         }
  746         SYSCTL_WUNLOCK();
  747 
  748         return (error);
  749 }
  750 
  751 /*
  752  * Duplicate the provided string, escaping any illegal characters.  The result
  753  * must be freed when no longer in use.
  754  *
  755  * The list of illegal characters is ".".
  756  */
  757 static char*
  758 sysctl_escape_name(const char* orig)
  759 {
  760         int i, s = 0, d = 0, nillegals = 0;
  761         char *new;
  762 
  763         /* First count the number of illegal characters */
  764         for (i = 0; orig[i] != '\0'; i++) {
  765                 if (orig[i] == '.')
  766                         nillegals++;
  767         }
  768 
  769         /* Allocate storage for new string */
  770         new = malloc(i + 2 * nillegals + 1, M_SYSCTLOID, M_WAITOK);
  771 
  772         /* Copy the name, escaping characters as we go */
  773         while (orig[s] != '\0') {
  774                 if (orig[s] == '.') {
  775                         /* %25 is the hexadecimal representation of '.' */
  776                         new[d++] = '%';
  777                         new[d++] = '2';
  778                         new[d++] = '5';
  779                         s++;
  780                 } else {
  781                         new[d++] = orig[s++];
  782                 }
  783         }
  784 
  785         /* Finally, nul-terminate */
  786         new[d] = '\0';
  787 
  788         return (new);
  789 }
  790 
  791 static int
  792 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
  793 {
  794         struct sysctl_oid *p, *tmp;
  795         int error;
  796 
  797         SYSCTL_ASSERT_WLOCKED();
  798         if (oidp == NULL)
  799                 return(EINVAL);
  800         if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
  801                 printf("Warning: can't remove non-dynamic nodes (%s)!\n",
  802                     oidp->oid_name);
  803                 return (EINVAL);
  804         }
  805         /*
  806          * WARNING: normal method to do this should be through
  807          * sysctl_ctx_free(). Use recursing as the last resort
  808          * method to purge your sysctl tree of leftovers...
  809          * However, if some other code still references these nodes,
  810          * it will panic.
  811          */
  812         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  813                 if (oidp->oid_refcnt == 1) {
  814                         SLIST_FOREACH_SAFE(p,
  815                             SYSCTL_CHILDREN(oidp), oid_link, tmp) {
  816                                 if (!recurse) {
  817                                         printf("Warning: failed attempt to "
  818                                             "remove oid %s with child %s\n",
  819                                             oidp->oid_name, p->oid_name);
  820                                         return (ENOTEMPTY);
  821                                 }
  822                                 error = sysctl_remove_oid_locked(p, del,
  823                                     recurse);
  824                                 if (error)
  825                                         return (error);
  826                         }
  827                 }
  828         }
  829         if (oidp->oid_refcnt > 1 ) {
  830                 oidp->oid_refcnt--;
  831         } else {
  832                 if (oidp->oid_refcnt == 0) {
  833                         printf("Warning: bad oid_refcnt=%u (%s)!\n",
  834                                 oidp->oid_refcnt, oidp->oid_name);
  835                         return (EINVAL);
  836                 }
  837                 sysctl_unregister_oid(oidp);
  838                 if (del) {
  839                         /*
  840                          * Wait for all threads running the handler to drain.
  841                          * This preserves the previous behavior when the
  842                          * sysctl lock was held across a handler invocation,
  843                          * and is necessary for module unload correctness.
  844                          */
  845                         while (oidp->oid_running > 0) {
  846                                 oidp->oid_kind |= CTLFLAG_DYING;
  847                                 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
  848                         }
  849                         if (oidp->oid_descr)
  850                                 free(__DECONST(char *, oidp->oid_descr),
  851                                     M_SYSCTLOID);
  852                         if (oidp->oid_label)
  853                                 free(__DECONST(char *, oidp->oid_label),
  854                                     M_SYSCTLOID);
  855                         free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
  856                         free(oidp, M_SYSCTLOID);
  857                 }
  858         }
  859         return (0);
  860 }
  861 /*
  862  * Create new sysctls at run time.
  863  * clist may point to a valid context initialized with sysctl_ctx_init().
  864  */
  865 struct sysctl_oid *
  866 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
  867         int number, const char *name, int kind, void *arg1, intmax_t arg2,
  868         int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr,
  869         const char *label)
  870 {
  871         struct sysctl_oid *oidp;
  872         char *escaped;
  873 
  874         /* You have to hook up somewhere.. */
  875         if (parent == NULL)
  876                 return(NULL);
  877         escaped = sysctl_escape_name(name);
  878         /* Check if the node already exists, otherwise create it */
  879         SYSCTL_WLOCK();
  880         oidp = sysctl_find_oidname(escaped, parent);
  881         if (oidp != NULL) {
  882                 free(escaped, M_SYSCTLOID);
  883                 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  884                         oidp->oid_refcnt++;
  885                         /* Update the context */
  886                         if (clist != NULL)
  887                                 sysctl_ctx_entry_add(clist, oidp);
  888                         SYSCTL_WUNLOCK();
  889                         return (oidp);
  890                 } else {
  891                         sysctl_warn_reuse(__func__, oidp);
  892                         SYSCTL_WUNLOCK();
  893                         return (NULL);
  894                 }
  895         }
  896         oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
  897         oidp->oid_parent = parent;
  898         SLIST_INIT(&oidp->oid_children);
  899         oidp->oid_number = number;
  900         oidp->oid_refcnt = 1;
  901         oidp->oid_name = escaped;
  902         oidp->oid_handler = handler;
  903         oidp->oid_kind = CTLFLAG_DYN | kind;
  904         oidp->oid_arg1 = arg1;
  905         oidp->oid_arg2 = arg2;
  906         oidp->oid_fmt = fmt;
  907         if (descr != NULL)
  908                 oidp->oid_descr = strdup(descr, M_SYSCTLOID);
  909         if (label != NULL)
  910                 oidp->oid_label = strdup(label, M_SYSCTLOID);
  911         /* Update the context, if used */
  912         if (clist != NULL)
  913                 sysctl_ctx_entry_add(clist, oidp);
  914         /* Register this oid */
  915         sysctl_register_oid(oidp);
  916         SYSCTL_WUNLOCK();
  917         return (oidp);
  918 }
  919 
  920 /*
  921  * Rename an existing oid.
  922  */
  923 void
  924 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
  925 {
  926         char *newname;
  927         char *oldname;
  928 
  929         newname = strdup(name, M_SYSCTLOID);
  930         SYSCTL_WLOCK();
  931         oldname = __DECONST(char *, oidp->oid_name);
  932         oidp->oid_name = newname;
  933         SYSCTL_WUNLOCK();
  934         free(oldname, M_SYSCTLOID);
  935 }
  936 
  937 /*
  938  * Reparent an existing oid.
  939  */
  940 int
  941 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
  942 {
  943         struct sysctl_oid *oidp;
  944 
  945         SYSCTL_WLOCK();
  946         if (oid->oid_parent == parent) {
  947                 SYSCTL_WUNLOCK();
  948                 return (0);
  949         }
  950         oidp = sysctl_find_oidname(oid->oid_name, parent);
  951         if (oidp != NULL) {
  952                 SYSCTL_WUNLOCK();
  953                 return (EEXIST);
  954         }
  955         sysctl_unregister_oid(oid);
  956         oid->oid_parent = parent;
  957         oid->oid_number = OID_AUTO;
  958         sysctl_register_oid(oid);
  959         SYSCTL_WUNLOCK();
  960         return (0);
  961 }
  962 
  963 /*
  964  * Register the kernel's oids on startup.
  965  */
  966 SET_DECLARE(sysctl_set, struct sysctl_oid);
  967 
  968 static void
  969 sysctl_register_all(void *arg)
  970 {
  971         struct sysctl_oid **oidp;
  972 
  973         sx_init(&sysctlmemlock, "sysctl mem");
  974         sx_init(&sysctlstringlock, "sysctl string handler");
  975         SYSCTL_INIT();
  976         SYSCTL_WLOCK();
  977         SET_FOREACH(oidp, sysctl_set)
  978                 sysctl_register_oid(*oidp);
  979         SYSCTL_WUNLOCK();
  980 }
  981 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
  982 
  983 /*
  984  * "Staff-functions"
  985  *
  986  * These functions implement a presently undocumented interface 
  987  * used by the sysctl program to walk the tree, and get the type
  988  * so it can print the value.
  989  * This interface is under work and consideration, and should probably
  990  * be killed with a big axe by the first person who can find the time.
  991  * (be aware though, that the proper interface isn't as obvious as it
  992  * may seem, there are various conflicting requirements.
  993  *
  994  * {CTL_SYSCTL, CTL_SYSCTL_DEBUG}               printf the entire MIB-tree.
  995  * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...}           return the name of the "..."
  996  *                                              OID.
  997  * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...}           return the next OID, honoring
  998  *                                              CTLFLAG_SKIP.
  999  * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID}            return the OID of the name in
 1000  *                                              "new"
 1001  * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...}         return the kind & format info
 1002  *                                              for the "..." OID.
 1003  * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...}       return the description of the
 1004  *                                              "..." OID.
 1005  * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...}       return the aggregation label of
 1006  *                                              the "..." OID.
 1007  * {CTL_SYSCTL, CTL_SYSCTL_NEXTNOSKIP, ...}     return the next OID, ignoring
 1008  *                                              CTLFLAG_SKIP.
 1009  */
 1010 
 1011 #ifdef SYSCTL_DEBUG
 1012 static void
 1013 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 1014 {
 1015         int k;
 1016         struct sysctl_oid *oidp;
 1017 
 1018         SYSCTL_ASSERT_LOCKED();
 1019         SLIST_FOREACH(oidp, l, oid_link) {
 1020                 for (k=0; k<i; k++)
 1021                         printf(" ");
 1022 
 1023                 printf("%d %s ", oidp->oid_number, oidp->oid_name);
 1024 
 1025                 printf("%c%c",
 1026                         oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 1027                         oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 1028 
 1029                 if (oidp->oid_handler)
 1030                         printf(" *Handler");
 1031 
 1032                 switch (oidp->oid_kind & CTLTYPE) {
 1033                         case CTLTYPE_NODE:
 1034                                 printf(" Node\n");
 1035                                 if (!oidp->oid_handler) {
 1036                                         sysctl_sysctl_debug_dump_node(
 1037                                             SYSCTL_CHILDREN(oidp), i + 2);
 1038                                 }
 1039                                 break;
 1040                         case CTLTYPE_INT:    printf(" Int\n"); break;
 1041                         case CTLTYPE_UINT:   printf(" u_int\n"); break;
 1042                         case CTLTYPE_LONG:   printf(" Long\n"); break;
 1043                         case CTLTYPE_ULONG:  printf(" u_long\n"); break;
 1044                         case CTLTYPE_STRING: printf(" String\n"); break;
 1045                         case CTLTYPE_S8:     printf(" int8_t\n"); break;
 1046                         case CTLTYPE_S16:    printf(" int16_t\n"); break;
 1047                         case CTLTYPE_S32:    printf(" int32_t\n"); break;
 1048                         case CTLTYPE_S64:    printf(" int64_t\n"); break;
 1049                         case CTLTYPE_U8:     printf(" uint8_t\n"); break;
 1050                         case CTLTYPE_U16:    printf(" uint16_t\n"); break;
 1051                         case CTLTYPE_U32:    printf(" uint32_t\n"); break;
 1052                         case CTLTYPE_U64:    printf(" uint64_t\n"); break;
 1053                         case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
 1054                         default:             printf("\n");
 1055                 }
 1056         }
 1057 }
 1058 
 1059 static int
 1060 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
 1061 {
 1062         struct rm_priotracker tracker;
 1063         int error;
 1064 
 1065         error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
 1066         if (error)
 1067                 return (error);
 1068         SYSCTL_RLOCK(&tracker);
 1069         sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
 1070         SYSCTL_RUNLOCK(&tracker);
 1071         return (ENOENT);
 1072 }
 1073 
 1074 SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
 1075     CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
 1076 #endif
 1077 
 1078 static int
 1079 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
 1080 {
 1081         int *name = (int *) arg1;
 1082         u_int namelen = arg2;
 1083         int error;
 1084         struct sysctl_oid *oid;
 1085         struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
 1086         struct rm_priotracker tracker;
 1087         char buf[10];
 1088 
 1089         error = sysctl_wire_old_buffer(req, 0);
 1090         if (error)
 1091                 return (error);
 1092 
 1093         SYSCTL_RLOCK(&tracker);
 1094         while (namelen) {
 1095                 if (!lsp) {
 1096                         snprintf(buf,sizeof(buf),"%d",*name);
 1097                         if (req->oldidx)
 1098                                 error = SYSCTL_OUT(req, ".", 1);
 1099                         if (!error)
 1100                                 error = SYSCTL_OUT(req, buf, strlen(buf));
 1101                         if (error)
 1102                                 goto out;
 1103                         namelen--;
 1104                         name++;
 1105                         continue;
 1106                 }
 1107                 lsp2 = NULL;
 1108                 SLIST_FOREACH(oid, lsp, oid_link) {
 1109                         if (oid->oid_number != *name)
 1110                                 continue;
 1111 
 1112                         if (req->oldidx)
 1113                                 error = SYSCTL_OUT(req, ".", 1);
 1114                         if (!error)
 1115                                 error = SYSCTL_OUT(req, oid->oid_name,
 1116                                         strlen(oid->oid_name));
 1117                         if (error)
 1118                                 goto out;
 1119 
 1120                         namelen--;
 1121                         name++;
 1122 
 1123                         if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 1124                                 break;
 1125 
 1126                         if (oid->oid_handler)
 1127                                 break;
 1128 
 1129                         lsp2 = SYSCTL_CHILDREN(oid);
 1130                         break;
 1131                 }
 1132                 lsp = lsp2;
 1133         }
 1134         error = SYSCTL_OUT(req, "", 1);
 1135  out:
 1136         SYSCTL_RUNLOCK(&tracker);
 1137         return (error);
 1138 }
 1139 
 1140 /*
 1141  * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
 1142  * capability mode.
 1143  */
 1144 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
 1145     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
 1146 
 1147 enum sysctl_iter_action {
 1148         ITER_SIBLINGS,  /* Not matched, continue iterating siblings */
 1149         ITER_CHILDREN,  /* Node has children we need to iterate over them */
 1150         ITER_FOUND,     /* Matching node was found */
 1151 };
 1152 
 1153 /*
 1154  * Tries to find the next node for @name and @namelen.
 1155  *
 1156  * Returns next action to take. 
 1157  */
 1158 static enum sysctl_iter_action
 1159 sysctl_sysctl_next_node(struct sysctl_oid *oidp, int *name, unsigned int namelen,
 1160     bool honor_skip)
 1161 {
 1162 
 1163         if ((oidp->oid_kind & CTLFLAG_DORMANT) != 0)
 1164                 return (ITER_SIBLINGS);
 1165 
 1166         if (honor_skip && (oidp->oid_kind & CTLFLAG_SKIP) != 0)
 1167                 return (ITER_SIBLINGS);
 1168 
 1169         if (namelen == 0) {
 1170                 /*
 1171                  * We have reached a node with a full name match and are
 1172                  * looking for the next oid in its children.
 1173                  *
 1174                  * For CTL_SYSCTL_NEXTNOSKIP we are done.
 1175                  *
 1176                  * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 1177                  * has a handler) and move on to the children.
 1178                  */
 1179                 if (!honor_skip)
 1180                         return (ITER_FOUND);
 1181                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 1182                         return (ITER_FOUND);
 1183                 /* If node does not have an iterator, treat it as leaf */
 1184                 if (oidp->oid_handler) 
 1185                         return (ITER_FOUND);
 1186 
 1187                 /* Report oid as a node to iterate */
 1188                 return (ITER_CHILDREN);
 1189         }
 1190 
 1191         /*
 1192          * No match yet. Continue seeking the given name.
 1193          *
 1194          * We are iterating in order by oid_number, so skip oids lower
 1195          * than the one we are looking for.
 1196          *
 1197          * When the current oid_number is higher than the one we seek,
 1198          * that means we have reached the next oid in the sequence and
 1199          * should return it.
 1200          *
 1201          * If the oid_number matches the name at this level then we
 1202          * have to find a node to continue searching at the next level.
 1203          */
 1204         if (oidp->oid_number < *name)
 1205                 return (ITER_SIBLINGS);
 1206         if (oidp->oid_number > *name) {
 1207                 /*
 1208                  * We have reached the next oid.
 1209                  *
 1210                  * For CTL_SYSCTL_NEXTNOSKIP we are done.
 1211                  *
 1212                  * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 1213                  * has a handler) and move on to the children.
 1214                  */
 1215                 if (!honor_skip)
 1216                         return (ITER_FOUND);
 1217                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1218                         return (ITER_FOUND);
 1219                 /* If node does not have an iterator, treat it as leaf */
 1220                 if (oidp->oid_handler)
 1221                         return (ITER_FOUND);
 1222                 return (ITER_CHILDREN);
 1223         }
 1224 
 1225         /* match at a current level */
 1226         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1227                 return (ITER_SIBLINGS);
 1228         if (oidp->oid_handler)
 1229                 return (ITER_SIBLINGS);
 1230 
 1231         return (ITER_CHILDREN);
 1232 }
 1233 
 1234 /*
 1235  * Recursively walk the sysctl subtree at lsp until we find the given name.
 1236  * Returns true and fills in next oid data in @next and @len if oid is found.
 1237  */
 1238 static bool
 1239 sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int namelen, 
 1240     int *next, int *len, int level, bool honor_skip)
 1241 {
 1242         struct sysctl_oid *oidp;
 1243         bool success = false;
 1244         enum sysctl_iter_action action;
 1245 
 1246         SYSCTL_ASSERT_LOCKED();
 1247         SLIST_FOREACH(oidp, lsp, oid_link) {
 1248                 action = sysctl_sysctl_next_node(oidp, name, namelen, honor_skip);
 1249                 if (action == ITER_SIBLINGS)
 1250                         continue;
 1251                 if (action == ITER_FOUND) {
 1252                         success = true;
 1253                         break;
 1254                 }
 1255                 KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", action));
 1256 
 1257                 lsp = SYSCTL_CHILDREN(oidp);
 1258                 if (namelen == 0) {
 1259                         success = sysctl_sysctl_next_action(lsp, NULL, 0,
 1260                             next + 1, len, level + 1, honor_skip);
 1261                 } else {
 1262                         success = sysctl_sysctl_next_action(lsp, name + 1, namelen - 1,
 1263                             next + 1, len, level + 1, honor_skip);
 1264                         if (!success) {
 1265 
 1266                                 /*
 1267                                  * We maintain the invariant that current node oid
 1268                                  * is >= the oid provided in @name.
 1269                                  * As there are no usable children at this node,
 1270                                  *  current node oid is strictly > than the requested
 1271                                  *  oid.
 1272                                  * Hence, reduce namelen to 0 to allow for picking first
 1273                                  *  nodes/leafs in the next node in list.
 1274                                  */
 1275                                 namelen = 0;
 1276                         }
 1277                 }
 1278                 if (success)
 1279                         break;
 1280         }
 1281 
 1282         if (success) {
 1283                 *next = oidp->oid_number;
 1284                 if (level > *len)
 1285                         *len = level;
 1286         }
 1287 
 1288         return (success);
 1289 }
 1290 
 1291 static int
 1292 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
 1293 {
 1294         int *name = (int *) arg1;
 1295         u_int namelen = arg2;
 1296         int len, error;
 1297         bool success;
 1298         struct sysctl_oid_list *lsp = &sysctl__children;
 1299         struct rm_priotracker tracker;
 1300         int next[CTL_MAXNAME];
 1301 
 1302         len = 0;
 1303         SYSCTL_RLOCK(&tracker);
 1304         success = sysctl_sysctl_next_action(lsp, name, namelen, next, &len, 1,
 1305             oidp->oid_number == CTL_SYSCTL_NEXT);
 1306         SYSCTL_RUNLOCK(&tracker);
 1307         if (!success)
 1308                 return (ENOENT);
 1309         error = SYSCTL_OUT(req, next, len * sizeof (int));
 1310         return (error);
 1311 }
 1312 
 1313 /*
 1314  * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
 1315  * capability mode.
 1316  */
 1317 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
 1318     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 1319 
 1320 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXTNOSKIP, nextnoskip, CTLFLAG_RD |
 1321     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 1322 
 1323 static int
 1324 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 1325 {
 1326         struct sysctl_oid *oidp;
 1327         struct sysctl_oid_list *lsp = &sysctl__children;
 1328         char *p;
 1329 
 1330         SYSCTL_ASSERT_LOCKED();
 1331 
 1332         for (*len = 0; *len < CTL_MAXNAME;) {
 1333                 p = strsep(&name, ".");
 1334 
 1335                 oidp = SLIST_FIRST(lsp);
 1336                 for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
 1337                         if (oidp == NULL)
 1338                                 return (ENOENT);
 1339                         if (strcmp(p, oidp->oid_name) == 0)
 1340                                 break;
 1341                 }
 1342                 *oid++ = oidp->oid_number;
 1343                 (*len)++;
 1344 
 1345                 if (name == NULL || *name == '\0') {
 1346                         if (oidpp)
 1347                                 *oidpp = oidp;
 1348                         return (0);
 1349                 }
 1350 
 1351                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1352                         break;
 1353 
 1354                 if (oidp->oid_handler)
 1355                         break;
 1356 
 1357                 lsp = SYSCTL_CHILDREN(oidp);
 1358         }
 1359         return (ENOENT);
 1360 }
 1361 
 1362 static int
 1363 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
 1364 {
 1365         char *p;
 1366         int error, oid[CTL_MAXNAME], len = 0;
 1367         struct sysctl_oid *op = NULL;
 1368         struct rm_priotracker tracker;
 1369         char buf[32];
 1370 
 1371         if (!req->newlen) 
 1372                 return (ENOENT);
 1373         if (req->newlen >= MAXPATHLEN)  /* XXX arbitrary, undocumented */
 1374                 return (ENAMETOOLONG);
 1375 
 1376         p = buf;
 1377         if (req->newlen >= sizeof(buf))
 1378                 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
 1379 
 1380         error = SYSCTL_IN(req, p, req->newlen);
 1381         if (error) {
 1382                 if (p != buf)
 1383                         free(p, M_SYSCTL);
 1384                 return (error);
 1385         }
 1386 
 1387         p [req->newlen] = '\0';
 1388 
 1389         SYSCTL_RLOCK(&tracker);
 1390         error = name2oid(p, oid, &len, &op);
 1391         SYSCTL_RUNLOCK(&tracker);
 1392 
 1393         if (p != buf)
 1394                 free(p, M_SYSCTL);
 1395 
 1396         if (error)
 1397                 return (error);
 1398 
 1399         error = SYSCTL_OUT(req, oid, len * sizeof *oid);
 1400         return (error);
 1401 }
 1402 
 1403 /*
 1404  * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
 1405  * capability mode.
 1406  */
 1407 SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
 1408     CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
 1409     sysctl_sysctl_name2oid, "I", "");
 1410 
 1411 static int
 1412 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
 1413 {
 1414         struct sysctl_oid *oid;
 1415         struct rm_priotracker tracker;
 1416         int error;
 1417 
 1418         error = sysctl_wire_old_buffer(req, 0);
 1419         if (error)
 1420                 return (error);
 1421 
 1422         SYSCTL_RLOCK(&tracker);
 1423         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1424         if (error)
 1425                 goto out;
 1426 
 1427         if (oid->oid_fmt == NULL) {
 1428                 error = ENOENT;
 1429                 goto out;
 1430         }
 1431         error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
 1432         if (error)
 1433                 goto out;
 1434         error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
 1435  out:
 1436         SYSCTL_RUNLOCK(&tracker);
 1437         return (error);
 1438 }
 1439 
 1440 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
 1441     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
 1442 
 1443 static int
 1444 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
 1445 {
 1446         struct sysctl_oid *oid;
 1447         struct rm_priotracker tracker;
 1448         int error;
 1449 
 1450         error = sysctl_wire_old_buffer(req, 0);
 1451         if (error)
 1452                 return (error);
 1453 
 1454         SYSCTL_RLOCK(&tracker);
 1455         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1456         if (error)
 1457                 goto out;
 1458 
 1459         if (oid->oid_descr == NULL) {
 1460                 error = ENOENT;
 1461                 goto out;
 1462         }
 1463         error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
 1464  out:
 1465         SYSCTL_RUNLOCK(&tracker);
 1466         return (error);
 1467 }
 1468 
 1469 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
 1470     CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
 1471 
 1472 static int
 1473 sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
 1474 {
 1475         struct sysctl_oid *oid;
 1476         struct rm_priotracker tracker;
 1477         int error;
 1478 
 1479         error = sysctl_wire_old_buffer(req, 0);
 1480         if (error)
 1481                 return (error);
 1482 
 1483         SYSCTL_RLOCK(&tracker);
 1484         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1485         if (error)
 1486                 goto out;
 1487 
 1488         if (oid->oid_label == NULL) {
 1489                 error = ENOENT;
 1490                 goto out;
 1491         }
 1492         error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1);
 1493  out:
 1494         SYSCTL_RUNLOCK(&tracker);
 1495         return (error);
 1496 }
 1497 
 1498 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
 1499     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
 1500 
 1501 /*
 1502  * Default "handler" functions.
 1503  */
 1504 
 1505 /*
 1506  * Handle a bool.
 1507  * Two cases:
 1508  *     a variable:  point arg1 at it.
 1509  *     a constant:  pass it in arg2.
 1510  */
 1511 
 1512 int
 1513 sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
 1514 {
 1515         uint8_t temp;
 1516         int error;
 1517 
 1518         /*
 1519          * Attempt to get a coherent snapshot by making a copy of the data.
 1520          */
 1521         if (arg1)
 1522                 temp = *(bool *)arg1 ? 1 : 0;
 1523         else
 1524                 temp = arg2 ? 1 : 0;
 1525 
 1526         error = SYSCTL_OUT(req, &temp, sizeof(temp));
 1527         if (error || !req->newptr)
 1528                 return (error);
 1529 
 1530         if (!arg1)
 1531                 error = EPERM;
 1532         else {
 1533                 error = SYSCTL_IN(req, &temp, sizeof(temp));
 1534                 if (!error)
 1535                         *(bool *)arg1 = temp ? 1 : 0;
 1536         }
 1537         return (error);
 1538 }
 1539 
 1540 /*
 1541  * Handle an int8_t, signed or unsigned.
 1542  * Two cases:
 1543  *     a variable:  point arg1 at it.
 1544  *     a constant:  pass it in arg2.
 1545  */
 1546 
 1547 int
 1548 sysctl_handle_8(SYSCTL_HANDLER_ARGS)
 1549 {
 1550         int8_t tmpout;
 1551         int error = 0;
 1552 
 1553         /*
 1554          * Attempt to get a coherent snapshot by making a copy of the data.
 1555          */
 1556         if (arg1)
 1557                 tmpout = *(int8_t *)arg1;
 1558         else
 1559                 tmpout = arg2;
 1560         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1561 
 1562         if (error || !req->newptr)
 1563                 return (error);
 1564 
 1565         if (!arg1)
 1566                 error = EPERM;
 1567         else
 1568                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1569         return (error);
 1570 }
 1571 
 1572 /*
 1573  * Handle an int16_t, signed or unsigned.
 1574  * Two cases:
 1575  *     a variable:  point arg1 at it.
 1576  *     a constant:  pass it in arg2.
 1577  */
 1578 
 1579 int
 1580 sysctl_handle_16(SYSCTL_HANDLER_ARGS)
 1581 {
 1582         int16_t tmpout;
 1583         int error = 0;
 1584 
 1585         /*
 1586          * Attempt to get a coherent snapshot by making a copy of the data.
 1587          */
 1588         if (arg1)
 1589                 tmpout = *(int16_t *)arg1;
 1590         else
 1591                 tmpout = arg2;
 1592         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1593 
 1594         if (error || !req->newptr)
 1595                 return (error);
 1596 
 1597         if (!arg1)
 1598                 error = EPERM;
 1599         else
 1600                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1601         return (error);
 1602 }
 1603 
 1604 /*
 1605  * Handle an int32_t, signed or unsigned.
 1606  * Two cases:
 1607  *     a variable:  point arg1 at it.
 1608  *     a constant:  pass it in arg2.
 1609  */
 1610 
 1611 int
 1612 sysctl_handle_32(SYSCTL_HANDLER_ARGS)
 1613 {
 1614         int32_t tmpout;
 1615         int error = 0;
 1616 
 1617         /*
 1618          * Attempt to get a coherent snapshot by making a copy of the data.
 1619          */
 1620         if (arg1)
 1621                 tmpout = *(int32_t *)arg1;
 1622         else
 1623                 tmpout = arg2;
 1624         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1625 
 1626         if (error || !req->newptr)
 1627                 return (error);
 1628 
 1629         if (!arg1)
 1630                 error = EPERM;
 1631         else
 1632                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1633         return (error);
 1634 }
 1635 
 1636 /*
 1637  * Handle an int, signed or unsigned.
 1638  * Two cases:
 1639  *     a variable:  point arg1 at it.
 1640  *     a constant:  pass it in arg2.
 1641  */
 1642 
 1643 int
 1644 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
 1645 {
 1646         int tmpout, error = 0;
 1647 
 1648         /*
 1649          * Attempt to get a coherent snapshot by making a copy of the data.
 1650          */
 1651         if (arg1)
 1652                 tmpout = *(int *)arg1;
 1653         else
 1654                 tmpout = arg2;
 1655         error = SYSCTL_OUT(req, &tmpout, sizeof(int));
 1656 
 1657         if (error || !req->newptr)
 1658                 return (error);
 1659 
 1660         if (!arg1)
 1661                 error = EPERM;
 1662         else
 1663                 error = SYSCTL_IN(req, arg1, sizeof(int));
 1664         return (error);
 1665 }
 1666 
 1667 /*
 1668  * Based on on sysctl_handle_int() convert milliseconds into ticks.
 1669  * Note: this is used by TCP.
 1670  */
 1671 
 1672 int
 1673 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
 1674 {
 1675         int error, s, tt;
 1676 
 1677         tt = *(int *)arg1;
 1678         s = (int)((int64_t)tt * 1000 / hz);
 1679 
 1680         error = sysctl_handle_int(oidp, &s, 0, req);
 1681         if (error || !req->newptr)
 1682                 return (error);
 1683 
 1684         tt = (int)((int64_t)s * hz / 1000);
 1685         if (tt < 1)
 1686                 return (EINVAL);
 1687 
 1688         *(int *)arg1 = tt;
 1689         return (0);
 1690 }
 1691 
 1692 /*
 1693  * Handle a long, signed or unsigned.
 1694  * Two cases:
 1695  *     a variable:  point arg1 at it.
 1696  *     a constant:  pass it in arg2.
 1697  */
 1698 
 1699 int
 1700 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
 1701 {
 1702         int error = 0;
 1703         long tmplong;
 1704 #ifdef SCTL_MASK32
 1705         int tmpint;
 1706 #endif
 1707 
 1708         /*
 1709          * Attempt to get a coherent snapshot by making a copy of the data.
 1710          */
 1711         if (arg1)
 1712                 tmplong = *(long *)arg1;
 1713         else
 1714                 tmplong = arg2;
 1715 #ifdef SCTL_MASK32
 1716         if (req->flags & SCTL_MASK32) {
 1717                 tmpint = tmplong;
 1718                 error = SYSCTL_OUT(req, &tmpint, sizeof(int));
 1719         } else
 1720 #endif
 1721                 error = SYSCTL_OUT(req, &tmplong, sizeof(long));
 1722 
 1723         if (error || !req->newptr)
 1724                 return (error);
 1725 
 1726         if (!arg1)
 1727                 error = EPERM;
 1728 #ifdef SCTL_MASK32
 1729         else if (req->flags & SCTL_MASK32) {
 1730                 error = SYSCTL_IN(req, &tmpint, sizeof(int));
 1731                 *(long *)arg1 = (long)tmpint;
 1732         }
 1733 #endif
 1734         else
 1735                 error = SYSCTL_IN(req, arg1, sizeof(long));
 1736         return (error);
 1737 }
 1738 
 1739 /*
 1740  * Handle a 64 bit int, signed or unsigned.
 1741  * Two cases:
 1742  *     a variable:  point arg1 at it.
 1743  *     a constant:  pass it in arg2.
 1744  */
 1745 int
 1746 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 1747 {
 1748         int error = 0;
 1749         uint64_t tmpout;
 1750 
 1751         /*
 1752          * Attempt to get a coherent snapshot by making a copy of the data.
 1753          */
 1754         if (arg1)
 1755                 tmpout = *(uint64_t *)arg1;
 1756         else
 1757                 tmpout = arg2;
 1758         error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
 1759 
 1760         if (error || !req->newptr)
 1761                 return (error);
 1762 
 1763         if (!arg1)
 1764                 error = EPERM;
 1765         else
 1766                 error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
 1767         return (error);
 1768 }
 1769 
 1770 /*
 1771  * Handle our generic '\0' terminated 'C' string.
 1772  * Two cases:
 1773  *      a variable string:  point arg1 at it, arg2 is max length.
 1774  *      a constant string:  point arg1 at it, arg2 is zero.
 1775  */
 1776 
 1777 int
 1778 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 1779 {
 1780         char *tmparg;
 1781         size_t outlen;
 1782         int error = 0, ro_string = 0;
 1783 
 1784         /*
 1785          * If the sysctl isn't writable and isn't a preallocated tunable that
 1786          * can be modified by kenv(2), microoptimise and treat it as a
 1787          * read-only string.
 1788          * A zero-length buffer indicates a fixed size read-only
 1789          * string.  In ddb, don't worry about trying to make a malloced
 1790          * snapshot.
 1791          */
 1792         if ((oidp->oid_kind & (CTLFLAG_WR | CTLFLAG_TUN)) == 0 ||
 1793             arg2 == 0 || kdb_active) {
 1794                 arg2 = strlen((char *)arg1) + 1;
 1795                 ro_string = 1;
 1796         }
 1797 
 1798         if (req->oldptr != NULL) {
 1799                 if (ro_string) {
 1800                         tmparg = arg1;
 1801                         outlen = strlen(tmparg) + 1;
 1802                 } else {
 1803                         tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 1804                         sx_slock(&sysctlstringlock);
 1805                         memcpy(tmparg, arg1, arg2);
 1806                         sx_sunlock(&sysctlstringlock);
 1807                         outlen = strlen(tmparg) + 1;
 1808                 }
 1809 
 1810                 error = SYSCTL_OUT(req, tmparg, outlen);
 1811 
 1812                 if (!ro_string)
 1813                         free(tmparg, M_SYSCTLTMP);
 1814         } else {
 1815                 if (!ro_string)
 1816                         sx_slock(&sysctlstringlock);
 1817                 outlen = strlen((char *)arg1) + 1;
 1818                 if (!ro_string)
 1819                         sx_sunlock(&sysctlstringlock);
 1820                 error = SYSCTL_OUT(req, NULL, outlen);
 1821         }
 1822         if (error || !req->newptr)
 1823                 return (error);
 1824 
 1825         if (req->newlen - req->newidx >= arg2 ||
 1826             req->newlen - req->newidx < 0) {
 1827                 error = EINVAL;
 1828         } else if (req->newlen - req->newidx == 0) {
 1829                 sx_xlock(&sysctlstringlock);
 1830                 ((char *)arg1)[0] = '\0';
 1831                 sx_xunlock(&sysctlstringlock);
 1832         } else if (req->newfunc == sysctl_new_kernel) {
 1833                 arg2 = req->newlen - req->newidx;
 1834                 sx_xlock(&sysctlstringlock);
 1835                 error = SYSCTL_IN(req, arg1, arg2);
 1836                 if (error == 0) {
 1837                         ((char *)arg1)[arg2] = '\0';
 1838                         req->newidx += arg2;
 1839                 }
 1840                 sx_xunlock(&sysctlstringlock);
 1841         } else {
 1842                 arg2 = req->newlen - req->newidx;
 1843                 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 1844 
 1845                 error = SYSCTL_IN(req, tmparg, arg2);
 1846                 if (error) {
 1847                         free(tmparg, M_SYSCTLTMP);
 1848                         return (error);
 1849                 }
 1850 
 1851                 sx_xlock(&sysctlstringlock);
 1852                 memcpy(arg1, tmparg, arg2);
 1853                 ((char *)arg1)[arg2] = '\0';
 1854                 sx_xunlock(&sysctlstringlock);
 1855                 free(tmparg, M_SYSCTLTMP);
 1856                 req->newidx += arg2;
 1857         }
 1858         return (error);
 1859 }
 1860 
 1861 /*
 1862  * Handle any kind of opaque data.
 1863  * arg1 points to it, arg2 is the size.
 1864  */
 1865 
 1866 int
 1867 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
 1868 {
 1869         int error, tries;
 1870         u_int generation;
 1871         struct sysctl_req req2;
 1872 
 1873         /*
 1874          * Attempt to get a coherent snapshot, by using the thread
 1875          * pre-emption counter updated from within mi_switch() to
 1876          * determine if we were pre-empted during a bcopy() or
 1877          * copyout(). Make 3 attempts at doing this before giving up.
 1878          * If we encounter an error, stop immediately.
 1879          */
 1880         tries = 0;
 1881         req2 = *req;
 1882 retry:
 1883         generation = curthread->td_generation;
 1884         error = SYSCTL_OUT(req, arg1, arg2);
 1885         if (error)
 1886                 return (error);
 1887         tries++;
 1888         if (generation != curthread->td_generation && tries < 3) {
 1889                 *req = req2;
 1890                 goto retry;
 1891         }
 1892 
 1893         error = SYSCTL_IN(req, arg1, arg2);
 1894 
 1895         return (error);
 1896 }
 1897 
 1898 /*
 1899  * Based on on sysctl_handle_int() convert microseconds to a sbintime.
 1900  */
 1901 int
 1902 sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)
 1903 {
 1904         int error;
 1905         int64_t tt;
 1906         sbintime_t sb;
 1907 
 1908         tt = *(int64_t *)arg1;
 1909         sb = sbttous(tt);
 1910 
 1911         error = sysctl_handle_64(oidp, &sb, 0, req);
 1912         if (error || !req->newptr)
 1913                 return (error);
 1914 
 1915         tt = ustosbt(sb);
 1916         *(int64_t *)arg1 = tt;
 1917 
 1918         return (0);
 1919 }
 1920 
 1921 /*
 1922  * Based on on sysctl_handle_int() convert milliseconds to a sbintime.
 1923  */
 1924 int
 1925 sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
 1926 {
 1927         int error;
 1928         int64_t tt;
 1929         sbintime_t sb;
 1930 
 1931         tt = *(int64_t *)arg1;
 1932         sb = sbttoms(tt);
 1933 
 1934         error = sysctl_handle_64(oidp, &sb, 0, req);
 1935         if (error || !req->newptr)
 1936                 return (error);
 1937 
 1938         tt = mstosbt(sb);
 1939         *(int64_t *)arg1 = tt;
 1940 
 1941         return (0);
 1942 }
 1943 
 1944 /*
 1945  * Convert seconds to a struct timeval.  Intended for use with
 1946  * intervals and thus does not permit negative seconds.
 1947  */
 1948 int
 1949 sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
 1950 {
 1951         struct timeval *tv;
 1952         int error, secs;
 1953 
 1954         tv = arg1;
 1955         secs = tv->tv_sec;
 1956 
 1957         error = sysctl_handle_int(oidp, &secs, 0, req);
 1958         if (error || req->newptr == NULL)
 1959                 return (error);
 1960 
 1961         if (secs < 0)
 1962                 return (EINVAL);
 1963         tv->tv_sec = secs;
 1964 
 1965         return (0);
 1966 }
 1967 
 1968 /*
 1969  * Transfer functions to/from kernel space.
 1970  * XXX: rather untested at this point
 1971  */
 1972 static int
 1973 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 1974 {
 1975         size_t i = 0;
 1976 
 1977         if (req->oldptr) {
 1978                 i = l;
 1979                 if (req->oldlen <= req->oldidx)
 1980                         i = 0;
 1981                 else
 1982                         if (i > req->oldlen - req->oldidx)
 1983                                 i = req->oldlen - req->oldidx;
 1984                 if (i > 0)
 1985                         bcopy(p, (char *)req->oldptr + req->oldidx, i);
 1986         }
 1987         req->oldidx += l;
 1988         if (req->oldptr && i != l)
 1989                 return (ENOMEM);
 1990         return (0);
 1991 }
 1992 
 1993 static int
 1994 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 1995 {
 1996         if (!req->newptr)
 1997                 return (0);
 1998         if (req->newlen - req->newidx < l)
 1999                 return (EINVAL);
 2000         bcopy((const char *)req->newptr + req->newidx, p, l);
 2001         req->newidx += l;
 2002         return (0);
 2003 }
 2004 
 2005 int
 2006 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 2007     size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
 2008 {
 2009         int error = 0;
 2010         struct sysctl_req req;
 2011 
 2012         bzero(&req, sizeof req);
 2013 
 2014         req.td = td;
 2015         req.flags = flags;
 2016 
 2017         if (oldlenp) {
 2018                 req.oldlen = *oldlenp;
 2019         }
 2020         req.validlen = req.oldlen;
 2021 
 2022         if (old) {
 2023                 req.oldptr= old;
 2024         }
 2025 
 2026         if (new != NULL) {
 2027                 req.newlen = newlen;
 2028                 req.newptr = new;
 2029         }
 2030 
 2031         req.oldfunc = sysctl_old_kernel;
 2032         req.newfunc = sysctl_new_kernel;
 2033         req.lock = REQ_UNWIRED;
 2034 
 2035         error = sysctl_root(0, name, namelen, &req);
 2036 
 2037         if (req.lock == REQ_WIRED && req.validlen > 0)
 2038                 vsunlock(req.oldptr, req.validlen);
 2039 
 2040         if (error && error != ENOMEM)
 2041                 return (error);
 2042 
 2043         if (retval) {
 2044                 if (req.oldptr && req.oldidx > req.validlen)
 2045                         *retval = req.validlen;
 2046                 else
 2047                         *retval = req.oldidx;
 2048         }
 2049         return (error);
 2050 }
 2051 
 2052 int
 2053 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
 2054     void *new, size_t newlen, size_t *retval, int flags)
 2055 {
 2056         int oid[CTL_MAXNAME];
 2057         size_t oidlen, plen;
 2058         int error;
 2059 
 2060         oid[0] = CTL_SYSCTL;
 2061         oid[1] = CTL_SYSCTL_NAME2OID;
 2062         oidlen = sizeof(oid);
 2063 
 2064         error = kernel_sysctl(td, oid, 2, oid, &oidlen,
 2065             (void *)name, strlen(name), &plen, flags);
 2066         if (error)
 2067                 return (error);
 2068 
 2069         error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
 2070             new, newlen, retval, flags);
 2071         return (error);
 2072 }
 2073 
 2074 /*
 2075  * Transfer function to/from user space.
 2076  */
 2077 static int
 2078 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 2079 {
 2080         size_t i, len, origidx;
 2081         int error;
 2082 
 2083         origidx = req->oldidx;
 2084         req->oldidx += l;
 2085         if (req->oldptr == NULL)
 2086                 return (0);
 2087         /*
 2088          * If we have not wired the user supplied buffer and we are currently
 2089          * holding locks, drop a witness warning, as it's possible that
 2090          * write operations to the user page can sleep.
 2091          */
 2092         if (req->lock != REQ_WIRED)
 2093                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 2094                     "sysctl_old_user()");
 2095         i = l;
 2096         len = req->validlen;
 2097         if (len <= origidx)
 2098                 i = 0;
 2099         else {
 2100                 if (i > len - origidx)
 2101                         i = len - origidx;
 2102                 if (req->lock == REQ_WIRED) {
 2103                         error = copyout_nofault(p, (char *)req->oldptr +
 2104                             origidx, i);
 2105                 } else
 2106                         error = copyout(p, (char *)req->oldptr + origidx, i);
 2107                 if (error != 0)
 2108                         return (error);
 2109         }
 2110         if (i < l)
 2111                 return (ENOMEM);
 2112         return (0);
 2113 }
 2114 
 2115 static int
 2116 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 2117 {
 2118         int error;
 2119 
 2120         if (!req->newptr)
 2121                 return (0);
 2122         if (req->newlen - req->newidx < l)
 2123                 return (EINVAL);
 2124         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 2125             "sysctl_new_user()");
 2126         error = copyin((const char *)req->newptr + req->newidx, p, l);
 2127         req->newidx += l;
 2128         return (error);
 2129 }
 2130 
 2131 /*
 2132  * Wire the user space destination buffer.  If set to a value greater than
 2133  * zero, the len parameter limits the maximum amount of wired memory.
 2134  */
 2135 int
 2136 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 2137 {
 2138         int ret;
 2139         size_t wiredlen;
 2140 
 2141         wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
 2142         ret = 0;
 2143         if (req->lock != REQ_WIRED && req->oldptr &&
 2144             req->oldfunc == sysctl_old_user) {
 2145                 if (wiredlen != 0) {
 2146                         ret = vslock(req->oldptr, wiredlen);
 2147                         if (ret != 0) {
 2148                                 if (ret != ENOMEM)
 2149                                         return (ret);
 2150                                 wiredlen = 0;
 2151                         }
 2152                 }
 2153                 req->lock = REQ_WIRED;
 2154                 req->validlen = wiredlen;
 2155         }
 2156         return (0);
 2157 }
 2158 
 2159 int
 2160 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 2161     int *nindx, struct sysctl_req *req)
 2162 {
 2163         struct sysctl_oid_list *lsp;
 2164         struct sysctl_oid *oid;
 2165         int indx;
 2166 
 2167         SYSCTL_ASSERT_LOCKED();
 2168         lsp = &sysctl__children;
 2169         indx = 0;
 2170         while (indx < CTL_MAXNAME) {
 2171                 SLIST_FOREACH(oid, lsp, oid_link) {
 2172                         if (oid->oid_number == name[indx])
 2173                                 break;
 2174                 }
 2175                 if (oid == NULL)
 2176                         return (ENOENT);
 2177 
 2178                 indx++;
 2179                 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2180                         if (oid->oid_handler != NULL || indx == namelen) {
 2181                                 *noid = oid;
 2182                                 if (nindx != NULL)
 2183                                         *nindx = indx;
 2184                                 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 2185                                     ("%s found DYING node %p", __func__, oid));
 2186                                 return (0);
 2187                         }
 2188                         lsp = SYSCTL_CHILDREN(oid);
 2189                 } else if (indx == namelen) {
 2190                         if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
 2191                                 return (ENOENT);
 2192                         *noid = oid;
 2193                         if (nindx != NULL)
 2194                                 *nindx = indx;
 2195                         KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 2196                             ("%s found DYING node %p", __func__, oid));
 2197                         return (0);
 2198                 } else {
 2199                         return (ENOTDIR);
 2200                 }
 2201         }
 2202         return (ENOENT);
 2203 }
 2204 
 2205 /*
 2206  * Traverse our tree, and find the right node, execute whatever it points
 2207  * to, and return the resulting error code.
 2208  */
 2209 
 2210 static int
 2211 sysctl_root(SYSCTL_HANDLER_ARGS)
 2212 {
 2213         struct sysctl_oid *oid;
 2214         struct rm_priotracker tracker;
 2215         int error, indx, lvl;
 2216 
 2217         SYSCTL_RLOCK(&tracker);
 2218 
 2219         error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
 2220         if (error)
 2221                 goto out;
 2222 
 2223         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2224                 /*
 2225                  * You can't call a sysctl when it's a node, but has
 2226                  * no handler.  Inform the user that it's a node.
 2227                  * The indx may or may not be the same as namelen.
 2228                  */
 2229                 if (oid->oid_handler == NULL) {
 2230                         error = EISDIR;
 2231                         goto out;
 2232                 }
 2233         }
 2234 
 2235         /* Is this sysctl writable? */
 2236         if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
 2237                 error = EPERM;
 2238                 goto out;
 2239         }
 2240 
 2241         KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
 2242 
 2243 #ifdef CAPABILITY_MODE
 2244         /*
 2245          * If the process is in capability mode, then don't permit reading or
 2246          * writing unless specifically granted for the node.
 2247          */
 2248         if (IN_CAPABILITY_MODE(req->td)) {
 2249                 if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
 2250                     (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
 2251                         error = EPERM;
 2252                         goto out;
 2253                 }
 2254         }
 2255 #endif
 2256 
 2257         /* Is this sysctl sensitive to securelevels? */
 2258         if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
 2259                 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
 2260                 error = securelevel_gt(req->td->td_ucred, lvl);
 2261                 if (error)
 2262                         goto out;
 2263         }
 2264 
 2265         /* Is this sysctl writable by only privileged users? */
 2266         if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
 2267                 int priv;
 2268 
 2269                 if (oid->oid_kind & CTLFLAG_PRISON)
 2270                         priv = PRIV_SYSCTL_WRITEJAIL;
 2271 #ifdef VIMAGE
 2272                 else if ((oid->oid_kind & CTLFLAG_VNET) &&
 2273                      prison_owns_vnet(req->td->td_ucred))
 2274                         priv = PRIV_SYSCTL_WRITEJAIL;
 2275 #endif
 2276                 else
 2277                         priv = PRIV_SYSCTL_WRITE;
 2278                 error = priv_check(req->td, priv);
 2279                 if (error)
 2280                         goto out;
 2281         }
 2282 
 2283         if (!oid->oid_handler) {
 2284                 error = EINVAL;
 2285                 goto out;
 2286         }
 2287 
 2288         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2289                 arg1 = (int *)arg1 + indx;
 2290                 arg2 -= indx;
 2291         } else {
 2292                 arg1 = oid->oid_arg1;
 2293                 arg2 = oid->oid_arg2;
 2294         }
 2295 #ifdef MAC
 2296         error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
 2297             req);
 2298         if (error != 0)
 2299                 goto out;
 2300 #endif
 2301 #ifdef VIMAGE
 2302         if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
 2303                 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
 2304 #endif
 2305         error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
 2306 
 2307 out:
 2308         SYSCTL_RUNLOCK(&tracker);
 2309         return (error);
 2310 }
 2311 
 2312 #ifndef _SYS_SYSPROTO_H_
 2313 struct sysctl_args {
 2314         int     *name;
 2315         u_int   namelen;
 2316         void    *old;
 2317         size_t  *oldlenp;
 2318         void    *new;
 2319         size_t  newlen;
 2320 };
 2321 #endif
 2322 int
 2323 sys___sysctl(struct thread *td, struct sysctl_args *uap)
 2324 {
 2325         int error, i, name[CTL_MAXNAME];
 2326         size_t j;
 2327 
 2328         if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 2329                 return (EINVAL);
 2330 
 2331         error = copyin(uap->name, &name, uap->namelen * sizeof(int));
 2332         if (error)
 2333                 return (error);
 2334 
 2335         error = userland_sysctl(td, name, uap->namelen,
 2336                 uap->old, uap->oldlenp, 0,
 2337                 uap->new, uap->newlen, &j, 0);
 2338         if (error && error != ENOMEM)
 2339                 return (error);
 2340         if (uap->oldlenp) {
 2341                 i = copyout(&j, uap->oldlenp, sizeof(j));
 2342                 if (i)
 2343                         return (i);
 2344         }
 2345         return (error);
 2346 }
 2347 
 2348 int
 2349 kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
 2350     void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
 2351     int flags, bool inkernel)
 2352 {
 2353         int oid[CTL_MAXNAME];
 2354         char namebuf[16];
 2355         char *name;
 2356         size_t oidlen;
 2357         int error;
 2358 
 2359         if (namelen > MAXPATHLEN || namelen == 0)
 2360                 return (EINVAL);
 2361         name = namebuf;
 2362         if (namelen > sizeof(namebuf))
 2363                 name = malloc(namelen, M_SYSCTL, M_WAITOK);
 2364         error = copyin(oname, name, namelen);
 2365         if (error != 0)
 2366                 goto out;
 2367 
 2368         oid[0] = CTL_SYSCTL;
 2369         oid[1] = CTL_SYSCTL_NAME2OID;
 2370         oidlen = sizeof(oid);
 2371         error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
 2372             retval, flags);
 2373         if (error != 0)
 2374                 goto out;
 2375         error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
 2376             inkernel, new, newlen, retval, flags);
 2377 
 2378 out:
 2379         if (namelen > sizeof(namebuf))
 2380                 free(name, M_SYSCTL);
 2381         return (error);
 2382 }
 2383 
 2384 #ifndef _SYS_SYSPROTO_H_
 2385 struct __sysctlbyname_args {
 2386         const char      *name;
 2387         size_t  namelen;
 2388         void    *old;
 2389         size_t  *oldlenp;
 2390         void    *new;
 2391         size_t  newlen;
 2392 };
 2393 #endif
 2394 int
 2395 sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
 2396 {
 2397         size_t rv;
 2398         int error;
 2399 
 2400         error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
 2401             uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
 2402         if (error != 0)
 2403                 return (error);
 2404         if (uap->oldlenp != NULL)
 2405                 error = copyout(&rv, uap->oldlenp, sizeof(rv));
 2406 
 2407         return (error);
 2408 }
 2409 
 2410 /*
 2411  * This is used from various compatibility syscalls too.  That's why name
 2412  * must be in kernel space.
 2413  */
 2414 int
 2415 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 2416     size_t *oldlenp, int inkernel, const void *new, size_t newlen,
 2417     size_t *retval, int flags)
 2418 {
 2419         int error = 0, memlocked;
 2420         struct sysctl_req req;
 2421 
 2422         bzero(&req, sizeof req);
 2423 
 2424         req.td = td;
 2425         req.flags = flags;
 2426 
 2427         if (oldlenp) {
 2428                 if (inkernel) {
 2429                         req.oldlen = *oldlenp;
 2430                 } else {
 2431                         error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
 2432                         if (error)
 2433                                 return (error);
 2434                 }
 2435         }
 2436         req.validlen = req.oldlen;
 2437         req.oldptr = old;
 2438 
 2439         if (new != NULL) {
 2440                 req.newlen = newlen;
 2441                 req.newptr = new;
 2442         }
 2443 
 2444         req.oldfunc = sysctl_old_user;
 2445         req.newfunc = sysctl_new_user;
 2446         req.lock = REQ_UNWIRED;
 2447 
 2448 #ifdef KTRACE
 2449         if (KTRPOINT(curthread, KTR_SYSCTL))
 2450                 ktrsysctl(name, namelen);
 2451 #endif
 2452         memlocked = 0;
 2453         if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) {
 2454                 memlocked = 1;
 2455                 sx_xlock(&sysctlmemlock);
 2456         }
 2457         CURVNET_SET(TD_TO_VNET(td));
 2458 
 2459         for (;;) {
 2460                 req.oldidx = 0;
 2461                 req.newidx = 0;
 2462                 error = sysctl_root(0, name, namelen, &req);
 2463                 if (error != EAGAIN)
 2464                         break;
 2465                 kern_yield(PRI_USER);
 2466         }
 2467 
 2468         CURVNET_RESTORE();
 2469 
 2470         if (req.lock == REQ_WIRED && req.validlen > 0)
 2471                 vsunlock(req.oldptr, req.validlen);
 2472         if (memlocked)
 2473                 sx_xunlock(&sysctlmemlock);
 2474 
 2475         if (error && error != ENOMEM)
 2476                 return (error);
 2477 
 2478         if (retval) {
 2479                 if (req.oldptr && req.oldidx > req.validlen)
 2480                         *retval = req.validlen;
 2481                 else
 2482                         *retval = req.oldidx;
 2483         }
 2484         return (error);
 2485 }
 2486 
 2487 /*
 2488  * Drain into a sysctl struct.  The user buffer should be wired if a page
 2489  * fault would cause issue.
 2490  */
 2491 static int
 2492 sbuf_sysctl_drain(void *arg, const char *data, int len)
 2493 {
 2494         struct sysctl_req *req = arg;
 2495         int error;
 2496 
 2497         error = SYSCTL_OUT(req, data, len);
 2498         KASSERT(error >= 0, ("Got unexpected negative value %d", error));
 2499         return (error == 0 ? len : -error);
 2500 }
 2501 
 2502 struct sbuf *
 2503 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
 2504     struct sysctl_req *req)
 2505 {
 2506 
 2507         /* Supply a default buffer size if none given. */
 2508         if (buf == NULL && length == 0)
 2509                 length = 64;
 2510         s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 2511         sbuf_set_drain(s, sbuf_sysctl_drain, req);
 2512         return (s);
 2513 }
 2514 
 2515 #ifdef DDB
 2516 
 2517 /* The current OID the debugger is working with */
 2518 static struct sysctl_oid *g_ddb_oid;
 2519 
 2520 /* The current flags specified by the user */
 2521 static int g_ddb_sysctl_flags;
 2522 
 2523 /* Check to see if the last sysctl printed */
 2524 static int g_ddb_sysctl_printed;
 2525 
 2526 static const int ctl_sign[CTLTYPE+1] = {
 2527         [CTLTYPE_INT] = 1,
 2528         [CTLTYPE_LONG] = 1,
 2529         [CTLTYPE_S8] = 1,
 2530         [CTLTYPE_S16] = 1,
 2531         [CTLTYPE_S32] = 1,
 2532         [CTLTYPE_S64] = 1,
 2533 };
 2534 
 2535 static const int ctl_size[CTLTYPE+1] = {
 2536         [CTLTYPE_INT] = sizeof(int),
 2537         [CTLTYPE_UINT] = sizeof(u_int),
 2538         [CTLTYPE_LONG] = sizeof(long),
 2539         [CTLTYPE_ULONG] = sizeof(u_long),
 2540         [CTLTYPE_S8] = sizeof(int8_t),
 2541         [CTLTYPE_S16] = sizeof(int16_t),
 2542         [CTLTYPE_S32] = sizeof(int32_t),
 2543         [CTLTYPE_S64] = sizeof(int64_t),
 2544         [CTLTYPE_U8] = sizeof(uint8_t),
 2545         [CTLTYPE_U16] = sizeof(uint16_t),
 2546         [CTLTYPE_U32] = sizeof(uint32_t),
 2547         [CTLTYPE_U64] = sizeof(uint64_t),
 2548 };
 2549 
 2550 #define DB_SYSCTL_NAME_ONLY     0x001   /* Compare with -N */
 2551 #define DB_SYSCTL_VALUE_ONLY    0x002   /* Compare with -n */
 2552 #define DB_SYSCTL_OPAQUE        0x004   /* Compare with -o */
 2553 #define DB_SYSCTL_HEX           0x008   /* Compare with -x */
 2554 
 2555 #define DB_SYSCTL_SAFE_ONLY     0x100   /* Only simple types */
 2556 
 2557 static const char db_sysctl_modifs[] = {
 2558         'N', 'n', 'o', 'x',
 2559 };
 2560 
 2561 static const int db_sysctl_modif_values[] = {
 2562         DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
 2563         DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
 2564 };
 2565 
 2566 /* Handlers considered safe to print while recursing */
 2567 static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
 2568         sysctl_handle_bool,
 2569         sysctl_handle_8,
 2570         sysctl_handle_16,
 2571         sysctl_handle_32,
 2572         sysctl_handle_64,
 2573         sysctl_handle_int,
 2574         sysctl_handle_long,
 2575         sysctl_handle_string,
 2576         sysctl_handle_opaque,
 2577 };
 2578 
 2579 /*
 2580  * Use in place of sysctl_old_kernel to print sysctl values.
 2581  *
 2582  * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
 2583  */
 2584 static int
 2585 sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
 2586 {
 2587         const u_char *val, *p;
 2588         const char *sep1;
 2589         size_t intlen, slen;
 2590         uintmax_t umv;
 2591         intmax_t mv;
 2592         int sign, ctltype, hexlen, xflag, error;
 2593 
 2594         /* Suppress false-positive GCC uninitialized variable warnings */
 2595         mv = 0;
 2596         umv = 0;
 2597 
 2598         slen = len;
 2599         val = p = ptr;
 2600 
 2601         if (ptr == NULL) {
 2602                 error = 0;
 2603                 goto out;
 2604         }
 2605 
 2606         /* We are going to print */
 2607         g_ddb_sysctl_printed = 1;
 2608 
 2609         xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
 2610 
 2611         ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
 2612         sign = ctl_sign[ctltype];
 2613         intlen = ctl_size[ctltype];
 2614 
 2615         switch (ctltype) {
 2616         case CTLTYPE_NODE:
 2617         case CTLTYPE_STRING:
 2618                 db_printf("%.*s", (int) len, (const char *) p);
 2619                 error = 0;
 2620                 goto out;
 2621 
 2622         case CTLTYPE_INT:
 2623         case CTLTYPE_UINT:
 2624         case CTLTYPE_LONG:
 2625         case CTLTYPE_ULONG:
 2626         case CTLTYPE_S8:
 2627         case CTLTYPE_S16:
 2628         case CTLTYPE_S32:
 2629         case CTLTYPE_S64:
 2630         case CTLTYPE_U8:
 2631         case CTLTYPE_U16:
 2632         case CTLTYPE_U32:
 2633         case CTLTYPE_U64:
 2634                 hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
 2635                 sep1 = "";
 2636                 while (len >= intlen) {
 2637                         switch (ctltype) {
 2638                         case CTLTYPE_INT:
 2639                         case CTLTYPE_UINT:
 2640                                 umv = *(const u_int *)p;
 2641                                 mv = *(const int *)p;
 2642                                 break;
 2643                         case CTLTYPE_LONG:
 2644                         case CTLTYPE_ULONG:
 2645                                 umv = *(const u_long *)p;
 2646                                 mv = *(const long *)p;
 2647                                 break;
 2648                         case CTLTYPE_S8:
 2649                         case CTLTYPE_U8:
 2650                                 umv = *(const uint8_t *)p;
 2651                                 mv = *(const int8_t *)p;
 2652                                 break;
 2653                         case CTLTYPE_S16:
 2654                         case CTLTYPE_U16:
 2655                                 umv = *(const uint16_t *)p;
 2656                                 mv = *(const int16_t *)p;
 2657                                 break;
 2658                         case CTLTYPE_S32:
 2659                         case CTLTYPE_U32:
 2660                                 umv = *(const uint32_t *)p;
 2661                                 mv = *(const int32_t *)p;
 2662                                 break;
 2663                         case CTLTYPE_S64:
 2664                         case CTLTYPE_U64:
 2665                                 umv = *(const uint64_t *)p;
 2666                                 mv = *(const int64_t *)p;
 2667                                 break;
 2668                         }
 2669 
 2670                         db_printf("%s", sep1);
 2671                         if (xflag)
 2672                                 db_printf("%#0*jx", hexlen, umv);
 2673                         else if (!sign)
 2674                                 db_printf("%ju", umv);
 2675                         else if (g_ddb_oid->oid_fmt[1] == 'K') {
 2676                                 /* Kelvins are currently unsupported. */
 2677                                 error = EOPNOTSUPP;
 2678                                 goto out;
 2679                         } else
 2680                                 db_printf("%jd", mv);
 2681 
 2682                         sep1 = " ";
 2683                         len -= intlen;
 2684                         p += intlen;
 2685                 }
 2686                 error = 0;
 2687                 goto out;
 2688 
 2689         case CTLTYPE_OPAQUE:
 2690                 /* TODO: Support struct functions. */
 2691 
 2692                 /* FALLTHROUGH */
 2693         default:
 2694                 db_printf("Format:%s Length:%zu Dump:0x",
 2695                     g_ddb_oid->oid_fmt, len);
 2696                 while (len-- && (xflag || p < val + 16))
 2697                         db_printf("%02x", *p++);
 2698                 if (!xflag && len > 16)
 2699                         db_printf("...");
 2700                 error = 0;
 2701                 goto out;
 2702         }
 2703 
 2704 out:
 2705         req->oldidx += slen;
 2706         return (error);
 2707 }
 2708 
 2709 /*
 2710  * Avoid setting new sysctl values from the debugger
 2711  */
 2712 static int
 2713 sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
 2714 {
 2715 
 2716         if (!req->newptr)
 2717                 return (0);
 2718 
 2719         /* Changing sysctls from the debugger is currently unsupported */
 2720         return (EPERM);
 2721 }
 2722 
 2723 /*
 2724  * Run a sysctl handler with the DDB oldfunc and newfunc attached.
 2725  * Instead of copying any output to a buffer we'll dump it right to
 2726  * the console.
 2727  */
 2728 static int
 2729 db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
 2730     void *old, size_t *oldlenp, size_t *retval, int flags)
 2731 {
 2732         struct sysctl_req req;
 2733         int error;
 2734 
 2735         /* Setup the request */
 2736         bzero(&req, sizeof req);
 2737         req.td = kdb_thread;
 2738         req.oldfunc = sysctl_old_ddb;
 2739         req.newfunc = sysctl_new_ddb;
 2740         req.lock = REQ_UNWIRED;
 2741         if (oldlenp) {
 2742                 req.oldlen = *oldlenp;
 2743         }
 2744         req.validlen = req.oldlen;
 2745         if (old) {
 2746                 req.oldptr = old;
 2747         }
 2748 
 2749         /* Setup our globals for sysctl_old_ddb */
 2750         g_ddb_oid = oidp;
 2751         g_ddb_sysctl_flags = flags;
 2752         g_ddb_sysctl_printed = 0;
 2753 
 2754         error = sysctl_root(0, name, namelen, &req);
 2755 
 2756         /* Reset globals */
 2757         g_ddb_oid = NULL;
 2758         g_ddb_sysctl_flags = 0;
 2759 
 2760         if (retval) {
 2761                 if (req.oldptr && req.oldidx > req.validlen)
 2762                         *retval = req.validlen;
 2763                 else
 2764                         *retval = req.oldidx;
 2765         }
 2766         return (error);
 2767 }
 2768 
 2769 /*
 2770  * Show a sysctl's name
 2771  */
 2772 static void
 2773 db_show_oid_name(int *oid, size_t nlen)
 2774 {
 2775         struct sysctl_oid *oidp;
 2776         int qoid[CTL_MAXNAME+2];
 2777         int error;
 2778 
 2779         qoid[0] = 0;
 2780         memcpy(qoid + 2, oid, nlen * sizeof(int));
 2781         qoid[1] = 1;
 2782 
 2783         error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
 2784         if (error)
 2785                 db_error("sysctl name oid");
 2786 
 2787         error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
 2788         if (error)
 2789                 db_error("sysctl name");
 2790 }
 2791 
 2792 /*
 2793  * Check to see if an OID is safe to print from ddb.
 2794  */
 2795 static bool
 2796 db_oid_safe(const struct sysctl_oid *oidp)
 2797 {
 2798         for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
 2799                 if (oidp->oid_handler == db_safe_handlers[i])
 2800                         return (true);
 2801         }
 2802 
 2803         return (false);
 2804 }
 2805 
 2806 /*
 2807  * Show a sysctl at a specific OID
 2808  * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
 2809  */
 2810 static int
 2811 db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
 2812 {
 2813         int error, xflag, oflag, Nflag, nflag;
 2814         size_t len;
 2815 
 2816         xflag = flags & DB_SYSCTL_HEX;
 2817         oflag = flags & DB_SYSCTL_OPAQUE;
 2818         nflag = flags & DB_SYSCTL_VALUE_ONLY;
 2819         Nflag = flags & DB_SYSCTL_NAME_ONLY;
 2820 
 2821         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
 2822             (!xflag && !oflag))
 2823                 return (0);
 2824 
 2825         if (Nflag) {
 2826                 db_show_oid_name(oid, nlen);
 2827                 error = 0;
 2828                 goto out;
 2829         }
 2830 
 2831         if (!nflag) {
 2832                 db_show_oid_name(oid, nlen);
 2833                 db_printf(": ");
 2834         }
 2835 
 2836         if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
 2837                 db_printf("Skipping, unsafe to print while recursing.");
 2838                 error = 0;
 2839                 goto out;
 2840         }
 2841 
 2842         /* Try once, and ask about the size */
 2843         len = 0;
 2844         error = db_sysctl(oidp, oid, nlen,
 2845             NULL, NULL, &len, flags);
 2846         if (error)
 2847                 goto out;
 2848 
 2849         if (!g_ddb_sysctl_printed)
 2850                 /* Lie about the size */
 2851                 error = db_sysctl(oidp, oid, nlen,
 2852                     (void *) 1, &len, NULL, flags);
 2853 
 2854 out:
 2855         db_printf("\n");
 2856         return (error);
 2857 }
 2858 
 2859 /*
 2860  * Show all sysctls under a specific OID
 2861  * Compare to sysctl_all from sbin/sysctl/sysctl.c
 2862  */
 2863 static int
 2864 db_show_sysctl_all(int *oid, size_t len, int flags)
 2865 {
 2866         struct sysctl_oid *oidp;
 2867         int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2];
 2868         size_t l1, l2;
 2869 
 2870         name1[0] = CTL_SYSCTL;
 2871         name1[1] = CTL_SYSCTL_NEXT;
 2872         l1 = 2;
 2873         if (len) {
 2874                 memcpy(name1 + 2, oid, len * sizeof(int));
 2875                 l1 += len;
 2876         } else {
 2877                 name1[2] = CTL_KERN;
 2878                 l1++;
 2879         }
 2880         for (;;) {
 2881                 int i, error;
 2882 
 2883                 l2 = sizeof(name2);
 2884                 error = kernel_sysctl(kdb_thread, name1, l1,
 2885                     name2, &l2, NULL, 0, &l2, 0);
 2886                 if (error != 0) {
 2887                         if (error == ENOENT)
 2888                                 return (0);
 2889                         else
 2890                                 db_error("sysctl(next)");
 2891                 }
 2892 
 2893                 l2 /= sizeof(int);
 2894 
 2895                 if (l2 < (unsigned int)len)
 2896                         return (0);
 2897 
 2898                 for (i = 0; i < len; i++)
 2899                         if (name2[i] != oid[i])
 2900                                 return (0);
 2901 
 2902                 /* Find the OID in question */
 2903                 error = sysctl_find_oid(name2, l2, &oidp, NULL, NULL);
 2904                 if (error)
 2905                         return (error);
 2906 
 2907                 i = db_show_oid(oidp, name2, l2, flags | DB_SYSCTL_SAFE_ONLY);
 2908 
 2909                 if (db_pager_quit)
 2910                         return (0);
 2911 
 2912                 memcpy(name1+2, name2, l2 * sizeof(int));
 2913                 l1 = 2 + l2;
 2914         }
 2915 }
 2916 
 2917 /*
 2918  * Show a sysctl by its user facing string
 2919  */
 2920 static int
 2921 db_sysctlbyname(char *name, int flags)
 2922 {
 2923         struct sysctl_oid *oidp;
 2924         int oid[CTL_MAXNAME];
 2925         int error, nlen;
 2926 
 2927         error = name2oid(name, oid, &nlen, &oidp);
 2928         if (error) {
 2929                 return (error);
 2930         }
 2931 
 2932         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2933                 db_show_sysctl_all(oid, nlen, flags);
 2934         } else {
 2935                 error = db_show_oid(oidp, oid, nlen, flags);
 2936         }
 2937 
 2938         return (error);
 2939 }
 2940 
 2941 static void
 2942 db_sysctl_cmd_usage(void)
 2943 {
 2944         db_printf(
 2945             " sysctl [/Nnox] <sysctl>                                       \n"
 2946             "                                                               \n"
 2947             " <sysctl> The name of the sysctl to show.                      \n"
 2948             "                                                               \n"
 2949             " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT.       \n"
 2950             " This will work for most sysctls, but should not be used       \n"
 2951             " with sysctls that are known to malloc.                        \n"
 2952             "                                                               \n"
 2953             " While recursing any \"unsafe\" sysctls will be skipped.       \n"
 2954             " Call sysctl directly on the sysctl to try printing the        \n"
 2955             " skipped sysctl. This is unsafe and may make the ddb           \n"
 2956             " session unusable.                                             \n"
 2957             "                                                               \n"
 2958             " Arguments:                                                    \n"
 2959             "   /N      Display only the name of the sysctl.                \n"
 2960             "   /n      Display only the value of the sysctl.               \n"
 2961             "   /o      Display opaque values.                              \n"
 2962             "   /x      Display the sysctl in hex.                          \n"
 2963             "                                                               \n"
 2964             "For example:                                                   \n"
 2965             "sysctl vm.v_free_min                                           \n"
 2966             "vn.v_free_min: 12669                                           \n"
 2967             );
 2968 }
 2969 
 2970 /*
 2971  * Show a specific sysctl similar to sysctl (8).
 2972  */
 2973 DB_FUNC(sysctl, db_sysctl_cmd, db_cmd_table, CS_OWN, NULL)
 2974 {
 2975         char name[TOK_STRING_SIZE];
 2976         int error, i, t, flags;
 2977 
 2978         /* Parse the modifiers */
 2979         t = db_read_token();
 2980         if (t == tSLASH || t == tMINUS) {
 2981                 t = db_read_token();
 2982                 if (t != tIDENT) {
 2983                         db_printf("Bad modifier\n");
 2984                         error = EINVAL;
 2985                         goto out;
 2986                 }
 2987                 db_strcpy(modif, db_tok_string);
 2988         }
 2989         else {
 2990                 db_unread_token(t);
 2991                 modif[0] = '\0';
 2992         }
 2993 
 2994         flags = 0;
 2995         for (i = 0; i < nitems(db_sysctl_modifs); i++) {
 2996                 if (strchr(modif, db_sysctl_modifs[i])) {
 2997                         flags |= db_sysctl_modif_values[i];
 2998                 }
 2999         }
 3000 
 3001         /* Parse the sysctl names */
 3002         t = db_read_token();
 3003         if (t != tIDENT) {
 3004                 db_printf("Need sysctl name\n");
 3005                 error = EINVAL;
 3006                 goto out;
 3007         }
 3008 
 3009         /* Copy the name into a temporary buffer */
 3010         db_strcpy(name, db_tok_string);
 3011 
 3012         /* Ensure there is no trailing cruft */
 3013         t = db_read_token();
 3014         if (t != tEOL) {
 3015                 db_printf("Unexpected sysctl argument\n");
 3016                 error = EINVAL;
 3017                 goto out;
 3018         }
 3019 
 3020         error = db_sysctlbyname(name, flags);
 3021         if (error == ENOENT) {
 3022                 db_printf("unknown oid: '%s'\n", db_tok_string);
 3023                 goto out;
 3024         } else if (error) {
 3025                 db_printf("%s: error: %d\n", db_tok_string, error);
 3026                 goto out;
 3027         }
 3028 
 3029 out:
 3030         /* Ensure we eat all of our text */
 3031         db_flush_lex();
 3032 
 3033         if (error == EINVAL) {
 3034                 db_sysctl_cmd_usage();
 3035         }
 3036 }
 3037 
 3038 #endif /* DDB */

Cache object: fc575f2e621ef84c34f4f71456da2619


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.