The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_sysctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Mike Karels at Berkeley Software Design, Inc.
    9  *
   10  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
   11  * project, to make these variables more userfriendly.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)kern_sysctl.c       8.4 (Berkeley) 4/14/94
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD$");
   42 
   43 #include "opt_capsicum.h"
   44 #include "opt_ddb.h"
   45 #include "opt_ktrace.h"
   46 #include "opt_sysctl.h"
   47 
   48 #include <sys/param.h>
   49 #include <sys/fail.h>
   50 #include <sys/systm.h>
   51 #include <sys/capsicum.h>
   52 #include <sys/kernel.h>
   53 #include <sys/limits.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/malloc.h>
   56 #include <sys/priv.h>
   57 #include <sys/proc.h>
   58 #include <sys/jail.h>
   59 #include <sys/kdb.h>
   60 #include <sys/lock.h>
   61 #include <sys/mutex.h>
   62 #include <sys/rmlock.h>
   63 #include <sys/sbuf.h>
   64 #include <sys/sx.h>
   65 #include <sys/sysproto.h>
   66 #include <sys/uio.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 
   71 #ifdef DDB
   72 #include <ddb/ddb.h>
   73 #include <ddb/db_lex.h>
   74 #endif
   75 
   76 #include <net/vnet.h>
   77 
   78 #include <security/mac/mac_framework.h>
   79 
   80 #include <vm/vm.h>
   81 #include <vm/vm_extern.h>
   82 
   83 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
   84 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
   85 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
   86 
   87 RB_GENERATE(sysctl_oid_list, sysctl_oid, oid_link, cmp_sysctl_oid);
   88 
   89 /*
   90  * The sysctllock protects the MIB tree.  It also protects sysctl
   91  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
   92  * sysctl_unregister_oid() routines require the sysctllock to already
   93  * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
   94  * provided for the few places in the kernel which need to use that
   95  * API rather than using the dynamic API.  Use of the dynamic API is
   96  * strongly encouraged for most code.
   97  *
   98  * The sysctlmemlock is used to limit the amount of user memory wired for
   99  * sysctl requests.  This is implemented by serializing any userland
  100  * sysctl requests larger than a single page via an exclusive lock.
  101  *
  102  * The sysctlstringlock is used to protect concurrent access to writable
  103  * string nodes in sysctl_handle_string().
  104  */
  105 static struct rmlock sysctllock;
  106 static struct sx __exclusive_cache_line sysctlmemlock;
  107 static struct sx sysctlstringlock;
  108 
  109 #define SYSCTL_WLOCK()          rm_wlock(&sysctllock)
  110 #define SYSCTL_WUNLOCK()        rm_wunlock(&sysctllock)
  111 #define SYSCTL_RLOCK(tracker)   rm_rlock(&sysctllock, (tracker))
  112 #define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker))
  113 #define SYSCTL_WLOCKED()        rm_wowned(&sysctllock)
  114 #define SYSCTL_ASSERT_LOCKED()  rm_assert(&sysctllock, RA_LOCKED)
  115 #define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED)
  116 #define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED)
  117 #define SYSCTL_INIT()           rm_init_flags(&sysctllock, "sysctl lock", \
  118                                     RM_SLEEPABLE)
  119 #define SYSCTL_SLEEP(ch, wmesg, timo)                                   \
  120                                 rm_sleep(ch, &sysctllock, 0, wmesg, timo)
  121 
  122 static int sysctl_root(SYSCTL_HANDLER_ARGS);
  123 
  124 /* Root list */
  125 struct sysctl_oid_list sysctl__children = RB_INITIALIZER(&sysctl__children);
  126 
  127 static char*    sysctl_escape_name(const char*);
  128 static int      sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
  129                     int recurse);
  130 static int      sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
  131 static int      sysctl_new_kernel(struct sysctl_req *, void *, size_t);
  132 
  133 static struct sysctl_oid *
  134 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
  135 {
  136         struct sysctl_oid *oidp;
  137 
  138         SYSCTL_ASSERT_LOCKED();
  139         SYSCTL_FOREACH(oidp, list) {
  140                 if (strcmp(oidp->oid_name, name) == 0) {
  141                         return (oidp);
  142                 }
  143         }
  144         return (NULL);
  145 }
  146 
  147 /*
  148  * Initialization of the MIB tree.
  149  *
  150  * Order by number in each list.
  151  */
  152 void
  153 sysctl_wlock(void)
  154 {
  155 
  156         SYSCTL_WLOCK();
  157 }
  158 
  159 void
  160 sysctl_wunlock(void)
  161 {
  162 
  163         SYSCTL_WUNLOCK();
  164 }
  165 
  166 static int
  167 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
  168     struct sysctl_req *req, struct rm_priotracker *tracker)
  169 {
  170         int error;
  171 
  172         if (oid->oid_kind & CTLFLAG_DYN)
  173                 atomic_add_int(&oid->oid_running, 1);
  174 
  175         if (tracker != NULL)
  176                 SYSCTL_RUNLOCK(tracker);
  177         else
  178                 SYSCTL_WUNLOCK();
  179 
  180         /*
  181          * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same,
  182          * untill we're ready to remove all traces of Giant from sysctl(9).
  183          */
  184         if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
  185             (!(oid->oid_kind & CTLFLAG_MPSAFE)))
  186                 mtx_lock(&Giant);
  187         error = oid->oid_handler(oid, arg1, arg2, req);
  188         if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
  189             (!(oid->oid_kind & CTLFLAG_MPSAFE)))
  190                 mtx_unlock(&Giant);
  191 
  192         KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
  193 
  194         if (tracker != NULL)
  195                 SYSCTL_RLOCK(tracker);
  196         else
  197                 SYSCTL_WLOCK();
  198 
  199         if (oid->oid_kind & CTLFLAG_DYN) {
  200                 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
  201                     (oid->oid_kind & CTLFLAG_DYING) != 0)
  202                         wakeup(&oid->oid_running);
  203         }
  204 
  205         return (error);
  206 }
  207 
  208 static void
  209 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
  210 {
  211         struct sysctl_req req;
  212         struct sysctl_oid *curr;
  213         char *penv = NULL;
  214         char path[96];
  215         ssize_t rem = sizeof(path);
  216         ssize_t len;
  217         uint8_t data[512] __aligned(sizeof(uint64_t));
  218         int size;
  219         int error;
  220 
  221         path[--rem] = 0;
  222 
  223         for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
  224                 len = strlen(curr->oid_name);
  225                 rem -= len;
  226                 if (curr != oidp)
  227                         rem -= 1;
  228                 if (rem < 0) {
  229                         printf("OID path exceeds %d bytes\n", (int)sizeof(path));
  230                         return;
  231                 }
  232                 memcpy(path + rem, curr->oid_name, len);
  233                 if (curr != oidp)
  234                         path[rem + len] = '.';
  235         }
  236 
  237         memset(&req, 0, sizeof(req));
  238 
  239         req.td = curthread;
  240         req.oldfunc = sysctl_old_kernel;
  241         req.newfunc = sysctl_new_kernel;
  242         req.lock = REQ_UNWIRED;
  243 
  244         switch (oidp->oid_kind & CTLTYPE) {
  245         case CTLTYPE_INT:
  246                 if (getenv_array(path + rem, data, sizeof(data), &size,
  247                     sizeof(int), GETENV_SIGNED) == 0)
  248                         return;
  249                 req.newlen = size;
  250                 req.newptr = data;
  251                 break;
  252         case CTLTYPE_UINT:
  253                 if (getenv_array(path + rem, data, sizeof(data), &size,
  254                     sizeof(int), GETENV_UNSIGNED) == 0)
  255                         return;
  256                 req.newlen = size;
  257                 req.newptr = data;
  258                 break;
  259         case CTLTYPE_LONG:
  260                 if (getenv_array(path + rem, data, sizeof(data), &size,
  261                     sizeof(long), GETENV_SIGNED) == 0)
  262                         return;
  263                 req.newlen = size;
  264                 req.newptr = data;
  265                 break;
  266         case CTLTYPE_ULONG:
  267                 if (getenv_array(path + rem, data, sizeof(data), &size,
  268                     sizeof(long), GETENV_UNSIGNED) == 0)
  269                         return;
  270                 req.newlen = size;
  271                 req.newptr = data;
  272                 break;
  273         case CTLTYPE_S8:
  274                 if (getenv_array(path + rem, data, sizeof(data), &size,
  275                     sizeof(int8_t), GETENV_SIGNED) == 0)
  276                         return;
  277                 req.newlen = size;
  278                 req.newptr = data;
  279                 break;
  280         case CTLTYPE_S16:
  281                 if (getenv_array(path + rem, data, sizeof(data), &size,
  282                     sizeof(int16_t), GETENV_SIGNED) == 0)
  283                         return;
  284                 req.newlen = size;
  285                 req.newptr = data;
  286                 break;
  287         case CTLTYPE_S32:
  288                 if (getenv_array(path + rem, data, sizeof(data), &size,
  289                     sizeof(int32_t), GETENV_SIGNED) == 0)
  290                         return;
  291                 req.newlen = size;
  292                 req.newptr = data;
  293                 break;
  294         case CTLTYPE_S64:
  295                 if (getenv_array(path + rem, data, sizeof(data), &size,
  296                     sizeof(int64_t), GETENV_SIGNED) == 0)
  297                         return;
  298                 req.newlen = size;
  299                 req.newptr = data;
  300                 break;
  301         case CTLTYPE_U8:
  302                 if (getenv_array(path + rem, data, sizeof(data), &size,
  303                     sizeof(uint8_t), GETENV_UNSIGNED) == 0)
  304                         return;
  305                 req.newlen = size;
  306                 req.newptr = data;
  307                 break;
  308         case CTLTYPE_U16:
  309                 if (getenv_array(path + rem, data, sizeof(data), &size,
  310                     sizeof(uint16_t), GETENV_UNSIGNED) == 0)
  311                         return;
  312                 req.newlen = size;
  313                 req.newptr = data;
  314                 break;
  315         case CTLTYPE_U32:
  316                 if (getenv_array(path + rem, data, sizeof(data), &size,
  317                     sizeof(uint32_t), GETENV_UNSIGNED) == 0)
  318                         return;
  319                 req.newlen = size;
  320                 req.newptr = data;
  321                 break;
  322         case CTLTYPE_U64:
  323                 if (getenv_array(path + rem, data, sizeof(data), &size,
  324                     sizeof(uint64_t), GETENV_UNSIGNED) == 0)
  325                         return;
  326                 req.newlen = size;
  327                 req.newptr = data;
  328                 break;
  329         case CTLTYPE_STRING:
  330                 penv = kern_getenv(path + rem);
  331                 if (penv == NULL)
  332                         return;
  333                 req.newlen = strlen(penv);
  334                 req.newptr = penv;
  335                 break;
  336         default:
  337                 return;
  338         }
  339         error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
  340             oidp->oid_arg2, &req, NULL);
  341         if (error != 0)
  342                 printf("Setting sysctl %s failed: %d\n", path + rem, error);
  343         if (penv != NULL)
  344                 freeenv(penv);
  345 }
  346 
  347 /*
  348  * Locate the path to a given oid.  Returns the length of the resulting path,
  349  * or -1 if the oid was not found.  nodes must have room for CTL_MAXNAME
  350  * elements.
  351  */
  352 static int
  353 sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle)
  354 {
  355         int indx;
  356 
  357         SYSCTL_ASSERT_LOCKED();
  358         indx = 0;
  359         /*
  360          * Do a depth-first search of the oid tree, looking for 'needle'. Start
  361          * with the first child of the root.
  362          */
  363         nodes[indx] = RB_MIN(sysctl_oid_list, &sysctl__children);
  364         for (;;) {
  365                 if (nodes[indx] == needle)
  366                         return (indx + 1);
  367 
  368                 if (nodes[indx] == NULL) {
  369                         /* Node has no more siblings, so back up to parent. */
  370                         if (indx-- == 0) {
  371                                 /* Retreat to root, so give up. */
  372                                 break;
  373                         }
  374                 } else if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  375                         /* Node has children. */
  376                         if (++indx == CTL_MAXNAME) {
  377                                 /* Max search depth reached, so give up. */
  378                                 break;
  379                         }
  380                         /* Start with the first child. */
  381                         nodes[indx] = RB_MIN(sysctl_oid_list,
  382                             &nodes[indx - 1]->oid_children);
  383                         continue;
  384                 }
  385                 /* Consider next sibling. */
  386                 nodes[indx] = RB_NEXT(sysctl_oid_list, NULL, nodes[indx]);
  387         }
  388         return (-1);
  389 }
  390 
  391 static void
  392 sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf)
  393 {
  394         struct sysctl_oid *nodes[CTL_MAXNAME];
  395         char buf[128];
  396         struct sbuf sb;
  397         int rc, i;
  398 
  399         (void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
  400         sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
  401 
  402         sbuf_printf(&sb, "%s: can't re-use a leaf (", __func__);
  403 
  404         rc = sysctl_search_oid(nodes, leaf);
  405         if (rc > 0) {
  406                 for (i = 0; i < rc; i++)
  407                         sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name,
  408                             i != (rc - 1), ".");
  409         } else {
  410                 sbuf_printf(&sb, "%s", leaf->oid_name);
  411         }
  412         sbuf_printf(&sb, ")!\n");
  413 
  414         (void)sbuf_finish(&sb);
  415 }
  416 
  417 #ifdef SYSCTL_DEBUG
  418 static int
  419 sysctl_reuse_test(SYSCTL_HANDLER_ARGS)
  420 {
  421         struct rm_priotracker tracker;
  422 
  423         SYSCTL_RLOCK(&tracker);
  424         sysctl_warn_reuse(__func__, oidp);
  425         SYSCTL_RUNLOCK(&tracker);
  426         return (0);
  427 }
  428 SYSCTL_PROC(_sysctl, OID_AUTO, reuse_test,
  429     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_reuse_test, "-",
  430     "");
  431 #endif
  432 
  433 void
  434 sysctl_register_oid(struct sysctl_oid *oidp)
  435 {
  436         struct sysctl_oid_list *parent = oidp->oid_parent;
  437         struct sysctl_oid *p, key;
  438         int oid_number;
  439         int timeout = 2;
  440 
  441         /*
  442          * First check if another oid with the same name already
  443          * exists in the parent's list.
  444          */
  445         SYSCTL_ASSERT_WLOCKED();
  446         p = sysctl_find_oidname(oidp->oid_name, parent);
  447         if (p != NULL) {
  448                 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  449                         p->oid_refcnt++;
  450                         return;
  451                 } else {
  452                         sysctl_warn_reuse(__func__, p);
  453                         return;
  454                 }
  455         }
  456         /* get current OID number */
  457         oid_number = oidp->oid_number;
  458 
  459 #if (OID_AUTO >= 0)
  460 #error "OID_AUTO is expected to be a negative value"
  461 #endif  
  462         /*
  463          * Any negative OID number qualifies as OID_AUTO. Valid OID
  464          * numbers should always be positive.
  465          *
  466          * NOTE: DO NOT change the starting value here, change it in
  467          * <sys/sysctl.h>, and make sure it is at least 256 to
  468          * accommodate e.g. net.inet.raw as a static sysctl node.
  469          */
  470         if (oid_number < 0) {
  471                 static int newoid;
  472 
  473                 /*
  474                  * By decrementing the next OID number we spend less
  475                  * time inserting the OIDs into a sorted list.
  476                  */
  477                 if (--newoid < CTL_AUTO_START)
  478                         newoid = 0x7fffffff;
  479 
  480                 oid_number = newoid;
  481         }
  482 
  483         /*
  484          * Insert the OID into the parent's list sorted by OID number.
  485          */
  486         key.oid_number = oid_number;
  487         p = RB_NFIND(sysctl_oid_list, parent, &key);
  488         while (p != NULL && oid_number == p->oid_number) {
  489                 /* get the next valid OID number */
  490                 if (oid_number < CTL_AUTO_START ||
  491                     oid_number == 0x7fffffff) {
  492                         /* wraparound - restart */
  493                         oid_number = CTL_AUTO_START;
  494                         /* don't loop forever */
  495                         if (!timeout--)
  496                                 panic("sysctl: Out of OID numbers\n");
  497                         key.oid_number = oid_number;
  498                         p = RB_NFIND(sysctl_oid_list, parent, &key);
  499                         continue;
  500                 }
  501                 p = RB_NEXT(sysctl_oid_list, NULL, p);
  502                 oid_number++;
  503         }
  504         /* check for non-auto OID number collision */
  505         if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
  506             oid_number >= CTL_AUTO_START) {
  507                 printf("sysctl: OID number(%d) is already in use for '%s'\n",
  508                     oidp->oid_number, oidp->oid_name);
  509         }
  510         /* update the OID number, if any */
  511         oidp->oid_number = oid_number;
  512         RB_INSERT(sysctl_oid_list, parent, oidp);
  513 
  514         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
  515 #ifdef VIMAGE
  516             (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
  517 #endif
  518             (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
  519             (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
  520                 /* only fetch value once */
  521                 oidp->oid_kind |= CTLFLAG_NOFETCH;
  522                 /* try to fetch value from kernel environment */
  523                 sysctl_load_tunable_by_oid_locked(oidp);
  524         }
  525 }
  526 
  527 void
  528 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
  529 {
  530 
  531         /*
  532          * Mark the leaf as dormant if it's not to be immediately enabled.
  533          * We do not disable nodes as they can be shared between modules
  534          * and it is always safe to access a node.
  535          */
  536         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  537             ("internal flag is set in oid_kind"));
  538         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  539                 oidp->oid_kind |= CTLFLAG_DORMANT;
  540         sysctl_register_oid(oidp);
  541 }
  542 
  543 void
  544 sysctl_enable_oid(struct sysctl_oid *oidp)
  545 {
  546 
  547         SYSCTL_ASSERT_WLOCKED();
  548         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  549                 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  550                     ("sysctl node is marked as dormant"));
  551                 return;
  552         }
  553         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
  554             ("enabling already enabled sysctl oid"));
  555         oidp->oid_kind &= ~CTLFLAG_DORMANT;
  556 }
  557 
  558 void
  559 sysctl_unregister_oid(struct sysctl_oid *oidp)
  560 {
  561         int error;
  562 
  563         SYSCTL_ASSERT_WLOCKED();
  564         if (oidp->oid_number == OID_AUTO) {
  565                 error = EINVAL;
  566         } else {
  567                 error = ENOENT;
  568                 if (RB_REMOVE(sysctl_oid_list, oidp->oid_parent, oidp))
  569                         error = 0;
  570         }
  571 
  572         /* 
  573          * This can happen when a module fails to register and is
  574          * being unloaded afterwards.  It should not be a panic()
  575          * for normal use.
  576          */
  577         if (error) {
  578                 printf("%s: failed(%d) to unregister sysctl(%s)\n",
  579                     __func__, error, oidp->oid_name);
  580         }
  581 }
  582 
  583 /* Initialize a new context to keep track of dynamically added sysctls. */
  584 int
  585 sysctl_ctx_init(struct sysctl_ctx_list *c)
  586 {
  587 
  588         if (c == NULL) {
  589                 return (EINVAL);
  590         }
  591 
  592         /*
  593          * No locking here, the caller is responsible for not adding
  594          * new nodes to a context until after this function has
  595          * returned.
  596          */
  597         TAILQ_INIT(c);
  598         return (0);
  599 }
  600 
  601 /* Free the context, and destroy all dynamic oids registered in this context */
  602 int
  603 sysctl_ctx_free(struct sysctl_ctx_list *clist)
  604 {
  605         struct sysctl_ctx_entry *e, *e1;
  606         int error;
  607 
  608         error = 0;
  609         /*
  610          * First perform a "dry run" to check if it's ok to remove oids.
  611          * XXX FIXME
  612          * XXX This algorithm is a hack. But I don't know any
  613          * XXX better solution for now...
  614          */
  615         SYSCTL_WLOCK();
  616         TAILQ_FOREACH(e, clist, link) {
  617                 error = sysctl_remove_oid_locked(e->entry, 0, 0);
  618                 if (error)
  619                         break;
  620         }
  621         /*
  622          * Restore deregistered entries, either from the end,
  623          * or from the place where error occurred.
  624          * e contains the entry that was not unregistered
  625          */
  626         if (error)
  627                 e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
  628         else
  629                 e1 = TAILQ_LAST(clist, sysctl_ctx_list);
  630         while (e1 != NULL) {
  631                 sysctl_register_oid(e1->entry);
  632                 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
  633         }
  634         if (error) {
  635                 SYSCTL_WUNLOCK();
  636                 return(EBUSY);
  637         }
  638         /* Now really delete the entries */
  639         e = TAILQ_FIRST(clist);
  640         while (e != NULL) {
  641                 e1 = TAILQ_NEXT(e, link);
  642                 error = sysctl_remove_oid_locked(e->entry, 1, 0);
  643                 if (error)
  644                         panic("sysctl_remove_oid: corrupt tree, entry: %s",
  645                             e->entry->oid_name);
  646                 free(e, M_SYSCTLOID);
  647                 e = e1;
  648         }
  649         SYSCTL_WUNLOCK();
  650         return (error);
  651 }
  652 
  653 /* Add an entry to the context */
  654 struct sysctl_ctx_entry *
  655 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  656 {
  657         struct sysctl_ctx_entry *e;
  658 
  659         SYSCTL_ASSERT_WLOCKED();
  660         if (clist == NULL || oidp == NULL)
  661                 return(NULL);
  662         e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
  663         e->entry = oidp;
  664         TAILQ_INSERT_HEAD(clist, e, link);
  665         return (e);
  666 }
  667 
  668 /* Find an entry in the context */
  669 struct sysctl_ctx_entry *
  670 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  671 {
  672         struct sysctl_ctx_entry *e;
  673 
  674         SYSCTL_ASSERT_WLOCKED();
  675         if (clist == NULL || oidp == NULL)
  676                 return(NULL);
  677         TAILQ_FOREACH(e, clist, link) {
  678                 if (e->entry == oidp)
  679                         return(e);
  680         }
  681         return (e);
  682 }
  683 
  684 /*
  685  * Delete an entry from the context.
  686  * NOTE: this function doesn't free oidp! You have to remove it
  687  * with sysctl_remove_oid().
  688  */
  689 int
  690 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  691 {
  692         struct sysctl_ctx_entry *e;
  693 
  694         if (clist == NULL || oidp == NULL)
  695                 return (EINVAL);
  696         SYSCTL_WLOCK();
  697         e = sysctl_ctx_entry_find(clist, oidp);
  698         if (e != NULL) {
  699                 TAILQ_REMOVE(clist, e, link);
  700                 SYSCTL_WUNLOCK();
  701                 free(e, M_SYSCTLOID);
  702                 return (0);
  703         } else {
  704                 SYSCTL_WUNLOCK();
  705                 return (ENOENT);
  706         }
  707 }
  708 
  709 /*
  710  * Remove dynamically created sysctl trees.
  711  * oidp - top of the tree to be removed
  712  * del - if 0 - just deregister, otherwise free up entries as well
  713  * recurse - if != 0 traverse the subtree to be deleted
  714  */
  715 int
  716 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
  717 {
  718         int error;
  719 
  720         SYSCTL_WLOCK();
  721         error = sysctl_remove_oid_locked(oidp, del, recurse);
  722         SYSCTL_WUNLOCK();
  723         return (error);
  724 }
  725 
  726 int
  727 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
  728     int del, int recurse)
  729 {
  730         struct sysctl_oid *p;
  731         int error;
  732 
  733         error = ENOENT;
  734         SYSCTL_WLOCK();
  735         p = sysctl_find_oidname(name, &parent->oid_children);
  736         if (p)
  737                 error = sysctl_remove_oid_locked(p, del, recurse);
  738         SYSCTL_WUNLOCK();
  739 
  740         return (error);
  741 }
  742 
  743 /*
  744  * Duplicate the provided string, escaping any illegal characters.  The result
  745  * must be freed when no longer in use.
  746  *
  747  * The list of illegal characters is ".".
  748  */
  749 static char*
  750 sysctl_escape_name(const char* orig)
  751 {
  752         int i, s = 0, d = 0, nillegals = 0;
  753         char *new;
  754 
  755         /* First count the number of illegal characters */
  756         for (i = 0; orig[i] != '\0'; i++) {
  757                 if (orig[i] == '.')
  758                         nillegals++;
  759         }
  760 
  761         /* Allocate storage for new string */
  762         new = malloc(i + 2 * nillegals + 1, M_SYSCTLOID, M_WAITOK);
  763 
  764         /* Copy the name, escaping characters as we go */
  765         while (orig[s] != '\0') {
  766                 if (orig[s] == '.') {
  767                         /* %25 is the hexadecimal representation of '.' */
  768                         new[d++] = '%';
  769                         new[d++] = '2';
  770                         new[d++] = '5';
  771                         s++;
  772                 } else {
  773                         new[d++] = orig[s++];
  774                 }
  775         }
  776 
  777         /* Finally, nul-terminate */
  778         new[d] = '\0';
  779 
  780         return (new);
  781 }
  782 
  783 static int
  784 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
  785 {
  786         struct sysctl_oid *p, *tmp;
  787         int error;
  788 
  789         SYSCTL_ASSERT_WLOCKED();
  790         if (oidp == NULL)
  791                 return(EINVAL);
  792         if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
  793                 printf("Warning: can't remove non-dynamic nodes (%s)!\n",
  794                     oidp->oid_name);
  795                 return (EINVAL);
  796         }
  797         /*
  798          * WARNING: normal method to do this should be through
  799          * sysctl_ctx_free(). Use recursing as the last resort
  800          * method to purge your sysctl tree of leftovers...
  801          * However, if some other code still references these nodes,
  802          * it will panic.
  803          */
  804         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  805                 if (oidp->oid_refcnt == 1) {
  806                         for(p = RB_MIN(sysctl_oid_list, &oidp->oid_children);
  807                             p != NULL; p = tmp) {
  808                                 if (!recurse) {
  809                                         printf("Warning: failed attempt to "
  810                                             "remove oid %s with child %s\n",
  811                                             oidp->oid_name, p->oid_name);
  812                                         return (ENOTEMPTY);
  813                                 }
  814                                 tmp = RB_NEXT(sysctl_oid_list,
  815                                     &oidp->oid_children, p);
  816                                 error = sysctl_remove_oid_locked(p, del,
  817                                     recurse);
  818                                 if (error)
  819                                         return (error);
  820                         }
  821                 }
  822         }
  823         if (oidp->oid_refcnt > 1 ) {
  824                 oidp->oid_refcnt--;
  825         } else {
  826                 if (oidp->oid_refcnt == 0) {
  827                         printf("Warning: bad oid_refcnt=%u (%s)!\n",
  828                                 oidp->oid_refcnt, oidp->oid_name);
  829                         return (EINVAL);
  830                 }
  831                 sysctl_unregister_oid(oidp);
  832                 if (del) {
  833                         /*
  834                          * Wait for all threads running the handler to drain.
  835                          * This preserves the previous behavior when the
  836                          * sysctl lock was held across a handler invocation,
  837                          * and is necessary for module unload correctness.
  838                          */
  839                         while (oidp->oid_running > 0) {
  840                                 oidp->oid_kind |= CTLFLAG_DYING;
  841                                 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
  842                         }
  843                         if (oidp->oid_descr)
  844                                 free(__DECONST(char *, oidp->oid_descr),
  845                                     M_SYSCTLOID);
  846                         if (oidp->oid_label)
  847                                 free(__DECONST(char *, oidp->oid_label),
  848                                     M_SYSCTLOID);
  849                         free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
  850                         free(oidp, M_SYSCTLOID);
  851                 }
  852         }
  853         return (0);
  854 }
  855 /*
  856  * Create new sysctls at run time.
  857  * clist may point to a valid context initialized with sysctl_ctx_init().
  858  */
  859 struct sysctl_oid *
  860 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
  861         int number, const char *name, int kind, void *arg1, intmax_t arg2,
  862         int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr,
  863         const char *label)
  864 {
  865         struct sysctl_oid *oidp;
  866         char *escaped;
  867 
  868         /* You have to hook up somewhere.. */
  869         if (parent == NULL)
  870                 return(NULL);
  871         escaped = sysctl_escape_name(name);
  872         /* Check if the node already exists, otherwise create it */
  873         SYSCTL_WLOCK();
  874         oidp = sysctl_find_oidname(escaped, parent);
  875         if (oidp != NULL) {
  876                 free(escaped, M_SYSCTLOID);
  877                 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  878                         oidp->oid_refcnt++;
  879                         /* Update the context */
  880                         if (clist != NULL)
  881                                 sysctl_ctx_entry_add(clist, oidp);
  882                         SYSCTL_WUNLOCK();
  883                         return (oidp);
  884                 } else {
  885                         sysctl_warn_reuse(__func__, oidp);
  886                         SYSCTL_WUNLOCK();
  887                         return (NULL);
  888                 }
  889         }
  890         oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
  891         oidp->oid_parent = parent;
  892         RB_INIT(&oidp->oid_children);
  893         oidp->oid_number = number;
  894         oidp->oid_refcnt = 1;
  895         oidp->oid_name = escaped;
  896         oidp->oid_handler = handler;
  897         oidp->oid_kind = CTLFLAG_DYN | kind;
  898         oidp->oid_arg1 = arg1;
  899         oidp->oid_arg2 = arg2;
  900         oidp->oid_fmt = fmt;
  901         if (descr != NULL)
  902                 oidp->oid_descr = strdup(descr, M_SYSCTLOID);
  903         if (label != NULL)
  904                 oidp->oid_label = strdup(label, M_SYSCTLOID);
  905         /* Update the context, if used */
  906         if (clist != NULL)
  907                 sysctl_ctx_entry_add(clist, oidp);
  908         /* Register this oid */
  909         sysctl_register_oid(oidp);
  910         SYSCTL_WUNLOCK();
  911         return (oidp);
  912 }
  913 
  914 /*
  915  * Rename an existing oid.
  916  */
  917 void
  918 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
  919 {
  920         char *newname;
  921         char *oldname;
  922 
  923         newname = strdup(name, M_SYSCTLOID);
  924         SYSCTL_WLOCK();
  925         oldname = __DECONST(char *, oidp->oid_name);
  926         oidp->oid_name = newname;
  927         SYSCTL_WUNLOCK();
  928         free(oldname, M_SYSCTLOID);
  929 }
  930 
  931 /*
  932  * Reparent an existing oid.
  933  */
  934 int
  935 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
  936 {
  937         struct sysctl_oid *oidp;
  938 
  939         SYSCTL_WLOCK();
  940         if (oid->oid_parent == parent) {
  941                 SYSCTL_WUNLOCK();
  942                 return (0);
  943         }
  944         oidp = sysctl_find_oidname(oid->oid_name, parent);
  945         if (oidp != NULL) {
  946                 SYSCTL_WUNLOCK();
  947                 return (EEXIST);
  948         }
  949         sysctl_unregister_oid(oid);
  950         oid->oid_parent = parent;
  951         oid->oid_number = OID_AUTO;
  952         sysctl_register_oid(oid);
  953         SYSCTL_WUNLOCK();
  954         return (0);
  955 }
  956 
  957 /*
  958  * Register the kernel's oids on startup.
  959  */
  960 SET_DECLARE(sysctl_set, struct sysctl_oid);
  961 
  962 static void
  963 sysctl_register_all(void *arg)
  964 {
  965         struct sysctl_oid **oidp;
  966 
  967         sx_init(&sysctlmemlock, "sysctl mem");
  968         sx_init(&sysctlstringlock, "sysctl string handler");
  969         SYSCTL_INIT();
  970         SYSCTL_WLOCK();
  971         SET_FOREACH(oidp, sysctl_set)
  972                 sysctl_register_oid(*oidp);
  973         SYSCTL_WUNLOCK();
  974 }
  975 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
  976 
  977 /*
  978  * "Staff-functions"
  979  *
  980  * These functions implement a presently undocumented interface 
  981  * used by the sysctl program to walk the tree, and get the type
  982  * so it can print the value.
  983  * This interface is under work and consideration, and should probably
  984  * be killed with a big axe by the first person who can find the time.
  985  * (be aware though, that the proper interface isn't as obvious as it
  986  * may seem, there are various conflicting requirements.
  987  *
  988  * {CTL_SYSCTL, CTL_SYSCTL_DEBUG}               printf the entire MIB-tree.
  989  * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...}           return the name of the "..."
  990  *                                              OID.
  991  * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...}           return the next OID, honoring
  992  *                                              CTLFLAG_SKIP.
  993  * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID}            return the OID of the name in
  994  *                                              "new"
  995  * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...}         return the kind & format info
  996  *                                              for the "..." OID.
  997  * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...}       return the description of the
  998  *                                              "..." OID.
  999  * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...}       return the aggregation label of
 1000  *                                              the "..." OID.
 1001  * {CTL_SYSCTL, CTL_SYSCTL_NEXTNOSKIP, ...}     return the next OID, ignoring
 1002  *                                              CTLFLAG_SKIP.
 1003  */
 1004 
 1005 #ifdef SYSCTL_DEBUG
 1006 static void
 1007 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 1008 {
 1009         int k;
 1010         struct sysctl_oid *oidp;
 1011 
 1012         SYSCTL_ASSERT_LOCKED();
 1013         SYSCTL_FOREACH(oidp, l) {
 1014                 for (k=0; k<i; k++)
 1015                         printf(" ");
 1016 
 1017                 printf("%d %s ", oidp->oid_number, oidp->oid_name);
 1018 
 1019                 printf("%c%c",
 1020                         oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 1021                         oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 1022 
 1023                 if (oidp->oid_handler)
 1024                         printf(" *Handler");
 1025 
 1026                 switch (oidp->oid_kind & CTLTYPE) {
 1027                         case CTLTYPE_NODE:
 1028                                 printf(" Node\n");
 1029                                 if (!oidp->oid_handler) {
 1030                                         sysctl_sysctl_debug_dump_node(
 1031                                             SYSCTL_CHILDREN(oidp), i + 2);
 1032                                 }
 1033                                 break;
 1034                         case CTLTYPE_INT:    printf(" Int\n"); break;
 1035                         case CTLTYPE_UINT:   printf(" u_int\n"); break;
 1036                         case CTLTYPE_LONG:   printf(" Long\n"); break;
 1037                         case CTLTYPE_ULONG:  printf(" u_long\n"); break;
 1038                         case CTLTYPE_STRING: printf(" String\n"); break;
 1039                         case CTLTYPE_S8:     printf(" int8_t\n"); break;
 1040                         case CTLTYPE_S16:    printf(" int16_t\n"); break;
 1041                         case CTLTYPE_S32:    printf(" int32_t\n"); break;
 1042                         case CTLTYPE_S64:    printf(" int64_t\n"); break;
 1043                         case CTLTYPE_U8:     printf(" uint8_t\n"); break;
 1044                         case CTLTYPE_U16:    printf(" uint16_t\n"); break;
 1045                         case CTLTYPE_U32:    printf(" uint32_t\n"); break;
 1046                         case CTLTYPE_U64:    printf(" uint64_t\n"); break;
 1047                         case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
 1048                         default:             printf("\n");
 1049                 }
 1050         }
 1051 }
 1052 
 1053 static int
 1054 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
 1055 {
 1056         struct rm_priotracker tracker;
 1057         int error;
 1058 
 1059         error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
 1060         if (error)
 1061                 return (error);
 1062         SYSCTL_RLOCK(&tracker);
 1063         sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
 1064         SYSCTL_RUNLOCK(&tracker);
 1065         return (ENOENT);
 1066 }
 1067 
 1068 SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
 1069     CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
 1070 #endif
 1071 
 1072 static int
 1073 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
 1074 {
 1075         int *name = (int *) arg1;
 1076         u_int namelen = arg2;
 1077         int error;
 1078         struct sysctl_oid *oid, key;
 1079         struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
 1080         struct rm_priotracker tracker;
 1081         char buf[10];
 1082 
 1083         error = sysctl_wire_old_buffer(req, 0);
 1084         if (error)
 1085                 return (error);
 1086 
 1087         SYSCTL_RLOCK(&tracker);
 1088         while (namelen) {
 1089                 if (!lsp) {
 1090                         snprintf(buf,sizeof(buf),"%d",*name);
 1091                         if (req->oldidx)
 1092                                 error = SYSCTL_OUT(req, ".", 1);
 1093                         if (!error)
 1094                                 error = SYSCTL_OUT(req, buf, strlen(buf));
 1095                         if (error)
 1096                                 goto out;
 1097                         namelen--;
 1098                         name++;
 1099                         continue;
 1100                 }
 1101                 lsp2 = NULL;
 1102                 key.oid_number = *name;
 1103                 oid = RB_FIND(sysctl_oid_list, lsp, &key);
 1104                 if (oid) {
 1105                         if (req->oldidx)
 1106                                 error = SYSCTL_OUT(req, ".", 1);
 1107                         if (!error)
 1108                                 error = SYSCTL_OUT(req, oid->oid_name,
 1109                                         strlen(oid->oid_name));
 1110                         if (error)
 1111                                 goto out;
 1112 
 1113                         namelen--;
 1114                         name++;
 1115 
 1116                         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE &&
 1117                                 !oid->oid_handler)
 1118                                 lsp2 = SYSCTL_CHILDREN(oid);
 1119                 }
 1120                 lsp = lsp2;
 1121         }
 1122         error = SYSCTL_OUT(req, "", 1);
 1123  out:
 1124         SYSCTL_RUNLOCK(&tracker);
 1125         return (error);
 1126 }
 1127 
 1128 /*
 1129  * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
 1130  * capability mode.
 1131  */
 1132 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
 1133     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
 1134 
 1135 enum sysctl_iter_action {
 1136         ITER_SIBLINGS,  /* Not matched, continue iterating siblings */
 1137         ITER_CHILDREN,  /* Node has children we need to iterate over them */
 1138         ITER_FOUND,     /* Matching node was found */
 1139 };
 1140 
 1141 /*
 1142  * Tries to find the next node for @name and @namelen.
 1143  *
 1144  * Returns next action to take. 
 1145  */
 1146 static enum sysctl_iter_action
 1147 sysctl_sysctl_next_node(struct sysctl_oid *oidp, int *name, unsigned int namelen,
 1148     bool honor_skip)
 1149 {
 1150 
 1151         if ((oidp->oid_kind & CTLFLAG_DORMANT) != 0)
 1152                 return (ITER_SIBLINGS);
 1153 
 1154         if (honor_skip && (oidp->oid_kind & CTLFLAG_SKIP) != 0)
 1155                 return (ITER_SIBLINGS);
 1156 
 1157         if (namelen == 0) {
 1158                 /*
 1159                  * We have reached a node with a full name match and are
 1160                  * looking for the next oid in its children.
 1161                  *
 1162                  * For CTL_SYSCTL_NEXTNOSKIP we are done.
 1163                  *
 1164                  * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 1165                  * has a handler) and move on to the children.
 1166                  */
 1167                 if (!honor_skip)
 1168                         return (ITER_FOUND);
 1169                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 1170                         return (ITER_FOUND);
 1171                 /* If node does not have an iterator, treat it as leaf */
 1172                 if (oidp->oid_handler) 
 1173                         return (ITER_FOUND);
 1174 
 1175                 /* Report oid as a node to iterate */
 1176                 return (ITER_CHILDREN);
 1177         }
 1178 
 1179         /*
 1180          * No match yet. Continue seeking the given name.
 1181          *
 1182          * We are iterating in order by oid_number, so skip oids lower
 1183          * than the one we are looking for.
 1184          *
 1185          * When the current oid_number is higher than the one we seek,
 1186          * that means we have reached the next oid in the sequence and
 1187          * should return it.
 1188          *
 1189          * If the oid_number matches the name at this level then we
 1190          * have to find a node to continue searching at the next level.
 1191          */
 1192         if (oidp->oid_number < *name)
 1193                 return (ITER_SIBLINGS);
 1194         if (oidp->oid_number > *name) {
 1195                 /*
 1196                  * We have reached the next oid.
 1197                  *
 1198                  * For CTL_SYSCTL_NEXTNOSKIP we are done.
 1199                  *
 1200                  * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 1201                  * has a handler) and move on to the children.
 1202                  */
 1203                 if (!honor_skip)
 1204                         return (ITER_FOUND);
 1205                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1206                         return (ITER_FOUND);
 1207                 /* If node does not have an iterator, treat it as leaf */
 1208                 if (oidp->oid_handler)
 1209                         return (ITER_FOUND);
 1210                 return (ITER_CHILDREN);
 1211         }
 1212 
 1213         /* match at a current level */
 1214         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1215                 return (ITER_SIBLINGS);
 1216         if (oidp->oid_handler)
 1217                 return (ITER_SIBLINGS);
 1218 
 1219         return (ITER_CHILDREN);
 1220 }
 1221 
 1222 /*
 1223  * Recursively walk the sysctl subtree at lsp until we find the given name.
 1224  * Returns true and fills in next oid data in @next and @len if oid is found.
 1225  */
 1226 static bool
 1227 sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int namelen, 
 1228     int *next, int *len, int level, bool honor_skip)
 1229 {
 1230         struct sysctl_oid_list *next_lsp;
 1231         struct sysctl_oid *oidp = NULL, key;
 1232         bool success = false;
 1233         enum sysctl_iter_action action;
 1234 
 1235         SYSCTL_ASSERT_LOCKED();
 1236         /*
 1237          * Start the search at the requested oid.  But if not found, then scan
 1238          * through all children.
 1239          */
 1240         if (namelen > 0) {
 1241                 key.oid_number = *name;
 1242                 oidp = RB_FIND(sysctl_oid_list, lsp, &key);
 1243         }
 1244         if (!oidp)
 1245                 oidp = RB_MIN(sysctl_oid_list, lsp);
 1246         for(; oidp != NULL; oidp = RB_NEXT(sysctl_oid_list, lsp, oidp)) {
 1247                 action = sysctl_sysctl_next_node(oidp, name, namelen,
 1248                     honor_skip);
 1249                 if (action == ITER_SIBLINGS)
 1250                         continue;
 1251                 if (action == ITER_FOUND) {
 1252                         success = true;
 1253                         break;
 1254                 }
 1255                 KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", action));
 1256 
 1257                 next_lsp = SYSCTL_CHILDREN(oidp);
 1258                 if (namelen == 0) {
 1259                         success = sysctl_sysctl_next_action(next_lsp, NULL, 0,
 1260                             next + 1, len, level + 1, honor_skip);
 1261                 } else {
 1262                         success = sysctl_sysctl_next_action(next_lsp, name + 1,
 1263                             namelen - 1, next + 1, len, level + 1, honor_skip);
 1264                         if (!success) {
 1265 
 1266                                 /*
 1267                                  * We maintain the invariant that current node oid
 1268                                  * is >= the oid provided in @name.
 1269                                  * As there are no usable children at this node,
 1270                                  *  current node oid is strictly > than the requested
 1271                                  *  oid.
 1272                                  * Hence, reduce namelen to 0 to allow for picking first
 1273                                  *  nodes/leafs in the next node in list.
 1274                                  */
 1275                                 namelen = 0;
 1276                         }
 1277                 }
 1278                 if (success)
 1279                         break;
 1280         }
 1281 
 1282         if (success) {
 1283                 *next = oidp->oid_number;
 1284                 if (level > *len)
 1285                         *len = level;
 1286         }
 1287 
 1288         return (success);
 1289 }
 1290 
 1291 static int
 1292 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
 1293 {
 1294         int *name = (int *) arg1;
 1295         u_int namelen = arg2;
 1296         int len, error;
 1297         bool success;
 1298         struct sysctl_oid_list *lsp = &sysctl__children;
 1299         struct rm_priotracker tracker;
 1300         int next[CTL_MAXNAME];
 1301 
 1302         len = 0;
 1303         SYSCTL_RLOCK(&tracker);
 1304         success = sysctl_sysctl_next_action(lsp, name, namelen, next, &len, 1,
 1305             oidp->oid_number == CTL_SYSCTL_NEXT);
 1306         SYSCTL_RUNLOCK(&tracker);
 1307         if (!success)
 1308                 return (ENOENT);
 1309         error = SYSCTL_OUT(req, next, len * sizeof (int));
 1310         return (error);
 1311 }
 1312 
 1313 /*
 1314  * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
 1315  * capability mode.
 1316  */
 1317 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
 1318     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 1319 
 1320 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXTNOSKIP, nextnoskip, CTLFLAG_RD |
 1321     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 1322 
 1323 static int
 1324 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 1325 {
 1326         struct sysctl_oid *oidp;
 1327         struct sysctl_oid_list *lsp = &sysctl__children;
 1328 
 1329         SYSCTL_ASSERT_LOCKED();
 1330 
 1331         for (*len = 0; *len < CTL_MAXNAME;) {
 1332                 oidp = sysctl_find_oidname(strsep(&name, "."), lsp);
 1333                 if (oidp == NULL)
 1334                         return (ENOENT);
 1335                 *oid++ = oidp->oid_number;
 1336                 (*len)++;
 1337 
 1338                 if (name == NULL || *name == '\0') {
 1339                         if (oidpp)
 1340                                 *oidpp = oidp;
 1341                         return (0);
 1342                 }
 1343 
 1344                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 1345                         break;
 1346 
 1347                 if (oidp->oid_handler)
 1348                         break;
 1349 
 1350                 lsp = SYSCTL_CHILDREN(oidp);
 1351         }
 1352         return (ENOENT);
 1353 }
 1354 
 1355 static int
 1356 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
 1357 {
 1358         char *p;
 1359         int error, oid[CTL_MAXNAME], len = 0;
 1360         struct sysctl_oid *op = NULL;
 1361         struct rm_priotracker tracker;
 1362         char buf[32];
 1363 
 1364         if (!req->newlen) 
 1365                 return (ENOENT);
 1366         if (req->newlen >= MAXPATHLEN)  /* XXX arbitrary, undocumented */
 1367                 return (ENAMETOOLONG);
 1368 
 1369         p = buf;
 1370         if (req->newlen >= sizeof(buf))
 1371                 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
 1372 
 1373         error = SYSCTL_IN(req, p, req->newlen);
 1374         if (error) {
 1375                 if (p != buf)
 1376                         free(p, M_SYSCTL);
 1377                 return (error);
 1378         }
 1379 
 1380         p [req->newlen] = '\0';
 1381 
 1382         SYSCTL_RLOCK(&tracker);
 1383         error = name2oid(p, oid, &len, &op);
 1384         SYSCTL_RUNLOCK(&tracker);
 1385 
 1386         if (p != buf)
 1387                 free(p, M_SYSCTL);
 1388 
 1389         if (error)
 1390                 return (error);
 1391 
 1392         error = SYSCTL_OUT(req, oid, len * sizeof *oid);
 1393         return (error);
 1394 }
 1395 
 1396 /*
 1397  * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
 1398  * capability mode.
 1399  */
 1400 SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
 1401     CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
 1402     sysctl_sysctl_name2oid, "I", "");
 1403 
 1404 static int
 1405 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
 1406 {
 1407         struct sysctl_oid *oid;
 1408         struct rm_priotracker tracker;
 1409         int error;
 1410 
 1411         error = sysctl_wire_old_buffer(req, 0);
 1412         if (error)
 1413                 return (error);
 1414 
 1415         SYSCTL_RLOCK(&tracker);
 1416         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1417         if (error)
 1418                 goto out;
 1419 
 1420         if (oid->oid_fmt == NULL) {
 1421                 error = ENOENT;
 1422                 goto out;
 1423         }
 1424         error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
 1425         if (error)
 1426                 goto out;
 1427         error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
 1428  out:
 1429         SYSCTL_RUNLOCK(&tracker);
 1430         return (error);
 1431 }
 1432 
 1433 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
 1434     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
 1435 
 1436 static int
 1437 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
 1438 {
 1439         struct sysctl_oid *oid;
 1440         struct rm_priotracker tracker;
 1441         int error;
 1442 
 1443         error = sysctl_wire_old_buffer(req, 0);
 1444         if (error)
 1445                 return (error);
 1446 
 1447         SYSCTL_RLOCK(&tracker);
 1448         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1449         if (error)
 1450                 goto out;
 1451 
 1452         if (oid->oid_descr == NULL) {
 1453                 error = ENOENT;
 1454                 goto out;
 1455         }
 1456         error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
 1457  out:
 1458         SYSCTL_RUNLOCK(&tracker);
 1459         return (error);
 1460 }
 1461 
 1462 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
 1463     CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
 1464 
 1465 static int
 1466 sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
 1467 {
 1468         struct sysctl_oid *oid;
 1469         struct rm_priotracker tracker;
 1470         int error;
 1471 
 1472         error = sysctl_wire_old_buffer(req, 0);
 1473         if (error)
 1474                 return (error);
 1475 
 1476         SYSCTL_RLOCK(&tracker);
 1477         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 1478         if (error)
 1479                 goto out;
 1480 
 1481         if (oid->oid_label == NULL) {
 1482                 error = ENOENT;
 1483                 goto out;
 1484         }
 1485         error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1);
 1486  out:
 1487         SYSCTL_RUNLOCK(&tracker);
 1488         return (error);
 1489 }
 1490 
 1491 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
 1492     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
 1493 
 1494 /*
 1495  * Default "handler" functions.
 1496  */
 1497 
 1498 /*
 1499  * Handle a bool.
 1500  * Two cases:
 1501  *     a variable:  point arg1 at it.
 1502  *     a constant:  pass it in arg2.
 1503  */
 1504 
 1505 int
 1506 sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
 1507 {
 1508         uint8_t temp;
 1509         int error;
 1510 
 1511         /*
 1512          * Attempt to get a coherent snapshot by making a copy of the data.
 1513          */
 1514         if (arg1)
 1515                 temp = *(bool *)arg1 ? 1 : 0;
 1516         else
 1517                 temp = arg2 ? 1 : 0;
 1518 
 1519         error = SYSCTL_OUT(req, &temp, sizeof(temp));
 1520         if (error || !req->newptr)
 1521                 return (error);
 1522 
 1523         if (!arg1)
 1524                 error = EPERM;
 1525         else {
 1526                 error = SYSCTL_IN(req, &temp, sizeof(temp));
 1527                 if (!error)
 1528                         *(bool *)arg1 = temp ? 1 : 0;
 1529         }
 1530         return (error);
 1531 }
 1532 
 1533 /*
 1534  * Handle an int8_t, signed or unsigned.
 1535  * Two cases:
 1536  *     a variable:  point arg1 at it.
 1537  *     a constant:  pass it in arg2.
 1538  */
 1539 
 1540 int
 1541 sysctl_handle_8(SYSCTL_HANDLER_ARGS)
 1542 {
 1543         int8_t tmpout;
 1544         int error = 0;
 1545 
 1546         /*
 1547          * Attempt to get a coherent snapshot by making a copy of the data.
 1548          */
 1549         if (arg1)
 1550                 tmpout = *(int8_t *)arg1;
 1551         else
 1552                 tmpout = arg2;
 1553         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1554 
 1555         if (error || !req->newptr)
 1556                 return (error);
 1557 
 1558         if (!arg1)
 1559                 error = EPERM;
 1560         else
 1561                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1562         return (error);
 1563 }
 1564 
 1565 /*
 1566  * Handle an int16_t, signed or unsigned.
 1567  * Two cases:
 1568  *     a variable:  point arg1 at it.
 1569  *     a constant:  pass it in arg2.
 1570  */
 1571 
 1572 int
 1573 sysctl_handle_16(SYSCTL_HANDLER_ARGS)
 1574 {
 1575         int16_t tmpout;
 1576         int error = 0;
 1577 
 1578         /*
 1579          * Attempt to get a coherent snapshot by making a copy of the data.
 1580          */
 1581         if (arg1)
 1582                 tmpout = *(int16_t *)arg1;
 1583         else
 1584                 tmpout = arg2;
 1585         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1586 
 1587         if (error || !req->newptr)
 1588                 return (error);
 1589 
 1590         if (!arg1)
 1591                 error = EPERM;
 1592         else
 1593                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1594         return (error);
 1595 }
 1596 
 1597 /*
 1598  * Handle an int32_t, signed or unsigned.
 1599  * Two cases:
 1600  *     a variable:  point arg1 at it.
 1601  *     a constant:  pass it in arg2.
 1602  */
 1603 
 1604 int
 1605 sysctl_handle_32(SYSCTL_HANDLER_ARGS)
 1606 {
 1607         int32_t tmpout;
 1608         int error = 0;
 1609 
 1610         /*
 1611          * Attempt to get a coherent snapshot by making a copy of the data.
 1612          */
 1613         if (arg1)
 1614                 tmpout = *(int32_t *)arg1;
 1615         else
 1616                 tmpout = arg2;
 1617         error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 1618 
 1619         if (error || !req->newptr)
 1620                 return (error);
 1621 
 1622         if (!arg1)
 1623                 error = EPERM;
 1624         else
 1625                 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 1626         return (error);
 1627 }
 1628 
 1629 /*
 1630  * Handle an int, signed or unsigned.
 1631  * Two cases:
 1632  *     a variable:  point arg1 at it.
 1633  *     a constant:  pass it in arg2.
 1634  */
 1635 
 1636 int
 1637 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
 1638 {
 1639         int tmpout, error = 0;
 1640 
 1641         /*
 1642          * Attempt to get a coherent snapshot by making a copy of the data.
 1643          */
 1644         if (arg1)
 1645                 tmpout = *(int *)arg1;
 1646         else
 1647                 tmpout = arg2;
 1648         error = SYSCTL_OUT(req, &tmpout, sizeof(int));
 1649 
 1650         if (error || !req->newptr)
 1651                 return (error);
 1652 
 1653         if (!arg1)
 1654                 error = EPERM;
 1655         else
 1656                 error = SYSCTL_IN(req, arg1, sizeof(int));
 1657         return (error);
 1658 }
 1659 
 1660 /*
 1661  * Based on on sysctl_handle_int() convert milliseconds into ticks.
 1662  * Note: this is used by TCP.
 1663  */
 1664 
 1665 int
 1666 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
 1667 {
 1668         int error, s, tt;
 1669 
 1670         tt = *(int *)arg1;
 1671         s = (int)((int64_t)tt * 1000 / hz);
 1672 
 1673         error = sysctl_handle_int(oidp, &s, 0, req);
 1674         if (error || !req->newptr)
 1675                 return (error);
 1676 
 1677         tt = (int)((int64_t)s * hz / 1000);
 1678         if (tt < 1)
 1679                 return (EINVAL);
 1680 
 1681         *(int *)arg1 = tt;
 1682         return (0);
 1683 }
 1684 
 1685 /*
 1686  * Handle a long, signed or unsigned.
 1687  * Two cases:
 1688  *     a variable:  point arg1 at it.
 1689  *     a constant:  pass it in arg2.
 1690  */
 1691 
 1692 int
 1693 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
 1694 {
 1695         int error = 0;
 1696         long tmplong;
 1697 #ifdef SCTL_MASK32
 1698         int tmpint;
 1699 #endif
 1700 
 1701         /*
 1702          * Attempt to get a coherent snapshot by making a copy of the data.
 1703          */
 1704         if (arg1)
 1705                 tmplong = *(long *)arg1;
 1706         else
 1707                 tmplong = arg2;
 1708 #ifdef SCTL_MASK32
 1709         if (req->flags & SCTL_MASK32) {
 1710                 tmpint = tmplong;
 1711                 error = SYSCTL_OUT(req, &tmpint, sizeof(int));
 1712         } else
 1713 #endif
 1714                 error = SYSCTL_OUT(req, &tmplong, sizeof(long));
 1715 
 1716         if (error || !req->newptr)
 1717                 return (error);
 1718 
 1719         if (!arg1)
 1720                 error = EPERM;
 1721 #ifdef SCTL_MASK32
 1722         else if (req->flags & SCTL_MASK32) {
 1723                 error = SYSCTL_IN(req, &tmpint, sizeof(int));
 1724                 *(long *)arg1 = (long)tmpint;
 1725         }
 1726 #endif
 1727         else
 1728                 error = SYSCTL_IN(req, arg1, sizeof(long));
 1729         return (error);
 1730 }
 1731 
 1732 /*
 1733  * Handle a 64 bit int, signed or unsigned.
 1734  * Two cases:
 1735  *     a variable:  point arg1 at it.
 1736  *     a constant:  pass it in arg2.
 1737  */
 1738 int
 1739 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 1740 {
 1741         int error = 0;
 1742         uint64_t tmpout;
 1743 
 1744         /*
 1745          * Attempt to get a coherent snapshot by making a copy of the data.
 1746          */
 1747         if (arg1)
 1748                 tmpout = *(uint64_t *)arg1;
 1749         else
 1750                 tmpout = arg2;
 1751         error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
 1752 
 1753         if (error || !req->newptr)
 1754                 return (error);
 1755 
 1756         if (!arg1)
 1757                 error = EPERM;
 1758         else
 1759                 error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
 1760         return (error);
 1761 }
 1762 
 1763 /*
 1764  * Handle our generic '\0' terminated 'C' string.
 1765  * Two cases:
 1766  *      a variable string:  point arg1 at it, arg2 is max length.
 1767  *      a constant string:  point arg1 at it, arg2 is zero.
 1768  */
 1769 
 1770 int
 1771 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 1772 {
 1773         char *tmparg;
 1774         size_t outlen;
 1775         int error = 0, ro_string = 0;
 1776 
 1777         /*
 1778          * If the sysctl isn't writable and isn't a preallocated tunable that
 1779          * can be modified by kenv(2), microoptimise and treat it as a
 1780          * read-only string.
 1781          * A zero-length buffer indicates a fixed size read-only
 1782          * string.  In ddb, don't worry about trying to make a malloced
 1783          * snapshot.
 1784          */
 1785         if ((oidp->oid_kind & (CTLFLAG_WR | CTLFLAG_TUN)) == 0 ||
 1786             arg2 == 0 || kdb_active) {
 1787                 arg2 = strlen((char *)arg1) + 1;
 1788                 ro_string = 1;
 1789         }
 1790 
 1791         if (req->oldptr != NULL) {
 1792                 if (ro_string) {
 1793                         tmparg = arg1;
 1794                         outlen = strlen(tmparg) + 1;
 1795                 } else {
 1796                         tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 1797                         sx_slock(&sysctlstringlock);
 1798                         memcpy(tmparg, arg1, arg2);
 1799                         sx_sunlock(&sysctlstringlock);
 1800                         outlen = strlen(tmparg) + 1;
 1801                 }
 1802 
 1803                 error = SYSCTL_OUT(req, tmparg, outlen);
 1804 
 1805                 if (!ro_string)
 1806                         free(tmparg, M_SYSCTLTMP);
 1807         } else {
 1808                 if (!ro_string)
 1809                         sx_slock(&sysctlstringlock);
 1810                 outlen = strlen((char *)arg1) + 1;
 1811                 if (!ro_string)
 1812                         sx_sunlock(&sysctlstringlock);
 1813                 error = SYSCTL_OUT(req, NULL, outlen);
 1814         }
 1815         if (error || !req->newptr)
 1816                 return (error);
 1817 
 1818         if (req->newlen - req->newidx >= arg2 ||
 1819             req->newlen - req->newidx < 0) {
 1820                 error = EINVAL;
 1821         } else if (req->newlen - req->newidx == 0) {
 1822                 sx_xlock(&sysctlstringlock);
 1823                 ((char *)arg1)[0] = '\0';
 1824                 sx_xunlock(&sysctlstringlock);
 1825         } else if (req->newfunc == sysctl_new_kernel) {
 1826                 arg2 = req->newlen - req->newidx;
 1827                 sx_xlock(&sysctlstringlock);
 1828                 error = SYSCTL_IN(req, arg1, arg2);
 1829                 if (error == 0) {
 1830                         ((char *)arg1)[arg2] = '\0';
 1831                         req->newidx += arg2;
 1832                 }
 1833                 sx_xunlock(&sysctlstringlock);
 1834         } else {
 1835                 arg2 = req->newlen - req->newidx;
 1836                 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 1837 
 1838                 error = SYSCTL_IN(req, tmparg, arg2);
 1839                 if (error) {
 1840                         free(tmparg, M_SYSCTLTMP);
 1841                         return (error);
 1842                 }
 1843 
 1844                 sx_xlock(&sysctlstringlock);
 1845                 memcpy(arg1, tmparg, arg2);
 1846                 ((char *)arg1)[arg2] = '\0';
 1847                 sx_xunlock(&sysctlstringlock);
 1848                 free(tmparg, M_SYSCTLTMP);
 1849                 req->newidx += arg2;
 1850         }
 1851         return (error);
 1852 }
 1853 
 1854 /*
 1855  * Handle any kind of opaque data.
 1856  * arg1 points to it, arg2 is the size.
 1857  */
 1858 
 1859 int
 1860 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
 1861 {
 1862         int error, tries;
 1863         u_int generation;
 1864         struct sysctl_req req2;
 1865 
 1866         /*
 1867          * Attempt to get a coherent snapshot, by using the thread
 1868          * pre-emption counter updated from within mi_switch() to
 1869          * determine if we were pre-empted during a bcopy() or
 1870          * copyout(). Make 3 attempts at doing this before giving up.
 1871          * If we encounter an error, stop immediately.
 1872          */
 1873         tries = 0;
 1874         req2 = *req;
 1875 retry:
 1876         generation = curthread->td_generation;
 1877         error = SYSCTL_OUT(req, arg1, arg2);
 1878         if (error)
 1879                 return (error);
 1880         tries++;
 1881         if (generation != curthread->td_generation && tries < 3) {
 1882                 *req = req2;
 1883                 goto retry;
 1884         }
 1885 
 1886         error = SYSCTL_IN(req, arg1, arg2);
 1887 
 1888         return (error);
 1889 }
 1890 
 1891 /*
 1892  * Based on on sysctl_handle_64() convert microseconds to a sbintime.
 1893  */
 1894 int
 1895 sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)
 1896 {
 1897         int error;
 1898         int64_t usec;
 1899 
 1900         usec = sbttous(*(sbintime_t *)arg1);
 1901 
 1902         error = sysctl_handle_64(oidp, &usec, 0, req);
 1903         if (error || !req->newptr)
 1904                 return (error);
 1905 
 1906         *(sbintime_t *)arg1 = ustosbt(usec);
 1907 
 1908         return (0);
 1909 }
 1910 
 1911 /*
 1912  * Based on on sysctl_handle_64() convert milliseconds to a sbintime.
 1913  */
 1914 int
 1915 sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
 1916 {
 1917         int error;
 1918         int64_t msec;
 1919 
 1920         msec = sbttoms(*(sbintime_t *)arg1);
 1921 
 1922         error = sysctl_handle_64(oidp, &msec, 0, req);
 1923         if (error || !req->newptr)
 1924                 return (error);
 1925 
 1926         *(sbintime_t *)arg1 = mstosbt(msec);
 1927 
 1928         return (0);
 1929 }
 1930 
 1931 /*
 1932  * Convert seconds to a struct timeval.  Intended for use with
 1933  * intervals and thus does not permit negative seconds.
 1934  */
 1935 int
 1936 sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
 1937 {
 1938         struct timeval *tv;
 1939         int error, secs;
 1940 
 1941         tv = arg1;
 1942         secs = tv->tv_sec;
 1943 
 1944         error = sysctl_handle_int(oidp, &secs, 0, req);
 1945         if (error || req->newptr == NULL)
 1946                 return (error);
 1947 
 1948         if (secs < 0)
 1949                 return (EINVAL);
 1950         tv->tv_sec = secs;
 1951 
 1952         return (0);
 1953 }
 1954 
 1955 /*
 1956  * Transfer functions to/from kernel space.
 1957  * XXX: rather untested at this point
 1958  */
 1959 static int
 1960 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 1961 {
 1962         size_t i = 0;
 1963 
 1964         if (req->oldptr) {
 1965                 i = l;
 1966                 if (req->oldlen <= req->oldidx)
 1967                         i = 0;
 1968                 else
 1969                         if (i > req->oldlen - req->oldidx)
 1970                                 i = req->oldlen - req->oldidx;
 1971                 if (i > 0)
 1972                         bcopy(p, (char *)req->oldptr + req->oldidx, i);
 1973         }
 1974         req->oldidx += l;
 1975         if (req->oldptr && i != l)
 1976                 return (ENOMEM);
 1977         return (0);
 1978 }
 1979 
 1980 static int
 1981 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 1982 {
 1983         if (!req->newptr)
 1984                 return (0);
 1985         if (req->newlen - req->newidx < l)
 1986                 return (EINVAL);
 1987         bcopy((const char *)req->newptr + req->newidx, p, l);
 1988         req->newidx += l;
 1989         return (0);
 1990 }
 1991 
 1992 int
 1993 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 1994     size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
 1995 {
 1996         int error = 0;
 1997         struct sysctl_req req;
 1998 
 1999         bzero(&req, sizeof req);
 2000 
 2001         req.td = td;
 2002         req.flags = flags;
 2003 
 2004         if (oldlenp) {
 2005                 req.oldlen = *oldlenp;
 2006         }
 2007         req.validlen = req.oldlen;
 2008 
 2009         if (old) {
 2010                 req.oldptr= old;
 2011         }
 2012 
 2013         if (new != NULL) {
 2014                 req.newlen = newlen;
 2015                 req.newptr = new;
 2016         }
 2017 
 2018         req.oldfunc = sysctl_old_kernel;
 2019         req.newfunc = sysctl_new_kernel;
 2020         req.lock = REQ_UNWIRED;
 2021 
 2022         error = sysctl_root(0, name, namelen, &req);
 2023 
 2024         if (req.lock == REQ_WIRED && req.validlen > 0)
 2025                 vsunlock(req.oldptr, req.validlen);
 2026 
 2027         if (error && error != ENOMEM)
 2028                 return (error);
 2029 
 2030         if (retval) {
 2031                 if (req.oldptr && req.oldidx > req.validlen)
 2032                         *retval = req.validlen;
 2033                 else
 2034                         *retval = req.oldidx;
 2035         }
 2036         return (error);
 2037 }
 2038 
 2039 int
 2040 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
 2041     void *new, size_t newlen, size_t *retval, int flags)
 2042 {
 2043         int oid[CTL_MAXNAME];
 2044         size_t oidlen, plen;
 2045         int error;
 2046 
 2047         oid[0] = CTL_SYSCTL;
 2048         oid[1] = CTL_SYSCTL_NAME2OID;
 2049         oidlen = sizeof(oid);
 2050 
 2051         error = kernel_sysctl(td, oid, 2, oid, &oidlen,
 2052             (void *)name, strlen(name), &plen, flags);
 2053         if (error)
 2054                 return (error);
 2055 
 2056         error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
 2057             new, newlen, retval, flags);
 2058         return (error);
 2059 }
 2060 
 2061 /*
 2062  * Transfer function to/from user space.
 2063  */
 2064 static int
 2065 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 2066 {
 2067         size_t i, len, origidx;
 2068         int error;
 2069 
 2070         origidx = req->oldidx;
 2071         req->oldidx += l;
 2072         if (req->oldptr == NULL)
 2073                 return (0);
 2074         /*
 2075          * If we have not wired the user supplied buffer and we are currently
 2076          * holding locks, drop a witness warning, as it's possible that
 2077          * write operations to the user page can sleep.
 2078          */
 2079         if (req->lock != REQ_WIRED)
 2080                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 2081                     "sysctl_old_user()");
 2082         i = l;
 2083         len = req->validlen;
 2084         if (len <= origidx)
 2085                 i = 0;
 2086         else {
 2087                 if (i > len - origidx)
 2088                         i = len - origidx;
 2089                 if (req->lock == REQ_WIRED) {
 2090                         error = copyout_nofault(p, (char *)req->oldptr +
 2091                             origidx, i);
 2092                 } else
 2093                         error = copyout(p, (char *)req->oldptr + origidx, i);
 2094                 if (error != 0)
 2095                         return (error);
 2096         }
 2097         if (i < l)
 2098                 return (ENOMEM);
 2099         return (0);
 2100 }
 2101 
 2102 static int
 2103 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 2104 {
 2105         int error;
 2106 
 2107         if (!req->newptr)
 2108                 return (0);
 2109         if (req->newlen - req->newidx < l)
 2110                 return (EINVAL);
 2111         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 2112             "sysctl_new_user()");
 2113         error = copyin((const char *)req->newptr + req->newidx, p, l);
 2114         req->newidx += l;
 2115         return (error);
 2116 }
 2117 
 2118 /*
 2119  * Wire the user space destination buffer.  If set to a value greater than
 2120  * zero, the len parameter limits the maximum amount of wired memory.
 2121  */
 2122 int
 2123 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 2124 {
 2125         int ret;
 2126         size_t wiredlen;
 2127 
 2128         wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
 2129         ret = 0;
 2130         if (req->lock != REQ_WIRED && req->oldptr &&
 2131             req->oldfunc == sysctl_old_user) {
 2132                 if (wiredlen != 0) {
 2133                         ret = vslock(req->oldptr, wiredlen);
 2134                         if (ret != 0) {
 2135                                 if (ret != ENOMEM)
 2136                                         return (ret);
 2137                                 wiredlen = 0;
 2138                         }
 2139                 }
 2140                 req->lock = REQ_WIRED;
 2141                 req->validlen = wiredlen;
 2142         }
 2143         return (0);
 2144 }
 2145 
 2146 int
 2147 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 2148     int *nindx, struct sysctl_req *req)
 2149 {
 2150         struct sysctl_oid_list *lsp;
 2151         struct sysctl_oid *oid;
 2152         struct sysctl_oid key;
 2153         int indx;
 2154 
 2155         SYSCTL_ASSERT_LOCKED();
 2156         lsp = &sysctl__children;
 2157         indx = 0;
 2158         while (indx < CTL_MAXNAME) {
 2159                 key.oid_number = name[indx];
 2160                 oid = RB_FIND(sysctl_oid_list, lsp, &key);
 2161                 if (oid == NULL)
 2162                         return (ENOENT);
 2163 
 2164                 indx++;
 2165                 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2166                         if (oid->oid_handler != NULL || indx == namelen) {
 2167                                 *noid = oid;
 2168                                 if (nindx != NULL)
 2169                                         *nindx = indx;
 2170                                 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 2171                                     ("%s found DYING node %p", __func__, oid));
 2172                                 return (0);
 2173                         }
 2174                         lsp = SYSCTL_CHILDREN(oid);
 2175                 } else if (indx == namelen) {
 2176                         if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
 2177                                 return (ENOENT);
 2178                         *noid = oid;
 2179                         if (nindx != NULL)
 2180                                 *nindx = indx;
 2181                         KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 2182                             ("%s found DYING node %p", __func__, oid));
 2183                         return (0);
 2184                 } else {
 2185                         return (ENOTDIR);
 2186                 }
 2187         }
 2188         return (ENOENT);
 2189 }
 2190 
 2191 /*
 2192  * Traverse our tree, and find the right node, execute whatever it points
 2193  * to, and return the resulting error code.
 2194  */
 2195 
 2196 static int
 2197 sysctl_root(SYSCTL_HANDLER_ARGS)
 2198 {
 2199         struct sysctl_oid *oid;
 2200         struct rm_priotracker tracker;
 2201         int error, indx, lvl;
 2202 
 2203         SYSCTL_RLOCK(&tracker);
 2204 
 2205         error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
 2206         if (error)
 2207                 goto out;
 2208 
 2209         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2210                 /*
 2211                  * You can't call a sysctl when it's a node, but has
 2212                  * no handler.  Inform the user that it's a node.
 2213                  * The indx may or may not be the same as namelen.
 2214                  */
 2215                 if (oid->oid_handler == NULL) {
 2216                         error = EISDIR;
 2217                         goto out;
 2218                 }
 2219         }
 2220 
 2221         /* Is this sysctl writable? */
 2222         if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
 2223                 error = EPERM;
 2224                 goto out;
 2225         }
 2226 
 2227         KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
 2228 
 2229 #ifdef CAPABILITY_MODE
 2230         /*
 2231          * If the process is in capability mode, then don't permit reading or
 2232          * writing unless specifically granted for the node.
 2233          */
 2234         if (IN_CAPABILITY_MODE(req->td)) {
 2235                 if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
 2236                     (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
 2237                         error = EPERM;
 2238                         goto out;
 2239                 }
 2240         }
 2241 #endif
 2242 
 2243         /* Is this sysctl sensitive to securelevels? */
 2244         if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
 2245                 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
 2246                 error = securelevel_gt(req->td->td_ucred, lvl);
 2247                 if (error)
 2248                         goto out;
 2249         }
 2250 
 2251         /* Is this sysctl writable by only privileged users? */
 2252         if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
 2253                 int priv;
 2254 
 2255                 if (oid->oid_kind & CTLFLAG_PRISON)
 2256                         priv = PRIV_SYSCTL_WRITEJAIL;
 2257 #ifdef VIMAGE
 2258                 else if ((oid->oid_kind & CTLFLAG_VNET) &&
 2259                      prison_owns_vnet(req->td->td_ucred))
 2260                         priv = PRIV_SYSCTL_WRITEJAIL;
 2261 #endif
 2262                 else
 2263                         priv = PRIV_SYSCTL_WRITE;
 2264                 error = priv_check(req->td, priv);
 2265                 if (error)
 2266                         goto out;
 2267         }
 2268 
 2269         if (!oid->oid_handler) {
 2270                 error = EINVAL;
 2271                 goto out;
 2272         }
 2273 
 2274         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2275                 arg1 = (int *)arg1 + indx;
 2276                 arg2 -= indx;
 2277         } else {
 2278                 arg1 = oid->oid_arg1;
 2279                 arg2 = oid->oid_arg2;
 2280         }
 2281 #ifdef MAC
 2282         error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
 2283             req);
 2284         if (error != 0)
 2285                 goto out;
 2286 #endif
 2287 #ifdef VIMAGE
 2288         if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
 2289                 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
 2290 #endif
 2291         error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
 2292 
 2293 out:
 2294         SYSCTL_RUNLOCK(&tracker);
 2295         return (error);
 2296 }
 2297 
 2298 #ifndef _SYS_SYSPROTO_H_
 2299 struct __sysctl_args {
 2300         int     *name;
 2301         u_int   namelen;
 2302         void    *old;
 2303         size_t  *oldlenp;
 2304         void    *new;
 2305         size_t  newlen;
 2306 };
 2307 #endif
 2308 int
 2309 sys___sysctl(struct thread *td, struct __sysctl_args *uap)
 2310 {
 2311         int error, i, name[CTL_MAXNAME];
 2312         size_t j;
 2313 
 2314         if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 2315                 return (EINVAL);
 2316 
 2317         error = copyin(uap->name, &name, uap->namelen * sizeof(int));
 2318         if (error)
 2319                 return (error);
 2320 
 2321         error = userland_sysctl(td, name, uap->namelen,
 2322                 uap->old, uap->oldlenp, 0,
 2323                 uap->new, uap->newlen, &j, 0);
 2324         if (error && error != ENOMEM)
 2325                 return (error);
 2326         if (uap->oldlenp) {
 2327                 i = copyout(&j, uap->oldlenp, sizeof(j));
 2328                 if (i)
 2329                         return (i);
 2330         }
 2331         return (error);
 2332 }
 2333 
 2334 int
 2335 kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
 2336     void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
 2337     int flags, bool inkernel)
 2338 {
 2339         int oid[CTL_MAXNAME];
 2340         char namebuf[16];
 2341         char *name;
 2342         size_t oidlen;
 2343         int error;
 2344 
 2345         if (namelen > MAXPATHLEN || namelen == 0)
 2346                 return (EINVAL);
 2347         name = namebuf;
 2348         if (namelen > sizeof(namebuf))
 2349                 name = malloc(namelen, M_SYSCTL, M_WAITOK);
 2350         error = copyin(oname, name, namelen);
 2351         if (error != 0)
 2352                 goto out;
 2353 
 2354         oid[0] = CTL_SYSCTL;
 2355         oid[1] = CTL_SYSCTL_NAME2OID;
 2356         oidlen = sizeof(oid);
 2357         error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
 2358             retval, flags);
 2359         if (error != 0)
 2360                 goto out;
 2361         error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
 2362             inkernel, new, newlen, retval, flags);
 2363 
 2364 out:
 2365         if (namelen > sizeof(namebuf))
 2366                 free(name, M_SYSCTL);
 2367         return (error);
 2368 }
 2369 
 2370 #ifndef _SYS_SYSPROTO_H_
 2371 struct __sysctlbyname_args {
 2372         const char      *name;
 2373         size_t  namelen;
 2374         void    *old;
 2375         size_t  *oldlenp;
 2376         void    *new;
 2377         size_t  newlen;
 2378 };
 2379 #endif
 2380 int
 2381 sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
 2382 {
 2383         size_t rv;
 2384         int error;
 2385 
 2386         error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
 2387             uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
 2388         if (error != 0)
 2389                 return (error);
 2390         if (uap->oldlenp != NULL)
 2391                 error = copyout(&rv, uap->oldlenp, sizeof(rv));
 2392 
 2393         return (error);
 2394 }
 2395 
 2396 /*
 2397  * This is used from various compatibility syscalls too.  That's why name
 2398  * must be in kernel space.
 2399  */
 2400 int
 2401 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 2402     size_t *oldlenp, int inkernel, const void *new, size_t newlen,
 2403     size_t *retval, int flags)
 2404 {
 2405         int error = 0, memlocked;
 2406         struct sysctl_req req;
 2407 
 2408         bzero(&req, sizeof req);
 2409 
 2410         req.td = td;
 2411         req.flags = flags;
 2412 
 2413         if (oldlenp) {
 2414                 if (inkernel) {
 2415                         req.oldlen = *oldlenp;
 2416                 } else {
 2417                         error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
 2418                         if (error)
 2419                                 return (error);
 2420                 }
 2421         }
 2422         req.validlen = req.oldlen;
 2423         req.oldptr = old;
 2424 
 2425         if (new != NULL) {
 2426                 req.newlen = newlen;
 2427                 req.newptr = new;
 2428         }
 2429 
 2430         req.oldfunc = sysctl_old_user;
 2431         req.newfunc = sysctl_new_user;
 2432         req.lock = REQ_UNWIRED;
 2433 
 2434 #ifdef KTRACE
 2435         if (KTRPOINT(curthread, KTR_SYSCTL))
 2436                 ktrsysctl(name, namelen);
 2437 #endif
 2438         memlocked = 0;
 2439         if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) {
 2440                 memlocked = 1;
 2441                 sx_xlock(&sysctlmemlock);
 2442         }
 2443         CURVNET_SET(TD_TO_VNET(td));
 2444 
 2445         for (;;) {
 2446                 req.oldidx = 0;
 2447                 req.newidx = 0;
 2448                 error = sysctl_root(0, name, namelen, &req);
 2449                 if (error != EAGAIN)
 2450                         break;
 2451                 kern_yield(PRI_USER);
 2452         }
 2453 
 2454         CURVNET_RESTORE();
 2455 
 2456         if (req.lock == REQ_WIRED && req.validlen > 0)
 2457                 vsunlock(req.oldptr, req.validlen);
 2458         if (memlocked)
 2459                 sx_xunlock(&sysctlmemlock);
 2460 
 2461         if (error && error != ENOMEM)
 2462                 return (error);
 2463 
 2464         if (retval) {
 2465                 if (req.oldptr && req.oldidx > req.validlen)
 2466                         *retval = req.validlen;
 2467                 else
 2468                         *retval = req.oldidx;
 2469         }
 2470         return (error);
 2471 }
 2472 
 2473 /*
 2474  * Drain into a sysctl struct.  The user buffer should be wired if a page
 2475  * fault would cause issue.
 2476  */
 2477 static int
 2478 sbuf_sysctl_drain(void *arg, const char *data, int len)
 2479 {
 2480         struct sysctl_req *req = arg;
 2481         int error;
 2482 
 2483         error = SYSCTL_OUT(req, data, len);
 2484         KASSERT(error >= 0, ("Got unexpected negative value %d", error));
 2485         return (error == 0 ? len : -error);
 2486 }
 2487 
 2488 struct sbuf *
 2489 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
 2490     struct sysctl_req *req)
 2491 {
 2492 
 2493         /* Supply a default buffer size if none given. */
 2494         if (buf == NULL && length == 0)
 2495                 length = 64;
 2496         s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 2497         sbuf_set_drain(s, sbuf_sysctl_drain, req);
 2498         return (s);
 2499 }
 2500 
 2501 #ifdef DDB
 2502 
 2503 /* The current OID the debugger is working with */
 2504 static struct sysctl_oid *g_ddb_oid;
 2505 
 2506 /* The current flags specified by the user */
 2507 static int g_ddb_sysctl_flags;
 2508 
 2509 /* Check to see if the last sysctl printed */
 2510 static int g_ddb_sysctl_printed;
 2511 
 2512 static const int ctl_sign[CTLTYPE+1] = {
 2513         [CTLTYPE_INT] = 1,
 2514         [CTLTYPE_LONG] = 1,
 2515         [CTLTYPE_S8] = 1,
 2516         [CTLTYPE_S16] = 1,
 2517         [CTLTYPE_S32] = 1,
 2518         [CTLTYPE_S64] = 1,
 2519 };
 2520 
 2521 static const int ctl_size[CTLTYPE+1] = {
 2522         [CTLTYPE_INT] = sizeof(int),
 2523         [CTLTYPE_UINT] = sizeof(u_int),
 2524         [CTLTYPE_LONG] = sizeof(long),
 2525         [CTLTYPE_ULONG] = sizeof(u_long),
 2526         [CTLTYPE_S8] = sizeof(int8_t),
 2527         [CTLTYPE_S16] = sizeof(int16_t),
 2528         [CTLTYPE_S32] = sizeof(int32_t),
 2529         [CTLTYPE_S64] = sizeof(int64_t),
 2530         [CTLTYPE_U8] = sizeof(uint8_t),
 2531         [CTLTYPE_U16] = sizeof(uint16_t),
 2532         [CTLTYPE_U32] = sizeof(uint32_t),
 2533         [CTLTYPE_U64] = sizeof(uint64_t),
 2534 };
 2535 
 2536 #define DB_SYSCTL_NAME_ONLY     0x001   /* Compare with -N */
 2537 #define DB_SYSCTL_VALUE_ONLY    0x002   /* Compare with -n */
 2538 #define DB_SYSCTL_OPAQUE        0x004   /* Compare with -o */
 2539 #define DB_SYSCTL_HEX           0x008   /* Compare with -x */
 2540 
 2541 #define DB_SYSCTL_SAFE_ONLY     0x100   /* Only simple types */
 2542 
 2543 static const char db_sysctl_modifs[] = {
 2544         'N', 'n', 'o', 'x',
 2545 };
 2546 
 2547 static const int db_sysctl_modif_values[] = {
 2548         DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
 2549         DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
 2550 };
 2551 
 2552 /* Handlers considered safe to print while recursing */
 2553 static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
 2554         sysctl_handle_bool,
 2555         sysctl_handle_8,
 2556         sysctl_handle_16,
 2557         sysctl_handle_32,
 2558         sysctl_handle_64,
 2559         sysctl_handle_int,
 2560         sysctl_handle_long,
 2561         sysctl_handle_string,
 2562         sysctl_handle_opaque,
 2563 };
 2564 
 2565 /*
 2566  * Use in place of sysctl_old_kernel to print sysctl values.
 2567  *
 2568  * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
 2569  */
 2570 static int
 2571 sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
 2572 {
 2573         const u_char *val, *p;
 2574         const char *sep1;
 2575         size_t intlen, slen;
 2576         uintmax_t umv;
 2577         intmax_t mv;
 2578         int sign, ctltype, hexlen, xflag, error;
 2579 
 2580         /* Suppress false-positive GCC uninitialized variable warnings */
 2581         mv = 0;
 2582         umv = 0;
 2583 
 2584         slen = len;
 2585         val = p = ptr;
 2586 
 2587         if (ptr == NULL) {
 2588                 error = 0;
 2589                 goto out;
 2590         }
 2591 
 2592         /* We are going to print */
 2593         g_ddb_sysctl_printed = 1;
 2594 
 2595         xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
 2596 
 2597         ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
 2598         sign = ctl_sign[ctltype];
 2599         intlen = ctl_size[ctltype];
 2600 
 2601         switch (ctltype) {
 2602         case CTLTYPE_NODE:
 2603         case CTLTYPE_STRING:
 2604                 db_printf("%.*s", (int) len, (const char *) p);
 2605                 error = 0;
 2606                 goto out;
 2607 
 2608         case CTLTYPE_INT:
 2609         case CTLTYPE_UINT:
 2610         case CTLTYPE_LONG:
 2611         case CTLTYPE_ULONG:
 2612         case CTLTYPE_S8:
 2613         case CTLTYPE_S16:
 2614         case CTLTYPE_S32:
 2615         case CTLTYPE_S64:
 2616         case CTLTYPE_U8:
 2617         case CTLTYPE_U16:
 2618         case CTLTYPE_U32:
 2619         case CTLTYPE_U64:
 2620                 hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
 2621                 sep1 = "";
 2622                 while (len >= intlen) {
 2623                         switch (ctltype) {
 2624                         case CTLTYPE_INT:
 2625                         case CTLTYPE_UINT:
 2626                                 umv = *(const u_int *)p;
 2627                                 mv = *(const int *)p;
 2628                                 break;
 2629                         case CTLTYPE_LONG:
 2630                         case CTLTYPE_ULONG:
 2631                                 umv = *(const u_long *)p;
 2632                                 mv = *(const long *)p;
 2633                                 break;
 2634                         case CTLTYPE_S8:
 2635                         case CTLTYPE_U8:
 2636                                 umv = *(const uint8_t *)p;
 2637                                 mv = *(const int8_t *)p;
 2638                                 break;
 2639                         case CTLTYPE_S16:
 2640                         case CTLTYPE_U16:
 2641                                 umv = *(const uint16_t *)p;
 2642                                 mv = *(const int16_t *)p;
 2643                                 break;
 2644                         case CTLTYPE_S32:
 2645                         case CTLTYPE_U32:
 2646                                 umv = *(const uint32_t *)p;
 2647                                 mv = *(const int32_t *)p;
 2648                                 break;
 2649                         case CTLTYPE_S64:
 2650                         case CTLTYPE_U64:
 2651                                 umv = *(const uint64_t *)p;
 2652                                 mv = *(const int64_t *)p;
 2653                                 break;
 2654                         }
 2655 
 2656                         db_printf("%s", sep1);
 2657                         if (xflag)
 2658                                 db_printf("%#0*jx", hexlen, umv);
 2659                         else if (!sign)
 2660                                 db_printf("%ju", umv);
 2661                         else if (g_ddb_oid->oid_fmt[1] == 'K') {
 2662                                 /* Kelvins are currently unsupported. */
 2663                                 error = EOPNOTSUPP;
 2664                                 goto out;
 2665                         } else
 2666                                 db_printf("%jd", mv);
 2667 
 2668                         sep1 = " ";
 2669                         len -= intlen;
 2670                         p += intlen;
 2671                 }
 2672                 error = 0;
 2673                 goto out;
 2674 
 2675         case CTLTYPE_OPAQUE:
 2676                 /* TODO: Support struct functions. */
 2677 
 2678                 /* FALLTHROUGH */
 2679         default:
 2680                 db_printf("Format:%s Length:%zu Dump:0x",
 2681                     g_ddb_oid->oid_fmt, len);
 2682                 while (len-- && (xflag || p < val + 16))
 2683                         db_printf("%02x", *p++);
 2684                 if (!xflag && len > 16)
 2685                         db_printf("...");
 2686                 error = 0;
 2687                 goto out;
 2688         }
 2689 
 2690 out:
 2691         req->oldidx += slen;
 2692         return (error);
 2693 }
 2694 
 2695 /*
 2696  * Avoid setting new sysctl values from the debugger
 2697  */
 2698 static int
 2699 sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
 2700 {
 2701 
 2702         if (!req->newptr)
 2703                 return (0);
 2704 
 2705         /* Changing sysctls from the debugger is currently unsupported */
 2706         return (EPERM);
 2707 }
 2708 
 2709 /*
 2710  * Run a sysctl handler with the DDB oldfunc and newfunc attached.
 2711  * Instead of copying any output to a buffer we'll dump it right to
 2712  * the console.
 2713  */
 2714 static int
 2715 db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
 2716     void *old, size_t *oldlenp, size_t *retval, int flags)
 2717 {
 2718         struct sysctl_req req;
 2719         int error;
 2720 
 2721         /* Setup the request */
 2722         bzero(&req, sizeof req);
 2723         req.td = kdb_thread;
 2724         req.oldfunc = sysctl_old_ddb;
 2725         req.newfunc = sysctl_new_ddb;
 2726         req.lock = REQ_UNWIRED;
 2727         if (oldlenp) {
 2728                 req.oldlen = *oldlenp;
 2729         }
 2730         req.validlen = req.oldlen;
 2731         if (old) {
 2732                 req.oldptr = old;
 2733         }
 2734 
 2735         /* Setup our globals for sysctl_old_ddb */
 2736         g_ddb_oid = oidp;
 2737         g_ddb_sysctl_flags = flags;
 2738         g_ddb_sysctl_printed = 0;
 2739 
 2740         error = sysctl_root(0, name, namelen, &req);
 2741 
 2742         /* Reset globals */
 2743         g_ddb_oid = NULL;
 2744         g_ddb_sysctl_flags = 0;
 2745 
 2746         if (retval) {
 2747                 if (req.oldptr && req.oldidx > req.validlen)
 2748                         *retval = req.validlen;
 2749                 else
 2750                         *retval = req.oldidx;
 2751         }
 2752         return (error);
 2753 }
 2754 
 2755 /*
 2756  * Show a sysctl's name
 2757  */
 2758 static void
 2759 db_show_oid_name(int *oid, size_t nlen)
 2760 {
 2761         struct sysctl_oid *oidp;
 2762         int qoid[CTL_MAXNAME + 2];
 2763         int error;
 2764 
 2765         qoid[0] = CTL_SYSCTL;
 2766         qoid[1] = CTL_SYSCTL_NAME;
 2767         memcpy(qoid + 2, oid, nlen * sizeof(int));
 2768 
 2769         error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
 2770         if (error)
 2771                 db_error("sysctl name oid");
 2772 
 2773         error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
 2774         if (error)
 2775                 db_error("sysctl name");
 2776 }
 2777 
 2778 /*
 2779  * Check to see if an OID is safe to print from ddb.
 2780  */
 2781 static bool
 2782 db_oid_safe(const struct sysctl_oid *oidp)
 2783 {
 2784         for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
 2785                 if (oidp->oid_handler == db_safe_handlers[i])
 2786                         return (true);
 2787         }
 2788 
 2789         return (false);
 2790 }
 2791 
 2792 /*
 2793  * Show a sysctl at a specific OID
 2794  * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
 2795  */
 2796 static int
 2797 db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
 2798 {
 2799         int error, xflag, oflag, Nflag, nflag;
 2800         size_t len;
 2801 
 2802         xflag = flags & DB_SYSCTL_HEX;
 2803         oflag = flags & DB_SYSCTL_OPAQUE;
 2804         nflag = flags & DB_SYSCTL_VALUE_ONLY;
 2805         Nflag = flags & DB_SYSCTL_NAME_ONLY;
 2806 
 2807         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
 2808             (!xflag && !oflag))
 2809                 return (0);
 2810 
 2811         if (Nflag) {
 2812                 db_show_oid_name(oid, nlen);
 2813                 error = 0;
 2814                 goto out;
 2815         }
 2816 
 2817         if (!nflag) {
 2818                 db_show_oid_name(oid, nlen);
 2819                 db_printf(": ");
 2820         }
 2821 
 2822         if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
 2823                 db_printf("Skipping, unsafe to print while recursing.");
 2824                 error = 0;
 2825                 goto out;
 2826         }
 2827 
 2828         /* Try once, and ask about the size */
 2829         len = 0;
 2830         error = db_sysctl(oidp, oid, nlen,
 2831             NULL, NULL, &len, flags);
 2832         if (error)
 2833                 goto out;
 2834 
 2835         if (!g_ddb_sysctl_printed)
 2836                 /* Lie about the size */
 2837                 error = db_sysctl(oidp, oid, nlen,
 2838                     (void *) 1, &len, NULL, flags);
 2839 
 2840 out:
 2841         db_printf("\n");
 2842         return (error);
 2843 }
 2844 
 2845 /*
 2846  * Show all sysctls under a specific OID
 2847  * Compare to sysctl_all from sbin/sysctl/sysctl.c
 2848  */
 2849 static int
 2850 db_show_sysctl_all(int *oid, size_t len, int flags)
 2851 {
 2852         struct sysctl_oid *oidp;
 2853         int qoid[CTL_MAXNAME + 2], next[CTL_MAXNAME];
 2854         size_t nlen;
 2855 
 2856         qoid[0] = CTL_SYSCTL;
 2857         qoid[1] = CTL_SYSCTL_NEXT;
 2858         if (len) {
 2859                 nlen = len;
 2860                 memcpy(&qoid[2], oid, nlen * sizeof(int));
 2861         } else {
 2862                 nlen = 1;
 2863                 qoid[2] = CTL_KERN;
 2864         }
 2865         for (;;) {
 2866                 int error;
 2867                 size_t nextsize = sizeof(next);
 2868 
 2869                 error = kernel_sysctl(kdb_thread, qoid, nlen + 2,
 2870                     next, &nextsize, NULL, 0, &nlen, 0);
 2871                 if (error != 0) {
 2872                         if (error == ENOENT)
 2873                                 return (0);
 2874                         else
 2875                                 db_error("sysctl(next)");
 2876                 }
 2877 
 2878                 nlen /= sizeof(int);
 2879 
 2880                 if (nlen < (unsigned int)len)
 2881                         return (0);
 2882 
 2883                 if (memcmp(&oid[0], &next[0], len * sizeof(int)) != 0)
 2884                         return (0);
 2885 
 2886                 /* Find the OID in question */
 2887                 error = sysctl_find_oid(next, nlen, &oidp, NULL, NULL);
 2888                 if (error)
 2889                         return (error);
 2890 
 2891                 (void)db_show_oid(oidp, next, nlen, flags | DB_SYSCTL_SAFE_ONLY);
 2892 
 2893                 if (db_pager_quit)
 2894                         return (0);
 2895 
 2896                 memcpy(&qoid[2 + len], &next[len], (nlen - len) * sizeof(int));
 2897         }
 2898 }
 2899 
 2900 /*
 2901  * Show a sysctl by its user facing string
 2902  */
 2903 static int
 2904 db_sysctlbyname(char *name, int flags)
 2905 {
 2906         struct sysctl_oid *oidp;
 2907         int oid[CTL_MAXNAME];
 2908         int error, nlen;
 2909 
 2910         error = name2oid(name, oid, &nlen, &oidp);
 2911         if (error) {
 2912                 return (error);
 2913         }
 2914 
 2915         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 2916                 db_show_sysctl_all(oid, nlen, flags);
 2917         } else {
 2918                 error = db_show_oid(oidp, oid, nlen, flags);
 2919         }
 2920 
 2921         return (error);
 2922 }
 2923 
 2924 static void
 2925 db_sysctl_cmd_usage(void)
 2926 {
 2927         db_printf(
 2928             " sysctl [/Nnox] <sysctl>                                       \n"
 2929             "                                                               \n"
 2930             " <sysctl> The name of the sysctl to show.                      \n"
 2931             "                                                               \n"
 2932             " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT.       \n"
 2933             " This will work for most sysctls, but should not be used       \n"
 2934             " with sysctls that are known to malloc.                        \n"
 2935             "                                                               \n"
 2936             " While recursing any \"unsafe\" sysctls will be skipped.       \n"
 2937             " Call sysctl directly on the sysctl to try printing the        \n"
 2938             " skipped sysctl. This is unsafe and may make the ddb           \n"
 2939             " session unusable.                                             \n"
 2940             "                                                               \n"
 2941             " Arguments:                                                    \n"
 2942             "   /N      Display only the name of the sysctl.                \n"
 2943             "   /n      Display only the value of the sysctl.               \n"
 2944             "   /o      Display opaque values.                              \n"
 2945             "   /x      Display the sysctl in hex.                          \n"
 2946             "                                                               \n"
 2947             "For example:                                                   \n"
 2948             "sysctl vm.v_free_min                                           \n"
 2949             "vn.v_free_min: 12669                                           \n"
 2950             );
 2951 }
 2952 
 2953 /*
 2954  * Show a specific sysctl similar to sysctl (8).
 2955  */
 2956 DB_COMMAND_FLAGS(sysctl, db_sysctl_cmd, CS_OWN)
 2957 {
 2958         char name[TOK_STRING_SIZE];
 2959         int error, i, t, flags;
 2960 
 2961         /* Parse the modifiers */
 2962         t = db_read_token();
 2963         if (t == tSLASH || t == tMINUS) {
 2964                 t = db_read_token();
 2965                 if (t != tIDENT) {
 2966                         db_printf("Bad modifier\n");
 2967                         error = EINVAL;
 2968                         goto out;
 2969                 }
 2970                 db_strcpy(modif, db_tok_string);
 2971         }
 2972         else {
 2973                 db_unread_token(t);
 2974                 modif[0] = '\0';
 2975         }
 2976 
 2977         flags = 0;
 2978         for (i = 0; i < nitems(db_sysctl_modifs); i++) {
 2979                 if (strchr(modif, db_sysctl_modifs[i])) {
 2980                         flags |= db_sysctl_modif_values[i];
 2981                 }
 2982         }
 2983 
 2984         /* Parse the sysctl names */
 2985         t = db_read_token();
 2986         if (t != tIDENT) {
 2987                 db_printf("Need sysctl name\n");
 2988                 error = EINVAL;
 2989                 goto out;
 2990         }
 2991 
 2992         /* Copy the name into a temporary buffer */
 2993         db_strcpy(name, db_tok_string);
 2994 
 2995         /* Ensure there is no trailing cruft */
 2996         t = db_read_token();
 2997         if (t != tEOL) {
 2998                 db_printf("Unexpected sysctl argument\n");
 2999                 error = EINVAL;
 3000                 goto out;
 3001         }
 3002 
 3003         error = db_sysctlbyname(name, flags);
 3004         if (error == ENOENT) {
 3005                 db_printf("unknown oid: '%s'\n", db_tok_string);
 3006                 goto out;
 3007         } else if (error) {
 3008                 db_printf("%s: error: %d\n", db_tok_string, error);
 3009                 goto out;
 3010         }
 3011 
 3012 out:
 3013         /* Ensure we eat all of our text */
 3014         db_flush_lex();
 3015 
 3016         if (error == EINVAL) {
 3017                 db_sysctl_cmd_usage();
 3018         }
 3019 }
 3020 
 3021 #endif /* DDB */

Cache object: f4c5f331484414936819f43a6fefb339


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.