The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_sysctl.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Mike Karels at Berkeley Software Design, Inc.
    7  *
    8  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
    9  * project, to make these variables more userfriendly.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 4. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)kern_sysctl.c       8.4 (Berkeley) 4/14/94
   36  */
   37 
   38 #include <sys/cdefs.h>
   39 __FBSDID("$FreeBSD: stable/10/sys/kern/kern_sysctl.c 324749 2017-10-19 08:00:34Z avg $");
   40 
   41 #include "opt_capsicum.h"
   42 #include "opt_compat.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/fail.h>
   47 #include <sys/systm.h>
   48 #include <sys/capsicum.h>
   49 #include <sys/kernel.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/malloc.h>
   52 #include <sys/priv.h>
   53 #include <sys/proc.h>
   54 #include <sys/jail.h>
   55 #include <sys/lock.h>
   56 #include <sys/mutex.h>
   57 #include <sys/sbuf.h>
   58 #include <sys/sx.h>
   59 #include <sys/sysproto.h>
   60 #include <sys/uio.h>
   61 #ifdef KTRACE
   62 #include <sys/ktrace.h>
   63 #endif
   64 
   65 #include <net/vnet.h>
   66 
   67 #include <security/mac/mac_framework.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/vm_extern.h>
   71 
   72 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
   73 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
   74 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
   75 
   76 /*
   77  * The sysctllock protects the MIB tree.  It also protects sysctl
   78  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
   79  * sysctl_unregister_oid() routines require the sysctllock to already
   80  * be held, so the sysctl_lock() and sysctl_unlock() routines are
   81  * provided for the few places in the kernel which need to use that
   82  * API rather than using the dynamic API.  Use of the dynamic API is
   83  * strongly encouraged for most code.
   84  *
   85  * The sysctlmemlock is used to limit the amount of user memory wired for
   86  * sysctl requests.  This is implemented by serializing any userland
   87  * sysctl requests larger than a single page via an exclusive lock.
   88  */
   89 static struct sx sysctllock;
   90 static struct sx sysctlmemlock;
   91 
   92 #define SYSCTL_XLOCK()          sx_xlock(&sysctllock)
   93 #define SYSCTL_XUNLOCK()        sx_xunlock(&sysctllock)
   94 #define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED)
   95 #define SYSCTL_INIT()           sx_init(&sysctllock, "sysctl lock")
   96 #define SYSCTL_SLEEP(ch, wmesg, timo)                                   \
   97                                 sx_sleep(ch, &sysctllock, 0, wmesg, timo)
   98 
   99 static int sysctl_root(SYSCTL_HANDLER_ARGS);
  100 
  101 struct sysctl_oid_list sysctl__children; /* root list */
  102 
  103 static int      sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
  104                     int recurse);
  105 
  106 static struct sysctl_oid *
  107 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
  108 {
  109         struct sysctl_oid *oidp;
  110 
  111         SYSCTL_ASSERT_XLOCKED();
  112         SLIST_FOREACH(oidp, list, oid_link) {
  113                 if (strcmp(oidp->oid_name, name) == 0) {
  114                         return (oidp);
  115                 }
  116         }
  117         return (NULL);
  118 }
  119 
  120 /*
  121  * Initialization of the MIB tree.
  122  *
  123  * Order by number in each list.
  124  */
  125 void
  126 sysctl_lock(void)
  127 {
  128 
  129         SYSCTL_XLOCK();
  130 }
  131 
  132 void
  133 sysctl_unlock(void)
  134 {
  135 
  136         SYSCTL_XUNLOCK();
  137 }
  138 
  139 void
  140 sysctl_register_oid(struct sysctl_oid *oidp)
  141 {
  142         struct sysctl_oid_list *parent = oidp->oid_parent;
  143         struct sysctl_oid *p;
  144         struct sysctl_oid *q;
  145         int oid_number;
  146         int timeout = 2;
  147 
  148         /*
  149          * First check if another oid with the same name already
  150          * exists in the parent's list.
  151          */
  152         SYSCTL_ASSERT_XLOCKED();
  153         p = sysctl_find_oidname(oidp->oid_name, parent);
  154         if (p != NULL) {
  155                 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  156                         p->oid_refcnt++;
  157                         return;
  158                 } else {
  159                         printf("can't re-use a leaf (%s)!\n", p->oid_name);
  160                         return;
  161                 }
  162         }
  163         /* get current OID number */
  164         oid_number = oidp->oid_number;
  165 
  166 #if (OID_AUTO >= 0)
  167 #error "OID_AUTO is expected to be a negative value"
  168 #endif  
  169         /*
  170          * Any negative OID number qualifies as OID_AUTO. Valid OID
  171          * numbers should always be positive.
  172          *
  173          * NOTE: DO NOT change the starting value here, change it in
  174          * <sys/sysctl.h>, and make sure it is at least 256 to
  175          * accommodate e.g. net.inet.raw as a static sysctl node.
  176          */
  177         if (oid_number < 0) {
  178                 static int newoid;
  179 
  180                 /*
  181                  * By decrementing the next OID number we spend less
  182                  * time inserting the OIDs into a sorted list.
  183                  */
  184                 if (--newoid < CTL_AUTO_START)
  185                         newoid = 0x7fffffff;
  186 
  187                 oid_number = newoid;
  188         }
  189 
  190         /*
  191          * Insert the OID into the parent's list sorted by OID number.
  192          */
  193 retry:
  194         q = NULL;
  195         SLIST_FOREACH(p, parent, oid_link) {
  196                 /* check if the current OID number is in use */
  197                 if (oid_number == p->oid_number) {
  198                         /* get the next valid OID number */
  199                         if (oid_number < CTL_AUTO_START ||
  200                             oid_number == 0x7fffffff) {
  201                                 /* wraparound - restart */
  202                                 oid_number = CTL_AUTO_START;
  203                                 /* don't loop forever */
  204                                 if (!timeout--)
  205                                         panic("sysctl: Out of OID numbers\n");
  206                                 goto retry;
  207                         } else {
  208                                 oid_number++;
  209                         }
  210                 } else if (oid_number < p->oid_number)
  211                         break;
  212                 q = p;
  213         }
  214         /* check for non-auto OID number collision */
  215         if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
  216             oid_number >= CTL_AUTO_START) {
  217                 printf("sysctl: OID number(%d) is already in use for '%s'\n",
  218                     oidp->oid_number, oidp->oid_name);
  219         }
  220         /* update the OID number, if any */
  221         oidp->oid_number = oid_number;
  222         if (q != NULL)
  223                 SLIST_INSERT_AFTER(q, oidp, oid_link);
  224         else
  225                 SLIST_INSERT_HEAD(parent, oidp, oid_link);
  226 }
  227 
  228 void
  229 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
  230 {
  231 
  232         /*
  233          * Mark the leaf as dormant if it's not to be immediately enabled.
  234          * We do not disable nodes as they can be shared between modules
  235          * and it is always safe to access a node.
  236          */
  237         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  238             ("internal flag is set in oid_kind"));
  239         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  240                 oidp->oid_kind |= CTLFLAG_DORMANT;
  241         sysctl_register_oid(oidp);
  242 }
  243 
  244 void
  245 sysctl_enable_oid(struct sysctl_oid *oidp)
  246 {
  247 
  248         SYSCTL_ASSERT_XLOCKED();
  249         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  250                 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
  251                     ("sysctl node is marked as dormant"));
  252                 return;
  253         }
  254         KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
  255             ("enabling already enabled sysctl oid"));
  256         oidp->oid_kind &= ~CTLFLAG_DORMANT;
  257 }
  258 
  259 void
  260 sysctl_unregister_oid(struct sysctl_oid *oidp)
  261 {
  262         struct sysctl_oid *p;
  263         int error;
  264 
  265         SYSCTL_ASSERT_XLOCKED();
  266         error = ENOENT;
  267         if (oidp->oid_number == OID_AUTO) {
  268                 error = EINVAL;
  269         } else {
  270                 SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  271                         if (p == oidp) {
  272                                 SLIST_REMOVE(oidp->oid_parent, oidp,
  273                                     sysctl_oid, oid_link);
  274                                 error = 0;
  275                                 break;
  276                         }
  277                 }
  278         }
  279 
  280         /* 
  281          * This can happen when a module fails to register and is
  282          * being unloaded afterwards.  It should not be a panic()
  283          * for normal use.
  284          */
  285         if (error)
  286                 printf("%s: failed to unregister sysctl\n", __func__);
  287 }
  288 
  289 /* Initialize a new context to keep track of dynamically added sysctls. */
  290 int
  291 sysctl_ctx_init(struct sysctl_ctx_list *c)
  292 {
  293 
  294         if (c == NULL) {
  295                 return (EINVAL);
  296         }
  297 
  298         /*
  299          * No locking here, the caller is responsible for not adding
  300          * new nodes to a context until after this function has
  301          * returned.
  302          */
  303         TAILQ_INIT(c);
  304         return (0);
  305 }
  306 
  307 /* Free the context, and destroy all dynamic oids registered in this context */
  308 int
  309 sysctl_ctx_free(struct sysctl_ctx_list *clist)
  310 {
  311         struct sysctl_ctx_entry *e, *e1;
  312         int error;
  313 
  314         error = 0;
  315         /*
  316          * First perform a "dry run" to check if it's ok to remove oids.
  317          * XXX FIXME
  318          * XXX This algorithm is a hack. But I don't know any
  319          * XXX better solution for now...
  320          */
  321         SYSCTL_XLOCK();
  322         TAILQ_FOREACH(e, clist, link) {
  323                 error = sysctl_remove_oid_locked(e->entry, 0, 0);
  324                 if (error)
  325                         break;
  326         }
  327         /*
  328          * Restore deregistered entries, either from the end,
  329          * or from the place where error occurred.
  330          * e contains the entry that was not unregistered
  331          */
  332         if (error)
  333                 e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
  334         else
  335                 e1 = TAILQ_LAST(clist, sysctl_ctx_list);
  336         while (e1 != NULL) {
  337                 sysctl_register_oid(e1->entry);
  338                 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
  339         }
  340         if (error) {
  341                 SYSCTL_XUNLOCK();
  342                 return(EBUSY);
  343         }
  344         /* Now really delete the entries */
  345         e = TAILQ_FIRST(clist);
  346         while (e != NULL) {
  347                 e1 = TAILQ_NEXT(e, link);
  348                 error = sysctl_remove_oid_locked(e->entry, 1, 0);
  349                 if (error)
  350                         panic("sysctl_remove_oid: corrupt tree, entry: %s",
  351                             e->entry->oid_name);
  352                 free(e, M_SYSCTLOID);
  353                 e = e1;
  354         }
  355         SYSCTL_XUNLOCK();
  356         return (error);
  357 }
  358 
  359 /* Add an entry to the context */
  360 struct sysctl_ctx_entry *
  361 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  362 {
  363         struct sysctl_ctx_entry *e;
  364 
  365         SYSCTL_ASSERT_XLOCKED();
  366         if (clist == NULL || oidp == NULL)
  367                 return(NULL);
  368         e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
  369         e->entry = oidp;
  370         TAILQ_INSERT_HEAD(clist, e, link);
  371         return (e);
  372 }
  373 
  374 /* Find an entry in the context */
  375 struct sysctl_ctx_entry *
  376 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  377 {
  378         struct sysctl_ctx_entry *e;
  379 
  380         SYSCTL_ASSERT_XLOCKED();
  381         if (clist == NULL || oidp == NULL)
  382                 return(NULL);
  383         TAILQ_FOREACH(e, clist, link) {
  384                 if(e->entry == oidp)
  385                         return(e);
  386         }
  387         return (e);
  388 }
  389 
  390 /*
  391  * Delete an entry from the context.
  392  * NOTE: this function doesn't free oidp! You have to remove it
  393  * with sysctl_remove_oid().
  394  */
  395 int
  396 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
  397 {
  398         struct sysctl_ctx_entry *e;
  399 
  400         if (clist == NULL || oidp == NULL)
  401                 return (EINVAL);
  402         SYSCTL_XLOCK();
  403         e = sysctl_ctx_entry_find(clist, oidp);
  404         if (e != NULL) {
  405                 TAILQ_REMOVE(clist, e, link);
  406                 SYSCTL_XUNLOCK();
  407                 free(e, M_SYSCTLOID);
  408                 return (0);
  409         } else {
  410                 SYSCTL_XUNLOCK();
  411                 return (ENOENT);
  412         }
  413 }
  414 
  415 /*
  416  * Remove dynamically created sysctl trees.
  417  * oidp - top of the tree to be removed
  418  * del - if 0 - just deregister, otherwise free up entries as well
  419  * recurse - if != 0 traverse the subtree to be deleted
  420  */
  421 int
  422 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
  423 {
  424         int error;
  425 
  426         SYSCTL_XLOCK();
  427         error = sysctl_remove_oid_locked(oidp, del, recurse);
  428         SYSCTL_XUNLOCK();
  429         return (error);
  430 }
  431 
  432 int
  433 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
  434     int del, int recurse)
  435 {
  436         struct sysctl_oid *p, *tmp;
  437         int error;
  438 
  439         error = ENOENT;
  440         SYSCTL_XLOCK();
  441         SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
  442                 if (strcmp(p->oid_name, name) == 0) {
  443                         error = sysctl_remove_oid_locked(p, del, recurse);
  444                         break;
  445                 }
  446         }
  447         SYSCTL_XUNLOCK();
  448 
  449         return (error);
  450 }
  451 
  452 
  453 static int
  454 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
  455 {
  456         struct sysctl_oid *p, *tmp;
  457         int error;
  458 
  459         SYSCTL_ASSERT_XLOCKED();
  460         if (oidp == NULL)
  461                 return(EINVAL);
  462         if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
  463                 printf("Warning: can't remove non-dynamic nodes (%s)!\n",
  464                     oidp->oid_name);
  465                 return (EINVAL);
  466         }
  467         /*
  468          * WARNING: normal method to do this should be through
  469          * sysctl_ctx_free(). Use recursing as the last resort
  470          * method to purge your sysctl tree of leftovers...
  471          * However, if some other code still references these nodes,
  472          * it will panic.
  473          */
  474         if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  475                 if (oidp->oid_refcnt == 1) {
  476                         SLIST_FOREACH_SAFE(p,
  477                             SYSCTL_CHILDREN(oidp), oid_link, tmp) {
  478                                 if (!recurse) {
  479                                         printf("Warning: failed attempt to "
  480                                             "remove oid %s with child %s\n",
  481                                             oidp->oid_name, p->oid_name);
  482                                         return (ENOTEMPTY);
  483                                 }
  484                                 error = sysctl_remove_oid_locked(p, del,
  485                                     recurse);
  486                                 if (error)
  487                                         return (error);
  488                         }
  489                         if (del)
  490                                 free(SYSCTL_CHILDREN(oidp), M_SYSCTLOID);
  491                 }
  492         }
  493         if (oidp->oid_refcnt > 1 ) {
  494                 oidp->oid_refcnt--;
  495         } else {
  496                 if (oidp->oid_refcnt == 0) {
  497                         printf("Warning: bad oid_refcnt=%u (%s)!\n",
  498                                 oidp->oid_refcnt, oidp->oid_name);
  499                         return (EINVAL);
  500                 }
  501                 sysctl_unregister_oid(oidp);
  502                 if (del) {
  503                         /*
  504                          * Wait for all threads running the handler to drain.
  505                          * This preserves the previous behavior when the
  506                          * sysctl lock was held across a handler invocation,
  507                          * and is necessary for module unload correctness.
  508                          */
  509                         while (oidp->oid_running > 0) {
  510                                 oidp->oid_kind |= CTLFLAG_DYING;
  511                                 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
  512                         }
  513                         if (oidp->oid_descr)
  514                                 free(__DECONST(char *, oidp->oid_descr),
  515                                     M_SYSCTLOID);
  516                         free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
  517                         free(oidp, M_SYSCTLOID);
  518                 }
  519         }
  520         return (0);
  521 }
  522 /*
  523  * Create new sysctls at run time.
  524  * clist may point to a valid context initialized with sysctl_ctx_init().
  525  */
  526 struct sysctl_oid *
  527 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
  528         int number, const char *name, int kind, void *arg1, intptr_t arg2,
  529         int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr)
  530 {
  531         struct sysctl_oid *oidp;
  532 
  533         /* You have to hook up somewhere.. */
  534         if (parent == NULL)
  535                 return(NULL);
  536         /* Check if the node already exists, otherwise create it */
  537         SYSCTL_XLOCK();
  538         oidp = sysctl_find_oidname(name, parent);
  539         if (oidp != NULL) {
  540                 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
  541                         oidp->oid_refcnt++;
  542                         /* Update the context */
  543                         if (clist != NULL)
  544                                 sysctl_ctx_entry_add(clist, oidp);
  545                         SYSCTL_XUNLOCK();
  546                         return (oidp);
  547                 } else {
  548                         SYSCTL_XUNLOCK();
  549                         printf("can't re-use a leaf (%s)!\n", name);
  550                         return (NULL);
  551                 }
  552         }
  553         oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
  554         oidp->oid_parent = parent;
  555         SLIST_NEXT(oidp, oid_link) = NULL;
  556         oidp->oid_number = number;
  557         oidp->oid_refcnt = 1;
  558         oidp->oid_name = strdup(name, M_SYSCTLOID);
  559         oidp->oid_handler = handler;
  560         oidp->oid_kind = CTLFLAG_DYN | kind;
  561         if ((kind & CTLTYPE) == CTLTYPE_NODE) {
  562                 /* Allocate space for children */
  563                 SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list),
  564                     M_SYSCTLOID, M_WAITOK));
  565                 SLIST_INIT(SYSCTL_CHILDREN(oidp));
  566                 oidp->oid_arg2 = arg2;
  567         } else {
  568                 oidp->oid_arg1 = arg1;
  569                 oidp->oid_arg2 = arg2;
  570         }
  571         oidp->oid_fmt = fmt;
  572         if (descr)
  573                 oidp->oid_descr = strdup(descr, M_SYSCTLOID);
  574         /* Update the context, if used */
  575         if (clist != NULL)
  576                 sysctl_ctx_entry_add(clist, oidp);
  577         /* Register this oid */
  578         sysctl_register_oid(oidp);
  579         SYSCTL_XUNLOCK();
  580         return (oidp);
  581 }
  582 
  583 /*
  584  * Rename an existing oid.
  585  */
  586 void
  587 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
  588 {
  589         char *newname;
  590         char *oldname;
  591 
  592         newname = strdup(name, M_SYSCTLOID);
  593         SYSCTL_XLOCK();
  594         oldname = __DECONST(char *, oidp->oid_name);
  595         oidp->oid_name = newname;
  596         SYSCTL_XUNLOCK();
  597         free(oldname, M_SYSCTLOID);
  598 }
  599 
  600 /*
  601  * Reparent an existing oid.
  602  */
  603 int
  604 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
  605 {
  606         struct sysctl_oid *oidp;
  607 
  608         SYSCTL_XLOCK();
  609         if (oid->oid_parent == parent) {
  610                 SYSCTL_XUNLOCK();
  611                 return (0);
  612         }
  613         oidp = sysctl_find_oidname(oid->oid_name, parent);
  614         if (oidp != NULL) {
  615                 SYSCTL_XUNLOCK();
  616                 return (EEXIST);
  617         }
  618         sysctl_unregister_oid(oid);
  619         oid->oid_parent = parent;
  620         oid->oid_number = OID_AUTO;
  621         sysctl_register_oid(oid);
  622         SYSCTL_XUNLOCK();
  623         return (0);
  624 }
  625 
  626 /*
  627  * Register the kernel's oids on startup.
  628  */
  629 SET_DECLARE(sysctl_set, struct sysctl_oid);
  630 
  631 static void
  632 sysctl_register_all(void *arg)
  633 {
  634         struct sysctl_oid **oidp;
  635 
  636         sx_init(&sysctlmemlock, "sysctl mem");
  637         SYSCTL_INIT();
  638         SYSCTL_XLOCK();
  639         SET_FOREACH(oidp, sysctl_set)
  640                 sysctl_register_oid(*oidp);
  641         SYSCTL_XUNLOCK();
  642 }
  643 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
  644 
  645 /*
  646  * "Staff-functions"
  647  *
  648  * These functions implement a presently undocumented interface 
  649  * used by the sysctl program to walk the tree, and get the type
  650  * so it can print the value.
  651  * This interface is under work and consideration, and should probably
  652  * be killed with a big axe by the first person who can find the time.
  653  * (be aware though, that the proper interface isn't as obvious as it
  654  * may seem, there are various conflicting requirements.
  655  *
  656  * {0,0}        printf the entire MIB-tree.
  657  * {0,1,...}    return the name of the "..." OID.
  658  * {0,2,...}    return the next OID.
  659  * {0,3}        return the OID of the name in "new"
  660  * {0,4,...}    return the kind & format info for the "..." OID.
  661  * {0,5,...}    return the description the "..." OID.
  662  */
  663 
  664 #ifdef SYSCTL_DEBUG
  665 static void
  666 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
  667 {
  668         int k;
  669         struct sysctl_oid *oidp;
  670 
  671         SYSCTL_ASSERT_XLOCKED();
  672         SLIST_FOREACH(oidp, l, oid_link) {
  673 
  674                 for (k=0; k<i; k++)
  675                         printf(" ");
  676 
  677                 printf("%d %s ", oidp->oid_number, oidp->oid_name);
  678 
  679                 printf("%c%c",
  680                         oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
  681                         oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
  682 
  683                 if (oidp->oid_handler)
  684                         printf(" *Handler");
  685 
  686                 switch (oidp->oid_kind & CTLTYPE) {
  687                         case CTLTYPE_NODE:
  688                                 printf(" Node\n");
  689                                 if (!oidp->oid_handler) {
  690                                         sysctl_sysctl_debug_dump_node(
  691                                                 oidp->oid_arg1, i+2);
  692                                 }
  693                                 break;
  694                         case CTLTYPE_INT:    printf(" Int\n"); break;
  695                         case CTLTYPE_UINT:   printf(" u_int\n"); break;
  696                         case CTLTYPE_LONG:   printf(" Long\n"); break;
  697                         case CTLTYPE_ULONG:  printf(" u_long\n"); break;
  698                         case CTLTYPE_STRING: printf(" String\n"); break;
  699                         case CTLTYPE_U64:    printf(" uint64_t\n"); break;
  700                         case CTLTYPE_S64:    printf(" int64_t\n"); break;
  701                         case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
  702                         default:             printf("\n");
  703                 }
  704 
  705         }
  706 }
  707 
  708 static int
  709 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
  710 {
  711         int error;
  712 
  713         error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
  714         if (error)
  715                 return (error);
  716         SYSCTL_XLOCK();
  717         sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
  718         SYSCTL_XUNLOCK();
  719         return (ENOENT);
  720 }
  721 
  722 SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
  723         0, 0, sysctl_sysctl_debug, "-", "");
  724 #endif
  725 
  726 static int
  727 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
  728 {
  729         int *name = (int *) arg1;
  730         u_int namelen = arg2;
  731         int error = 0;
  732         struct sysctl_oid *oid;
  733         struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
  734         char buf[10];
  735 
  736         SYSCTL_XLOCK();
  737         while (namelen) {
  738                 if (!lsp) {
  739                         snprintf(buf,sizeof(buf),"%d",*name);
  740                         if (req->oldidx)
  741                                 error = SYSCTL_OUT(req, ".", 1);
  742                         if (!error)
  743                                 error = SYSCTL_OUT(req, buf, strlen(buf));
  744                         if (error)
  745                                 goto out;
  746                         namelen--;
  747                         name++;
  748                         continue;
  749                 }
  750                 lsp2 = 0;
  751                 SLIST_FOREACH(oid, lsp, oid_link) {
  752                         if (oid->oid_number != *name)
  753                                 continue;
  754 
  755                         if (req->oldidx)
  756                                 error = SYSCTL_OUT(req, ".", 1);
  757                         if (!error)
  758                                 error = SYSCTL_OUT(req, oid->oid_name,
  759                                         strlen(oid->oid_name));
  760                         if (error)
  761                                 goto out;
  762 
  763                         namelen--;
  764                         name++;
  765 
  766                         if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
  767                                 break;
  768 
  769                         if (oid->oid_handler)
  770                                 break;
  771 
  772                         lsp2 = SYSCTL_CHILDREN(oid);
  773                         break;
  774                 }
  775                 lsp = lsp2;
  776         }
  777         error = SYSCTL_OUT(req, "", 1);
  778  out:
  779         SYSCTL_XUNLOCK();
  780         return (error);
  781 }
  782 
  783 /*
  784  * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
  785  * capability mode.
  786  */
  787 static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD,
  788     sysctl_sysctl_name, "");
  789 
  790 static int
  791 sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 
  792         int *next, int *len, int level, struct sysctl_oid **oidpp)
  793 {
  794         struct sysctl_oid *oidp;
  795 
  796         SYSCTL_ASSERT_XLOCKED();
  797         *len = level;
  798         SLIST_FOREACH(oidp, lsp, oid_link) {
  799                 *next = oidp->oid_number;
  800                 *oidpp = oidp;
  801 
  802                 if ((oidp->oid_kind & (CTLFLAG_SKIP | CTLFLAG_DORMANT)) != 0)
  803                         continue;
  804 
  805                 if (!namelen) {
  806                         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
  807                                 return (0);
  808                         if (oidp->oid_handler) 
  809                                 /* We really should call the handler here...*/
  810                                 return (0);
  811                         lsp = SYSCTL_CHILDREN(oidp);
  812                         if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 
  813                                 len, level+1, oidpp))
  814                                 return (0);
  815                         goto emptynode;
  816                 }
  817 
  818                 if (oidp->oid_number < *name)
  819                         continue;
  820 
  821                 if (oidp->oid_number > *name) {
  822                         if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  823                                 return (0);
  824                         if (oidp->oid_handler)
  825                                 return (0);
  826                         lsp = SYSCTL_CHILDREN(oidp);
  827                         if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 
  828                                 next+1, len, level+1, oidpp))
  829                                 return (0);
  830                         goto next;
  831                 }
  832                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  833                         continue;
  834 
  835                 if (oidp->oid_handler)
  836                         continue;
  837 
  838                 lsp = SYSCTL_CHILDREN(oidp);
  839                 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 
  840                         len, level+1, oidpp))
  841                         return (0);
  842         next:
  843                 namelen = 1;
  844         emptynode:
  845                 *len = level;
  846         }
  847         return (1);
  848 }
  849 
  850 static int
  851 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
  852 {
  853         int *name = (int *) arg1;
  854         u_int namelen = arg2;
  855         int i, j, error;
  856         struct sysctl_oid *oid;
  857         struct sysctl_oid_list *lsp = &sysctl__children;
  858         int newoid[CTL_MAXNAME];
  859 
  860         SYSCTL_XLOCK();
  861         i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
  862         SYSCTL_XUNLOCK();
  863         if (i)
  864                 return (ENOENT);
  865         error = SYSCTL_OUT(req, newoid, j * sizeof (int));
  866         return (error);
  867 }
  868 
  869 /*
  870  * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
  871  * capability mode.
  872  */
  873 static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD,
  874     sysctl_sysctl_next, "");
  875 
  876 static int
  877 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
  878 {
  879         struct sysctl_oid *oidp;
  880         struct sysctl_oid_list *lsp = &sysctl__children;
  881         char *p;
  882 
  883         SYSCTL_ASSERT_XLOCKED();
  884 
  885         for (*len = 0; *len < CTL_MAXNAME;) {
  886                 p = strsep(&name, ".");
  887 
  888                 oidp = SLIST_FIRST(lsp);
  889                 for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
  890                         if (oidp == NULL)
  891                                 return (ENOENT);
  892                         if (strcmp(p, oidp->oid_name) == 0)
  893                                 break;
  894                 }
  895                 *oid++ = oidp->oid_number;
  896                 (*len)++;
  897 
  898                 if (name == NULL || *name == '\0') {
  899                         if (oidpp)
  900                                 *oidpp = oidp;
  901                         return (0);
  902                 }
  903 
  904                 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
  905                         break;
  906 
  907                 if (oidp->oid_handler)
  908                         break;
  909 
  910                 lsp = SYSCTL_CHILDREN(oidp);
  911         }
  912         return (ENOENT);
  913 }
  914 
  915 static int
  916 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
  917 {
  918         char *p;
  919         int error, oid[CTL_MAXNAME], len = 0;
  920         struct sysctl_oid *op = 0;
  921 
  922         if (!req->newlen) 
  923                 return (ENOENT);
  924         if (req->newlen >= MAXPATHLEN)  /* XXX arbitrary, undocumented */
  925                 return (ENAMETOOLONG);
  926 
  927         p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
  928 
  929         error = SYSCTL_IN(req, p, req->newlen);
  930         if (error) {
  931                 free(p, M_SYSCTL);
  932                 return (error);
  933         }
  934 
  935         p [req->newlen] = '\0';
  936 
  937         SYSCTL_XLOCK();
  938         error = name2oid(p, oid, &len, &op);
  939         SYSCTL_XUNLOCK();
  940 
  941         free(p, M_SYSCTL);
  942 
  943         if (error)
  944                 return (error);
  945 
  946         error = SYSCTL_OUT(req, oid, len * sizeof *oid);
  947         return (error);
  948 }
  949 
  950 /*
  951  * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
  952  * capability mode.
  953  */
  954 SYSCTL_PROC(_sysctl, 3, name2oid,
  955     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
  956     | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
  957 
  958 static int
  959 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
  960 {
  961         struct sysctl_oid *oid;
  962         int error;
  963 
  964         SYSCTL_XLOCK();
  965         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
  966         if (error)
  967                 goto out;
  968 
  969         if (oid->oid_fmt == NULL) {
  970                 error = ENOENT;
  971                 goto out;
  972         }
  973         error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
  974         if (error)
  975                 goto out;
  976         error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
  977  out:
  978         SYSCTL_XUNLOCK();
  979         return (error);
  980 }
  981 
  982 
  983 static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
  984     sysctl_sysctl_oidfmt, "");
  985 
  986 static int
  987 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
  988 {
  989         struct sysctl_oid *oid;
  990         int error;
  991 
  992         SYSCTL_XLOCK();
  993         error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
  994         if (error)
  995                 goto out;
  996 
  997         if (oid->oid_descr == NULL) {
  998                 error = ENOENT;
  999                 goto out;
 1000         }
 1001         error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
 1002  out:
 1003         SYSCTL_XUNLOCK();
 1004         return (error);
 1005 }
 1006 
 1007 static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD,
 1008     sysctl_sysctl_oiddescr, "");
 1009 
 1010 /*
 1011  * Default "handler" functions.
 1012  */
 1013 
 1014 /*
 1015  * Handle an int, signed or unsigned.
 1016  * Two cases:
 1017  *     a variable:  point arg1 at it.
 1018  *     a constant:  pass it in arg2.
 1019  */
 1020 
 1021 int
 1022 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
 1023 {
 1024         int tmpout, error = 0;
 1025 
 1026         /*
 1027          * Attempt to get a coherent snapshot by making a copy of the data.
 1028          */
 1029         if (arg1)
 1030                 tmpout = *(int *)arg1;
 1031         else
 1032                 tmpout = arg2;
 1033         error = SYSCTL_OUT(req, &tmpout, sizeof(int));
 1034 
 1035         if (error || !req->newptr)
 1036                 return (error);
 1037 
 1038         if (!arg1)
 1039                 error = EPERM;
 1040         else
 1041                 error = SYSCTL_IN(req, arg1, sizeof(int));
 1042         return (error);
 1043 }
 1044 
 1045 /*
 1046  * Based on on sysctl_handle_int() convert milliseconds into ticks.
 1047  * Note: this is used by TCP.
 1048  */
 1049 
 1050 int
 1051 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
 1052 {
 1053         int error, s, tt;
 1054 
 1055         tt = *(int *)arg1;
 1056         s = (int)((int64_t)tt * 1000 / hz);
 1057 
 1058         error = sysctl_handle_int(oidp, &s, 0, req);
 1059         if (error || !req->newptr)
 1060                 return (error);
 1061 
 1062         tt = (int)((int64_t)s * hz / 1000);
 1063         if (tt < 1)
 1064                 return (EINVAL);
 1065 
 1066         *(int *)arg1 = tt;
 1067         return (0);
 1068 }
 1069 
 1070 
 1071 /*
 1072  * Handle a long, signed or unsigned.
 1073  * Two cases:
 1074  *     a variable:  point arg1 at it.
 1075  *     a constant:  pass it in arg2.
 1076  */
 1077 
 1078 int
 1079 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
 1080 {
 1081         int error = 0;
 1082         long tmplong;
 1083 #ifdef SCTL_MASK32
 1084         int tmpint;
 1085 #endif
 1086 
 1087         /*
 1088          * Attempt to get a coherent snapshot by making a copy of the data.
 1089          */
 1090         if (arg1)
 1091                 tmplong = *(long *)arg1;
 1092         else
 1093                 tmplong = arg2;
 1094 #ifdef SCTL_MASK32
 1095         if (req->flags & SCTL_MASK32) {
 1096                 tmpint = tmplong;
 1097                 error = SYSCTL_OUT(req, &tmpint, sizeof(int));
 1098         } else
 1099 #endif
 1100                 error = SYSCTL_OUT(req, &tmplong, sizeof(long));
 1101 
 1102         if (error || !req->newptr)
 1103                 return (error);
 1104 
 1105         if (!arg1)
 1106                 error = EPERM;
 1107 #ifdef SCTL_MASK32
 1108         else if (req->flags & SCTL_MASK32) {
 1109                 error = SYSCTL_IN(req, &tmpint, sizeof(int));
 1110                 *(long *)arg1 = (long)tmpint;
 1111         }
 1112 #endif
 1113         else
 1114                 error = SYSCTL_IN(req, arg1, sizeof(long));
 1115         return (error);
 1116 }
 1117 
 1118 /*
 1119  * Handle a 64 bit int, signed or unsigned.
 1120  * Two cases:
 1121  *     a variable:  point arg1 at it.
 1122  *     a constant:  pass it in arg2.
 1123  */
 1124 int
 1125 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 1126 {
 1127         int error = 0;
 1128         uint64_t tmpout;
 1129 
 1130         /*
 1131          * Attempt to get a coherent snapshot by making a copy of the data.
 1132          */
 1133         if (arg1)
 1134                 tmpout = *(uint64_t *)arg1;
 1135         else
 1136                 tmpout = arg2;
 1137         error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
 1138 
 1139         if (error || !req->newptr)
 1140                 return (error);
 1141 
 1142         if (!arg1)
 1143                 error = EPERM;
 1144         else
 1145                 error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
 1146         return (error);
 1147 }
 1148 
 1149 /*
 1150  * Handle our generic '\0' terminated 'C' string.
 1151  * Two cases:
 1152  *      a variable string:  point arg1 at it, arg2 is max length.
 1153  *      a constant string:  point arg1 at it, arg2 is zero.
 1154  */
 1155 
 1156 int
 1157 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 1158 {
 1159         int error=0;
 1160         char *tmparg;
 1161         size_t outlen;
 1162 
 1163         /*
 1164          * Attempt to get a coherent snapshot by copying to a
 1165          * temporary kernel buffer.
 1166          */
 1167 retry:
 1168         outlen = strlen((char *)arg1)+1;
 1169         tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
 1170 
 1171         if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
 1172                 free(tmparg, M_SYSCTLTMP);
 1173                 goto retry;
 1174         }
 1175 
 1176         error = SYSCTL_OUT(req, tmparg, outlen);
 1177         free(tmparg, M_SYSCTLTMP);
 1178 
 1179         if (error || !req->newptr)
 1180                 return (error);
 1181 
 1182         if ((req->newlen - req->newidx) >= arg2) {
 1183                 error = EINVAL;
 1184         } else {
 1185                 arg2 = (req->newlen - req->newidx);
 1186                 error = SYSCTL_IN(req, arg1, arg2);
 1187                 ((char *)arg1)[arg2] = '\0';
 1188         }
 1189 
 1190         return (error);
 1191 }
 1192 
 1193 /*
 1194  * Handle any kind of opaque data.
 1195  * arg1 points to it, arg2 is the size.
 1196  */
 1197 
 1198 int
 1199 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
 1200 {
 1201         int error, tries;
 1202         u_int generation;
 1203         struct sysctl_req req2;
 1204 
 1205         /*
 1206          * Attempt to get a coherent snapshot, by using the thread
 1207          * pre-emption counter updated from within mi_switch() to
 1208          * determine if we were pre-empted during a bcopy() or
 1209          * copyout(). Make 3 attempts at doing this before giving up.
 1210          * If we encounter an error, stop immediately.
 1211          */
 1212         tries = 0;
 1213         req2 = *req;
 1214 retry:
 1215         generation = curthread->td_generation;
 1216         error = SYSCTL_OUT(req, arg1, arg2);
 1217         if (error)
 1218                 return (error);
 1219         tries++;
 1220         if (generation != curthread->td_generation && tries < 3) {
 1221                 *req = req2;
 1222                 goto retry;
 1223         }
 1224 
 1225         error = SYSCTL_IN(req, arg1, arg2);
 1226 
 1227         return (error);
 1228 }
 1229 
 1230 /*
 1231  * Transfer functions to/from kernel space.
 1232  * XXX: rather untested at this point
 1233  */
 1234 static int
 1235 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 1236 {
 1237         size_t i = 0;
 1238 
 1239         if (req->oldptr) {
 1240                 i = l;
 1241                 if (req->oldlen <= req->oldidx)
 1242                         i = 0;
 1243                 else
 1244                         if (i > req->oldlen - req->oldidx)
 1245                                 i = req->oldlen - req->oldidx;
 1246                 if (i > 0)
 1247                         bcopy(p, (char *)req->oldptr + req->oldidx, i);
 1248         }
 1249         req->oldidx += l;
 1250         if (req->oldptr && i != l)
 1251                 return (ENOMEM);
 1252         return (0);
 1253 }
 1254 
 1255 static int
 1256 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 1257 {
 1258         if (!req->newptr)
 1259                 return (0);
 1260         if (req->newlen - req->newidx < l)
 1261                 return (EINVAL);
 1262         bcopy((char *)req->newptr + req->newidx, p, l);
 1263         req->newidx += l;
 1264         return (0);
 1265 }
 1266 
 1267 int
 1268 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 1269     size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
 1270 {
 1271         int error = 0;
 1272         struct sysctl_req req;
 1273 
 1274         bzero(&req, sizeof req);
 1275 
 1276         req.td = td;
 1277         req.flags = flags;
 1278 
 1279         if (oldlenp) {
 1280                 req.oldlen = *oldlenp;
 1281         }
 1282         req.validlen = req.oldlen;
 1283 
 1284         if (old) {
 1285                 req.oldptr= old;
 1286         }
 1287 
 1288         if (new != NULL) {
 1289                 req.newlen = newlen;
 1290                 req.newptr = new;
 1291         }
 1292 
 1293         req.oldfunc = sysctl_old_kernel;
 1294         req.newfunc = sysctl_new_kernel;
 1295         req.lock = REQ_UNWIRED;
 1296 
 1297         SYSCTL_XLOCK();
 1298         error = sysctl_root(0, name, namelen, &req);
 1299         SYSCTL_XUNLOCK();
 1300 
 1301         if (req.lock == REQ_WIRED && req.validlen > 0)
 1302                 vsunlock(req.oldptr, req.validlen);
 1303 
 1304         if (error && error != ENOMEM)
 1305                 return (error);
 1306 
 1307         if (retval) {
 1308                 if (req.oldptr && req.oldidx > req.validlen)
 1309                         *retval = req.validlen;
 1310                 else
 1311                         *retval = req.oldidx;
 1312         }
 1313         return (error);
 1314 }
 1315 
 1316 int
 1317 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
 1318     void *new, size_t newlen, size_t *retval, int flags)
 1319 {
 1320         int oid[CTL_MAXNAME];
 1321         size_t oidlen, plen;
 1322         int error;
 1323 
 1324         oid[0] = 0;             /* sysctl internal magic */
 1325         oid[1] = 3;             /* name2oid */
 1326         oidlen = sizeof(oid);
 1327 
 1328         error = kernel_sysctl(td, oid, 2, oid, &oidlen,
 1329             (void *)name, strlen(name), &plen, flags);
 1330         if (error)
 1331                 return (error);
 1332 
 1333         error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
 1334             new, newlen, retval, flags);
 1335         return (error);
 1336 }
 1337 
 1338 /*
 1339  * Transfer function to/from user space.
 1340  */
 1341 static int
 1342 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 1343 {
 1344         size_t i, len, origidx;
 1345         int error;
 1346 
 1347         origidx = req->oldidx;
 1348         req->oldidx += l;
 1349         if (req->oldptr == NULL)
 1350                 return (0);
 1351         /*
 1352          * If we have not wired the user supplied buffer and we are currently
 1353          * holding locks, drop a witness warning, as it's possible that
 1354          * write operations to the user page can sleep.
 1355          */
 1356         if (req->lock != REQ_WIRED)
 1357                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 1358                     "sysctl_old_user()");
 1359         i = l;
 1360         len = req->validlen;
 1361         if (len <= origidx)
 1362                 i = 0;
 1363         else {
 1364                 if (i > len - origidx)
 1365                         i = len - origidx;
 1366                 if (req->lock == REQ_WIRED) {
 1367                         error = copyout_nofault(p, (char *)req->oldptr +
 1368                             origidx, i);
 1369                 } else
 1370                         error = copyout(p, (char *)req->oldptr + origidx, i);
 1371                 if (error != 0)
 1372                         return (error);
 1373         }
 1374         if (i < l)
 1375                 return (ENOMEM);
 1376         return (0);
 1377 }
 1378 
 1379 static int
 1380 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 1381 {
 1382         int error;
 1383 
 1384         if (!req->newptr)
 1385                 return (0);
 1386         if (req->newlen - req->newidx < l)
 1387                 return (EINVAL);
 1388         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 1389             "sysctl_new_user()");
 1390         error = copyin((char *)req->newptr + req->newidx, p, l);
 1391         req->newidx += l;
 1392         return (error);
 1393 }
 1394 
 1395 /*
 1396  * Wire the user space destination buffer.  If set to a value greater than
 1397  * zero, the len parameter limits the maximum amount of wired memory.
 1398  */
 1399 int
 1400 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 1401 {
 1402         int ret;
 1403         size_t wiredlen;
 1404 
 1405         wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
 1406         ret = 0;
 1407         if (req->lock != REQ_WIRED && req->oldptr &&
 1408             req->oldfunc == sysctl_old_user) {
 1409                 if (wiredlen != 0) {
 1410                         ret = vslock(req->oldptr, wiredlen);
 1411                         if (ret != 0) {
 1412                                 if (ret != ENOMEM)
 1413                                         return (ret);
 1414                                 wiredlen = 0;
 1415                         }
 1416                 }
 1417                 req->lock = REQ_WIRED;
 1418                 req->validlen = wiredlen;
 1419         }
 1420         return (0);
 1421 }
 1422 
 1423 int
 1424 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 1425     int *nindx, struct sysctl_req *req)
 1426 {
 1427         struct sysctl_oid_list *lsp;
 1428         struct sysctl_oid *oid;
 1429         int indx;
 1430 
 1431         SYSCTL_ASSERT_XLOCKED();
 1432         lsp = &sysctl__children;
 1433         indx = 0;
 1434         while (indx < CTL_MAXNAME) {
 1435                 SLIST_FOREACH(oid, lsp, oid_link) {
 1436                         if (oid->oid_number == name[indx])
 1437                                 break;
 1438                 }
 1439                 if (oid == NULL)
 1440                         return (ENOENT);
 1441 
 1442                 indx++;
 1443                 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 1444                         if (oid->oid_handler != NULL || indx == namelen) {
 1445                                 *noid = oid;
 1446                                 if (nindx != NULL)
 1447                                         *nindx = indx;
 1448                                 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 1449                                     ("%s found DYING node %p", __func__, oid));
 1450                                 return (0);
 1451                         }
 1452                         lsp = SYSCTL_CHILDREN(oid);
 1453                 } else if (indx == namelen) {
 1454                         if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
 1455                                 return (ENOENT);
 1456                         *noid = oid;
 1457                         if (nindx != NULL)
 1458                                 *nindx = indx;
 1459                         KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 1460                             ("%s found DYING node %p", __func__, oid));
 1461                         return (0);
 1462                 } else {
 1463                         return (ENOTDIR);
 1464                 }
 1465         }
 1466         return (ENOENT);
 1467 }
 1468 
 1469 /*
 1470  * Traverse our tree, and find the right node, execute whatever it points
 1471  * to, and return the resulting error code.
 1472  */
 1473 
 1474 static int
 1475 sysctl_root(SYSCTL_HANDLER_ARGS)
 1476 {
 1477         struct sysctl_oid *oid;
 1478         int error, indx, lvl;
 1479 
 1480         SYSCTL_ASSERT_XLOCKED();
 1481 
 1482         error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
 1483         if (error)
 1484                 return (error);
 1485 
 1486         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 1487                 /*
 1488                  * You can't call a sysctl when it's a node, but has
 1489                  * no handler.  Inform the user that it's a node.
 1490                  * The indx may or may not be the same as namelen.
 1491                  */
 1492                 if (oid->oid_handler == NULL)
 1493                         return (EISDIR);
 1494         }
 1495 
 1496         /* Is this sysctl writable? */
 1497         if (req->newptr && !(oid->oid_kind & CTLFLAG_WR))
 1498                 return (EPERM);
 1499 
 1500         KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
 1501 
 1502 #ifdef CAPABILITY_MODE
 1503         /*
 1504          * If the process is in capability mode, then don't permit reading or
 1505          * writing unless specifically granted for the node.
 1506          */
 1507         if (IN_CAPABILITY_MODE(req->td)) {
 1508                 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD))
 1509                         return (EPERM);
 1510                 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))
 1511                         return (EPERM);
 1512         }
 1513 #endif
 1514 
 1515         /* Is this sysctl sensitive to securelevels? */
 1516         if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
 1517                 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
 1518                 error = securelevel_gt(req->td->td_ucred, lvl);
 1519                 if (error)
 1520                         return (error);
 1521         }
 1522 
 1523         /* Is this sysctl writable by only privileged users? */
 1524         if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
 1525                 int priv;
 1526 
 1527                 if (oid->oid_kind & CTLFLAG_PRISON)
 1528                         priv = PRIV_SYSCTL_WRITEJAIL;
 1529 #ifdef VIMAGE
 1530                 else if ((oid->oid_kind & CTLFLAG_VNET) &&
 1531                      prison_owns_vnet(req->td->td_ucred))
 1532                         priv = PRIV_SYSCTL_WRITEJAIL;
 1533 #endif
 1534                 else
 1535                         priv = PRIV_SYSCTL_WRITE;
 1536                 error = priv_check(req->td, priv);
 1537                 if (error)
 1538                         return (error);
 1539         }
 1540 
 1541         if (!oid->oid_handler)
 1542                 return (EINVAL);
 1543 
 1544         if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 1545                 arg1 = (int *)arg1 + indx;
 1546                 arg2 -= indx;
 1547         } else {
 1548                 arg1 = oid->oid_arg1;
 1549                 arg2 = oid->oid_arg2;
 1550         }
 1551 #ifdef MAC
 1552         error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
 1553             req);
 1554         if (error != 0)
 1555                 return (error);
 1556 #endif
 1557         oid->oid_running++;
 1558         SYSCTL_XUNLOCK();
 1559 #ifdef VIMAGE
 1560         if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
 1561                 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
 1562 #endif
 1563         if (!(oid->oid_kind & CTLFLAG_MPSAFE))
 1564                 mtx_lock(&Giant);
 1565         error = oid->oid_handler(oid, arg1, arg2, req);
 1566         if (!(oid->oid_kind & CTLFLAG_MPSAFE))
 1567                 mtx_unlock(&Giant);
 1568 
 1569         KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
 1570 
 1571         SYSCTL_XLOCK();
 1572         oid->oid_running--;
 1573         if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0)
 1574                 wakeup(&oid->oid_running);
 1575         return (error);
 1576 }
 1577 
 1578 #ifndef _SYS_SYSPROTO_H_
 1579 struct sysctl_args {
 1580         int     *name;
 1581         u_int   namelen;
 1582         void    *old;
 1583         size_t  *oldlenp;
 1584         void    *new;
 1585         size_t  newlen;
 1586 };
 1587 #endif
 1588 int
 1589 sys___sysctl(struct thread *td, struct sysctl_args *uap)
 1590 {
 1591         int error, i, name[CTL_MAXNAME];
 1592         size_t j;
 1593 
 1594         if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 1595                 return (EINVAL);
 1596 
 1597         error = copyin(uap->name, &name, uap->namelen * sizeof(int));
 1598         if (error)
 1599                 return (error);
 1600 
 1601         error = userland_sysctl(td, name, uap->namelen,
 1602                 uap->old, uap->oldlenp, 0,
 1603                 uap->new, uap->newlen, &j, 0);
 1604         if (error && error != ENOMEM)
 1605                 return (error);
 1606         if (uap->oldlenp) {
 1607                 i = copyout(&j, uap->oldlenp, sizeof(j));
 1608                 if (i)
 1609                         return (i);
 1610         }
 1611         return (error);
 1612 }
 1613 
 1614 /*
 1615  * This is used from various compatibility syscalls too.  That's why name
 1616  * must be in kernel space.
 1617  */
 1618 int
 1619 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 1620     size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval,
 1621     int flags)
 1622 {
 1623         int error = 0, memlocked;
 1624         struct sysctl_req req;
 1625 
 1626         bzero(&req, sizeof req);
 1627 
 1628         req.td = td;
 1629         req.flags = flags;
 1630 
 1631         if (oldlenp) {
 1632                 if (inkernel) {
 1633                         req.oldlen = *oldlenp;
 1634                 } else {
 1635                         error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
 1636                         if (error)
 1637                                 return (error);
 1638                 }
 1639         }
 1640         req.validlen = req.oldlen;
 1641 
 1642         if (old) {
 1643                 if (!useracc(old, req.oldlen, VM_PROT_WRITE))
 1644                         return (EFAULT);
 1645                 req.oldptr= old;
 1646         }
 1647 
 1648         if (new != NULL) {
 1649                 if (!useracc(new, newlen, VM_PROT_READ))
 1650                         return (EFAULT);
 1651                 req.newlen = newlen;
 1652                 req.newptr = new;
 1653         }
 1654 
 1655         req.oldfunc = sysctl_old_user;
 1656         req.newfunc = sysctl_new_user;
 1657         req.lock = REQ_UNWIRED;
 1658 
 1659 #ifdef KTRACE
 1660         if (KTRPOINT(curthread, KTR_SYSCTL))
 1661                 ktrsysctl(name, namelen);
 1662 #endif
 1663 
 1664         if (req.oldlen > PAGE_SIZE) {
 1665                 memlocked = 1;
 1666                 sx_xlock(&sysctlmemlock);
 1667         } else
 1668                 memlocked = 0;
 1669         CURVNET_SET(TD_TO_VNET(td));
 1670 
 1671         for (;;) {
 1672                 req.oldidx = 0;
 1673                 req.newidx = 0;
 1674                 SYSCTL_XLOCK();
 1675                 error = sysctl_root(0, name, namelen, &req);
 1676                 SYSCTL_XUNLOCK();
 1677                 if (error != EAGAIN)
 1678                         break;
 1679                 kern_yield(PRI_USER);
 1680         }
 1681 
 1682         CURVNET_RESTORE();
 1683 
 1684         if (req.lock == REQ_WIRED && req.validlen > 0)
 1685                 vsunlock(req.oldptr, req.validlen);
 1686         if (memlocked)
 1687                 sx_xunlock(&sysctlmemlock);
 1688 
 1689         if (error && error != ENOMEM)
 1690                 return (error);
 1691 
 1692         if (retval) {
 1693                 if (req.oldptr && req.oldidx > req.validlen)
 1694                         *retval = req.validlen;
 1695                 else
 1696                         *retval = req.oldidx;
 1697         }
 1698         return (error);
 1699 }
 1700 
 1701 /*
 1702  * Drain into a sysctl struct.  The user buffer should be wired if a page
 1703  * fault would cause issue.
 1704  */
 1705 static int
 1706 sbuf_sysctl_drain(void *arg, const char *data, int len)
 1707 {
 1708         struct sysctl_req *req = arg;
 1709         int error;
 1710 
 1711         error = SYSCTL_OUT(req, data, len);
 1712         KASSERT(error >= 0, ("Got unexpected negative value %d", error));
 1713         return (error == 0 ? len : -error);
 1714 }
 1715 
 1716 struct sbuf *
 1717 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
 1718     struct sysctl_req *req)
 1719 {
 1720 
 1721         s = sbuf_new(s, buf, length, SBUF_FIXEDLEN);
 1722         sbuf_set_drain(s, sbuf_sysctl_drain, req);
 1723         return (s);
 1724 }

Cache object: 9d0a3eccf26f76557ecbb5fb70d8f096


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.