The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_sysctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_sysctl.c,v 1.266 2020/08/27 14:11:57 riastradh Exp $      */
    2 
    3 /*-
    4  * Copyright (c) 2003, 2007, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Andrew Brown.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (c) 1982, 1986, 1989, 1993
   34  *      The Regents of the University of California.  All rights reserved.
   35  *
   36  * This code is derived from software contributed to Berkeley by
   37  * Mike Karels at Berkeley Software Design, Inc.
   38  *
   39  * Redistribution and use in source and binary forms, with or without
   40  * modification, are permitted provided that the following conditions
   41  * are met:
   42  * 1. Redistributions of source code must retain the above copyright
   43  *    notice, this list of conditions and the following disclaimer.
   44  * 2. Redistributions in binary form must reproduce the above copyright
   45  *    notice, this list of conditions and the following disclaimer in the
   46  *    documentation and/or other materials provided with the distribution.
   47  * 3. Neither the name of the University nor the names of its contributors
   48  *    may be used to endorse or promote products derived from this software
   49  *    without specific prior written permission.
   50  *
   51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   61  * SUCH DAMAGE.
   62  *
   63  *      @(#)kern_sysctl.c       8.9 (Berkeley) 5/20/95
   64  */
   65 
   66 /*
   67  * sysctl system call.
   68  */
   69 
   70 #define __COMPAT_SYSCTL
   71 
   72 #include <sys/cdefs.h>
   73 __KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.266 2020/08/27 14:11:57 riastradh Exp $");
   74 
   75 #ifdef _KERNEL_OPT
   76 #include "opt_defcorename.h"
   77 #endif
   78 
   79 #include "ksyms.h"
   80 
   81 #include <sys/param.h>
   82 #include <sys/types.h>
   83 
   84 #include <sys/buf.h>
   85 #include <sys/cprng.h>
   86 #include <sys/kauth.h>
   87 #include <sys/ksyms.h>
   88 #include <sys/ktrace.h>
   89 #include <sys/malloc.h>
   90 #include <sys/mount.h>
   91 #include <sys/once.h>
   92 #include <sys/rndsource.h>
   93 #include <sys/syscallargs.h>
   94 #include <sys/sysctl.h>
   95 #include <sys/systm.h>
   96 
   97 #include <crypto/blake2/blake2s.h>
   98 
   99 #define MAXDESCLEN      1024
  100 MALLOC_DEFINE(M_SYSCTLNODE, "sysctlnode", "sysctl node structures");
  101 MALLOC_DEFINE(M_SYSCTLDATA, "sysctldata", "misc sysctl data");
  102 
  103 static int sysctl_mmap(SYSCTLFN_PROTO);
  104 static int sysctl_alloc(struct sysctlnode *, int);
  105 static int sysctl_realloc(struct sysctlnode *);
  106 
  107 static int sysctl_cvt_in(struct lwp *, int *, const void *, size_t,
  108                          struct sysctlnode *);
  109 static int sysctl_cvt_out(struct lwp *, int, const struct sysctlnode *,
  110                           void *, size_t, size_t *);
  111 
  112 static int sysctl_log_add(struct sysctllog **, const struct sysctlnode *);
  113 static int sysctl_log_realloc(struct sysctllog *);
  114 
  115 typedef void sysctl_setup_func(struct sysctllog **);
  116 
  117 #ifdef SYSCTL_DEBUG
  118 #define DPRINTF(a)      printf a
  119 #else
  120 #define DPRINTF(a)
  121 #endif
  122 
  123 struct sysctllog {
  124         const struct sysctlnode *log_root;
  125         int *log_num;
  126         int log_size, log_left;
  127 };
  128 
  129 /*
  130  * the "root" of the new sysctl tree
  131  */
  132 struct sysctlnode sysctl_root = {
  133         .sysctl_flags = SYSCTL_VERSION|
  134             CTLFLAG_ROOT|CTLFLAG_READWRITE|
  135             CTLTYPE_NODE,
  136         .sysctl_num = 0,
  137         .sysctl_size = sizeof(struct sysctlnode),
  138         .sysctl_name = "(root)",
  139 };
  140 
  141 /*
  142  * link set of functions that add nodes at boot time (see also
  143  * sysctl_buildtree())
  144  */
  145 __link_set_decl(sysctl_funcs, sysctl_setup_func);
  146 
  147 /*
  148  * The `sysctl_treelock' is intended to serialize access to the sysctl
  149  * tree.  XXX This has serious problems; allocating memory and
  150  * copying data out with the lock held is insane.
  151  */
  152 krwlock_t sysctl_treelock;
  153 
  154 kmutex_t sysctl_file_marker_lock;
  155 
  156 /*
  157  * Attributes stored in the kernel.
  158  */
  159 char hostname[MAXHOSTNAMELEN];
  160 int hostnamelen;
  161 
  162 char domainname[MAXHOSTNAMELEN];
  163 int domainnamelen;
  164 
  165 long hostid;
  166 
  167 #ifndef DEFCORENAME
  168 #define DEFCORENAME     "%n.core"
  169 #endif
  170 char defcorename[MAXPATHLEN] = DEFCORENAME;
  171 
  172 /*
  173  * ********************************************************************
  174  * Section 0: Some simple glue
  175  * ********************************************************************
  176  * By wrapping copyin(), copyout(), and copyinstr() like this, we can
  177  * stop caring about who's calling us and simplify some code a bunch.
  178  * ********************************************************************
  179  */
  180 int
  181 sysctl_copyin(struct lwp *l, const void *uaddr, void *kaddr, size_t len)
  182 {
  183         int error;
  184 
  185         if (l != NULL) {
  186                 error = copyin(uaddr, kaddr, len);
  187                 ktrmibio(-1, UIO_WRITE, uaddr, len, error);
  188         } else {
  189                 error = kcopy(uaddr, kaddr, len);
  190         }
  191 
  192         return error;
  193 }
  194 
  195 int
  196 sysctl_copyout(struct lwp *l, const void *kaddr, void *uaddr, size_t len)
  197 {
  198         int error;
  199 
  200         if (l != NULL) {
  201                 error = copyout(kaddr, uaddr, len);
  202                 ktrmibio(-1, UIO_READ, uaddr, len, error);
  203         } else {
  204                 error = kcopy(kaddr, uaddr, len);
  205         }
  206         
  207         return error;
  208 }
  209 
  210 int
  211 sysctl_copyinstr(struct lwp *l, const void *uaddr, void *kaddr,
  212                  size_t len, size_t *done)
  213 {
  214         int error;
  215 
  216         if (l != NULL) {
  217                 error = copyinstr(uaddr, kaddr, len, done);
  218                 ktrmibio(-1, UIO_WRITE, uaddr, len, error);
  219         } else {
  220                 error = copystr(uaddr, kaddr, len, done);
  221         }
  222 
  223         return error;
  224 }
  225 
  226 /*
  227  * ********************************************************************
  228  * Initialize sysctl subsystem.
  229  * ********************************************************************
  230  */
  231 void
  232 sysctl_init(void)
  233 {
  234         sysctl_setup_func *const *sysctl_setup;
  235 
  236         rw_init(&sysctl_treelock);
  237 
  238         /*
  239          * dynamic mib numbers start here
  240          */
  241         sysctl_root.sysctl_num = CREATE_BASE;
  242         sysctl_basenode_init();
  243 
  244         __link_set_foreach(sysctl_setup, sysctl_funcs) {
  245                 (**sysctl_setup)(NULL);
  246         }
  247 
  248         mutex_init(&sysctl_file_marker_lock, MUTEX_DEFAULT, IPL_NONE);
  249 }
  250 
  251 /*
  252  * Setting this means no more permanent nodes can be added,
  253  * trees that claim to be readonly at the root now are, and if
  254  * the main tree is readonly, *everything* is.
  255  *
  256  * Also starts up the PRNG used for the "random" sysctl: it's
  257  * better to start it later than sooner.
  258  *
  259  * Call this at the end of kernel init.
  260  */
  261 void
  262 sysctl_finalize(void)
  263 {
  264 
  265         sysctl_root.sysctl_flags |= CTLFLAG_PERMANENT;
  266 }
  267 
  268 /*
  269  * ********************************************************************
  270  * The main native sysctl system call itself.
  271  * ********************************************************************
  272  */
  273 int
  274 sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *retval)
  275 {
  276         /* {
  277                 syscallarg(const int *) name;
  278                 syscallarg(u_int) namelen;
  279                 syscallarg(void *) old;
  280                 syscallarg(size_t *) oldlenp;
  281                 syscallarg(const void *) new;
  282                 syscallarg(size_t) newlen;
  283         } */
  284         int error, nerror, name[CTL_MAXNAME];
  285         size_t oldlen, savelen, *oldlenp;
  286 
  287         /*
  288          * get oldlen
  289          */
  290         oldlen = 0;
  291         oldlenp = SCARG(uap, oldlenp);
  292         if (oldlenp != NULL) {
  293                 error = copyin(oldlenp, &oldlen, sizeof(oldlen));
  294                 if (error)
  295                         return (error);
  296         }
  297         savelen = oldlen;
  298 
  299         /*
  300          * top-level sysctl names may or may not be non-terminal, but
  301          * we don't care
  302          */
  303         if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 1)
  304                 return (EINVAL);
  305         error = copyin(SCARG(uap, name), &name,
  306                        SCARG(uap, namelen) * sizeof(int));
  307         if (error)
  308                 return (error);
  309 
  310         ktrmib(name, SCARG(uap, namelen));
  311 
  312         sysctl_lock(SCARG(uap, newv) != NULL);
  313 
  314         /*
  315          * do sysctl work (NULL means main built-in default tree)
  316          */
  317         error = sysctl_dispatch(&name[0], SCARG(uap, namelen),
  318                                 SCARG(uap, oldv), &oldlen,
  319                                 SCARG(uap, newv), SCARG(uap, newlen),
  320                                 &name[0], l, NULL);
  321 
  322         /*
  323          * release the sysctl lock
  324          */
  325         sysctl_unlock();
  326 
  327         /*
  328          * set caller's oldlen to new value even in the face of an
  329          * error (if this gets an error and they didn't have one, they
  330          * get this one)
  331          */
  332         if (oldlenp) {
  333                 nerror = copyout(&oldlen, oldlenp, sizeof(oldlen));
  334                 if (error == 0)
  335                         error = nerror;
  336         }
  337 
  338         /*
  339          * if the only problem is that we weren't given enough space,
  340          * that's an ENOMEM error
  341          */
  342         if (error == 0 && SCARG(uap, oldv) != NULL && savelen < oldlen)
  343                 error = ENOMEM;
  344 
  345         return (error);
  346 }
  347 
  348 /*
  349  * ********************************************************************
  350  * Section 1: How the tree is used
  351  * ********************************************************************
  352  * Implementations of sysctl for emulations should typically need only
  353  * these three functions in this order: lock the tree, dispatch
  354  * request into it, unlock the tree.
  355  * ********************************************************************
  356  */
  357 void
  358 sysctl_lock(bool write)
  359 {
  360 
  361         if (write) {
  362                 rw_enter(&sysctl_treelock, RW_WRITER);
  363                 curlwp->l_pflag |= LP_SYSCTLWRITE;
  364         } else {
  365                 rw_enter(&sysctl_treelock, RW_READER);
  366                 curlwp->l_pflag &= ~LP_SYSCTLWRITE;
  367         }
  368 }
  369 
  370 void
  371 sysctl_relock(void)
  372 {
  373 
  374         if ((curlwp->l_pflag & LP_SYSCTLWRITE) != 0) {
  375                 rw_enter(&sysctl_treelock, RW_WRITER);
  376         } else {
  377                 rw_enter(&sysctl_treelock, RW_READER);
  378         }
  379 }
  380 
  381 /*
  382  * ********************************************************************
  383  * the main sysctl dispatch routine.  scans the given tree and picks a
  384  * function to call based on what it finds.
  385  * ********************************************************************
  386  */
  387 int
  388 sysctl_dispatch(SYSCTLFN_ARGS)
  389 {
  390         int error;
  391         sysctlfn fn;
  392         int ni;
  393 
  394         KASSERT(rw_lock_held(&sysctl_treelock));
  395 
  396         if (rnode && SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
  397                 printf("sysctl_dispatch: rnode %p wrong version\n", rnode);
  398                 error = EINVAL;
  399                 goto out;
  400         }
  401 
  402         fn = NULL;
  403         error = sysctl_locate(l, name, namelen, &rnode, &ni);
  404 
  405         if (rnode->sysctl_func != NULL) {
  406                 /*
  407                  * the node we ended up at has a function, so call it.  it can
  408                  * hand off to query or create if it wants to.
  409                  */
  410                 fn = rnode->sysctl_func;
  411         } else if (error == 0) {
  412                 /*
  413                  * we found the node they were looking for, so do a lookup.
  414                  */
  415                 fn = (sysctlfn)sysctl_lookup; /* XXX may write to rnode */
  416         } else if (error == ENOENT && (ni + 1) == namelen && name[ni] < 0) {
  417                 /*
  418                  * prospective parent node found, but the terminal node was
  419                  * not.  generic operations associate with the parent.
  420                  */
  421                 switch (name[ni]) {
  422                 case CTL_QUERY:
  423                         fn = sysctl_query;
  424                         break;
  425                 case CTL_CREATE:
  426 #if NKSYMS > 0
  427                 case CTL_CREATESYM:
  428 #endif /* NKSYMS > 0 */
  429                         if (newp == NULL) {
  430                                 error = EINVAL;
  431                                 break;
  432                         }
  433                         KASSERT(rw_write_held(&sysctl_treelock));
  434                         fn = (sysctlfn)sysctl_create; /* we own the rnode */
  435                         break;
  436                 case CTL_DESTROY:
  437                         if (newp == NULL) {
  438                                 error = EINVAL;
  439                                 break;
  440                         }
  441                         KASSERT(rw_write_held(&sysctl_treelock));
  442                         fn = (sysctlfn)sysctl_destroy; /* we own the rnode */
  443                         break;
  444                 case CTL_MMAP:
  445                         fn = (sysctlfn)sysctl_mmap; /* we own the rnode */
  446                         break;
  447                 case CTL_DESCRIBE:
  448                         fn = sysctl_describe;
  449                         break;
  450                 default:
  451                         error = EOPNOTSUPP;
  452                         break;
  453                 }
  454         }
  455 
  456         /*
  457          * after all of that, maybe we found someone who knows how to
  458          * get us what we want?
  459          */
  460         if (fn != NULL)
  461                 error = (*fn)(name + ni, namelen - ni, oldp, oldlenp,
  462                               newp, newlen, name, l, rnode);
  463         else if (error == 0)
  464                 error = EOPNOTSUPP;
  465 
  466 out:
  467         return (error);
  468 }
  469 
  470 /*
  471  * ********************************************************************
  472  * Releases the tree lock.
  473  * ********************************************************************
  474  */
  475 void
  476 sysctl_unlock(void)
  477 {
  478 
  479         rw_exit(&sysctl_treelock);
  480 }
  481 
  482 /*
  483  * ********************************************************************
  484  * Section 2: The main tree interfaces
  485  * ********************************************************************
  486  * This is how sysctl_dispatch() does its work, and you can too, by
  487  * calling these routines from helpers (though typically only
  488  * sysctl_lookup() will be used).  The tree MUST BE LOCKED when these
  489  * are called.
  490  * ********************************************************************
  491  */
  492 
  493 /*
  494  * sysctl_locate -- Finds the node matching the given mib under the
  495  * given tree (via rv).  If no tree is given, we fall back to the
  496  * native tree.  The current process (via l) is used for access
  497  * control on the tree (some nodes may be traversable only by root) and
  498  * on return, nip will show how many numbers in the mib were consumed.
  499  */
  500 int
  501 sysctl_locate(struct lwp *l, const int *name, u_int namelen,
  502               const struct sysctlnode **rnode, int *nip)
  503 {
  504         const struct sysctlnode *node, *pnode;
  505         int tn, si, ni, error, alias;
  506 
  507         KASSERT(rw_lock_held(&sysctl_treelock));
  508 
  509         /*
  510          * basic checks and setup
  511          */
  512         if (*rnode == NULL)
  513                 *rnode = &sysctl_root;
  514         if (nip)
  515                 *nip = 0;
  516         if (namelen == 0)
  517                 return (0);
  518 
  519         /*
  520          * search starts from "root"
  521          */
  522         pnode = *rnode;
  523         if (SYSCTL_VERS(pnode->sysctl_flags) != SYSCTL_VERSION) {
  524                 printf("sysctl_locate: pnode %p wrong version\n", pnode);
  525                 return (EINVAL);
  526         }
  527         node = pnode->sysctl_child;
  528         error = 0;
  529 
  530         /*
  531          * scan for node to which new node should be attached
  532          */
  533         for (ni = 0; ni < namelen; ni++) {
  534                 /*
  535                  * walked off bottom of tree
  536                  */
  537                 if (node == NULL) {
  538                         if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
  539                                 error = ENOENT;
  540                         else
  541                                 error = ENOTDIR;
  542                         break;
  543                 }
  544                 /*
  545                  * can anyone traverse this node or only root?
  546                  */
  547                 if (l != NULL && (pnode->sysctl_flags & CTLFLAG_PRIVATE) &&
  548                     (error = kauth_authorize_system(l->l_cred,
  549                     KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_PRVT,
  550                     NULL, NULL, NULL)) != 0)
  551                         return (error);
  552                 /*
  553                  * find a child node with the right number
  554                  */
  555                 tn = name[ni];
  556                 alias = 0;
  557 
  558                 si = 0;
  559                 /*
  560                  * Note: ANYNUMBER only matches positive integers.
  561                  * Since ANYNUMBER is only permitted on single-node
  562                  * sub-trees (eg proc), check before the loop and skip
  563                  * it if we can.
  564                  */
  565                 if ((node[si].sysctl_flags & CTLFLAG_ANYNUMBER) && (tn >= 0))
  566                         goto foundit;
  567                 for (; si < pnode->sysctl_clen; si++) {
  568                         if (node[si].sysctl_num == tn) {
  569                                 if (node[si].sysctl_flags & CTLFLAG_ALIAS) {
  570                                         if (alias++ == 4)
  571                                                 break;
  572                                         else {
  573                                                 tn = node[si].sysctl_alias;
  574                                                 si = -1;
  575                                         }
  576                                 } else
  577                                         goto foundit;
  578                         }
  579                 }
  580                 /*
  581                  * if we ran off the end, it obviously doesn't exist
  582                  */
  583                 error = ENOENT;
  584                 break;
  585 
  586                 /*
  587                  * so far so good, move on down the line
  588                  */
  589           foundit:
  590                 pnode = &node[si];
  591                 if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
  592                         node = node[si].sysctl_child;
  593                 else
  594                         node = NULL;
  595         }
  596 
  597         *rnode = pnode;
  598         if (nip)
  599                 *nip = ni;
  600 
  601         return (error);
  602 }
  603 
  604 /*
  605  * sysctl_query -- The auto-discovery engine.  Copies out the structs
  606  * describing nodes under the given node and handles overlay trees.
  607  */
  608 int
  609 sysctl_query(SYSCTLFN_ARGS)
  610 {
  611         int error, ni, elim, v;
  612         size_t out, left, t;
  613         const struct sysctlnode *enode, *onode;
  614         struct sysctlnode qnode;
  615 
  616         KASSERT(rw_lock_held(&sysctl_treelock));
  617 
  618         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
  619                 printf("sysctl_query: rnode %p wrong version\n", rnode);
  620                 return (EINVAL);
  621         }
  622 
  623         if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
  624                 return (ENOTDIR);
  625         if (namelen != 1 || name[0] != CTL_QUERY)
  626                 return (EINVAL);
  627 
  628         error = 0;
  629         out = 0;
  630         left = *oldlenp;
  631         elim = 0;
  632         enode = NULL;
  633 
  634         /*
  635          * translate the given request to a current node
  636          */
  637         error = sysctl_cvt_in(l, &v, newp, newlen, &qnode);
  638         if (error)
  639                 return (error);
  640 
  641         /*
  642          * if the request specifies a version, check it
  643          */
  644         if (qnode.sysctl_ver != 0) {
  645                 enode = rnode;
  646                 if (qnode.sysctl_ver != enode->sysctl_ver &&
  647                     qnode.sysctl_ver != sysctl_rootof(enode)->sysctl_ver)
  648                         return (EINVAL);
  649         }
  650 
  651         /*
  652          * process has overlay tree
  653          */
  654         if (l && l->l_proc->p_emul->e_sysctlovly) {
  655                 enode = l->l_proc->p_emul->e_sysctlovly;
  656                 elim = (name - oname);
  657                 error = sysctl_locate(l, oname, elim, &enode, NULL);
  658                 if (error == 0) {
  659                         /* ah, found parent in overlay */
  660                         elim = enode->sysctl_clen;
  661                         enode = enode->sysctl_child;
  662                 } else {
  663                         error = 0;
  664                         elim = 0;
  665                         enode = NULL;
  666                 }
  667         }
  668 
  669         for (ni = 0; ni < rnode->sysctl_clen; ni++) {
  670                 onode = &rnode->sysctl_child[ni];
  671                 if (enode && enode->sysctl_num == onode->sysctl_num) {
  672                         if (SYSCTL_TYPE(enode->sysctl_flags) != CTLTYPE_NODE)
  673                                 onode = enode;
  674                         if (--elim > 0)
  675                                 enode++;
  676                         else
  677                                 enode = NULL;
  678                 }
  679                 error = sysctl_cvt_out(l, v, onode, oldp, left, &t);
  680                 if (error)
  681                         return (error);
  682                 if (oldp != NULL)
  683                         oldp = (char*)oldp + t;
  684                 out += t;
  685                 left -= MIN(left, t);
  686         }
  687 
  688         /*
  689          * overlay trees *MUST* be entirely consumed
  690          */
  691         KASSERT(enode == NULL);
  692 
  693         *oldlenp = out;
  694 
  695         return (error);
  696 }
  697 
  698 /*
  699  * sysctl_create -- Adds a node (the description of which is taken
  700  * from newp) to the tree, returning a copy of it in the space pointed
  701  * to by oldp.  In the event that the requested slot is already taken
  702  * (either by name or by number), the offending node is returned
  703  * instead.  Yes, this is complex, but we want to make sure everything
  704  * is proper.
  705  */
  706 #ifdef SYSCTL_DEBUG_CREATE
  707 int _sysctl_create(SYSCTLFN_ARGS);
  708 int
  709 _sysctl_create(SYSCTLFN_ARGS)
  710 #else
  711 int
  712 sysctl_create(SYSCTLFN_ARGS)
  713 #endif
  714 {
  715         struct sysctlnode nnode, *node, *pnode;
  716         int error, ni, at, nm, type, nsz, sz, flags, anum, v;
  717         void *own;
  718 
  719         KASSERT(rw_write_held(&sysctl_treelock));
  720 
  721         error = 0;
  722         own = NULL;
  723         anum = -1;
  724 
  725         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
  726                 printf("sysctl_create: rnode %p wrong version\n", rnode);
  727                 return (EINVAL);
  728         }
  729 
  730         if (namelen != 1 || (name[namelen - 1] != CTL_CREATE
  731 #if NKSYMS > 0
  732                              && name[namelen - 1] != CTL_CREATESYM
  733 #endif /* NKSYMS > 0 */
  734                              ))
  735                 return (EINVAL);
  736 
  737         /*
  738          * processes can only add nodes at securelevel 0, must be
  739          * root, and can't add nodes to a parent that's not writeable
  740          */
  741         if (l != NULL) {
  742 #ifndef SYSCTL_DISALLOW_CREATE
  743                 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
  744                     KAUTH_REQ_SYSTEM_SYSCTL_ADD, NULL, NULL, NULL);
  745                 if (error)
  746                         return (error);
  747                 if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
  748 #endif /* SYSCTL_DISALLOW_CREATE */
  749                         return (EPERM);
  750         }
  751 
  752         /*
  753          * nothing can add a node if:
  754          * we've finished initial set up of this tree and
  755          * (the tree itself is not writeable or
  756          * the entire sysctl system is not writeable)
  757          */
  758         if ((sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_PERMANENT) &&
  759             (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
  760              !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
  761                 return (EPERM);
  762 
  763         /*
  764          * it must be a "node", not a "int" or something
  765          */
  766         if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
  767                 return (ENOTDIR);
  768         if (rnode->sysctl_flags & CTLFLAG_ALIAS) {
  769                 printf("sysctl_create: attempt to add node to aliased "
  770                        "node %p\n", rnode);
  771                 return (EINVAL);
  772         }
  773         pnode = __UNCONST(rnode); /* we are adding children to this node */
  774 
  775         if (newp == NULL)
  776                 return (EINVAL);
  777         error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
  778         if (error)
  779                 return (error);
  780 
  781         /*
  782          * nodes passed in don't *have* parents
  783          */
  784         if (nnode.sysctl_parent != NULL)
  785                 return (EINVAL);
  786 
  787         /*
  788          * if we are indeed adding it, it should be a "good" name and
  789          * number
  790          */
  791         nm = nnode.sysctl_num;
  792 #if NKSYMS > 0
  793         if (nm == CTL_CREATESYM)
  794                 nm = CTL_CREATE;
  795 #endif /* NKSYMS > 0 */
  796         if (nm < 0 && nm != CTL_CREATE)
  797                 return (EINVAL);
  798 
  799         /*
  800          * the name can't start with a digit
  801          */
  802         if (nnode.sysctl_name[0] >= '' &&
  803             nnode.sysctl_name[0] <= '9')
  804                 return (EINVAL);
  805 
  806         /*
  807          * the name must be only alphanumerics or - or _, longer than
  808          * 0 bytes and less than SYSCTL_NAMELEN
  809          */
  810         nsz = 0;
  811         while (nsz < SYSCTL_NAMELEN && nnode.sysctl_name[nsz] != '\0') {
  812                 if ((nnode.sysctl_name[nsz] >= '' &&
  813                      nnode.sysctl_name[nsz] <= '9') ||
  814                     (nnode.sysctl_name[nsz] >= 'A' &&
  815                      nnode.sysctl_name[nsz] <= 'Z') ||
  816                     (nnode.sysctl_name[nsz] >= 'a' &&
  817                      nnode.sysctl_name[nsz] <= 'z') ||
  818                     nnode.sysctl_name[nsz] == '-' ||
  819                     nnode.sysctl_name[nsz] == '_')
  820                         nsz++;
  821                 else
  822                         return (EINVAL);
  823         }
  824         if (nsz == 0 || nsz == SYSCTL_NAMELEN)
  825                 return (EINVAL);
  826 
  827         /*
  828          * various checks revolve around size vs type, etc
  829          */
  830         type = SYSCTL_TYPE(nnode.sysctl_flags);
  831         flags = SYSCTL_FLAGS(nnode.sysctl_flags);
  832         sz = nnode.sysctl_size;
  833 
  834         /*
  835          * find out if there's a collision, and if so, let the caller
  836          * know what they collided with
  837          */
  838         node = pnode->sysctl_child;
  839         at = 0;
  840         if (node) {
  841                 if ((flags | node->sysctl_flags) & CTLFLAG_ANYNUMBER)
  842                         /* No siblings for a CTLFLAG_ANYNUMBER node */
  843                         return EINVAL;
  844                 for (ni = 0; ni < pnode->sysctl_clen; ni++) {
  845                         if (nm == node[ni].sysctl_num ||
  846                             strcmp(nnode.sysctl_name, node[ni].sysctl_name) == 0) {
  847                                 /*
  848                                  * ignore error here, since we
  849                                  * are already fixed on EEXIST
  850                                  */
  851                                 (void)sysctl_cvt_out(l, v, &node[ni], oldp,
  852                                                      *oldlenp, oldlenp);
  853                                 return (EEXIST);
  854                         }
  855                         if (nm > node[ni].sysctl_num)
  856                                 at++;
  857                 }
  858         }
  859 
  860         /*
  861          * use sysctl_ver to add to the tree iff it hasn't changed
  862          */
  863         if (nnode.sysctl_ver != 0) {
  864                 /*
  865                  * a specified value must match either the parent
  866                  * node's version or the root node's version
  867                  */
  868                 if (nnode.sysctl_ver != sysctl_rootof(rnode)->sysctl_ver &&
  869                     nnode.sysctl_ver != rnode->sysctl_ver) {
  870                         return (EINVAL);
  871                 }
  872         }
  873 
  874         /*
  875          * only the kernel can assign functions to entries
  876          */
  877         if (l != NULL && nnode.sysctl_func != NULL)
  878                 return (EPERM);
  879 
  880         /*
  881          * only the kernel can create permanent entries, and only then
  882          * before the kernel is finished setting itself up
  883          */
  884         if (l != NULL && (flags & ~SYSCTL_USERFLAGS))
  885                 return (EPERM);
  886         if ((flags & CTLFLAG_PERMANENT) &
  887             (sysctl_root.sysctl_flags & CTLFLAG_PERMANENT))
  888                 return (EPERM);
  889         if ((flags & (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE)) ==
  890             (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE))
  891                 return (EINVAL);
  892         if ((flags & CTLFLAG_IMMEDIATE) &&
  893             type != CTLTYPE_INT && type != CTLTYPE_QUAD && type != CTLTYPE_BOOL)
  894                 return (EINVAL);
  895 
  896         /*
  897          * check size, or set it if unset and we can figure it out.
  898          * kernel created nodes are allowed to have a function instead
  899          * of a size (or a data pointer).
  900          */
  901         switch (type) {
  902         case CTLTYPE_NODE:
  903                 /*
  904                  * only *i* can assert the size of a node
  905                  */
  906                 if (flags & CTLFLAG_ALIAS) {
  907                         anum = nnode.sysctl_alias;
  908                         if (anum < 0)
  909                                 return (EINVAL);
  910                         nnode.sysctl_alias = 0;
  911                 }
  912                 if (sz != 0 || nnode.sysctl_data != NULL)
  913                         return (EINVAL);
  914                 if (nnode.sysctl_csize != 0 ||
  915                     nnode.sysctl_clen != 0 ||
  916                     nnode.sysctl_child != 0)
  917                         return (EINVAL);
  918                 if (flags & CTLFLAG_OWNDATA)
  919                         return (EINVAL);
  920                 sz = sizeof(struct sysctlnode);
  921                 break;
  922         case CTLTYPE_INT:
  923                 /*
  924                  * since an int is an int, if the size is not given or
  925                  * is wrong, we can "int-uit" it.
  926                  */
  927                 if (sz != 0 && sz != sizeof(int))
  928                         return (EINVAL);
  929                 sz = sizeof(int);
  930                 break;
  931         case CTLTYPE_STRING:
  932                 /*
  933                  * strings are a little more tricky
  934                  */
  935                 if (sz == 0) {
  936                         if (l == NULL) {
  937                                 if (nnode.sysctl_func == NULL) {
  938                                         if (nnode.sysctl_data == NULL)
  939                                                 return (EINVAL);
  940                                         else
  941                                                 sz = strlen(nnode.sysctl_data) +
  942                                                     1;
  943                                 }
  944                         } else if (nnode.sysctl_data == NULL &&
  945                                  flags & CTLFLAG_OWNDATA) {
  946                                 return (EINVAL);
  947                         } else {
  948                                 char *vp, *e;
  949                                 size_t s;
  950 
  951                                 /*
  952                                  * we want a rough idea of what the
  953                                  * size is now
  954                                  */
  955                                 vp = malloc(PAGE_SIZE, M_SYSCTLDATA, M_WAITOK);
  956                                 if (vp == NULL)
  957                                         return (ENOMEM);
  958                                 e = nnode.sysctl_data;
  959                                 do {
  960                                         error = copyinstr(e, vp, PAGE_SIZE, &s);
  961                                         if (error) {
  962                                                 if (error != ENAMETOOLONG) {
  963                                                         free(vp, M_SYSCTLDATA);
  964                                                         return (error);
  965                                                 }
  966                                                 e += PAGE_SIZE;
  967                                                 if ((e - 32 * PAGE_SIZE) >
  968                                                     (char*)nnode.sysctl_data) {
  969                                                         free(vp, M_SYSCTLDATA);
  970                                                         return (ERANGE);
  971                                                 }
  972                                         }
  973                                 } while (error != 0);
  974                                 sz = s + (e - (char*)nnode.sysctl_data);
  975                                 free(vp, M_SYSCTLDATA);
  976                         }
  977                 }
  978                 break;
  979         case CTLTYPE_QUAD:
  980                 if (sz != 0 && sz != sizeof(u_quad_t))
  981                         return (EINVAL);
  982                 sz = sizeof(u_quad_t);
  983                 break;
  984         case CTLTYPE_BOOL:
  985                 /*
  986                  * since an bool is an bool, if the size is not given or
  987                  * is wrong, we can "intuit" it.
  988                  */
  989                 if (sz != 0 && sz != sizeof(bool))
  990                         return (EINVAL);
  991                 sz = sizeof(bool);
  992                 break;
  993         case CTLTYPE_STRUCT:
  994                 if (sz == 0) {
  995                         if (l != NULL || nnode.sysctl_func == NULL)
  996                                 return (EINVAL);
  997                         if (flags & CTLFLAG_OWNDATA)
  998                                 return (EINVAL);
  999                 }
 1000                 break;
 1001         default:
 1002                 return (EINVAL);
 1003         }
 1004 
 1005         /*
 1006          * at this point, if sz is zero, we *must* have a
 1007          * function to go with it and we can't own it.
 1008          */
 1009 
 1010         /*
 1011          *  l  ptr own
 1012          *  0   0   0  -> EINVAL (if no func)
 1013          *  0   0   1  -> own
 1014          *  0   1   0  -> kptr
 1015          *  0   1   1  -> kptr
 1016          *  1   0   0  -> EINVAL
 1017          *  1   0   1  -> own
 1018          *  1   1   0  -> kptr, no own (fault on lookup)
 1019          *  1   1   1  -> uptr, own
 1020          */
 1021         if (type != CTLTYPE_NODE) {
 1022                 if (sz != 0) {
 1023                         if (flags & CTLFLAG_OWNDATA) {
 1024                                 own = malloc(sz, M_SYSCTLDATA, M_WAITOK);
 1025                                 if (own == NULL)
 1026                                         return ENOMEM;
 1027                                 if (nnode.sysctl_data == NULL)
 1028                                         memset(own, 0, sz);
 1029                                 else {
 1030                                         error = sysctl_copyin(l,
 1031                                             nnode.sysctl_data, own, sz);
 1032                                         if (error != 0) {
 1033                                                 free(own, M_SYSCTLDATA);
 1034                                                 return (error);
 1035                                         }
 1036                                 }
 1037                         } else if ((nnode.sysctl_data != NULL) &&
 1038                                  !(flags & CTLFLAG_IMMEDIATE)) {
 1039 #if NKSYMS > 0
 1040                                 if (name[namelen - 1] == CTL_CREATESYM) {
 1041                                         char symname[128]; /* XXX enough? */
 1042                                         u_long symaddr;
 1043                                         size_t symlen;
 1044 
 1045                                         error = sysctl_copyinstr(l,
 1046                                             nnode.sysctl_data, symname,
 1047                                             sizeof(symname), &symlen);
 1048                                         if (error)
 1049                                                 return (error);
 1050                                         error = ksyms_getval(NULL, symname,
 1051                                             &symaddr, KSYMS_EXTERN);
 1052                                         if (error)
 1053                                                 return (error); /* EINVAL? */
 1054                                         nnode.sysctl_data = (void*)symaddr;
 1055                                 }
 1056 #endif /* NKSYMS > 0 */
 1057                                 /*
 1058                                  * Ideally, we'd like to verify here
 1059                                  * that this address is acceptable,
 1060                                  * but...
 1061                                  *
 1062                                  * - it might be valid now, only to
 1063                                  *   become invalid later
 1064                                  *
 1065                                  * - it might be invalid only for the
 1066                                  *   moment and valid later
 1067                                  *
 1068                                  * - or something else.
 1069                                  *
 1070                                  * Since we can't get a good answer,
 1071                                  * we'll just accept the address as
 1072                                  * given, and fault on individual
 1073                                  * lookups.
 1074                                  */
 1075                         }
 1076                 } else if (nnode.sysctl_func == NULL)
 1077                         return (EINVAL);
 1078         }
 1079 
 1080         /*
 1081          * a process can't assign a function to a node, and the kernel
 1082          * can't create a node that has no function or data.
 1083          * (XXX somewhat redundant check)
 1084          */
 1085         if (l != NULL || nnode.sysctl_func == NULL) {
 1086                 if (type != CTLTYPE_NODE &&
 1087                     !(flags & CTLFLAG_IMMEDIATE) &&
 1088                     nnode.sysctl_data == NULL &&
 1089                     own == NULL)
 1090                         return (EINVAL);
 1091         }
 1092 
 1093 #ifdef SYSCTL_DISALLOW_KWRITE
 1094         /*
 1095          * a process can't create a writable node unless it refers to
 1096          * new data.
 1097          */
 1098         if (l != NULL && own == NULL && type != CTLTYPE_NODE &&
 1099             (flags & CTLFLAG_READWRITE) != CTLFLAG_READONLY &&
 1100             !(flags & CTLFLAG_IMMEDIATE))
 1101                 return (EPERM);
 1102 #endif /* SYSCTL_DISALLOW_KWRITE */
 1103 
 1104         /*
 1105          * make sure there's somewhere to put the new stuff.
 1106          */
 1107         if (pnode->sysctl_child == NULL) {
 1108                 if (flags & CTLFLAG_ANYNUMBER)
 1109                         error = sysctl_alloc(pnode, 1);
 1110                 else
 1111                         error = sysctl_alloc(pnode, 0);
 1112                 if (error) {
 1113                         if (own != NULL)
 1114                                 free(own, M_SYSCTLDATA);
 1115                         return (error);
 1116                 }
 1117         }
 1118         node = pnode->sysctl_child;
 1119 
 1120         /*
 1121          * no collisions, so pick a good dynamic number if we need to.
 1122          */
 1123         if (nm == CTL_CREATE) {
 1124                 nm = ++sysctl_root.sysctl_num;
 1125                 for (ni = 0; ni < pnode->sysctl_clen; ni++) {
 1126                         if (nm == node[ni].sysctl_num) {
 1127                                 nm++;
 1128                                 ni = -1;
 1129                         } else if (nm > node[ni].sysctl_num)
 1130                                 at = ni + 1;
 1131                 }
 1132         }
 1133 
 1134         /*
 1135          * oops...ran out of space
 1136          */
 1137         if (pnode->sysctl_clen == pnode->sysctl_csize) {
 1138                 error = sysctl_realloc(pnode);
 1139                 if (error) {
 1140                         if (own != NULL)
 1141                                 free(own, M_SYSCTLDATA);
 1142                         return (error);
 1143                 }
 1144                 node = pnode->sysctl_child;
 1145         }
 1146 
 1147         /*
 1148          * insert new node data
 1149          */
 1150         if (at < pnode->sysctl_clen) {
 1151                 int t;
 1152 
 1153                 /*
 1154                  * move the nodes that should come after the new one
 1155                  */
 1156                 memmove(&node[at + 1], &node[at],
 1157                         (pnode->sysctl_clen - at) * sizeof(struct sysctlnode));
 1158                 memset(&node[at], 0, sizeof(struct sysctlnode));
 1159                 node[at].sysctl_parent = pnode;
 1160                 /*
 1161                  * and...reparent any children of any moved nodes
 1162                  */
 1163                 for (ni = at; ni <= pnode->sysctl_clen; ni++)
 1164                         if (node[ni].sysctl_child != NULL)
 1165                                 for (t = 0; t < node[ni].sysctl_csize; t++)
 1166                                         node[ni].sysctl_child[t].sysctl_parent =
 1167                                                 &node[ni];
 1168         }
 1169         node = &node[at];
 1170         pnode->sysctl_clen++;
 1171 
 1172         strlcpy(node->sysctl_name, nnode.sysctl_name,
 1173                 sizeof(node->sysctl_name));
 1174         node->sysctl_num = nm;
 1175         node->sysctl_size = sz;
 1176         node->sysctl_flags = SYSCTL_VERSION|type|flags; /* XXX other trees */
 1177         node->sysctl_csize = 0;
 1178         node->sysctl_clen = 0;
 1179         if (own) {
 1180                 node->sysctl_data = own;
 1181                 node->sysctl_flags |= CTLFLAG_OWNDATA;
 1182         } else if (flags & CTLFLAG_ALIAS) {
 1183                 node->sysctl_alias = anum;
 1184         } else if (flags & CTLFLAG_IMMEDIATE) {
 1185                 switch (type) {
 1186                 case CTLTYPE_BOOL:
 1187                         node->sysctl_bdata = nnode.sysctl_bdata;
 1188                         break;
 1189                 case CTLTYPE_INT:
 1190                         node->sysctl_idata = nnode.sysctl_idata;
 1191                         break;
 1192                 case CTLTYPE_QUAD:
 1193                         node->sysctl_qdata = nnode.sysctl_qdata;
 1194                         break;
 1195                 }
 1196         } else {
 1197                 node->sysctl_data = nnode.sysctl_data;
 1198                 node->sysctl_flags &= ~CTLFLAG_OWNDATA;
 1199         }
 1200         node->sysctl_func = nnode.sysctl_func;
 1201         node->sysctl_child = NULL;
 1202         /* node->sysctl_parent should already be done */
 1203 
 1204         /*
 1205          * update "version" on path to "root"
 1206          */
 1207         for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
 1208                 ;
 1209         pnode = node;
 1210         for (nm = rnode->sysctl_ver + 1; pnode != NULL;
 1211              pnode = pnode->sysctl_parent)
 1212                 pnode->sysctl_ver = nm;
 1213 
 1214         /* If this fails, the node is already added - the user won't know! */
 1215         error = sysctl_cvt_out(l, v, node, oldp, *oldlenp, oldlenp);
 1216 
 1217         return (error);
 1218 }
 1219 
 1220 /*
 1221  * ********************************************************************
 1222  * A wrapper around sysctl_create() that prints the thing we're trying
 1223  * to add.
 1224  * ********************************************************************
 1225  */
 1226 #ifdef SYSCTL_DEBUG_CREATE
 1227 int
 1228 sysctl_create(SYSCTLFN_ARGS)
 1229 {
 1230         const struct sysctlnode *node;
 1231         int k, rc, ni, nl = namelen + (name - oname);
 1232 
 1233         node = newp;
 1234 
 1235         printf("namelen %d (", nl);
 1236         for (ni = 0; ni < nl - 1; ni++)
 1237                 printf(" %d", oname[ni]);
 1238         printf(" %d )\t[%s]\tflags %08x (%08x %d %zu)\n",
 1239                k = node->sysctl_num,
 1240                node->sysctl_name,
 1241                node->sysctl_flags,
 1242                SYSCTL_FLAGS(node->sysctl_flags),
 1243                SYSCTL_TYPE(node->sysctl_flags),
 1244                node->sysctl_size);
 1245 
 1246         node = rnode;
 1247         rc = _sysctl_create(SYSCTLFN_CALL(rnode));
 1248 
 1249         printf("sysctl_create(");
 1250         for (ni = 0; ni < nl - 1; ni++)
 1251                 printf(" %d", oname[ni]);
 1252         printf(" %d ) returned %d\n", k, rc);
 1253 
 1254         return (rc);
 1255 }
 1256 #endif /* SYSCTL_DEBUG_CREATE */
 1257 
 1258 /*
 1259  * sysctl_destroy -- Removes a node (as described by newp) from the
 1260  * given tree, returning (if successful) a copy of the dead node in
 1261  * oldp.  Since we're removing stuff, there's not much to check.
 1262  */
 1263 int
 1264 sysctl_destroy(SYSCTLFN_ARGS)
 1265 {
 1266         struct sysctlnode *node, *pnode, onode, nnode;
 1267         int ni, error, v;
 1268 
 1269         KASSERT(rw_write_held(&sysctl_treelock));
 1270 
 1271         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
 1272                 printf("sysctl_destroy: rnode %p wrong version\n", rnode);
 1273                 return (EINVAL);
 1274         }
 1275 
 1276         error = 0;
 1277 
 1278         if (namelen != 1 || name[namelen - 1] != CTL_DESTROY)
 1279                 return (EINVAL);
 1280 
 1281         /*
 1282          * processes can only destroy nodes at securelevel 0, must be
 1283          * root, and can't remove nodes from a parent that's not
 1284          * writeable
 1285          */
 1286         if (l != NULL) {
 1287 #ifndef SYSCTL_DISALLOW_CREATE
 1288                 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
 1289                     KAUTH_REQ_SYSTEM_SYSCTL_DELETE, NULL, NULL, NULL);
 1290                 if (error)
 1291                         return (error);
 1292                 if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
 1293 #endif /* SYSCTL_DISALLOW_CREATE */
 1294                         return (EPERM);
 1295         }
 1296 
 1297         /*
 1298          * nothing can remove a node if:
 1299          * the node is permanent (checked later) or
 1300          * the tree itself is not writeable or
 1301          * the entire sysctl system is not writeable
 1302          *
 1303          * note that we ignore whether setup is complete or not,
 1304          * because these rules always apply.
 1305          */
 1306         if (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
 1307             !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))
 1308                 return (EPERM);
 1309 
 1310         if (newp == NULL)
 1311                 return (EINVAL);
 1312         error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
 1313         if (error)
 1314                 return (error);
 1315         memset(&onode, 0, sizeof(struct sysctlnode));
 1316 
 1317         node = rnode->sysctl_child;
 1318         for (ni = 0; ni < rnode->sysctl_clen; ni++) {
 1319                 if (nnode.sysctl_num == node[ni].sysctl_num) {
 1320                         /*
 1321                          * if name specified, must match
 1322                          */
 1323                         if (nnode.sysctl_name[0] != '\0' &&
 1324                             strcmp(nnode.sysctl_name, node[ni].sysctl_name))
 1325                                 continue;
 1326                         /*
 1327                          * if version specified, must match
 1328                          */
 1329                         if (nnode.sysctl_ver != 0 &&
 1330                             nnode.sysctl_ver != node[ni].sysctl_ver)
 1331                                 continue;
 1332                         /*
 1333                          * this must be the one
 1334                          */
 1335                         break;
 1336                 }
 1337         }
 1338         if (ni == rnode->sysctl_clen)
 1339                 return (ENOENT);
 1340         node = &node[ni];
 1341         pnode = node->sysctl_parent;
 1342 
 1343         /*
 1344          * if the kernel says permanent, it is, so there.  nyah.
 1345          */
 1346         if (SYSCTL_FLAGS(node->sysctl_flags) & CTLFLAG_PERMANENT)
 1347                 return (EPERM);
 1348 
 1349         /*
 1350          * can't delete non-empty nodes
 1351          */
 1352         if (SYSCTL_TYPE(node->sysctl_flags) == CTLTYPE_NODE &&
 1353             node->sysctl_clen != 0)
 1354                 return (ENOTEMPTY);
 1355 
 1356         /*
 1357          * if the node "owns" data, release it now
 1358          */
 1359         if (node->sysctl_flags & CTLFLAG_OWNDATA) {
 1360                 if (node->sysctl_data != NULL)
 1361                         free(node->sysctl_data, M_SYSCTLDATA);
 1362                 node->sysctl_data = NULL;
 1363         }
 1364         if (node->sysctl_flags & CTLFLAG_OWNDESC) {
 1365                 if (node->sysctl_desc != NULL)
 1366                         /*XXXUNCONST*/
 1367                         free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
 1368                 node->sysctl_desc = NULL;
 1369         }
 1370 
 1371         /*
 1372          * if the node to be removed is not the last one on the list,
 1373          * move the remaining nodes up, and reparent any grandchildren
 1374          */
 1375         onode = *node;
 1376         if (ni < pnode->sysctl_clen - 1) {
 1377                 int t;
 1378 
 1379                 memmove(&pnode->sysctl_child[ni], &pnode->sysctl_child[ni + 1],
 1380                         (pnode->sysctl_clen - ni - 1) *
 1381                         sizeof(struct sysctlnode));
 1382                 for (; ni < pnode->sysctl_clen - 1; ni++)
 1383                         if (SYSCTL_TYPE(pnode->sysctl_child[ni].sysctl_flags) ==
 1384                             CTLTYPE_NODE)
 1385                                 for (t = 0;
 1386                                      t < pnode->sysctl_child[ni].sysctl_clen;
 1387                                      t++)
 1388                                         pnode->sysctl_child[ni].sysctl_child[t].
 1389                                                 sysctl_parent =
 1390                                                 &pnode->sysctl_child[ni];
 1391                 ni = pnode->sysctl_clen - 1;
 1392                 node = &pnode->sysctl_child[ni];
 1393         }
 1394 
 1395         /*
 1396          * reset the space we just vacated
 1397          */
 1398         memset(node, 0, sizeof(struct sysctlnode));
 1399         node->sysctl_parent = pnode;
 1400         pnode->sysctl_clen--;
 1401 
 1402         /*
 1403          * if this parent just lost its last child, nuke the creche
 1404          */
 1405         if (pnode->sysctl_clen == 0) {
 1406                 free(pnode->sysctl_child, M_SYSCTLNODE);
 1407                 pnode->sysctl_csize = 0;
 1408                 pnode->sysctl_child = NULL;
 1409         }
 1410 
 1411         /*
 1412          * update "version" on path to "root"
 1413          */
 1414         for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
 1415                 ;
 1416         for (ni = rnode->sysctl_ver + 1; pnode != NULL;
 1417              pnode = pnode->sysctl_parent)
 1418                 pnode->sysctl_ver = ni;
 1419 
 1420         error = sysctl_cvt_out(l, v, &onode, oldp, *oldlenp, oldlenp);
 1421 
 1422         return (error);
 1423 }
 1424 
 1425 /*
 1426  * sysctl_lookup -- Handles copyin/copyout of new and old values.
 1427  * Partial reads are globally allowed.  Only root can write to things
 1428  * unless the node says otherwise.
 1429  */
 1430 int
 1431 sysctl_lookup(SYSCTLFN_ARGS)
 1432 {
 1433         int error, rw;
 1434         size_t sz, len;
 1435         void *d;
 1436 
 1437         KASSERT(rw_lock_held(&sysctl_treelock));
 1438 
 1439         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
 1440                 printf("%s: rnode %p wrong version\n", __func__, rnode);
 1441                 return EINVAL;
 1442         }
 1443 
 1444         if (newlen == 0)
 1445                 newp = NULL;
 1446 
 1447         error = 0;
 1448 
 1449         /*
 1450          * you can't "look up" a node.  you can "query" it, but you
 1451          * can't "look it up".
 1452          */
 1453         if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_NODE || namelen != 0) {
 1454                 DPRINTF(("%s: can't lookup a node\n", __func__));
 1455                 return EINVAL;
 1456         }
 1457 
 1458         /*
 1459          * some nodes are private, so only root can look into them.
 1460          */
 1461         if (l != NULL && (rnode->sysctl_flags & CTLFLAG_PRIVATE) &&
 1462             (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
 1463             KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)) != 0) {
 1464                 DPRINTF(("%s: private node\n", __func__));
 1465                 return error;
 1466         }
 1467 
 1468         /*
 1469          * if a node wants to be writable according to different rules
 1470          * other than "only root can write to stuff unless a flag is
 1471          * set", then it needs its own function which should have been
 1472          * called and not us.
 1473          */
 1474         if (l != NULL && newp != NULL &&
 1475             !(rnode->sysctl_flags & CTLFLAG_ANYWRITE) &&
 1476             (error = kauth_authorize_system(l->l_cred,
 1477             KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_MODIFY, NULL, NULL,
 1478             NULL)) != 0) {
 1479                 DPRINTF(("%s: can't modify\n", __func__));
 1480                 return error;
 1481         }
 1482 
 1483         /*
 1484          * is this node supposedly writable?
 1485          */
 1486         rw = (rnode->sysctl_flags & CTLFLAG_READWRITE) ? 1 : 0;
 1487 
 1488         /*
 1489          * it appears not to be writable at this time, so if someone
 1490          * tried to write to it, we must tell them to go away
 1491          */
 1492         if (!rw && newp != NULL) {
 1493                 DPRINTF(("%s: not writable\n", __func__));
 1494                 return EPERM;
 1495         }
 1496 
 1497         /*
 1498          * step one, copy out the stuff we have presently
 1499          */
 1500         if (rnode->sysctl_flags & CTLFLAG_IMMEDIATE) {
 1501                 /*
 1502                  * note that we discard const here because we are
 1503                  * modifying the contents of the node (which is okay
 1504                  * because it's ours)
 1505                  *
 1506                  * It also doesn't matter which field of the union we pick.
 1507                  */
 1508                 d = __UNCONST(&rnode->sysctl_qdata);
 1509         } else
 1510                 d = rnode->sysctl_data;
 1511 
 1512         if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_STRING)
 1513                 sz = strlen(d) + 1; /* XXX@@@ possible fault here */
 1514         else
 1515                 sz = rnode->sysctl_size;
 1516         if (oldp != NULL) {
 1517                 error = sysctl_copyout(l, d, oldp, MIN(sz, *oldlenp));
 1518                 if (error) {
 1519                         DPRINTF(("%s: bad copyout %d\n", __func__, error));
 1520                         return error;
 1521                 }
 1522         }
 1523         *oldlenp = sz;
 1524 
 1525         /*
 1526          * are we done?
 1527          */
 1528         if (newp == NULL)
 1529                 return 0;
 1530 
 1531         /*
 1532          * hmm...not done.  must now "copy in" new value.  re-adjust
 1533          * sz to maximum value (strings are "weird").
 1534          */
 1535         sz = rnode->sysctl_size;
 1536         switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
 1537         case CTLTYPE_BOOL: {
 1538                 bool tmp;
 1539                 /*
 1540                  * these data must be *exactly* the same size coming
 1541                  * in.  bool may only be true or false.
 1542                  */
 1543                 if (newlen != sz) {
 1544                         DPRINTF(("%s: bad size %zu != %zu\n", __func__, newlen,
 1545                             sz));
 1546                         return EINVAL;
 1547                 }
 1548                 error = sysctl_copyin(l, newp, &tmp, sz);
 1549                 if (error)
 1550                         break;
 1551                 if (tmp != true && tmp != false) {
 1552                         DPRINTF(("%s: tmp %d\n", __func__, tmp));
 1553                         return EINVAL;
 1554                 }
 1555                 *(bool *)d = tmp;
 1556                 break;
 1557         }
 1558         case CTLTYPE_INT:
 1559         case CTLTYPE_QUAD:
 1560         case CTLTYPE_STRUCT:
 1561                 /*
 1562                  * these data must be *exactly* the same size coming
 1563                  * in.
 1564                  */
 1565                 if (newlen != sz)
 1566                         goto bad_size;
 1567                 error = sysctl_copyin(l, newp, d, sz);
 1568                 rnd_add_data(NULL, d, sz, 0);
 1569                 break;
 1570         case CTLTYPE_STRING: {
 1571                 /*
 1572                  * strings, on the other hand, can be shorter, and we
 1573                  * let userland be sloppy about the trailing nul.
 1574                  */
 1575                 char *newbuf;
 1576 
 1577                 /*
 1578                  * too much new string?
 1579                  */
 1580                 if (newlen > sz)
 1581                         goto bad_size;
 1582 
 1583                 /*
 1584                  * temporary copy of new inbound string
 1585                  */
 1586                 len = MIN(sz, newlen);
 1587                 newbuf = malloc(len, M_SYSCTLDATA, M_WAITOK);
 1588                 if (newbuf == NULL) {
 1589                         DPRINTF(("%s: oomem %zu\n", __func__, len));
 1590                         return ENOMEM;
 1591                 }
 1592                 error = sysctl_copyin(l, newp, newbuf, len);
 1593                 if (error) {
 1594                         free(newbuf, M_SYSCTLDATA);
 1595                         DPRINTF(("%s: copyin %d\n", __func__, error));
 1596                         return error;
 1597                 }
 1598 
 1599                 /*
 1600                  * did they NUL terminate it, or do we have space
 1601                  * left to do it ourselves?
 1602                  */
 1603                 if (newbuf[len - 1] != '\0' && len == sz) {
 1604                         free(newbuf, M_SYSCTLDATA);
 1605                         DPRINTF(("%s: string too long\n", __func__));
 1606                         return EINVAL;
 1607                 }
 1608 
 1609                 /*
 1610                  * looks good, so pop it into place and zero the rest.
 1611                  */
 1612                 if (len > 0) {
 1613                         memcpy(d, newbuf, len);
 1614                         rnd_add_data(NULL, d, len, 0);
 1615                 }
 1616                 if (sz != len)
 1617                         memset((char*)d + len, 0, sz - len);
 1618                 free(newbuf, M_SYSCTLDATA);
 1619                 break;
 1620         }
 1621         default:
 1622                 DPRINTF(("%s: bad type\n", __func__));
 1623                 return EINVAL;
 1624         }
 1625         if (error) {
 1626                 DPRINTF(("%s: copyin %d\n", __func__, error));
 1627         }
 1628 
 1629         return error;
 1630 
 1631     bad_size:
 1632         DPRINTF(("%s: bad size %zu > %zu\n", __func__, newlen, sz));
 1633         return EINVAL;
 1634 }
 1635 
 1636 /*
 1637  * sysctl_mmap -- Dispatches sysctl mmap requests to those nodes that
 1638  * purport to handle it.  This interface isn't fully fleshed out yet,
 1639  * unfortunately.
 1640  */
 1641 static int
 1642 sysctl_mmap(SYSCTLFN_ARGS)
 1643 {
 1644         const struct sysctlnode *node;
 1645         struct sysctlnode nnode;
 1646         int error;
 1647         int sysctl_num;
 1648 
 1649         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
 1650                 printf("sysctl_mmap: rnode %p wrong version\n", rnode);
 1651                 return (EINVAL);
 1652         }
 1653 
 1654         /*
 1655          * let's just pretend that didn't happen, m'kay?
 1656          */
 1657         if (l == NULL)
 1658                 return (EPERM);
 1659 
 1660         /*
 1661          * is this a sysctlnode description of an mmap request?
 1662          */
 1663         if (newp == NULL || newlen != sizeof(struct sysctlnode))
 1664                 return (EINVAL);
 1665         error = sysctl_copyin(l, newp, &nnode, sizeof(nnode));
 1666         if (error)
 1667                 return (error);
 1668 
 1669         /*
 1670          * does the node they asked for exist?
 1671          */
 1672         if (namelen != 1)
 1673                 return (EOPNOTSUPP);
 1674         node = rnode;
 1675         sysctl_num = nnode.sysctl_num;
 1676         error = sysctl_locate(l, &sysctl_num, 1, &node, NULL);
 1677         if (error)
 1678                 return (error);
 1679 
 1680         /*
 1681          * does this node that we have found purport to handle mmap?
 1682          */
 1683         if (node->sysctl_func == NULL ||
 1684             !(node->sysctl_flags & CTLFLAG_MMAP))
 1685                 return (EOPNOTSUPP);
 1686 
 1687         /*
 1688          * well...okay, they asked for it.
 1689          */
 1690         return ((*node->sysctl_func)(SYSCTLFN_CALL(node)));
 1691 }
 1692 
 1693 int
 1694 sysctl_describe(SYSCTLFN_ARGS)
 1695 {
 1696         struct sysctldesc *d;
 1697         void *bf;
 1698         size_t sz, left, tot;
 1699         int i, error, v = -1;
 1700         struct sysctlnode *node;
 1701         struct sysctlnode dnode;
 1702 
 1703         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
 1704                 printf("sysctl_query: rnode %p wrong version\n", rnode);
 1705                 return (EINVAL);
 1706         }
 1707 
 1708         if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
 1709                 return (ENOTDIR);
 1710         if (namelen != 1 || name[0] != CTL_DESCRIBE)
 1711                 return (EINVAL);
 1712 
 1713         /*
 1714          * get ready...
 1715          */
 1716         error = 0;
 1717         d = bf = malloc(MAXDESCLEN, M_TEMP, M_WAITOK);
 1718         if (bf == NULL)
 1719                 return ENOMEM;
 1720         tot = 0;
 1721         node = rnode->sysctl_child;
 1722         left = *oldlenp;
 1723 
 1724         /*
 1725          * no request -> all descriptions at this level
 1726          * request with desc unset -> just this node
 1727          * request with desc set -> set descr for this node
 1728          */
 1729         if (newp != NULL) {
 1730                 error = sysctl_cvt_in(l, &v, newp, newlen, &dnode);
 1731                 if (error)
 1732                         goto out;
 1733                 if (dnode.sysctl_desc != NULL) {
 1734                         /*
 1735                          * processes cannot set descriptions above
 1736                          * securelevel 0.  and must be root.  blah
 1737                          * blah blah.  a couple more checks are made
 1738                          * once we find the node we want.
 1739                          */
 1740                         if (l != NULL) {
 1741 #ifndef SYSCTL_DISALLOW_CREATE
 1742                                 error = kauth_authorize_system(l->l_cred,
 1743                                     KAUTH_SYSTEM_SYSCTL,
 1744                                     KAUTH_REQ_SYSTEM_SYSCTL_DESC, NULL,
 1745                                     NULL, NULL);
 1746                                 if (error)
 1747                                         goto out;
 1748 #else /* SYSCTL_DISALLOW_CREATE */
 1749                                 error = EPERM;
 1750                                 goto out;
 1751 #endif /* SYSCTL_DISALLOW_CREATE */
 1752                         }
 1753 
 1754                         /*
 1755                          * find node and try to set the description on it
 1756                          */
 1757                         for (i = 0; i < rnode->sysctl_clen; i++)
 1758                                 if (node[i].sysctl_num == dnode.sysctl_num)
 1759                                         break;
 1760                         if (i == rnode->sysctl_clen) {
 1761                                 error = ENOENT;
 1762                                 goto out;
 1763                         }
 1764                         node = &node[i];
 1765 
 1766                         /*
 1767                          * did the caller specify a node version?
 1768                          */
 1769                         if (dnode.sysctl_ver != 0 &&
 1770                             dnode.sysctl_ver != node->sysctl_ver) {
 1771                                 error = EINVAL;
 1772                                 goto out;
 1773                         }
 1774 
 1775                         /*
 1776                          * okay...some rules:
 1777                          * (1) if setup is done and the tree is
 1778                          *     read-only or the whole system is
 1779                          *     read-only
 1780                          * (2) no one can set a description on a
 1781                          *     permanent node (it must be set when
 1782                          *     using createv)
 1783                          * (3) processes cannot *change* a description
 1784                          * (4) processes *can*, however, set a
 1785                          *     description on a read-only node so that
 1786                          *     one can be created and then described
 1787                          *     in two steps
 1788                          * anything else come to mind?
 1789                          */
 1790                         if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
 1791                             (!(sysctl_rootof(node)->sysctl_flags &
 1792                                CTLFLAG_READWRITE) ||
 1793                              !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))) {
 1794                                 error = EPERM;
 1795                                 goto out;
 1796                         }
 1797                         if (node->sysctl_flags & CTLFLAG_PERMANENT) {
 1798                                 error = EPERM;
 1799                                 goto out;
 1800                         }
 1801                         if (l != NULL && node->sysctl_desc != NULL) {
 1802                                 error = EPERM;
 1803                                 goto out;
 1804                         }
 1805 
 1806                         /*
 1807                          * right, let's go ahead.  the first step is
 1808                          * making the description into something the
 1809                          * node can "own", if need be.
 1810                          */
 1811                         if (l != NULL ||
 1812                             dnode.sysctl_flags & CTLFLAG_OWNDESC) {
 1813                                 char *nd, *k;
 1814 
 1815                                 k = malloc(MAXDESCLEN, M_TEMP, M_WAITOK);
 1816                                 if (k == NULL) {
 1817                                         error = ENOMEM;
 1818                                         goto out;
 1819                                 }
 1820                                 error = sysctl_copyinstr(l, dnode.sysctl_desc,
 1821                                                          k, MAXDESCLEN, &sz);
 1822                                 if (error) {
 1823                                         free(k, M_TEMP);
 1824                                         goto out;
 1825                                 }
 1826                                 nd = malloc(sz, M_SYSCTLDATA, M_WAITOK);
 1827                                 if (nd == NULL) {
 1828                                         free(k, M_TEMP);
 1829                                         error = ENOMEM;
 1830                                         goto out;
 1831                                 }
 1832                                 memcpy(nd, k, sz);
 1833                                 dnode.sysctl_flags |= CTLFLAG_OWNDESC;
 1834                                 dnode.sysctl_desc = nd;
 1835                                 free(k, M_TEMP);
 1836                         }
 1837 
 1838                         /*
 1839                          * now "release" the old description and
 1840                          * attach the new one.  ta-da.
 1841                          */
 1842                         if ((node->sysctl_flags & CTLFLAG_OWNDESC) &&
 1843                             node->sysctl_desc != NULL)
 1844                                 /*XXXUNCONST*/
 1845                                 free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
 1846                         node->sysctl_desc = dnode.sysctl_desc;
 1847                         node->sysctl_flags |=
 1848                                 (dnode.sysctl_flags & CTLFLAG_OWNDESC);
 1849 
 1850                         /*
 1851                          * now we "fall out" and into the loop which
 1852                          * will copy the new description back out for
 1853                          * those interested parties
 1854                          */
 1855                 }
 1856         }
 1857 
 1858         /*
 1859          * scan for one description or just retrieve all descriptions
 1860          */
 1861         for (i = 0; i < rnode->sysctl_clen; i++) {
 1862                 /*
 1863                  * did they ask for the description of only one node?
 1864                  */
 1865                 if (v != -1 && node[i].sysctl_num != dnode.sysctl_num)
 1866                         continue;
 1867 
 1868                 /*
 1869                  * don't describe "private" nodes to non-suser users
 1870                  */
 1871                 if ((node[i].sysctl_flags & CTLFLAG_PRIVATE) && (l != NULL) &&
 1872                     !(kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
 1873                     KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)))
 1874                         continue;
 1875 
 1876                 /*
 1877                  * is this description "valid"?
 1878                  */
 1879                 memset(bf, 0, MAXDESCLEN);
 1880                 if (node[i].sysctl_desc == NULL)
 1881                         sz = 1;
 1882                 else if (copystr(node[i].sysctl_desc, &d->descr_str[0],
 1883                                  MAXDESCLEN - sizeof(*d), &sz) != 0) {
 1884                         /*
 1885                          * erase possible partial description
 1886                          */
 1887                         memset(bf, 0, MAXDESCLEN);
 1888                         sz = 1;
 1889                 }
 1890 
 1891                 /*
 1892                  * we've got it, stuff it into the caller's buffer
 1893                  */
 1894                 d->descr_num = node[i].sysctl_num;
 1895                 d->descr_ver = node[i].sysctl_ver;
 1896                 d->descr_len = sz; /* includes trailing nul */
 1897                 sz = (char *)NEXT_DESCR(d) - (char *)d;
 1898                 if (oldp != NULL && left >= sz) {
 1899                         error = sysctl_copyout(l, d, oldp, sz);
 1900                         if (error)
 1901                                 goto out;
 1902                         left -= sz;
 1903                         oldp = (void *)__sysc_desc_adv(oldp, d->descr_len);
 1904                 }
 1905                 tot += sz;
 1906 
 1907                 /*
 1908                  * if we get this far with v not "unset", they asked
 1909                  * for a specific node and we found it
 1910                  */
 1911                 if (v != -1)
 1912                         break;
 1913         }
 1914 
 1915         /*
 1916          * did we find it after all?
 1917          */
 1918         if (v != -1 && tot == 0)
 1919                 error = ENOENT;
 1920         else
 1921                 *oldlenp = tot;
 1922 
 1923 out:
 1924         free(bf, M_TEMP);
 1925         return (error);
 1926 }
 1927 
 1928 /*
 1929  * ********************************************************************
 1930  * Section 3: Create and destroy from inside the kernel
 1931  * ********************************************************************
 1932  * sysctl_createv() and sysctl_destroyv() are simpler-to-use
 1933  * interfaces for the kernel to fling new entries into the mib and rip
 1934  * them out later.  In the case of sysctl_createv(), the returned copy
 1935  * of the node (see sysctl_create()) will be translated back into a
 1936  * pointer to the actual node.
 1937  *
 1938  * Note that sysctl_createv() will return 0 if the create request
 1939  * matches an existing node (ala mkdir -p), and that sysctl_destroyv()
 1940  * will return 0 if the node to be destroyed already does not exist
 1941  * (aka rm -f) or if it is a parent of other nodes.
 1942  *
 1943  * This allows two (or more) different subsystems to assert sub-tree
 1944  * existence before populating their own nodes, and to remove their
 1945  * own nodes without orphaning the others when they are done.
 1946  * ********************************************************************
 1947  */
 1948 #undef sysctl_createv
 1949 int
 1950 sysctl_createv(struct sysctllog **log, int cflags,
 1951                const struct sysctlnode **rnode, const struct sysctlnode **cnode,
 1952                int flags, int type, const char *namep, const char *descr,
 1953                sysctlfn func, u_quad_t qv, void *newp, size_t newlen,
 1954                ...)
 1955 {
 1956         va_list ap;
 1957         int error, ni, namelen, name[CTL_MAXNAME];
 1958         const struct sysctlnode *root, *pnode;
 1959         struct sysctlnode nnode, onode, *dnode;
 1960         size_t sz;
 1961         const struct sysctlnode *snode __diagused;
 1962 
 1963         /*
 1964          * where are we putting this?
 1965          */
 1966         if (rnode != NULL && *rnode == NULL) {
 1967                 printf("sysctl_createv: rnode NULL\n");
 1968                 return (EINVAL);
 1969         }
 1970         root = rnode ? *rnode : NULL;
 1971         if (cnode != NULL)
 1972                 *cnode = NULL;
 1973         if (cflags != 0)
 1974                 return (EINVAL);
 1975 
 1976         /*
 1977          * what is it?
 1978          */
 1979         flags = SYSCTL_VERSION|SYSCTL_TYPE(type)|SYSCTL_FLAGS(flags);
 1980         if (log != NULL)
 1981                 flags &= ~CTLFLAG_PERMANENT;
 1982 
 1983         /*
 1984          * where do we put it?
 1985          */
 1986         va_start(ap, newlen);
 1987         namelen = 0;
 1988         error = 0;
 1989         ni = -1;
 1990         do {
 1991                 if (++ni == CTL_MAXNAME) {
 1992                         error = ENAMETOOLONG;
 1993                         break;
 1994                 }
 1995                 name[ni] = va_arg(ap, int);
 1996                 /*
 1997                  * sorry, this is not supported from here
 1998                  */
 1999                 if (name[ni] == CTL_CREATESYM) {
 2000                         error = EINVAL;
 2001                         break;
 2002                 }
 2003         } while (name[ni] != CTL_EOL && name[ni] != CTL_CREATE);
 2004         va_end(ap);
 2005         if (error)
 2006                 return error;
 2007         namelen = ni + (name[ni] == CTL_CREATE ? 1 : 0);
 2008 
 2009         /*
 2010          * what's it called
 2011          */
 2012         if (strlcpy(nnode.sysctl_name, namep, sizeof(nnode.sysctl_name)) >=
 2013             sizeof(nnode.sysctl_name))
 2014                 return (ENAMETOOLONG);
 2015 
 2016         /*
 2017          * cons up the description of the new node
 2018          */
 2019         nnode.sysctl_num = name[namelen - 1];
 2020         name[namelen - 1] = CTL_CREATE;
 2021         nnode.sysctl_size = newlen;
 2022         nnode.sysctl_flags = flags;
 2023         if (type == CTLTYPE_NODE) {
 2024                 nnode.sysctl_csize = 0;
 2025                 nnode.sysctl_clen = 0;
 2026                 nnode.sysctl_child = NULL;
 2027                 if (flags & CTLFLAG_ALIAS)
 2028                         nnode.sysctl_alias = qv;
 2029         } else if (flags & CTLFLAG_IMMEDIATE) {
 2030                 switch (type) {
 2031                 case CTLTYPE_BOOL:
 2032                         nnode.sysctl_bdata = qv;
 2033                         break;
 2034                 case CTLTYPE_INT:
 2035                         nnode.sysctl_idata = qv;
 2036                         break;
 2037                 case CTLTYPE_QUAD:
 2038                         nnode.sysctl_qdata = qv;
 2039                         break;
 2040                 default:
 2041                         return (EINVAL);
 2042                 }
 2043         } else {
 2044                 nnode.sysctl_data = newp;
 2045         }
 2046         nnode.sysctl_func = func;
 2047         nnode.sysctl_parent = NULL;
 2048         nnode.sysctl_ver = 0;
 2049 
 2050         /*
 2051          * initialize lock state -- we need locks if the main tree has
 2052          * been marked as complete, but since we could be called from
 2053          * either there, or from a device driver (say, at device
 2054          * insertion), or from a module (at module load time, say), we
 2055          * don't really want to "wait"...
 2056          */
 2057         sysctl_lock(true);
 2058 
 2059         /*
 2060          * locate the prospective parent of the new node, and if we
 2061          * find it, add the new node.
 2062          */
 2063         sz = sizeof(onode);
 2064         pnode = root;
 2065         error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
 2066         if (error) {
 2067                 /*
 2068                  * XXX: If you are seeing this printf in early bringup
 2069                  * stages, perhaps your setfault is not functioning and
 2070                  * thus kcopy() is mis-behaving.
 2071                  */
 2072                 printf("sysctl_createv: sysctl_locate(%s) returned %d\n",
 2073                        nnode.sysctl_name, error);
 2074                 sysctl_unlock();
 2075                 return (error);
 2076         }
 2077         error = sysctl_create(&name[ni], namelen - ni, &onode, &sz,
 2078                               &nnode, sizeof(nnode), &name[0], NULL,
 2079                               pnode);
 2080 
 2081         /*
 2082          * unfortunately the node we wanted to create is already
 2083          * there.  if the node that's already there is a reasonable
 2084          * facsimile of the node we wanted to create, just pretend
 2085          * (for the caller's benefit) that we managed to create the
 2086          * node they wanted.
 2087          */
 2088         if (error == EEXIST) {
 2089                 /* name is the same as requested... */
 2090                 if (strcmp(nnode.sysctl_name, onode.sysctl_name) == 0 &&
 2091                     /* they want the same function... */
 2092                     nnode.sysctl_func == onode.sysctl_func &&
 2093                     /* number is the same as requested, or... */
 2094                     (nnode.sysctl_num == onode.sysctl_num ||
 2095                      /* they didn't pick a number... */
 2096                      nnode.sysctl_num == CTL_CREATE)) {
 2097                         /*
 2098                          * collision here from trying to create
 2099                          * something that already existed; let's give
 2100                          * our customers a hand and tell them they got
 2101                          * what they wanted.
 2102                          */
 2103 #ifdef SYSCTL_DEBUG_CREATE
 2104                         printf("cleared\n");
 2105 #endif /* SYSCTL_DEBUG_CREATE */
 2106                         error = 0;
 2107                 }
 2108         }
 2109 
 2110         if (error == 0 &&
 2111             (cnode != NULL || log != NULL || descr != NULL)) {
 2112                 /*
 2113                  * sysctl_create() gave us back a copy of the node,
 2114                  * but we need to know where it actually is...
 2115                  */
 2116                 pnode = root;
 2117                 error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
 2118                 snode = pnode;
 2119 
 2120                 /*
 2121                  * manual scan of last layer so that aliased nodes
 2122                  * aren't followed.
 2123                  */
 2124                 if (error == 0) {
 2125                         for (ni = 0; ni < pnode->sysctl_clen; ni++)
 2126                                 if (pnode->sysctl_child[ni].sysctl_num ==
 2127                                     onode.sysctl_num)
 2128                                         break;
 2129                         if (ni < pnode->sysctl_clen)
 2130                                 pnode = &pnode->sysctl_child[ni];
 2131                         else
 2132                                 error = ENOENT;
 2133                 }
 2134 
 2135                 /*
 2136                  * not expecting an error here, but...
 2137                  */
 2138                 if (error == 0) {
 2139                         KASSERTMSG(pnode->sysctl_parent == snode,
 2140                             "sysctl parent mis-match pnode %s, snode %s",
 2141                             pnode->sysctl_name, snode->sysctl_name);
 2142                         if (log != NULL)
 2143                                 sysctl_log_add(log, pnode);
 2144                         if (cnode != NULL)
 2145                                 *cnode = pnode;
 2146                         if (descr != NULL) {
 2147                                 /*
 2148                                  * allow first caller to *set* a
 2149                                  * description actually to set it
 2150                                  * 
 2151                                  * discard const here so we can attach
 2152                                  * the description
 2153                                  */
 2154                                 dnode = __UNCONST(pnode);
 2155                                 if (pnode->sysctl_desc != NULL)
 2156                                         /* skip it...we've got one */;
 2157                                 else if (flags & CTLFLAG_OWNDESC) {
 2158                                         size_t l = strlen(descr) + 1;
 2159                                         char *d = malloc(l, M_SYSCTLDATA,
 2160                                                          M_WAITOK);
 2161                                         if (d != NULL) {
 2162                                                 memcpy(d, descr, l);
 2163                                                 dnode->sysctl_desc = d;
 2164                                                 dnode->sysctl_flags |=
 2165                                                     CTLFLAG_OWNDESC;
 2166                                         }
 2167                                 } else
 2168                                         dnode->sysctl_desc = descr;
 2169                         }
 2170                 } else {
 2171                         printf("sysctl_create succeeded but node not found?!\n");
 2172                         /*
 2173                          *  confusing, but the create said it
 2174                          * succeeded, so...
 2175                          */
 2176                         error = 0;
 2177                 }
 2178         }
 2179 
 2180         /*
 2181          * now it should be safe to release the lock state.  note that
 2182          * the pointer to the newly created node being passed back may
 2183          * not be "good" for very long.
 2184          */
 2185         sysctl_unlock();
 2186 
 2187         if (error != 0) {
 2188                 printf("sysctl_createv: sysctl_create(%s) returned %d\n",
 2189                        nnode.sysctl_name, error);
 2190 #if 0
 2191                 if (error != ENOENT)
 2192                         sysctl_dump(&onode);
 2193 #endif
 2194         }
 2195 
 2196         return (error);
 2197 }
 2198 
 2199 int
 2200 sysctl_destroyv(struct sysctlnode *rnode, ...)
 2201 {
 2202         va_list ap;
 2203         int error, name[CTL_MAXNAME], namelen, ni;
 2204         const struct sysctlnode *pnode, *node;
 2205         struct sysctlnode dnode, *onode;
 2206         size_t sz;
 2207 
 2208         va_start(ap, rnode);
 2209         namelen = 0;
 2210         ni = 0;
 2211         do {
 2212                 if (ni == CTL_MAXNAME) {
 2213                         va_end(ap);
 2214                         return (ENAMETOOLONG);
 2215                 }
 2216                 name[ni] = va_arg(ap, int);
 2217         } while (name[ni++] != CTL_EOL);
 2218         namelen = ni - 1;
 2219         va_end(ap);
 2220 
 2221         /*
 2222          * i can't imagine why we'd be destroying a node when the tree
 2223          * wasn't complete, but who knows?
 2224          */
 2225         sysctl_lock(true);
 2226 
 2227         /*
 2228          * where is it?
 2229          */
 2230         node = rnode;
 2231         error = sysctl_locate(NULL, &name[0], namelen - 1, &node, &ni);
 2232         if (error) {
 2233                 /* they want it gone and it's not there, so... */
 2234                 sysctl_unlock();
 2235                 return (error == ENOENT ? 0 : error);
 2236         }
 2237 
 2238         /*
 2239          * set up the deletion
 2240          */
 2241         pnode = node;
 2242         node = &dnode;
 2243         memset(&dnode, 0, sizeof(dnode));
 2244         dnode.sysctl_flags = SYSCTL_VERSION;
 2245         dnode.sysctl_num = name[namelen - 1];
 2246 
 2247         /*
 2248          * we found it, now let's nuke it
 2249          */
 2250         name[namelen - 1] = CTL_DESTROY;
 2251         sz = 0;
 2252         error = sysctl_destroy(&name[namelen - 1], 1, NULL, &sz,
 2253                                node, sizeof(*node), &name[0], NULL,
 2254                                pnode);
 2255         if (error == ENOTEMPTY) {
 2256                 /*
 2257                  * think of trying to delete "foo" when "foo.bar"
 2258                  * (which someone else put there) is still in
 2259                  * existence
 2260                  */
 2261                 error = 0;
 2262 
 2263                 /*
 2264                  * dunno who put the description there, but if this
 2265                  * node can ever be removed, we need to make sure the
 2266                  * string doesn't go out of context.  that means we
 2267                  * need to find the node that's still there (don't use
 2268                  * sysctl_locate() because that follows aliasing).
 2269                  */
 2270                 node = pnode->sysctl_child;
 2271                 for (ni = 0; ni < pnode->sysctl_clen; ni++)
 2272                         if (node[ni].sysctl_num == dnode.sysctl_num)
 2273                                 break;
 2274                 node = (ni < pnode->sysctl_clen) ? &node[ni] : NULL;
 2275 
 2276                 /*
 2277                  * if we found it, and this node has a description,
 2278                  * and this node can be released, and it doesn't
 2279                  * already own its own description...sigh.  :)
 2280                  */
 2281                 if (node != NULL && node->sysctl_desc != NULL &&
 2282                     !(node->sysctl_flags & CTLFLAG_PERMANENT) &&
 2283                     !(node->sysctl_flags & CTLFLAG_OWNDESC)) {
 2284                         char *d;
 2285 
 2286                         sz = strlen(node->sysctl_desc) + 1;
 2287                         d = malloc(sz, M_SYSCTLDATA, M_WAITOK);
 2288                         if (d != NULL) {
 2289                                 /*
 2290                                  * discard const so that we can
 2291                                  * re-attach the description
 2292                                  */
 2293                                 memcpy(d, node->sysctl_desc, sz);
 2294                                 onode = __UNCONST(node);
 2295                                 onode->sysctl_desc = d;
 2296                                 onode->sysctl_flags |= CTLFLAG_OWNDESC;
 2297                         } else {
 2298                                 /*
 2299                                  * XXX drop the description?  be
 2300                                  * afraid?  don't care?
 2301                                  */
 2302                         }
 2303                 }
 2304         }
 2305 
 2306         sysctl_unlock();
 2307 
 2308         return (error);
 2309 }
 2310 
 2311 /*
 2312  * ********************************************************************
 2313  * Deletes an entire n-ary tree.  Not recommended unless you know why
 2314  * you're doing it.  Personally, I don't know why you'd even think
 2315  * about it.
 2316  * ********************************************************************
 2317  */
 2318 void
 2319 sysctl_free(struct sysctlnode *rnode)
 2320 {
 2321         struct sysctlnode *node, *pnode;
 2322 
 2323         rw_enter(&sysctl_treelock, RW_WRITER);
 2324 
 2325         if (rnode == NULL)
 2326                 rnode = &sysctl_root;
 2327 
 2328         if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
 2329                 printf("sysctl_free: rnode %p wrong version\n", rnode);
 2330                 rw_exit(&sysctl_treelock);
 2331                 return;
 2332         }
 2333 
 2334         pnode = rnode;
 2335 
 2336         node = pnode->sysctl_child;
 2337         do {
 2338                 while (node != NULL && pnode->sysctl_csize > 0) {
 2339                         while (node <
 2340                                &pnode->sysctl_child[pnode->sysctl_clen] &&
 2341                                (SYSCTL_TYPE(node->sysctl_flags) !=
 2342                                 CTLTYPE_NODE ||
 2343                                 node->sysctl_csize == 0)) {
 2344                                 if (SYSCTL_FLAGS(node->sysctl_flags) &
 2345                                     CTLFLAG_OWNDATA) {
 2346                                         if (node->sysctl_data != NULL) {
 2347                                                 free(node->sysctl_data,
 2348                                                      M_SYSCTLDATA);
 2349                                                 node->sysctl_data = NULL;
 2350                                         }
 2351                                 }
 2352                                 if (SYSCTL_FLAGS(node->sysctl_flags) &
 2353                                     CTLFLAG_OWNDESC) {
 2354                                         if (node->sysctl_desc != NULL) {
 2355                                                 /*XXXUNCONST*/
 2356                                                 free(__UNCONST(node->sysctl_desc),
 2357                                                      M_SYSCTLDATA);
 2358                                                 node->sysctl_desc = NULL;
 2359                                         }
 2360                                 }
 2361                                 node++;
 2362                         }
 2363                         if (node < &pnode->sysctl_child[pnode->sysctl_clen]) {
 2364                                 pnode = node;
 2365                                 node = node->sysctl_child;
 2366                         } else
 2367                                 break;
 2368                 }
 2369                 if (pnode->sysctl_child != NULL)
 2370                         free(pnode->sysctl_child, M_SYSCTLNODE);
 2371                 pnode->sysctl_clen = 0;
 2372                 pnode->sysctl_csize = 0;
 2373                 pnode->sysctl_child = NULL;
 2374                 node = pnode;
 2375                 pnode = node->sysctl_parent;
 2376         } while (pnode != NULL && node != rnode);
 2377 
 2378         rw_exit(&sysctl_treelock);
 2379 }
 2380 
 2381 void
 2382 sysctl_log_print(const struct sysctllog *slog)
 2383 {
 2384         int i, len;
 2385 
 2386         printf("root %p left %d size %d content", (const void *)slog->log_root,
 2387             slog->log_left, slog->log_size);
 2388 
 2389         for (len = 0, i = slog->log_left; i < slog->log_size; i++) {
 2390                 switch (len) {
 2391                 case 0:
 2392                         len = -1;
 2393                         printf(" version %d", slog->log_num[i]);
 2394                         break;
 2395                 case -1:
 2396                         len = -2;
 2397                         printf(" type %d", slog->log_num[i]);
 2398                         break;
 2399                 case -2:
 2400                         len =  slog->log_num[i];
 2401                         printf(" len %d:", slog->log_num[i]);
 2402                         if (len <= 0)
 2403                                 len = -1;
 2404                         break;
 2405                 default:
 2406                         len--;
 2407                         printf(" %d", slog->log_num[i]);
 2408                         break;
 2409                 }
 2410         }
 2411         printf(" end\n");
 2412 }
 2413 
 2414 int
 2415 sysctl_log_add(struct sysctllog **logp, const struct sysctlnode *node)
 2416 {
 2417         const int size0 = 16;
 2418         int name[CTL_MAXNAME], namelen, i;
 2419         const struct sysctlnode *pnode;
 2420         struct sysctllog *log;
 2421 
 2422         if (node->sysctl_flags & CTLFLAG_PERMANENT)
 2423                 return (0);
 2424 
 2425         if (logp == NULL)
 2426                 return (0);
 2427 
 2428         if (*logp == NULL) {
 2429                 log = malloc(sizeof(struct sysctllog),
 2430                        M_SYSCTLDATA, M_WAITOK);
 2431                 if (log == NULL) {
 2432                         /* XXX print error message? */
 2433                         return (-1);
 2434                 }
 2435                 log->log_num = malloc(size0 * sizeof(int),
 2436                        M_SYSCTLDATA, M_WAITOK);
 2437                 if (log->log_num == NULL) {
 2438                         /* XXX print error message? */
 2439                         free(log, M_SYSCTLDATA);
 2440                         return (-1);
 2441                 }
 2442                 memset(log->log_num, 0, size0 * sizeof(int));
 2443                 log->log_root = NULL;
 2444                 log->log_size = size0;
 2445                 log->log_left = size0;
 2446                 *logp = log;
 2447         } else
 2448                 log = *logp;
 2449 
 2450         /*
 2451          * check that the root is proper.  it's okay to record the
 2452          * address of the root of a tree.  it's the only thing that's
 2453          * guaranteed not to shift around as nodes come and go.
 2454          */
 2455         if (log->log_root == NULL)
 2456                 log->log_root = sysctl_rootof(node);
 2457         else if (log->log_root != sysctl_rootof(node)) {
 2458                 printf("sysctl: log %p root mismatch (%p)\n",
 2459                        log->log_root, sysctl_rootof(node));
 2460                 return (-1);
 2461         }
 2462 
 2463         /*
 2464          * we will copy out name in reverse order
 2465          */
 2466         for (pnode = node, namelen = 0;
 2467              pnode != NULL && !(pnode->sysctl_flags & CTLFLAG_ROOT);
 2468              pnode = pnode->sysctl_parent)
 2469                 name[namelen++] = pnode->sysctl_num;
 2470 
 2471         /*
 2472          * do we have space?
 2473          */
 2474         if (log->log_left < (namelen + 3))
 2475                 sysctl_log_realloc(log);
 2476         if (log->log_left < (namelen + 3))
 2477                 return (-1);
 2478 
 2479         /*
 2480          * stuff name in, then namelen, then node type, and finally,
 2481          * the version for non-node nodes.
 2482          */
 2483         for (i = 0; i < namelen && i < CTL_MAXNAME; i++)
 2484                 log->log_num[--log->log_left] = name[i];
 2485         log->log_num[--log->log_left] = namelen;
 2486         log->log_num[--log->log_left] = SYSCTL_TYPE(node->sysctl_flags);
 2487         if (log->log_num[log->log_left] != CTLTYPE_NODE)
 2488                 log->log_num[--log->log_left] = node->sysctl_ver;
 2489         else
 2490                 log->log_num[--log->log_left] = 0;
 2491 
 2492         return (0);
 2493 }
 2494 
 2495 void
 2496 sysctl_teardown(struct sysctllog **logp)
 2497 {
 2498         const struct sysctlnode *rnode;
 2499         struct sysctlnode node;
 2500         struct sysctllog *log;
 2501         uint namelen;
 2502         int *name, t, v, error, ni;
 2503         size_t sz;
 2504 
 2505         if (logp == NULL || *logp == NULL)
 2506                 return;
 2507         log = *logp;
 2508 
 2509         rw_enter(&sysctl_treelock, RW_WRITER);
 2510         memset(&node, 0, sizeof(node));
 2511 
 2512         while (log->log_left < log->log_size) {
 2513                 KASSERT((log->log_left + 3 < log->log_size) &&
 2514                         (log->log_left + log->log_num[log->log_left + 2] <=
 2515                          log->log_size));
 2516                 v = log->log_num[log->log_left++];
 2517                 t = log->log_num[log->log_left++];
 2518                 namelen = log->log_num[log->log_left++];
 2519                 name = &log->log_num[log->log_left];
 2520 
 2521                 node.sysctl_num = name[namelen - 1];
 2522                 node.sysctl_flags = SYSCTL_VERSION|t;
 2523                 node.sysctl_ver = v;
 2524 
 2525                 rnode = log->log_root;
 2526                 error = sysctl_locate(NULL, &name[0], namelen, &rnode, &ni);
 2527                 if (error == 0) {
 2528                         name[namelen - 1] = CTL_DESTROY;
 2529                         rnode = rnode->sysctl_parent;
 2530                         sz = 0;
 2531                         (void)sysctl_destroy(&name[namelen - 1], 1, NULL,
 2532                                              &sz, &node, sizeof(node),
 2533                                              &name[0], NULL, rnode);
 2534                 }
 2535 
 2536                 log->log_left += namelen;
 2537         }
 2538 
 2539         KASSERT(log->log_size == log->log_left);
 2540         free(log->log_num, M_SYSCTLDATA);
 2541         free(log, M_SYSCTLDATA);
 2542         *logp = NULL;
 2543 
 2544         rw_exit(&sysctl_treelock);
 2545 }
 2546 
 2547 /*
 2548  * ********************************************************************
 2549  * old_sysctl -- A routine to bridge old-style internal calls to the
 2550  * new infrastructure.
 2551  * ********************************************************************
 2552  */
 2553 int
 2554 old_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 2555            void *newp, size_t newlen, struct lwp *l)
 2556 {
 2557         int error;
 2558         size_t oldlen = 0;
 2559         size_t savelen;
 2560 
 2561         if (oldlenp) {
 2562                 oldlen = *oldlenp;
 2563         }
 2564         savelen = oldlen;
 2565 
 2566         sysctl_lock(newp != NULL);
 2567         error = sysctl_dispatch(name, namelen, oldp, &oldlen,
 2568                                 newp, newlen, name, l, NULL);
 2569         sysctl_unlock();
 2570         if (error == 0 && oldp != NULL && savelen < oldlen)
 2571                 error = ENOMEM;
 2572         if (oldlenp) {
 2573                 *oldlenp = oldlen;
 2574         }
 2575 
 2576         return (error);
 2577 }
 2578 
 2579 /*
 2580  * ********************************************************************
 2581  * Section 4: Generic helper routines
 2582  * ********************************************************************
 2583  * "helper" routines that can do more finely grained access control,
 2584  * construct structures from disparate information, create the
 2585  * appearance of more nodes and sub-trees, etc.  for example, if
 2586  * CTL_PROC wanted a helper function, it could respond to a CTL_QUERY
 2587  * with a dynamically created list of nodes that represented the
 2588  * currently running processes at that instant.
 2589  * ********************************************************************
 2590  */
 2591 
 2592 /*
 2593  * first, a few generic helpers that provide:
 2594  *
 2595  * sysctl_needfunc()            a readonly interface that emits a warning
 2596  * sysctl_notavail()            returns EOPNOTSUPP (generic error)
 2597  * sysctl_null()                an empty return buffer with no error
 2598  */
 2599 int
 2600 sysctl_needfunc(SYSCTLFN_ARGS)
 2601 {
 2602         int error;
 2603 
 2604         printf("!!SYSCTL_NEEDFUNC!!\n");
 2605 
 2606         if (newp != NULL || namelen != 0)
 2607                 return (EOPNOTSUPP);
 2608 
 2609         error = 0;
 2610         if (oldp != NULL)
 2611                 error = sysctl_copyout(l, rnode->sysctl_data, oldp,
 2612                                        MIN(rnode->sysctl_size, *oldlenp));
 2613         *oldlenp = rnode->sysctl_size;
 2614 
 2615         return (error);
 2616 }
 2617 
 2618 int
 2619 sysctl_notavail(SYSCTLFN_ARGS)
 2620 {
 2621 
 2622         if (namelen == 1 && name[0] == CTL_QUERY)
 2623                 return (sysctl_query(SYSCTLFN_CALL(rnode)));
 2624 
 2625         return (EOPNOTSUPP);
 2626 }
 2627 
 2628 int
 2629 sysctl_null(SYSCTLFN_ARGS)
 2630 {
 2631 
 2632         *oldlenp = 0;
 2633 
 2634         return (0);
 2635 }
 2636 
 2637 u_int
 2638 sysctl_map_flags(const u_int *map, u_int word)
 2639 {
 2640         u_int rv;
 2641 
 2642         for (rv = 0; *map != 0; map += 2)
 2643                 if ((word & map[0]) != 0)
 2644                         rv |= map[1];
 2645 
 2646         return rv;
 2647 }
 2648 
 2649 /*
 2650  * ********************************************************************
 2651  * Section 5: The machinery that makes it all go
 2652  * ********************************************************************
 2653  * Memory "manglement" routines.  Not much to this, eh?
 2654  * ********************************************************************
 2655  */
 2656 static int
 2657 sysctl_alloc(struct sysctlnode *p, int x)
 2658 {
 2659         int i;
 2660         struct sysctlnode *n;
 2661 
 2662         assert(p->sysctl_child == NULL);
 2663 
 2664         if (x == 1)
 2665                 n = malloc(sizeof(struct sysctlnode),
 2666                        M_SYSCTLNODE, M_WAITOK);
 2667         else
 2668                 n = malloc(SYSCTL_DEFSIZE * sizeof(struct sysctlnode),
 2669                        M_SYSCTLNODE, M_WAITOK);
 2670         if (n == NULL)
 2671                 return (ENOMEM);
 2672 
 2673         if (x == 1) {
 2674                 memset(n, 0, sizeof(struct sysctlnode));
 2675                 p->sysctl_csize = 1;
 2676         } else {
 2677                 memset(n, 0, SYSCTL_DEFSIZE * sizeof(struct sysctlnode));
 2678                 p->sysctl_csize = SYSCTL_DEFSIZE;
 2679         }
 2680         p->sysctl_clen = 0;
 2681 
 2682         for (i = 0; i < p->sysctl_csize; i++)
 2683                 n[i].sysctl_parent = p;
 2684 
 2685         p->sysctl_child = n;
 2686         return (0);
 2687 }
 2688 
 2689 static int
 2690 sysctl_realloc(struct sysctlnode *p)
 2691 {
 2692         int i, j, olen;
 2693         struct sysctlnode *n;
 2694 
 2695         assert(p->sysctl_csize == p->sysctl_clen);
 2696 
 2697         /*
 2698          * how many do we have...how many should we make?
 2699          */
 2700         olen = p->sysctl_clen;
 2701         n = malloc(2 * olen * sizeof(struct sysctlnode), M_SYSCTLNODE,
 2702                    M_WAITOK);
 2703         if (n == NULL)
 2704                 return (ENOMEM);
 2705 
 2706         /*
 2707          * move old children over...initialize new children
 2708          */
 2709         memcpy(n, p->sysctl_child, olen * sizeof(struct sysctlnode));
 2710         memset(&n[olen], 0, olen * sizeof(struct sysctlnode));
 2711         p->sysctl_csize = 2 * olen;
 2712 
 2713         /*
 2714          * reattach moved (and new) children to parent; if a moved
 2715          * child node has children, reattach the parent pointers of
 2716          * grandchildren
 2717          */
 2718         for (i = 0; i < p->sysctl_csize; i++) {
 2719                 n[i].sysctl_parent = p;
 2720                 if (n[i].sysctl_child != NULL) {
 2721                         for (j = 0; j < n[i].sysctl_csize; j++)
 2722                                 n[i].sysctl_child[j].sysctl_parent = &n[i];
 2723                 }
 2724         }
 2725 
 2726         /*
 2727          * get out with the old and in with the new
 2728          */
 2729         free(p->sysctl_child, M_SYSCTLNODE);
 2730         p->sysctl_child = n;
 2731 
 2732         return (0);
 2733 }
 2734 
 2735 static int
 2736 sysctl_log_realloc(struct sysctllog *log)
 2737 {
 2738         int *n, s, d;
 2739 
 2740         s = log->log_size * 2;
 2741         d = log->log_size;
 2742 
 2743         n = malloc(s * sizeof(int), M_SYSCTLDATA, M_WAITOK);
 2744         if (n == NULL)
 2745                 return (-1);
 2746 
 2747         memset(n, 0, s * sizeof(int));
 2748         memcpy(&n[d], log->log_num, d * sizeof(int));
 2749         free(log->log_num, M_SYSCTLDATA);
 2750         log->log_num = n;
 2751         if (d)
 2752                 log->log_left += d;
 2753         else
 2754                 log->log_left = s;
 2755         log->log_size = s;
 2756 
 2757         return (0);
 2758 }
 2759 
 2760 /*
 2761  * ********************************************************************
 2762  * Section 6: Conversion between API versions wrt the sysctlnode
 2763  * ********************************************************************
 2764  */
 2765 static int
 2766 sysctl_cvt_in(struct lwp *l, int *vp, const void *i, size_t sz,
 2767               struct sysctlnode *node)
 2768 {
 2769         int error, flags;
 2770 
 2771         if (i == NULL || sz < sizeof(flags))
 2772                 return (EINVAL);
 2773 
 2774         error = sysctl_copyin(l, i, &flags, sizeof(flags));
 2775         if (error)
 2776                 return (error);
 2777 
 2778 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
 2779 #error sysctl_cvt_in: no support for SYSCTL_VERSION
 2780 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
 2781 
 2782         if (sz == sizeof(*node) &&
 2783             SYSCTL_VERS(flags) == SYSCTL_VERSION) {
 2784                 error = sysctl_copyin(l, i, node, sizeof(*node));
 2785                 if (error)
 2786                         return (error);
 2787                 *vp = SYSCTL_VERSION;
 2788                 return (0);
 2789         }
 2790 
 2791         return (EINVAL);
 2792 }
 2793 
 2794 static int
 2795 sysctl_cvt_out(struct lwp *l, int v, const struct sysctlnode *i,
 2796                void *ovp, size_t left, size_t *szp)
 2797 {
 2798         size_t sz = sizeof(*i);
 2799         const void *src = i;
 2800         int error;
 2801 
 2802         switch (v) {
 2803         case SYSCTL_VERS_0:
 2804                 return (EINVAL);
 2805 
 2806 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
 2807 #error sysctl_cvt_out: no support for SYSCTL_VERSION
 2808 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
 2809 
 2810         case SYSCTL_VERSION:
 2811                 /* nothing more to do here */
 2812                 break;
 2813         }
 2814 
 2815         if (ovp != NULL && left >= sz) {
 2816                 error = sysctl_copyout(l, src, ovp, sz);
 2817                 if (error)
 2818                         return (error);
 2819         }
 2820 
 2821         if (szp != NULL)
 2822                 *szp = sz;
 2823 
 2824         return (0);
 2825 }
 2826 
 2827 static uint8_t address_key[32]; /* key used in address hashing */
 2828 static ONCE_DECL(random_inithook);
 2829 
 2830 static int
 2831 random_address_init(void)
 2832 {
 2833 
 2834         cprng_strong(kern_cprng, address_key, sizeof(address_key), 0);
 2835         return 0;
 2836 }
 2837 
 2838 void
 2839 hash_value(void *d, size_t ds, const void *s, size_t ss)
 2840 {
 2841 
 2842         RUN_ONCE(&random_inithook, random_address_init);
 2843         blake2s(d, ds, address_key, sizeof(address_key), s, ss);
 2844 }

Cache object: a49c34dd06a5d657d4f93f8a27dc05d3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.