The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_tap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: if_tap.c,v 1.128 2023/01/06 01:54:22 ozaki-r Exp $     */
    2 
    3 /*
    4  *  Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation.
    5  *  All rights reserved.
    6  *
    7  *  Redistribution and use in source and binary forms, with or without
    8  *  modification, are permitted provided that the following conditions
    9  *  are met:
   10  *  1. Redistributions of source code must retain the above copyright
   11  *     notice, this list of conditions and the following disclaimer.
   12  *  2. Redistributions in binary form must reproduce the above copyright
   13  *     notice, this list of conditions and the following disclaimer in the
   14  *     documentation and/or other materials provided with the distribution.
   15  *
   16  *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   17  *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   18  *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   19  *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   20  *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26  *  POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * tap(4) is a virtual Ethernet interface.  It appears as a real Ethernet
   31  * device to the system, but can also be accessed by userland through a
   32  * character device interface, which allows reading and injecting frames.
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.128 2023/01/06 01:54:22 ozaki-r Exp $");
   37 
   38 #if defined(_KERNEL_OPT)
   39 
   40 #include "opt_modular.h"
   41 #endif
   42 
   43 #include <sys/param.h>
   44 #include <sys/atomic.h>
   45 #include <sys/conf.h>
   46 #include <sys/cprng.h>
   47 #include <sys/device.h>
   48 #include <sys/file.h>
   49 #include <sys/filedesc.h>
   50 #include <sys/intr.h>
   51 #include <sys/kauth.h>
   52 #include <sys/kernel.h>
   53 #include <sys/kmem.h>
   54 #include <sys/module.h>
   55 #include <sys/mutex.h>
   56 #include <sys/condvar.h>
   57 #include <sys/poll.h>
   58 #include <sys/proc.h>
   59 #include <sys/select.h>
   60 #include <sys/sockio.h>
   61 #include <sys/stat.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/systm.h>
   64 
   65 #include <net/if.h>
   66 #include <net/if_dl.h>
   67 #include <net/if_ether.h>
   68 #include <net/if_tap.h>
   69 #include <net/bpf.h>
   70 
   71 #include "ioconf.h"
   72 
   73 /*
   74  * sysctl node management
   75  *
   76  * It's not really possible to use a SYSCTL_SETUP block with
   77  * current module implementation, so it is easier to just define
   78  * our own function.
   79  *
   80  * The handler function is a "helper" in Andrew Brown's sysctl
   81  * framework terminology.  It is used as a gateway for sysctl
   82  * requests over the nodes.
   83  *
   84  * tap_log allows the module to log creations of nodes and
   85  * destroy them all at once using sysctl_teardown.
   86  */
   87 static int      tap_node;
   88 static int      tap_sysctl_handler(SYSCTLFN_PROTO);
   89 static void     sysctl_tap_setup(struct sysctllog **);
   90 
   91 struct tap_softc {
   92         device_t        sc_dev;
   93         struct ethercom sc_ec;
   94         int             sc_flags;
   95 #define TAP_INUSE       0x00000001      /* tap device can only be opened once */
   96 #define TAP_ASYNCIO     0x00000002      /* user is using async I/O (SIGIO) on the device */
   97 #define TAP_NBIO        0x00000004      /* user wants calls to avoid blocking */
   98 #define TAP_GOING       0x00000008      /* interface is being destroyed */
   99         struct selinfo  sc_rsel;
  100         pid_t           sc_pgid; /* For async. IO */
  101         kmutex_t        sc_lock;
  102         kcondvar_t      sc_cv;
  103         void            *sc_sih;
  104         struct timespec sc_atime;
  105         struct timespec sc_mtime;
  106         struct timespec sc_btime;
  107 };
  108 
  109 /* autoconf(9) glue */
  110 
  111 static int      tap_match(device_t, cfdata_t, void *);
  112 static void     tap_attach(device_t, device_t, void *);
  113 static int      tap_detach(device_t, int);
  114 
  115 CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc),
  116     tap_match, tap_attach, tap_detach, NULL);
  117 extern struct cfdriver tap_cd;
  118 
  119 /* Real device access routines */
  120 static int      tap_dev_close(struct tap_softc *);
  121 static int      tap_dev_read(int, struct uio *, int);
  122 static int      tap_dev_write(int, struct uio *, int);
  123 static int      tap_dev_ioctl(int, u_long, void *, struct lwp *);
  124 static int      tap_dev_poll(int, int, struct lwp *);
  125 static int      tap_dev_kqfilter(int, struct knote *);
  126 
  127 /* Fileops access routines */
  128 static int      tap_fops_close(file_t *);
  129 static int      tap_fops_read(file_t *, off_t *, struct uio *,
  130     kauth_cred_t, int);
  131 static int      tap_fops_write(file_t *, off_t *, struct uio *,
  132     kauth_cred_t, int);
  133 static int      tap_fops_ioctl(file_t *, u_long, void *);
  134 static int      tap_fops_poll(file_t *, int);
  135 static int      tap_fops_stat(file_t *, struct stat *);
  136 static int      tap_fops_kqfilter(file_t *, struct knote *);
  137 
  138 static const struct fileops tap_fileops = {
  139         .fo_name = "tap",
  140         .fo_read = tap_fops_read,
  141         .fo_write = tap_fops_write,
  142         .fo_ioctl = tap_fops_ioctl,
  143         .fo_fcntl = fnullop_fcntl,
  144         .fo_poll = tap_fops_poll,
  145         .fo_stat = tap_fops_stat,
  146         .fo_close = tap_fops_close,
  147         .fo_kqfilter = tap_fops_kqfilter,
  148         .fo_restart = fnullop_restart,
  149 };
  150 
  151 /* Helper for cloning open() */
  152 static int      tap_dev_cloner(struct lwp *);
  153 
  154 /* Character device routines */
  155 static int      tap_cdev_open(dev_t, int, int, struct lwp *);
  156 static int      tap_cdev_close(dev_t, int, int, struct lwp *);
  157 static int      tap_cdev_read(dev_t, struct uio *, int);
  158 static int      tap_cdev_write(dev_t, struct uio *, int);
  159 static int      tap_cdev_ioctl(dev_t, u_long, void *, int, struct lwp *);
  160 static int      tap_cdev_poll(dev_t, int, struct lwp *);
  161 static int      tap_cdev_kqfilter(dev_t, struct knote *);
  162 
  163 const struct cdevsw tap_cdevsw = {
  164         .d_open = tap_cdev_open,
  165         .d_close = tap_cdev_close,
  166         .d_read = tap_cdev_read,
  167         .d_write = tap_cdev_write,
  168         .d_ioctl = tap_cdev_ioctl,
  169         .d_stop = nostop,
  170         .d_tty = notty,
  171         .d_poll = tap_cdev_poll,
  172         .d_mmap = nommap,
  173         .d_kqfilter = tap_cdev_kqfilter,
  174         .d_discard = nodiscard,
  175         .d_flag = D_OTHER | D_MPSAFE
  176 };
  177 
  178 #define TAP_CLONER      0xfffff         /* Maximal minor value */
  179 
  180 /* kqueue-related routines */
  181 static void     tap_kqdetach(struct knote *);
  182 static int      tap_kqread(struct knote *, long);
  183 
  184 /*
  185  * Those are needed by the ifnet interface, and would typically be
  186  * there for any network interface driver.
  187  * Some other routines are optional: watchdog and drain.
  188  */
  189 static void     tap_start(struct ifnet *);
  190 static void     tap_stop(struct ifnet *, int);
  191 static int      tap_init(struct ifnet *);
  192 static int      tap_ioctl(struct ifnet *, u_long, void *);
  193 
  194 /* Internal functions */
  195 static int      tap_lifaddr(struct ifnet *, u_long, struct ifaliasreq *);
  196 static void     tap_softintr(void *);
  197 
  198 /*
  199  * tap is a clonable interface, although it is highly unrealistic for
  200  * an Ethernet device.
  201  *
  202  * Here are the bits needed for a clonable interface.
  203  */
  204 static int      tap_clone_create(struct if_clone *, int);
  205 static int      tap_clone_destroy(struct ifnet *);
  206 
  207 struct if_clone tap_cloners = IF_CLONE_INITIALIZER("tap",
  208                                         tap_clone_create,
  209                                         tap_clone_destroy);
  210 
  211 /* Helper functions shared by the two cloning code paths */
  212 static struct tap_softc *       tap_clone_creator(int);
  213 int     tap_clone_destroyer(device_t);
  214 
  215 static struct sysctllog *tap_sysctl_clog;
  216 
  217 #ifdef _MODULE
  218 devmajor_t tap_bmajor = -1, tap_cmajor = -1;
  219 #endif
  220 
  221 static u_int tap_count;
  222 
  223 void
  224 tapattach(int n)
  225 {
  226 
  227         /*
  228          * Nothing to do here, initialization is handled by the
  229          * module initialization code in tapinit() below).
  230          */
  231 }
  232 
  233 static void
  234 tapinit(void)
  235 {
  236         int error;
  237 
  238 #ifdef _MODULE
  239         devsw_attach("tap", NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor);
  240 #endif
  241         error = config_cfattach_attach(tap_cd.cd_name, &tap_ca);
  242 
  243         if (error) {
  244                 aprint_error("%s: unable to register cfattach\n",
  245                     tap_cd.cd_name);
  246                 (void)config_cfdriver_detach(&tap_cd);
  247                 return;
  248         }
  249 
  250         if_clone_attach(&tap_cloners);
  251         sysctl_tap_setup(&tap_sysctl_clog);
  252 }
  253 
  254 static int
  255 tapdetach(void)
  256 {
  257         int error = 0;
  258 
  259         if_clone_detach(&tap_cloners);
  260 
  261         if (tap_count != 0) {
  262                 if_clone_attach(&tap_cloners);
  263                 return EBUSY;
  264         }
  265 
  266         error = config_cfattach_detach(tap_cd.cd_name, &tap_ca);
  267         if (error == 0) {
  268 #ifdef _MODULE
  269                 devsw_detach(NULL, &tap_cdevsw);
  270 #endif
  271                 sysctl_teardown(&tap_sysctl_clog);
  272         } else
  273                 if_clone_attach(&tap_cloners);
  274 
  275         return error;
  276 }
  277 
  278 /* Pretty much useless for a pseudo-device */
  279 static int
  280 tap_match(device_t parent, cfdata_t cfdata, void *arg)
  281 {
  282 
  283         return 1;
  284 }
  285 
  286 void
  287 tap_attach(device_t parent, device_t self, void *aux)
  288 {
  289         struct tap_softc *sc = device_private(self);
  290         struct ifnet *ifp;
  291         const struct sysctlnode *node;
  292         int error;
  293         uint8_t enaddr[ETHER_ADDR_LEN] =
  294             { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff };
  295         char enaddrstr[3 * ETHER_ADDR_LEN];
  296 
  297         sc->sc_dev = self;
  298         sc->sc_sih = NULL;
  299         getnanotime(&sc->sc_btime);
  300         sc->sc_atime = sc->sc_mtime = sc->sc_btime;
  301         sc->sc_flags = 0;
  302         selinit(&sc->sc_rsel);
  303 
  304         cv_init(&sc->sc_cv, "tapread");
  305         mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NET);
  306 
  307         if (!pmf_device_register(self, NULL, NULL))
  308                 aprint_error_dev(self, "couldn't establish power handler\n");
  309 
  310         /*
  311          * In order to obtain unique initial Ethernet address on a host,
  312          * do some randomisation.  It's not meant for anything but avoiding
  313          * hard-coding an address.
  314          */
  315         cprng_fast(&enaddr[3], 3);
  316 
  317         aprint_verbose_dev(self, "Ethernet address %s\n",
  318             ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr));
  319 
  320         /*
  321          * One should note that an interface must do multicast in order
  322          * to support IPv6.
  323          */
  324         ifp = &sc->sc_ec.ec_if;
  325         strcpy(ifp->if_xname, device_xname(self));
  326         ifp->if_softc   = sc;
  327         ifp->if_flags   = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
  328 #ifdef NET_MPSAFE
  329         ifp->if_extflags = IFEF_MPSAFE;
  330 #endif
  331         ifp->if_ioctl   = tap_ioctl;
  332         ifp->if_start   = tap_start;
  333         ifp->if_stop    = tap_stop;
  334         ifp->if_init    = tap_init;
  335         IFQ_SET_READY(&ifp->if_snd);
  336 
  337         sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
  338 
  339         /* Those steps are mandatory for an Ethernet driver. */
  340         if_initialize(ifp);
  341         ifp->if_percpuq = if_percpuq_create(ifp);
  342         ether_ifattach(ifp, enaddr);
  343         /* Opening the device will bring the link state up. */
  344         ifp->if_link_state = LINK_STATE_DOWN;
  345         if_register(ifp);
  346 
  347         /*
  348          * Add a sysctl node for that interface.
  349          *
  350          * The pointer transmitted is not a string, but instead a pointer to
  351          * the softc structure, which we can use to build the string value on
  352          * the fly in the helper function of the node.  See the comments for
  353          * tap_sysctl_handler for details.
  354          *
  355          * Usually sysctl_createv is called with CTL_CREATE as the before-last
  356          * component.  However, we can allocate a number ourselves, as we are
  357          * the only consumer of the net.link.<iface> node.  In this case, the
  358          * unit number is conveniently used to number the node.  CTL_CREATE
  359          * would just work, too.
  360          */
  361         if ((error = sysctl_createv(NULL, 0, NULL,
  362             &node, CTLFLAG_READWRITE,
  363             CTLTYPE_STRING, device_xname(self), NULL,
  364             tap_sysctl_handler, 0, (void *)sc, 18,
  365             CTL_NET, AF_LINK, tap_node, device_unit(sc->sc_dev),
  366             CTL_EOL)) != 0)
  367                 aprint_error_dev(self,
  368                     "sysctl_createv returned %d, ignoring\n", error);
  369 }
  370 
  371 /*
  372  * When detaching, we do the inverse of what is done in the attach
  373  * routine, in reversed order.
  374  */
  375 static int
  376 tap_detach(device_t self, int flags)
  377 {
  378         struct tap_softc *sc = device_private(self);
  379         struct ifnet *ifp = &sc->sc_ec.ec_if;
  380         int error;
  381 
  382         sc->sc_flags |= TAP_GOING;
  383         tap_stop(ifp, 1);
  384         if_down(ifp);
  385 
  386         if (sc->sc_sih != NULL) {
  387                 softint_disestablish(sc->sc_sih);
  388                 sc->sc_sih = NULL;
  389         }
  390 
  391         /*
  392          * Destroying a single leaf is a very straightforward operation using
  393          * sysctl_destroyv.  One should be sure to always end the path with
  394          * CTL_EOL.
  395          */
  396         if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node,
  397             device_unit(sc->sc_dev), CTL_EOL)) != 0)
  398                 aprint_error_dev(self,
  399                     "sysctl_destroyv returned %d, ignoring\n", error);
  400         ether_ifdetach(ifp);
  401         if_detach(ifp);
  402         seldestroy(&sc->sc_rsel);
  403         mutex_destroy(&sc->sc_lock);
  404         cv_destroy(&sc->sc_cv);
  405 
  406         pmf_device_deregister(self);
  407 
  408         return 0;
  409 }
  410 
  411 /*
  412  * This is the function where we SEND packets.
  413  *
  414  * There is no 'receive' equivalent.  A typical driver will get
  415  * interrupts from the hardware, and from there will inject new packets
  416  * into the network stack.
  417  *
  418  * Once handled, a packet must be freed.  A real driver might not be able
  419  * to fit all the pending packets into the hardware, and is allowed to
  420  * return before having sent all the packets.  It should then use the
  421  * if_flags flag IFF_OACTIVE to notify the upper layer.
  422  *
  423  * There are also other flags one should check, such as IFF_PAUSE.
  424  *
  425  * It is our duty to make packets available to BPF listeners.
  426  *
  427  * You should be aware that this function is called by the Ethernet layer
  428  * at splnet().
  429  *
  430  * When the device is opened, we have to pass the packet(s) to the
  431  * userland.  For that we stay in OACTIVE mode while the userland gets
  432  * the packets, and we send a signal to the processes waiting to read.
  433  *
  434  * wakeup(sc) is the counterpart to the tsleep call in
  435  * tap_dev_read, while selnotify() is used for kevent(2) and
  436  * poll(2) (which includes select(2)) listeners.
  437  */
  438 static void
  439 tap_start(struct ifnet *ifp)
  440 {
  441         struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
  442         struct mbuf *m0;
  443 
  444         mutex_enter(&sc->sc_lock);
  445         if ((sc->sc_flags & TAP_INUSE) == 0) {
  446                 /* Simply drop packets */
  447                 for (;;) {
  448                         IFQ_DEQUEUE(&ifp->if_snd, m0);
  449                         if (m0 == NULL)
  450                                 goto done;
  451 
  452                         if_statadd2(ifp, if_opackets, 1, if_obytes, m0->m_len);
  453                         bpf_mtap(ifp, m0, BPF_D_OUT);
  454 
  455                         m_freem(m0);
  456                 }
  457         } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
  458                 ifp->if_flags |= IFF_OACTIVE;
  459                 cv_broadcast(&sc->sc_cv);
  460                 selnotify(&sc->sc_rsel, 0, 1);
  461                 if (sc->sc_flags & TAP_ASYNCIO) {
  462                         kpreempt_disable();
  463                         softint_schedule(sc->sc_sih);
  464                         kpreempt_enable();
  465                 }
  466         }
  467 done:
  468         mutex_exit(&sc->sc_lock);
  469 }
  470 
  471 static void
  472 tap_softintr(void *cookie)
  473 {
  474         struct tap_softc *sc;
  475         struct ifnet *ifp;
  476         int a, b;
  477 
  478         sc = cookie;
  479 
  480         if (sc->sc_flags & TAP_ASYNCIO) {
  481                 ifp = &sc->sc_ec.ec_if;
  482                 if (ifp->if_flags & IFF_RUNNING) {
  483                         a = POLL_IN;
  484                         b = POLLIN | POLLRDNORM;
  485                 } else {
  486                         a = POLL_HUP;
  487                         b = 0;
  488                 }
  489                 fownsignal(sc->sc_pgid, SIGIO, a, b, NULL);
  490         }
  491 }
  492 
  493 /*
  494  * A typical driver will only contain the following handlers for
  495  * ioctl calls, except SIOCSIFPHYADDR.
  496  * The latter is a hack I used to set the Ethernet address of the
  497  * faked device.
  498  *
  499  * Note that ether_ioctl() has to be called under splnet().
  500  */
  501 static int
  502 tap_ioctl(struct ifnet *ifp, u_long cmd, void *data)
  503 {
  504         int s, error;
  505 
  506         s = splnet();
  507 
  508         switch (cmd) {
  509         case SIOCSIFPHYADDR:
  510                 error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data);
  511                 break;
  512         default:
  513                 error = ether_ioctl(ifp, cmd, data);
  514                 if (error == ENETRESET)
  515                         error = 0;
  516                 break;
  517         }
  518 
  519         splx(s);
  520 
  521         return error;
  522 }
  523 
  524 /*
  525  * Helper function to set Ethernet address.  This has been replaced by
  526  * the generic SIOCALIFADDR ioctl on a PF_LINK socket.
  527  */
  528 static int
  529 tap_lifaddr(struct ifnet *ifp, u_long cmd, struct ifaliasreq *ifra)
  530 {
  531         const struct sockaddr *sa = &ifra->ifra_addr;
  532 
  533         if (sa->sa_family != AF_LINK)
  534                 return EINVAL;
  535 
  536         if_set_sadl(ifp, sa->sa_data, ETHER_ADDR_LEN, false);
  537 
  538         return 0;
  539 }
  540 
  541 /*
  542  * _init() would typically be called when an interface goes up,
  543  * meaning it should configure itself into the state in which it
  544  * can send packets.
  545  */
  546 static int
  547 tap_init(struct ifnet *ifp)
  548 {
  549         ifp->if_flags |= IFF_RUNNING;
  550 
  551         tap_start(ifp);
  552 
  553         return 0;
  554 }
  555 
  556 /*
  557  * _stop() is called when an interface goes down.  It is our
  558  * responsibility to validate that state by clearing the
  559  * IFF_RUNNING flag.
  560  *
  561  * We have to wake up all the sleeping processes to have the pending
  562  * read requests cancelled.
  563  */
  564 static void
  565 tap_stop(struct ifnet *ifp, int disable)
  566 {
  567         struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
  568 
  569         mutex_enter(&sc->sc_lock);
  570         ifp->if_flags &= ~IFF_RUNNING;
  571         cv_broadcast(&sc->sc_cv);
  572         selnotify(&sc->sc_rsel, 0, 1);
  573         if (sc->sc_flags & TAP_ASYNCIO) {
  574                 kpreempt_disable();
  575                 softint_schedule(sc->sc_sih);
  576                 kpreempt_enable();
  577         }
  578         mutex_exit(&sc->sc_lock);
  579 }
  580 
  581 /*
  582  * The 'create' command of ifconfig can be used to create
  583  * any numbered instance of a given device.  Thus we have to
  584  * make sure we have enough room in cd_devs to create the
  585  * user-specified instance.  config_attach_pseudo will do this
  586  * for us.
  587  */
  588 static int
  589 tap_clone_create(struct if_clone *ifc, int unit)
  590 {
  591 
  592         if (tap_clone_creator(unit) == NULL) {
  593                 aprint_error("%s%d: unable to attach an instance\n",
  594                     tap_cd.cd_name, unit);
  595                 return ENXIO;
  596         }
  597         atomic_inc_uint(&tap_count);
  598         return 0;
  599 }
  600 
  601 /*
  602  * tap(4) can be cloned by two ways:
  603  *   using 'ifconfig tap0 create', which will use the network
  604  *     interface cloning API, and call tap_clone_create above.
  605  *   opening the cloning device node, whose minor number is TAP_CLONER.
  606  *     See below for an explanation on how this part work.
  607  */
  608 static struct tap_softc *
  609 tap_clone_creator(int unit)
  610 {
  611         cfdata_t cf;
  612 
  613         cf = kmem_alloc(sizeof(*cf), KM_SLEEP);
  614         cf->cf_name = tap_cd.cd_name;
  615         cf->cf_atname = tap_ca.ca_name;
  616         if (unit == -1) {
  617                 /* let autoconf find the first free one */
  618                 cf->cf_unit = 0;
  619                 cf->cf_fstate = FSTATE_STAR;
  620         } else {
  621                 cf->cf_unit = unit;
  622                 cf->cf_fstate = FSTATE_NOTFOUND;
  623         }
  624 
  625         return device_private(config_attach_pseudo(cf));
  626 }
  627 
  628 /*
  629  * The clean design of if_clone and autoconf(9) makes that part
  630  * really straightforward.  The second argument of config_detach
  631  * means neither QUIET nor FORCED.
  632  */
  633 static int
  634 tap_clone_destroy(struct ifnet *ifp)
  635 {
  636         struct tap_softc *sc = ifp->if_softc;
  637         int error = tap_clone_destroyer(sc->sc_dev);
  638 
  639         if (error == 0)
  640                 atomic_dec_uint(&tap_count);
  641         return error;
  642 }
  643 
  644 int
  645 tap_clone_destroyer(device_t dev)
  646 {
  647         cfdata_t cf = device_cfdata(dev);
  648         int error;
  649 
  650         if ((error = config_detach(dev, 0)) != 0)
  651                 aprint_error_dev(dev, "unable to detach instance\n");
  652         kmem_free(cf, sizeof(*cf));
  653 
  654         return error;
  655 }
  656 
  657 /*
  658  * tap(4) is a bit of an hybrid device.  It can be used in two different
  659  * ways:
  660  *  1. ifconfig tapN create, then use /dev/tapN to read/write off it.
  661  *  2. open /dev/tap, get a new interface created and read/write off it.
  662  *     That interface is destroyed when the process that had it created exits.
  663  *
  664  * The first way is managed by the cdevsw structure, and you access interfaces
  665  * through a (major, minor) mapping:  tap4 is obtained by the minor number
  666  * 4.  The entry points for the cdevsw interface are prefixed by tap_cdev_.
  667  *
  668  * The second way is the so-called "cloning" device.  It's a special minor
  669  * number (chosen as the maximal number, to allow as much tap devices as
  670  * possible).  The user first opens the cloner (e.g., /dev/tap), and that
  671  * call ends in tap_cdev_open.  The actual place where it is handled is
  672  * tap_dev_cloner.
  673  *
  674  * An tap device cannot be opened more than once at a time, so the cdevsw
  675  * part of open() does nothing but noting that the interface is being used and
  676  * hence ready to actually handle packets.
  677  */
  678 
  679 static int
  680 tap_cdev_open(dev_t dev, int flags, int fmt, struct lwp *l)
  681 {
  682         struct tap_softc *sc;
  683 
  684         if (minor(dev) == TAP_CLONER)
  685                 return tap_dev_cloner(l);
  686 
  687         sc = device_lookup_private(&tap_cd, minor(dev));
  688         if (sc == NULL)
  689                 return ENXIO;
  690 
  691         /* The device can only be opened once */
  692         if (sc->sc_flags & TAP_INUSE)
  693                 return EBUSY;
  694         sc->sc_flags |= TAP_INUSE;
  695         if_link_state_change(&sc->sc_ec.ec_if, LINK_STATE_UP);
  696 
  697         return 0;
  698 }
  699 
  700 /*
  701  * There are several kinds of cloning devices, and the most simple is the one
  702  * tap(4) uses.  What it does is change the file descriptor with a new one,
  703  * with its own fileops structure (which maps to the various read, write,
  704  * ioctl functions).  It starts allocating a new file descriptor with falloc,
  705  * then actually creates the new tap devices.
  706  *
  707  * Once those two steps are successful, we can re-wire the existing file
  708  * descriptor to its new self.  This is done with fdclone():  it fills the fp
  709  * structure as needed (notably f_devunit gets filled with the fifth parameter
  710  * passed, the unit of the tap device which will allows us identifying the
  711  * device later), and returns EMOVEFD.
  712  *
  713  * That magic value is interpreted by sys_open() which then replaces the
  714  * current file descriptor by the new one (through a magic member of struct
  715  * lwp, l_dupfd).
  716  *
  717  * The tap device is flagged as being busy since it otherwise could be
  718  * externally accessed through the corresponding device node with the cdevsw
  719  * interface.
  720  */
  721 
  722 static int
  723 tap_dev_cloner(struct lwp *l)
  724 {
  725         struct tap_softc *sc;
  726         file_t *fp;
  727         int error, fd;
  728 
  729         if ((error = fd_allocfile(&fp, &fd)) != 0)
  730                 return error;
  731 
  732         if ((sc = tap_clone_creator(-1)) == NULL) {
  733                 fd_abort(curproc, fp, fd);
  734                 return ENXIO;
  735         }
  736 
  737         sc->sc_flags |= TAP_INUSE;
  738         if_link_state_change(&sc->sc_ec.ec_if, LINK_STATE_UP);
  739 
  740         return fd_clone(fp, fd, FREAD | FWRITE, &tap_fileops,
  741             (void *)(intptr_t)device_unit(sc->sc_dev));
  742 }
  743 
  744 /*
  745  * While all other operations (read, write, ioctl, poll and kqfilter) are
  746  * really the same whether we are in cdevsw or fileops mode, the close()
  747  * function is slightly different in the two cases.
  748  *
  749  * As for the other, the core of it is shared in tap_dev_close.  What
  750  * it does is sufficient for the cdevsw interface, but the cloning interface
  751  * needs another thing:  the interface is destroyed when the processes that
  752  * created it closes it.
  753  */
  754 static int
  755 tap_cdev_close(dev_t dev, int flags, int fmt, struct lwp *l)
  756 {
  757         struct tap_softc *sc = device_lookup_private(&tap_cd, minor(dev));
  758 
  759         if (sc == NULL)
  760                 return ENXIO;
  761 
  762         return tap_dev_close(sc);
  763 }
  764 
  765 /*
  766  * It might happen that the administrator used ifconfig to externally destroy
  767  * the interface.  In that case, tap_fops_close will be called while
  768  * tap_detach is already happening.  If we called it again from here, we
  769  * would dead lock.  TAP_GOING ensures that this situation doesn't happen.
  770  */
  771 static int
  772 tap_fops_close(file_t *fp)
  773 {
  774         struct tap_softc *sc;
  775         int unit = fp->f_devunit;
  776         int error;
  777 
  778         sc = device_lookup_private(&tap_cd, unit);
  779         if (sc == NULL)
  780                 return ENXIO;
  781 
  782         /* tap_dev_close currently always succeeds, but it might not
  783          * always be the case. */
  784         KERNEL_LOCK(1, NULL);
  785         if ((error = tap_dev_close(sc)) != 0) {
  786                 KERNEL_UNLOCK_ONE(NULL);
  787                 return error;
  788         }
  789 
  790         /* Destroy the device now that it is no longer useful,
  791          * unless it's already being destroyed. */
  792         if ((sc->sc_flags & TAP_GOING) != 0) {
  793                 KERNEL_UNLOCK_ONE(NULL);
  794                 return 0;
  795         }
  796 
  797         error = tap_clone_destroyer(sc->sc_dev);
  798         KERNEL_UNLOCK_ONE(NULL);
  799         return error;
  800 }
  801 
  802 static int
  803 tap_dev_close(struct tap_softc *sc)
  804 {
  805         struct ifnet *ifp;
  806         int s;
  807 
  808         s = splnet();
  809         /* Let tap_start handle packets again */
  810         ifp = &sc->sc_ec.ec_if;
  811         ifp->if_flags &= ~IFF_OACTIVE;
  812 
  813         /* Purge output queue */
  814         if (!(IFQ_IS_EMPTY(&ifp->if_snd))) {
  815                 struct mbuf *m;
  816 
  817                 for (;;) {
  818                         IFQ_DEQUEUE(&ifp->if_snd, m);
  819                         if (m == NULL)
  820                                 break;
  821 
  822                         if_statadd2(ifp, if_opackets, 1, if_obytes, m->m_len);
  823                         bpf_mtap(ifp, m, BPF_D_OUT);
  824                         m_freem(m);
  825                 }
  826         }
  827         splx(s);
  828 
  829         if (sc->sc_sih != NULL) {
  830                 softint_disestablish(sc->sc_sih);
  831                 sc->sc_sih = NULL;
  832         }
  833         sc->sc_flags &= ~(TAP_INUSE | TAP_ASYNCIO);
  834         if_link_state_change(ifp, LINK_STATE_DOWN);
  835 
  836         return 0;
  837 }
  838 
  839 static int
  840 tap_cdev_read(dev_t dev, struct uio *uio, int flags)
  841 {
  842 
  843         return tap_dev_read(minor(dev), uio, flags);
  844 }
  845 
  846 static int
  847 tap_fops_read(file_t *fp, off_t *offp, struct uio *uio,
  848     kauth_cred_t cred, int flags)
  849 {
  850         int error;
  851 
  852         KERNEL_LOCK(1, NULL);
  853         error = tap_dev_read(fp->f_devunit, uio, flags);
  854         KERNEL_UNLOCK_ONE(NULL);
  855         return error;
  856 }
  857 
  858 static int
  859 tap_dev_read(int unit, struct uio *uio, int flags)
  860 {
  861         struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
  862         struct ifnet *ifp;
  863         struct mbuf *m, *n;
  864         int error = 0;
  865 
  866         if (sc == NULL)
  867                 return ENXIO;
  868 
  869         getnanotime(&sc->sc_atime);
  870 
  871         ifp = &sc->sc_ec.ec_if;
  872         if ((ifp->if_flags & IFF_UP) == 0)
  873                 return EHOSTDOWN;
  874 
  875         /* In the TAP_NBIO case, we have to make sure we won't be sleeping */
  876         if ((sc->sc_flags & TAP_NBIO) != 0) {
  877                 if (!mutex_tryenter(&sc->sc_lock))
  878                         return EWOULDBLOCK;
  879         } else
  880                 mutex_enter(&sc->sc_lock);
  881 
  882         if (IFQ_IS_EMPTY(&ifp->if_snd)) {
  883                 ifp->if_flags &= ~IFF_OACTIVE;
  884                 if (sc->sc_flags & TAP_NBIO)
  885                         error = EWOULDBLOCK;
  886                 else
  887                         error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
  888 
  889                 if (error != 0) {
  890                         mutex_exit(&sc->sc_lock);
  891                         return error;
  892                 }
  893                 /* The device might have been downed */
  894                 if ((ifp->if_flags & IFF_UP) == 0) {
  895                         mutex_exit(&sc->sc_lock);
  896                         return EHOSTDOWN;
  897                 }
  898         }
  899 
  900         IFQ_DEQUEUE(&ifp->if_snd, m);
  901         mutex_exit(&sc->sc_lock);
  902 
  903         ifp->if_flags &= ~IFF_OACTIVE;
  904         if (m == NULL) {
  905                 error = 0;
  906                 goto out;
  907         }
  908 
  909         if_statadd2(ifp, if_opackets, 1,
  910             if_obytes, m->m_len);               /* XXX only first in chain */
  911         bpf_mtap(ifp, m, BPF_D_OUT);
  912         if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
  913                 goto out;
  914         if (m == NULL)
  915                 goto out;
  916 
  917         /*
  918          * One read is one packet.
  919          */
  920         do {
  921                 error = uiomove(mtod(m, void *),
  922                     uimin(m->m_len, uio->uio_resid), uio);
  923                 m = n = m_free(m);
  924         } while (m != NULL && uio->uio_resid > 0 && error == 0);
  925 
  926         if (m != NULL)
  927                 m_freem(m);
  928 
  929 out:
  930         return error;
  931 }
  932 
  933 static int
  934 tap_fops_stat(file_t *fp, struct stat *st)
  935 {
  936         int error = 0;
  937         struct tap_softc *sc;
  938         int unit = fp->f_devunit;
  939 
  940         (void)memset(st, 0, sizeof(*st));
  941 
  942         KERNEL_LOCK(1, NULL);
  943         sc = device_lookup_private(&tap_cd, unit);
  944         if (sc == NULL) {
  945                 error = ENXIO;
  946                 goto out;
  947         }
  948 
  949         st->st_dev = makedev(cdevsw_lookup_major(&tap_cdevsw), unit);
  950         st->st_atimespec = sc->sc_atime;
  951         st->st_mtimespec = sc->sc_mtime;
  952         st->st_ctimespec = st->st_birthtimespec = sc->sc_btime;
  953         st->st_uid = kauth_cred_geteuid(fp->f_cred);
  954         st->st_gid = kauth_cred_getegid(fp->f_cred);
  955 out:
  956         KERNEL_UNLOCK_ONE(NULL);
  957         return error;
  958 }
  959 
  960 static int
  961 tap_cdev_write(dev_t dev, struct uio *uio, int flags)
  962 {
  963 
  964         return tap_dev_write(minor(dev), uio, flags);
  965 }
  966 
  967 static int
  968 tap_fops_write(file_t *fp, off_t *offp, struct uio *uio,
  969     kauth_cred_t cred, int flags)
  970 {
  971         int error;
  972 
  973         KERNEL_LOCK(1, NULL);
  974         error = tap_dev_write(fp->f_devunit, uio, flags);
  975         KERNEL_UNLOCK_ONE(NULL);
  976         return error;
  977 }
  978 
  979 static int
  980 tap_dev_write(int unit, struct uio *uio, int flags)
  981 {
  982         struct tap_softc *sc =
  983             device_lookup_private(&tap_cd, unit);
  984         struct ifnet *ifp;
  985         struct mbuf *m, **mp;
  986         size_t len = 0;
  987         int error = 0;
  988 
  989         if (sc == NULL)
  990                 return ENXIO;
  991 
  992         getnanotime(&sc->sc_mtime);
  993         ifp = &sc->sc_ec.ec_if;
  994 
  995         /* One write, one packet, that's the rule */
  996         MGETHDR(m, M_DONTWAIT, MT_DATA);
  997         if (m == NULL) {
  998                 if_statinc(ifp, if_ierrors);
  999                 return ENOBUFS;
 1000         }
 1001         m->m_pkthdr.len = uio->uio_resid;
 1002 
 1003         mp = &m;
 1004         while (error == 0 && uio->uio_resid > 0) {
 1005                 if (*mp != m) {
 1006                         MGET(*mp, M_DONTWAIT, MT_DATA);
 1007                         if (*mp == NULL) {
 1008                                 error = ENOBUFS;
 1009                                 break;
 1010                         }
 1011                 }
 1012                 (*mp)->m_len = uimin(MHLEN, uio->uio_resid);
 1013                 len += (*mp)->m_len;
 1014                 error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio);
 1015                 mp = &(*mp)->m_next;
 1016         }
 1017         if (error) {
 1018                 if_statinc(ifp, if_ierrors);
 1019                 m_freem(m);
 1020                 return error;
 1021         }
 1022 
 1023         m_set_rcvif(m, ifp);
 1024 
 1025         if_statadd2(ifp, if_ipackets, 1, if_ibytes, len);
 1026         bpf_mtap(ifp, m, BPF_D_IN);
 1027         if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN)) != 0)
 1028                 return error;
 1029         if (m == NULL)
 1030                 return 0;
 1031 
 1032         if_percpuq_enqueue(ifp->if_percpuq, m);
 1033 
 1034         return 0;
 1035 }
 1036 
 1037 static int
 1038 tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
 1039 {
 1040 
 1041         return tap_dev_ioctl(minor(dev), cmd, data, l);
 1042 }
 1043 
 1044 static int
 1045 tap_fops_ioctl(file_t *fp, u_long cmd, void *data)
 1046 {
 1047 
 1048         return tap_dev_ioctl(fp->f_devunit, cmd, data, curlwp);
 1049 }
 1050 
 1051 static int
 1052 tap_dev_ioctl(int unit, u_long cmd, void *data, struct lwp *l)
 1053 {
 1054         struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
 1055 
 1056         if (sc == NULL)
 1057                 return ENXIO;
 1058 
 1059         switch (cmd) {
 1060         case FIONREAD:
 1061                 {
 1062                         struct ifnet *ifp = &sc->sc_ec.ec_if;
 1063                         struct mbuf *m;
 1064                         int s;
 1065 
 1066                         s = splnet();
 1067                         IFQ_POLL(&ifp->if_snd, m);
 1068 
 1069                         if (m == NULL)
 1070                                 *(int *)data = 0;
 1071                         else
 1072                                 *(int *)data = m->m_pkthdr.len;
 1073                         splx(s);
 1074                         return 0;
 1075                 }
 1076         case TIOCSPGRP:
 1077         case FIOSETOWN:
 1078                 return fsetown(&sc->sc_pgid, cmd, data);
 1079         case TIOCGPGRP:
 1080         case FIOGETOWN:
 1081                 return fgetown(sc->sc_pgid, cmd, data);
 1082         case FIOASYNC:
 1083                 if (*(int *)data) {
 1084                         if (sc->sc_sih == NULL) {
 1085                                 sc->sc_sih = softint_establish(SOFTINT_CLOCK,
 1086                                     tap_softintr, sc);
 1087                                 if (sc->sc_sih == NULL)
 1088                                         return EBUSY; /* XXX */
 1089                         }
 1090                         sc->sc_flags |= TAP_ASYNCIO;
 1091                 } else {
 1092                         sc->sc_flags &= ~TAP_ASYNCIO;
 1093                         if (sc->sc_sih != NULL) {
 1094                                 softint_disestablish(sc->sc_sih);
 1095                                 sc->sc_sih = NULL;
 1096                         }
 1097                 }
 1098                 return 0;
 1099         case FIONBIO:
 1100                 if (*(int *)data)
 1101                         sc->sc_flags |= TAP_NBIO;
 1102                 else
 1103                         sc->sc_flags &= ~TAP_NBIO;
 1104                 return 0;
 1105         case TAPGIFNAME:
 1106                 {
 1107                         struct ifreq *ifr = (struct ifreq *)data;
 1108                         struct ifnet *ifp = &sc->sc_ec.ec_if;
 1109 
 1110                         strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
 1111                         return 0;
 1112                 }
 1113         default:
 1114                 return ENOTTY;
 1115         }
 1116 }
 1117 
 1118 static int
 1119 tap_cdev_poll(dev_t dev, int events, struct lwp *l)
 1120 {
 1121 
 1122         return tap_dev_poll(minor(dev), events, l);
 1123 }
 1124 
 1125 static int
 1126 tap_fops_poll(file_t *fp, int events)
 1127 {
 1128 
 1129         return tap_dev_poll(fp->f_devunit, events, curlwp);
 1130 }
 1131 
 1132 static int
 1133 tap_dev_poll(int unit, int events, struct lwp *l)
 1134 {
 1135         struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
 1136         int revents = 0;
 1137 
 1138         if (sc == NULL)
 1139                 return POLLERR;
 1140 
 1141         if (events & (POLLIN | POLLRDNORM)) {
 1142                 struct ifnet *ifp = &sc->sc_ec.ec_if;
 1143                 struct mbuf *m;
 1144                 int s;
 1145 
 1146                 s = splnet();
 1147                 IFQ_POLL(&ifp->if_snd, m);
 1148 
 1149                 if (m != NULL)
 1150                         revents |= events & (POLLIN | POLLRDNORM);
 1151                 else {
 1152                         mutex_spin_enter(&sc->sc_lock);
 1153                         selrecord(l, &sc->sc_rsel);
 1154                         mutex_spin_exit(&sc->sc_lock);
 1155                 }
 1156                 splx(s);
 1157         }
 1158         revents |= events & (POLLOUT | POLLWRNORM);
 1159 
 1160         return revents;
 1161 }
 1162 
 1163 static struct filterops tap_read_filterops = {
 1164         .f_flags = FILTEROP_ISFD,
 1165         .f_attach = NULL,
 1166         .f_detach = tap_kqdetach,
 1167         .f_event = tap_kqread,
 1168 };
 1169 
 1170 static int
 1171 tap_cdev_kqfilter(dev_t dev, struct knote *kn)
 1172 {
 1173 
 1174         return tap_dev_kqfilter(minor(dev), kn);
 1175 }
 1176 
 1177 static int
 1178 tap_fops_kqfilter(file_t *fp, struct knote *kn)
 1179 {
 1180 
 1181         return tap_dev_kqfilter(fp->f_devunit, kn);
 1182 }
 1183 
 1184 static int
 1185 tap_dev_kqfilter(int unit, struct knote *kn)
 1186 {
 1187         struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
 1188 
 1189         if (sc == NULL)
 1190                 return ENXIO;
 1191 
 1192         switch(kn->kn_filter) {
 1193         case EVFILT_READ:
 1194                 kn->kn_fop = &tap_read_filterops;
 1195                 kn->kn_hook = sc;
 1196                 KERNEL_LOCK(1, NULL);
 1197                 mutex_spin_enter(&sc->sc_lock);
 1198                 selrecord_knote(&sc->sc_rsel, kn);
 1199                 mutex_spin_exit(&sc->sc_lock);
 1200                 KERNEL_UNLOCK_ONE(NULL);
 1201                 break;
 1202 
 1203         case EVFILT_WRITE:
 1204                 kn->kn_fop = &seltrue_filtops;
 1205                 break;
 1206 
 1207         default:
 1208                 return EINVAL;
 1209         }
 1210 
 1211         return 0;
 1212 }
 1213 
 1214 static void
 1215 tap_kqdetach(struct knote *kn)
 1216 {
 1217         struct tap_softc *sc = (struct tap_softc *)kn->kn_hook;
 1218 
 1219         KERNEL_LOCK(1, NULL);
 1220         mutex_spin_enter(&sc->sc_lock);
 1221         selremove_knote(&sc->sc_rsel, kn);
 1222         mutex_spin_exit(&sc->sc_lock);
 1223         KERNEL_UNLOCK_ONE(NULL);
 1224 }
 1225 
 1226 static int
 1227 tap_kqread(struct knote *kn, long hint)
 1228 {
 1229         struct tap_softc *sc = (struct tap_softc *)kn->kn_hook;
 1230         struct ifnet *ifp = &sc->sc_ec.ec_if;
 1231         struct mbuf *m;
 1232         int s, rv;
 1233 
 1234         KERNEL_LOCK(1, NULL);
 1235         s = splnet();
 1236         IFQ_POLL(&ifp->if_snd, m);
 1237 
 1238         if (m == NULL)
 1239                 kn->kn_data = 0;
 1240         else
 1241                 kn->kn_data = m->m_pkthdr.len;
 1242         splx(s);
 1243         rv = (kn->kn_data != 0 ? 1 : 0);
 1244         KERNEL_UNLOCK_ONE(NULL);
 1245         return rv;
 1246 }
 1247 
 1248 /*
 1249  * sysctl management routines
 1250  * You can set the address of an interface through:
 1251  * net.link.tap.tap<number>
 1252  *
 1253  * Note the consistent use of tap_log in order to use
 1254  * sysctl_teardown at unload time.
 1255  *
 1256  * In the kernel you will find a lot of SYSCTL_SETUP blocks.  Those
 1257  * blocks register a function in a special section of the kernel
 1258  * (called a link set) which is used at init_sysctl() time to cycle
 1259  * through all those functions to create the kernel's sysctl tree.
 1260  *
 1261  * It is not possible to use link sets in a module, so the
 1262  * easiest is to simply call our own setup routine at load time.
 1263  *
 1264  * In the SYSCTL_SETUP blocks you find in the kernel, nodes have the
 1265  * CTLFLAG_PERMANENT flag, meaning they cannot be removed.  Once the
 1266  * whole kernel sysctl tree is built, it is not possible to add any
 1267  * permanent node.
 1268  *
 1269  * It should be noted that we're not saving the sysctlnode pointer
 1270  * we are returned when creating the "tap" node.  That structure
 1271  * cannot be trusted once out of the calling function, as it might
 1272  * get reused.  So we just save the MIB number, and always give the
 1273  * full path starting from the root for later calls to sysctl_createv
 1274  * and sysctl_destroyv.
 1275  */
 1276 static void
 1277 sysctl_tap_setup(struct sysctllog **clog)
 1278 {
 1279         const struct sysctlnode *node;
 1280         int error = 0;
 1281 
 1282         if ((error = sysctl_createv(clog, 0, NULL, NULL,
 1283             CTLFLAG_PERMANENT,
 1284             CTLTYPE_NODE, "link", NULL,
 1285             NULL, 0, NULL, 0,
 1286             CTL_NET, AF_LINK, CTL_EOL)) != 0)
 1287                 return;
 1288 
 1289         /*
 1290          * The first four parameters of sysctl_createv are for management.
 1291          *
 1292          * The four that follows, here starting with a '' for the flags,
 1293          * describe the node.
 1294          *
 1295          * The next series of four set its value, through various possible
 1296          * means.
 1297          *
 1298          * Last but not least, the path to the node is described.  That path
 1299          * is relative to the given root (third argument).  Here we're
 1300          * starting from the root.
 1301          */
 1302         if ((error = sysctl_createv(clog, 0, NULL, &node,
 1303             CTLFLAG_PERMANENT,
 1304             CTLTYPE_NODE, "tap", NULL,
 1305             NULL, 0, NULL, 0,
 1306             CTL_NET, AF_LINK, CTL_CREATE, CTL_EOL)) != 0)
 1307                 return;
 1308         tap_node = node->sysctl_num;
 1309 }
 1310 
 1311 /*
 1312  * The helper functions make Andrew Brown's interface really
 1313  * shine.  It makes possible to create value on the fly whether
 1314  * the sysctl value is read or written.
 1315  *
 1316  * As shown as an example in the man page, the first step is to
 1317  * create a copy of the node to have sysctl_lookup work on it.
 1318  *
 1319  * Here, we have more work to do than just a copy, since we have
 1320  * to create the string.  The first step is to collect the actual
 1321  * value of the node, which is a convenient pointer to the softc
 1322  * of the interface.  From there we create the string and use it
 1323  * as the value, but only for the *copy* of the node.
 1324  *
 1325  * Then we let sysctl_lookup do the magic, which consists in
 1326  * setting oldp and newp as required by the operation.  When the
 1327  * value is read, that means that the string will be copied to
 1328  * the user, and when it is written, the new value will be copied
 1329  * over in the addr array.
 1330  *
 1331  * If newp is NULL, the user was reading the value, so we don't
 1332  * have anything else to do.  If a new value was written, we
 1333  * have to check it.
 1334  *
 1335  * If it is incorrect, we can return an error and leave 'node' as
 1336  * it is:  since it is a copy of the actual node, the change will
 1337  * be forgotten.
 1338  *
 1339  * Upon a correct input, we commit the change to the ifnet
 1340  * structure of our interface.
 1341  */
 1342 static int
 1343 tap_sysctl_handler(SYSCTLFN_ARGS)
 1344 {
 1345         struct sysctlnode node;
 1346         struct tap_softc *sc;
 1347         struct ifnet *ifp;
 1348         int error;
 1349         size_t len;
 1350         char addr[3 * ETHER_ADDR_LEN];
 1351         uint8_t enaddr[ETHER_ADDR_LEN];
 1352 
 1353         node = *rnode;
 1354         sc = node.sysctl_data;
 1355         ifp = &sc->sc_ec.ec_if;
 1356         (void)ether_snprintf(addr, sizeof(addr), CLLADDR(ifp->if_sadl));
 1357         node.sysctl_data = addr;
 1358         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 1359         if (error || newp == NULL)
 1360                 return error;
 1361 
 1362         len = strlen(addr);
 1363         if (len < 11 || len > 17)
 1364                 return EINVAL;
 1365 
 1366         /* Commit change */
 1367         if (ether_aton_r(enaddr, sizeof(enaddr), addr) != 0)
 1368                 return EINVAL;
 1369         if_set_sadl(ifp, enaddr, ETHER_ADDR_LEN, false);
 1370         return error;
 1371 }
 1372 
 1373 /*
 1374  * Module infrastructure
 1375  */
 1376 #include "if_module.h"
 1377 
 1378 IF_MODULE(MODULE_CLASS_DRIVER, tap, NULL)

Cache object: 5033c24bde49e6b5f38f683ff905154b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.