The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_tuntap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $  */
    2 /*-
    3  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    4  *
    5  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
    6  * All rights reserved.
    7  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  *
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  * BASED ON:
   32  * -------------------------------------------------------------------------
   33  *
   34  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
   35  * Nottingham University 1987.
   36  *
   37  * This source may be freely distributed, however I would be interested
   38  * in any changes that are made.
   39  *
   40  * This driver takes packets off the IP i/f and hands them up to a
   41  * user process to have its wicked way with. This driver has it's
   42  * roots in a similar driver written by Phil Cockcroft (formerly) at
   43  * UCL. This driver is based much more on read/write/poll mode of
   44  * operation though.
   45  *
   46  * $FreeBSD$
   47  */
   48 
   49 #include "opt_inet.h"
   50 #include "opt_inet6.h"
   51 
   52 #include <sys/param.h>
   53 #include <sys/lock.h>
   54 #include <sys/priv.h>
   55 #include <sys/proc.h>
   56 #include <sys/systm.h>
   57 #include <sys/jail.h>
   58 #include <sys/mbuf.h>
   59 #include <sys/module.h>
   60 #include <sys/socket.h>
   61 #include <sys/eventhandler.h>
   62 #include <sys/fcntl.h>
   63 #include <sys/filio.h>
   64 #include <sys/sockio.h>
   65 #include <sys/sx.h>
   66 #include <sys/syslog.h>
   67 #include <sys/ttycom.h>
   68 #include <sys/poll.h>
   69 #include <sys/selinfo.h>
   70 #include <sys/signalvar.h>
   71 #include <sys/filedesc.h>
   72 #include <sys/kernel.h>
   73 #include <sys/sysctl.h>
   74 #include <sys/conf.h>
   75 #include <sys/uio.h>
   76 #include <sys/malloc.h>
   77 #include <sys/random.h>
   78 #include <sys/ctype.h>
   79 
   80 #include <net/ethernet.h>
   81 #include <net/if.h>
   82 #include <net/if_var.h>
   83 #include <net/if_clone.h>
   84 #include <net/if_dl.h>
   85 #include <net/if_media.h>
   86 #include <net/if_private.h>
   87 #include <net/if_types.h>
   88 #include <net/if_vlan_var.h>
   89 #include <net/netisr.h>
   90 #include <net/route.h>
   91 #include <net/vnet.h>
   92 #include <netinet/in.h>
   93 #ifdef INET
   94 #include <netinet/ip.h>
   95 #endif
   96 #ifdef INET6
   97 #include <netinet/ip6.h>
   98 #include <netinet6/ip6_var.h>
   99 #endif
  100 #include <netinet/udp.h>
  101 #include <netinet/tcp.h>
  102 #include <net/bpf.h>
  103 #include <net/if_tap.h>
  104 #include <net/if_tun.h>
  105 
  106 #include <dev/virtio/network/virtio_net.h>
  107 
  108 #include <sys/queue.h>
  109 #include <sys/condvar.h>
  110 #include <security/mac/mac_framework.h>
  111 
  112 struct tuntap_driver;
  113 
  114 /*
  115  * tun_list is protected by global tunmtx.  Other mutable fields are
  116  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
  117  * static for the duration of a tunnel interface.
  118  */
  119 struct tuntap_softc {
  120         TAILQ_ENTRY(tuntap_softc)        tun_list;
  121         struct cdev                     *tun_alias;
  122         struct cdev                     *tun_dev;
  123         u_short                          tun_flags;     /* misc flags */
  124 #define TUN_OPEN        0x0001
  125 #define TUN_INITED      0x0002
  126 #define TUN_UNUSED1     0x0008
  127 #define TUN_UNUSED2     0x0010
  128 #define TUN_LMODE       0x0020
  129 #define TUN_RWAIT       0x0040
  130 #define TUN_ASYNC       0x0080
  131 #define TUN_IFHEAD      0x0100
  132 #define TUN_DYING       0x0200
  133 #define TUN_L2          0x0400
  134 #define TUN_VMNET       0x0800
  135 
  136 #define TUN_DRIVER_IDENT_MASK   (TUN_L2 | TUN_VMNET)
  137 #define TUN_READY               (TUN_OPEN | TUN_INITED)
  138 
  139         pid_t                    tun_pid;       /* owning pid */
  140         struct ifnet            *tun_ifp;       /* the interface */
  141         struct sigio            *tun_sigio;     /* async I/O info */
  142         struct tuntap_driver    *tun_drv;       /* appropriate driver */
  143         struct selinfo           tun_rsel;      /* read select */
  144         struct mtx               tun_mtx;       /* softc field mutex */
  145         struct cv                tun_cv;        /* for ref'd dev destroy */
  146         struct ether_addr        tun_ether;     /* remote address */
  147         int                      tun_busy;      /* busy count */
  148         int                      tun_vhdrlen;   /* virtio-net header length */
  149 };
  150 #define TUN2IFP(sc)     ((sc)->tun_ifp)
  151 
  152 #define TUNDEBUG        if (tundebug) if_printf
  153 
  154 #define TUN_LOCK(tp)            mtx_lock(&(tp)->tun_mtx)
  155 #define TUN_UNLOCK(tp)          mtx_unlock(&(tp)->tun_mtx)
  156 #define TUN_LOCK_ASSERT(tp)     mtx_assert(&(tp)->tun_mtx, MA_OWNED);
  157 
  158 #define TUN_VMIO_FLAG_MASK      0x0fff
  159 
  160 /*
  161  * Interface capabilities of a tap device that supports the virtio-net
  162  * header.
  163  */
  164 #define TAP_VNET_HDR_CAPS       (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6       \
  165                                 | IFCAP_VLAN_HWCSUM                     \
  166                                 | IFCAP_TSO | IFCAP_LRO                 \
  167                                 | IFCAP_VLAN_HWTSO)
  168 
  169 #define TAP_ALL_OFFLOAD         (CSUM_TSO | CSUM_TCP | CSUM_UDP |\
  170                                     CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
  171 
  172 /*
  173  * All mutable global variables in if_tun are locked using tunmtx, with
  174  * the exception of tundebug, which is used unlocked, and the drivers' *clones,
  175  * which are static after setup.
  176  */
  177 static struct mtx tunmtx;
  178 static eventhandler_tag arrival_tag;
  179 static eventhandler_tag clone_tag;
  180 static const char tunname[] = "tun";
  181 static const char tapname[] = "tap";
  182 static const char vmnetname[] = "vmnet";
  183 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
  184 static int tundebug = 0;
  185 static int tundclone = 1;
  186 static int tap_allow_uopen = 0; /* allow user devfs cloning */
  187 static int tapuponopen = 0;     /* IFF_UP on open() */
  188 static int tapdclone = 1;       /* enable devfs cloning */
  189 
  190 static TAILQ_HEAD(,tuntap_softc)        tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
  191 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
  192 
  193 static struct sx tun_ioctl_sx;
  194 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
  195 
  196 SYSCTL_DECL(_net_link);
  197 /* tun */
  198 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  199     "IP tunnel software network interface");
  200 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
  201     "Enable legacy devfs interface creation");
  202 
  203 /* tap */
  204 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  205     "Ethernet tunnel software network interface");
  206 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
  207     "Enable legacy devfs interface creation for all users");
  208 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
  209     "Bring interface up when /dev/tap is opened");
  210 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
  211     "Enable legacy devfs interface creation");
  212 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
  213 
  214 static int      tun_create_device(struct tuntap_driver *drv, int unit,
  215     struct ucred *cr, struct cdev **dev, const char *name);
  216 static int      tun_busy_locked(struct tuntap_softc *tp);
  217 static void     tun_unbusy_locked(struct tuntap_softc *tp);
  218 static int      tun_busy(struct tuntap_softc *tp);
  219 static void     tun_unbusy(struct tuntap_softc *tp);
  220 
  221 static int      tuntap_name2info(const char *name, int *unit, int *flags);
  222 static void     tunclone(void *arg, struct ucred *cred, char *name,
  223                     int namelen, struct cdev **dev);
  224 static void     tuncreate(struct cdev *dev);
  225 static void     tundtor(void *data);
  226 static void     tunrename(void *arg, struct ifnet *ifp);
  227 static int      tunifioctl(struct ifnet *, u_long, caddr_t);
  228 static void     tuninit(struct ifnet *);
  229 static void     tunifinit(void *xtp);
  230 static int      tuntapmodevent(module_t, int, void *);
  231 static int      tunoutput(struct ifnet *, struct mbuf *,
  232                     const struct sockaddr *, struct route *ro);
  233 static void     tunstart(struct ifnet *);
  234 static void     tunstart_l2(struct ifnet *);
  235 
  236 static int      tun_clone_match(struct if_clone *ifc, const char *name);
  237 static int      tap_clone_match(struct if_clone *ifc, const char *name);
  238 static int      vmnet_clone_match(struct if_clone *ifc, const char *name);
  239 static int      tun_clone_create(struct if_clone *, char *, size_t,
  240                     struct ifc_data *, struct ifnet **);
  241 static int      tun_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
  242 static void     tun_vnethdr_set(struct ifnet *ifp, int vhdrlen);
  243 
  244 static d_open_t         tunopen;
  245 static d_read_t         tunread;
  246 static d_write_t        tunwrite;
  247 static d_ioctl_t        tunioctl;
  248 static d_poll_t         tunpoll;
  249 static d_kqfilter_t     tunkqfilter;
  250 
  251 static int              tunkqread(struct knote *, long);
  252 static int              tunkqwrite(struct knote *, long);
  253 static void             tunkqdetach(struct knote *);
  254 
  255 static struct filterops tun_read_filterops = {
  256         .f_isfd =       1,
  257         .f_attach =     NULL,
  258         .f_detach =     tunkqdetach,
  259         .f_event =      tunkqread,
  260 };
  261 
  262 static struct filterops tun_write_filterops = {
  263         .f_isfd =       1,
  264         .f_attach =     NULL,
  265         .f_detach =     tunkqdetach,
  266         .f_event =      tunkqwrite,
  267 };
  268 
  269 static struct tuntap_driver {
  270         struct cdevsw            cdevsw;
  271         int                      ident_flags;
  272         struct unrhdr           *unrhdr;
  273         struct clonedevs        *clones;
  274         ifc_match_f             *clone_match_fn;
  275         ifc_create_f            *clone_create_fn;
  276         ifc_destroy_f           *clone_destroy_fn;
  277 } tuntap_drivers[] = {
  278         {
  279                 .ident_flags =  0,
  280                 .cdevsw =       {
  281                     .d_version =        D_VERSION,
  282                     .d_flags =          D_NEEDMINOR,
  283                     .d_open =           tunopen,
  284                     .d_read =           tunread,
  285                     .d_write =          tunwrite,
  286                     .d_ioctl =          tunioctl,
  287                     .d_poll =           tunpoll,
  288                     .d_kqfilter =       tunkqfilter,
  289                     .d_name =           tunname,
  290                 },
  291                 .clone_match_fn =       tun_clone_match,
  292                 .clone_create_fn =      tun_clone_create,
  293                 .clone_destroy_fn =     tun_clone_destroy,
  294         },
  295         {
  296                 .ident_flags =  TUN_L2,
  297                 .cdevsw =       {
  298                     .d_version =        D_VERSION,
  299                     .d_flags =          D_NEEDMINOR,
  300                     .d_open =           tunopen,
  301                     .d_read =           tunread,
  302                     .d_write =          tunwrite,
  303                     .d_ioctl =          tunioctl,
  304                     .d_poll =           tunpoll,
  305                     .d_kqfilter =       tunkqfilter,
  306                     .d_name =           tapname,
  307                 },
  308                 .clone_match_fn =       tap_clone_match,
  309                 .clone_create_fn =      tun_clone_create,
  310                 .clone_destroy_fn =     tun_clone_destroy,
  311         },
  312         {
  313                 .ident_flags =  TUN_L2 | TUN_VMNET,
  314                 .cdevsw =       {
  315                     .d_version =        D_VERSION,
  316                     .d_flags =          D_NEEDMINOR,
  317                     .d_open =           tunopen,
  318                     .d_read =           tunread,
  319                     .d_write =          tunwrite,
  320                     .d_ioctl =          tunioctl,
  321                     .d_poll =           tunpoll,
  322                     .d_kqfilter =       tunkqfilter,
  323                     .d_name =           vmnetname,
  324                 },
  325                 .clone_match_fn =       vmnet_clone_match,
  326                 .clone_create_fn =      tun_clone_create,
  327                 .clone_destroy_fn =     tun_clone_destroy,
  328         },
  329 };
  330 
  331 struct tuntap_driver_cloner {
  332         SLIST_ENTRY(tuntap_driver_cloner)        link;
  333         struct tuntap_driver                    *drv;
  334         struct if_clone                         *cloner;
  335 };
  336 
  337 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
  338     SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
  339 
  340 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
  341 
  342 /*
  343  * Mechanism for marking a tunnel device as busy so that we can safely do some
  344  * orthogonal operations (such as operations on devices) without racing against
  345  * tun_destroy.  tun_destroy will wait on the condvar if we're at all busy or
  346  * open, to be woken up when the condition is alleviated.
  347  */
  348 static int
  349 tun_busy_locked(struct tuntap_softc *tp)
  350 {
  351 
  352         TUN_LOCK_ASSERT(tp);
  353         if ((tp->tun_flags & TUN_DYING) != 0) {
  354                 /*
  355                  * Perhaps unintuitive, but the device is busy going away.
  356                  * Other interpretations of EBUSY from tun_busy make little
  357                  * sense, since making a busy device even more busy doesn't
  358                  * sound like a problem.
  359                  */
  360                 return (EBUSY);
  361         }
  362 
  363         ++tp->tun_busy;
  364         return (0);
  365 }
  366 
  367 static void
  368 tun_unbusy_locked(struct tuntap_softc *tp)
  369 {
  370 
  371         TUN_LOCK_ASSERT(tp);
  372         KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel"));
  373 
  374         --tp->tun_busy;
  375         /* Wake up anything that may be waiting on our busy tunnel. */
  376         if (tp->tun_busy == 0)
  377                 cv_broadcast(&tp->tun_cv);
  378 }
  379 
  380 static int
  381 tun_busy(struct tuntap_softc *tp)
  382 {
  383         int ret;
  384 
  385         TUN_LOCK(tp);
  386         ret = tun_busy_locked(tp);
  387         TUN_UNLOCK(tp);
  388         return (ret);
  389 }
  390 
  391 static void
  392 tun_unbusy(struct tuntap_softc *tp)
  393 {
  394 
  395         TUN_LOCK(tp);
  396         tun_unbusy_locked(tp);
  397         TUN_UNLOCK(tp);
  398 }
  399 
  400 /*
  401  * Sets unit and/or flags given the device name.  Must be called with correct
  402  * vnet context.
  403  */
  404 static int
  405 tuntap_name2info(const char *name, int *outunit, int *outflags)
  406 {
  407         struct tuntap_driver *drv;
  408         struct tuntap_driver_cloner *drvc;
  409         char *dname;
  410         int flags, unit;
  411         bool found;
  412 
  413         if (name == NULL)
  414                 return (EINVAL);
  415 
  416         /*
  417          * Needed for dev_stdclone, but dev_stdclone will not modify, it just
  418          * wants to be able to pass back a char * through the second param. We
  419          * will always set that as NULL here, so we'll fake it.
  420          */
  421         dname = __DECONST(char *, name);
  422         found = false;
  423 
  424         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
  425             ("tuntap_driver_cloners failed to initialize"));
  426         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
  427                 KASSERT(drvc->drv != NULL,
  428                     ("tuntap_driver_cloners entry not properly initialized"));
  429                 drv = drvc->drv;
  430 
  431                 if (strcmp(name, drv->cdevsw.d_name) == 0) {
  432                         found = true;
  433                         unit = -1;
  434                         flags = drv->ident_flags;
  435                         break;
  436                 }
  437 
  438                 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
  439                         found = true;
  440                         flags = drv->ident_flags;
  441                         break;
  442                 }
  443         }
  444 
  445         if (!found)
  446                 return (ENXIO);
  447 
  448         if (outunit != NULL)
  449                 *outunit = unit;
  450         if (outflags != NULL)
  451                 *outflags = flags;
  452         return (0);
  453 }
  454 
  455 /*
  456  * Get driver information from a set of flags specified.  Masks the identifying
  457  * part of the flags and compares it against all of the available
  458  * tuntap_drivers. Must be called with correct vnet context.
  459  */
  460 static struct tuntap_driver *
  461 tuntap_driver_from_flags(int tun_flags)
  462 {
  463         struct tuntap_driver *drv;
  464         struct tuntap_driver_cloner *drvc;
  465 
  466         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
  467             ("tuntap_driver_cloners failed to initialize"));
  468         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
  469                 KASSERT(drvc->drv != NULL,
  470                     ("tuntap_driver_cloners entry not properly initialized"));
  471                 drv = drvc->drv;
  472                 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
  473                         return (drv);
  474         }
  475 
  476         return (NULL);
  477 }
  478 
  479 static int
  480 tun_clone_match(struct if_clone *ifc, const char *name)
  481 {
  482         int tunflags;
  483 
  484         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  485                 if ((tunflags & TUN_L2) == 0)
  486                         return (1);
  487         }
  488 
  489         return (0);
  490 }
  491 
  492 static int
  493 tap_clone_match(struct if_clone *ifc, const char *name)
  494 {
  495         int tunflags;
  496 
  497         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  498                 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
  499                         return (1);
  500         }
  501 
  502         return (0);
  503 }
  504 
  505 static int
  506 vmnet_clone_match(struct if_clone *ifc, const char *name)
  507 {
  508         int tunflags;
  509 
  510         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  511                 if ((tunflags & TUN_VMNET) != 0)
  512                         return (1);
  513         }
  514 
  515         return (0);
  516 }
  517 
  518 static int
  519 tun_clone_create(struct if_clone *ifc, char *name, size_t len,
  520     struct ifc_data *ifd, struct ifnet **ifpp)
  521 {
  522         struct tuntap_driver *drv;
  523         struct cdev *dev;
  524         int err, i, tunflags, unit;
  525 
  526         tunflags = 0;
  527         /* The name here tells us exactly what we're creating */
  528         err = tuntap_name2info(name, &unit, &tunflags);
  529         if (err != 0)
  530                 return (err);
  531 
  532         drv = tuntap_driver_from_flags(tunflags);
  533         if (drv == NULL)
  534                 return (ENXIO);
  535 
  536         if (unit != -1) {
  537                 /* If this unit number is still available that's okay. */
  538                 if (alloc_unr_specific(drv->unrhdr, unit) == -1)
  539                         return (EEXIST);
  540         } else {
  541                 unit = alloc_unr(drv->unrhdr);
  542         }
  543 
  544         snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
  545 
  546         /* find any existing device, or allocate new unit number */
  547         dev = NULL;
  548         i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
  549         /* No preexisting struct cdev *, create one */
  550         if (i != 0)
  551                 i = tun_create_device(drv, unit, NULL, &dev, name);
  552         if (i == 0) {
  553                 tuncreate(dev);
  554                 struct tuntap_softc *tp = dev->si_drv1;
  555                 *ifpp = tp->tun_ifp;
  556         }
  557 
  558         return (i);
  559 }
  560 
  561 static void
  562 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
  563     struct cdev **dev)
  564 {
  565         char devname[SPECNAMELEN + 1];
  566         struct tuntap_driver *drv;
  567         int append_unit, i, u, tunflags;
  568         bool mayclone;
  569 
  570         if (*dev != NULL)
  571                 return;
  572 
  573         tunflags = 0;
  574         CURVNET_SET(CRED_TO_VNET(cred));
  575         if (tuntap_name2info(name, &u, &tunflags) != 0)
  576                 goto out;       /* Not recognized */
  577 
  578         if (u != -1 && u > IF_MAXUNIT)
  579                 goto out;       /* Unit number too high */
  580 
  581         mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
  582         if ((tunflags & TUN_L2) != 0) {
  583                 /* tap/vmnet allow user open with a sysctl */
  584                 mayclone = (mayclone || tap_allow_uopen) && tapdclone;
  585         } else {
  586                 mayclone = mayclone && tundclone;
  587         }
  588 
  589         /*
  590          * If tun cloning is enabled, only the superuser can create an
  591          * interface.
  592          */
  593         if (!mayclone)
  594                 goto out;
  595 
  596         if (u == -1)
  597                 append_unit = 1;
  598         else
  599                 append_unit = 0;
  600 
  601         drv = tuntap_driver_from_flags(tunflags);
  602         if (drv == NULL)
  603                 goto out;
  604 
  605         /* find any existing device, or allocate new unit number */
  606         i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
  607         if (i) {
  608                 if (append_unit) {
  609                         namelen = snprintf(devname, sizeof(devname), "%s%d",
  610                             name, u);
  611                         name = devname;
  612                 }
  613 
  614                 i = tun_create_device(drv, u, cred, dev, name);
  615         }
  616         if (i == 0)
  617                 if_clone_create(name, namelen, NULL);
  618 out:
  619         CURVNET_RESTORE();
  620 }
  621 
  622 static void
  623 tun_destroy(struct tuntap_softc *tp)
  624 {
  625 
  626         TUN_LOCK(tp);
  627         tp->tun_flags |= TUN_DYING;
  628         if (tp->tun_busy != 0)
  629                 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
  630         else
  631                 TUN_UNLOCK(tp);
  632 
  633         CURVNET_SET(TUN2IFP(tp)->if_vnet);
  634 
  635         /* destroy_dev will take care of any alias. */
  636         destroy_dev(tp->tun_dev);
  637         seldrain(&tp->tun_rsel);
  638         knlist_clear(&tp->tun_rsel.si_note, 0);
  639         knlist_destroy(&tp->tun_rsel.si_note);
  640         if ((tp->tun_flags & TUN_L2) != 0) {
  641                 ether_ifdetach(TUN2IFP(tp));
  642         } else {
  643                 bpfdetach(TUN2IFP(tp));
  644                 if_detach(TUN2IFP(tp));
  645         }
  646         sx_xlock(&tun_ioctl_sx);
  647         TUN2IFP(tp)->if_softc = NULL;
  648         sx_xunlock(&tun_ioctl_sx);
  649         free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
  650         if_free(TUN2IFP(tp));
  651         mtx_destroy(&tp->tun_mtx);
  652         cv_destroy(&tp->tun_cv);
  653         free(tp, M_TUN);
  654         CURVNET_RESTORE();
  655 }
  656 
  657 static int
  658 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t flags)
  659 {
  660         struct tuntap_softc *tp = ifp->if_softc;
  661 
  662         mtx_lock(&tunmtx);
  663         TAILQ_REMOVE(&tunhead, tp, tun_list);
  664         mtx_unlock(&tunmtx);
  665         tun_destroy(tp);
  666 
  667         return (0);
  668 }
  669 
  670 static void
  671 vnet_tun_init(const void *unused __unused)
  672 {
  673         struct tuntap_driver *drv;
  674         struct tuntap_driver_cloner *drvc;
  675         int i;
  676 
  677         for (i = 0; i < nitems(tuntap_drivers); ++i) {
  678                 drv = &tuntap_drivers[i];
  679                 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
  680 
  681                 drvc->drv = drv;
  682                 struct if_clone_addreq req = {
  683                         .match_f = drv->clone_match_fn,
  684                         .create_f = drv->clone_create_fn,
  685                         .destroy_f = drv->clone_destroy_fn,
  686                 };
  687                 drvc->cloner = ifc_attach_cloner(drv->cdevsw.d_name, &req);
  688                 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
  689         };
  690 }
  691 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  692                 vnet_tun_init, NULL);
  693 
  694 static void
  695 vnet_tun_uninit(const void *unused __unused)
  696 {
  697         struct tuntap_driver_cloner *drvc;
  698 
  699         while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
  700                 drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
  701                 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
  702 
  703                 if_clone_detach(drvc->cloner);
  704                 free(drvc, M_TUN);
  705         }
  706 }
  707 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  708     vnet_tun_uninit, NULL);
  709 
  710 static void
  711 tun_uninit(const void *unused __unused)
  712 {
  713         struct tuntap_driver *drv;
  714         struct tuntap_softc *tp;
  715         int i;
  716 
  717         EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
  718         EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
  719 
  720         mtx_lock(&tunmtx);
  721         while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
  722                 TAILQ_REMOVE(&tunhead, tp, tun_list);
  723                 mtx_unlock(&tunmtx);
  724                 tun_destroy(tp);
  725                 mtx_lock(&tunmtx);
  726         }
  727         mtx_unlock(&tunmtx);
  728         for (i = 0; i < nitems(tuntap_drivers); ++i) {
  729                 drv = &tuntap_drivers[i];
  730                 delete_unrhdr(drv->unrhdr);
  731                 clone_cleanup(&drv->clones);
  732         }
  733         mtx_destroy(&tunmtx);
  734 }
  735 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
  736 
  737 static struct tuntap_driver *
  738 tuntap_driver_from_ifnet(const struct ifnet *ifp)
  739 {
  740         struct tuntap_driver *drv;
  741         int i;
  742 
  743         if (ifp == NULL)
  744                 return (NULL);
  745 
  746         for (i = 0; i < nitems(tuntap_drivers); ++i) {
  747                 drv = &tuntap_drivers[i];
  748                 if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0)
  749                         return (drv);
  750         }
  751 
  752         return (NULL);
  753 }
  754 
  755 static int
  756 tuntapmodevent(module_t mod, int type, void *data)
  757 {
  758         struct tuntap_driver *drv;
  759         int i;
  760 
  761         switch (type) {
  762         case MOD_LOAD:
  763                 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
  764                 for (i = 0; i < nitems(tuntap_drivers); ++i) {
  765                         drv = &tuntap_drivers[i];
  766                         clone_setup(&drv->clones);
  767                         drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
  768                 }
  769                 arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
  770                    tunrename, 0, 1000);
  771                 if (arrival_tag == NULL)
  772                         return (ENOMEM);
  773                 clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
  774                 if (clone_tag == NULL)
  775                         return (ENOMEM);
  776                 break;
  777         case MOD_UNLOAD:
  778                 /* See tun_uninit, so it's done after the vnet_sysuninit() */
  779                 break;
  780         default:
  781                 return EOPNOTSUPP;
  782         }
  783         return 0;
  784 }
  785 
  786 static moduledata_t tuntap_mod = {
  787         "if_tuntap",
  788         tuntapmodevent,
  789         0
  790 };
  791 
  792 /* We'll only ever have these two, so no need for a macro. */
  793 static moduledata_t tun_mod = { "if_tun", NULL, 0 };
  794 static moduledata_t tap_mod = { "if_tap", NULL, 0 };
  795 
  796 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  797 MODULE_VERSION(if_tuntap, 1);
  798 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  799 MODULE_VERSION(if_tun, 1);
  800 DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  801 MODULE_VERSION(if_tap, 1);
  802 
  803 static int
  804 tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
  805     struct cdev **dev, const char *name)
  806 {
  807         struct make_dev_args args;
  808         struct tuntap_softc *tp;
  809         int error;
  810 
  811         tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO);
  812         mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF);
  813         cv_init(&tp->tun_cv, "tun_condvar");
  814         tp->tun_flags = drv->ident_flags;
  815         tp->tun_drv = drv;
  816 
  817         make_dev_args_init(&args);
  818         if (cr != NULL)
  819                 args.mda_flags = MAKEDEV_REF;
  820         args.mda_devsw = &drv->cdevsw;
  821         args.mda_cr = cr;
  822         args.mda_uid = UID_UUCP;
  823         args.mda_gid = GID_DIALER;
  824         args.mda_mode = 0600;
  825         args.mda_unit = unit;
  826         args.mda_si_drv1 = tp;
  827         error = make_dev_s(&args, dev, "%s", name);
  828         if (error != 0) {
  829                 free(tp, M_TUN);
  830                 return (error);
  831         }
  832 
  833         KASSERT((*dev)->si_drv1 != NULL,
  834             ("Failed to set si_drv1 at %s creation", name));
  835         tp->tun_dev = *dev;
  836         knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx);
  837         mtx_lock(&tunmtx);
  838         TAILQ_INSERT_TAIL(&tunhead, tp, tun_list);
  839         mtx_unlock(&tunmtx);
  840         return (0);
  841 }
  842 
  843 static void
  844 tunstart(struct ifnet *ifp)
  845 {
  846         struct tuntap_softc *tp = ifp->if_softc;
  847         struct mbuf *m;
  848 
  849         TUNDEBUG(ifp, "starting\n");
  850         if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
  851                 IFQ_LOCK(&ifp->if_snd);
  852                 IFQ_POLL_NOLOCK(&ifp->if_snd, m);
  853                 if (m == NULL) {
  854                         IFQ_UNLOCK(&ifp->if_snd);
  855                         return;
  856                 }
  857                 IFQ_UNLOCK(&ifp->if_snd);
  858         }
  859 
  860         TUN_LOCK(tp);
  861         if (tp->tun_flags & TUN_RWAIT) {
  862                 tp->tun_flags &= ~TUN_RWAIT;
  863                 wakeup(tp);
  864         }
  865         selwakeuppri(&tp->tun_rsel, PZERO + 1);
  866         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
  867         if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
  868                 TUN_UNLOCK(tp);
  869                 pgsigio(&tp->tun_sigio, SIGIO, 0);
  870         } else
  871                 TUN_UNLOCK(tp);
  872 }
  873 
  874 /*
  875  * tunstart_l2
  876  *
  877  * queue packets from higher level ready to put out
  878  */
  879 static void
  880 tunstart_l2(struct ifnet *ifp)
  881 {
  882         struct tuntap_softc     *tp = ifp->if_softc;
  883 
  884         TUNDEBUG(ifp, "starting\n");
  885 
  886         /*
  887          * do not junk pending output if we are in VMnet mode.
  888          * XXX: can this do any harm because of queue overflow?
  889          */
  890 
  891         TUN_LOCK(tp);
  892         if (((tp->tun_flags & TUN_VMNET) == 0) &&
  893             ((tp->tun_flags & TUN_READY) != TUN_READY)) {
  894                 struct mbuf *m;
  895 
  896                 /* Unlocked read. */
  897                 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
  898 
  899                 for (;;) {
  900                         IF_DEQUEUE(&ifp->if_snd, m);
  901                         if (m != NULL) {
  902                                 m_freem(m);
  903                                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  904                         } else
  905                                 break;
  906                 }
  907                 TUN_UNLOCK(tp);
  908 
  909                 return;
  910         }
  911 
  912         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
  913 
  914         if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
  915                 if (tp->tun_flags & TUN_RWAIT) {
  916                         tp->tun_flags &= ~TUN_RWAIT;
  917                         wakeup(tp);
  918                 }
  919 
  920                 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
  921                         TUN_UNLOCK(tp);
  922                         pgsigio(&tp->tun_sigio, SIGIO, 0);
  923                         TUN_LOCK(tp);
  924                 }
  925 
  926                 selwakeuppri(&tp->tun_rsel, PZERO+1);
  927                 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
  928                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
  929         }
  930 
  931         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
  932         TUN_UNLOCK(tp);
  933 } /* tunstart_l2 */
  934 
  935 /* XXX: should return an error code so it can fail. */
  936 static void
  937 tuncreate(struct cdev *dev)
  938 {
  939         struct tuntap_driver *drv;
  940         struct tuntap_softc *tp;
  941         struct ifnet *ifp;
  942         struct ether_addr eaddr;
  943         int iflags;
  944         u_char type;
  945 
  946         tp = dev->si_drv1;
  947         KASSERT(tp != NULL,
  948             ("si_drv1 should have been initialized at creation"));
  949 
  950         drv = tp->tun_drv;
  951         iflags = IFF_MULTICAST;
  952         if ((tp->tun_flags & TUN_L2) != 0) {
  953                 type = IFT_ETHER;
  954                 iflags |= IFF_BROADCAST | IFF_SIMPLEX;
  955         } else {
  956                 type = IFT_PPP;
  957                 iflags |= IFF_POINTOPOINT;
  958         }
  959         ifp = tp->tun_ifp = if_alloc(type);
  960         if (ifp == NULL)
  961                 panic("%s%d: failed to if_alloc() interface.\n",
  962                     drv->cdevsw.d_name, dev2unit(dev));
  963         ifp->if_softc = tp;
  964         if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
  965         ifp->if_ioctl = tunifioctl;
  966         ifp->if_flags = iflags;
  967         IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
  968         ifp->if_capabilities |= IFCAP_LINKSTATE;
  969         ifp->if_capenable |= IFCAP_LINKSTATE;
  970 
  971         if ((tp->tun_flags & TUN_L2) != 0) {
  972                 ifp->if_init = tunifinit;
  973                 ifp->if_start = tunstart_l2;
  974 
  975                 ether_gen_addr(ifp, &eaddr);
  976                 ether_ifattach(ifp, eaddr.octet);
  977         } else {
  978                 ifp->if_mtu = TUNMTU;
  979                 ifp->if_start = tunstart;
  980                 ifp->if_output = tunoutput;
  981 
  982                 ifp->if_snd.ifq_drv_maxlen = 0;
  983                 IFQ_SET_READY(&ifp->if_snd);
  984 
  985                 if_attach(ifp);
  986                 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
  987         }
  988 
  989         TUN_LOCK(tp);
  990         tp->tun_flags |= TUN_INITED;
  991         TUN_UNLOCK(tp);
  992 
  993         TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
  994             ifp->if_xname, dev2unit(dev));
  995 }
  996 
  997 static void
  998 tunrename(void *arg __unused, struct ifnet *ifp)
  999 {
 1000         struct tuntap_softc *tp;
 1001         int error;
 1002 
 1003         if ((ifp->if_flags & IFF_RENAMING) == 0)
 1004                 return;
 1005 
 1006         if (tuntap_driver_from_ifnet(ifp) == NULL)
 1007                 return;
 1008 
 1009         /*
 1010          * We need to grab the ioctl sx long enough to make sure the softc is
 1011          * still there.  If it is, we can safely try to busy the tun device.
 1012          * The busy may fail if the device is currently dying, in which case
 1013          * we do nothing.  If it doesn't fail, the busy count stops the device
 1014          * from dying until we've created the alias (that will then be
 1015          * subsequently destroyed).
 1016          */
 1017         sx_xlock(&tun_ioctl_sx);
 1018         tp = ifp->if_softc;
 1019         if (tp == NULL) {
 1020                 sx_xunlock(&tun_ioctl_sx);
 1021                 return;
 1022         }
 1023         error = tun_busy(tp);
 1024         sx_xunlock(&tun_ioctl_sx);
 1025         if (error != 0)
 1026                 return;
 1027         if (tp->tun_alias != NULL) {
 1028                 destroy_dev(tp->tun_alias);
 1029                 tp->tun_alias = NULL;
 1030         }
 1031 
 1032         if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0)
 1033                 goto out;
 1034 
 1035         /*
 1036          * Failure's ok, aliases are created on a best effort basis.  If a
 1037          * tun user/consumer decides to rename the interface to conflict with
 1038          * another device (non-ifnet) on the system, we will assume they know
 1039          * what they are doing.  make_dev_alias_p won't touch tun_alias on
 1040          * failure, so we use it but ignore the return value.
 1041          */
 1042         make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s",
 1043             ifp->if_xname);
 1044 out:
 1045         tun_unbusy(tp);
 1046 }
 1047 
 1048 static int
 1049 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
 1050 {
 1051         struct ifnet    *ifp;
 1052         struct tuntap_softc *tp;
 1053         int error __diagused, tunflags;
 1054 
 1055         tunflags = 0;
 1056         CURVNET_SET(TD_TO_VNET(td));
 1057         error = tuntap_name2info(dev->si_name, NULL, &tunflags);
 1058         if (error != 0) {
 1059                 CURVNET_RESTORE();
 1060                 return (error); /* Shouldn't happen */
 1061         }
 1062 
 1063         tp = dev->si_drv1;
 1064         KASSERT(tp != NULL,
 1065             ("si_drv1 should have been initialized at creation"));
 1066 
 1067         TUN_LOCK(tp);
 1068         if ((tp->tun_flags & TUN_INITED) == 0) {
 1069                 TUN_UNLOCK(tp);
 1070                 CURVNET_RESTORE();
 1071                 return (ENXIO);
 1072         }
 1073         if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
 1074                 TUN_UNLOCK(tp);
 1075                 CURVNET_RESTORE();
 1076                 return (EBUSY);
 1077         }
 1078 
 1079         error = tun_busy_locked(tp);
 1080         KASSERT(error == 0, ("Must be able to busy an unopen tunnel"));
 1081         ifp = TUN2IFP(tp);
 1082 
 1083         if ((tp->tun_flags & TUN_L2) != 0) {
 1084                 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
 1085                     sizeof(tp->tun_ether.octet));
 1086 
 1087                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1088                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 1089 
 1090                 if (tapuponopen)
 1091                         ifp->if_flags |= IFF_UP;
 1092         }
 1093 
 1094         tp->tun_pid = td->td_proc->p_pid;
 1095         tp->tun_flags |= TUN_OPEN;
 1096 
 1097         if_link_state_change(ifp, LINK_STATE_UP);
 1098         TUNDEBUG(ifp, "open\n");
 1099         TUN_UNLOCK(tp);
 1100 
 1101         /*
 1102          * This can fail with either ENOENT or EBUSY.  This is in the middle of
 1103          * d_open, so ENOENT should not be possible.  EBUSY is possible, but
 1104          * the only cdevpriv dtor being set will be tundtor and the softc being
 1105          * passed is constant for a given cdev.  We ignore the possible error
 1106          * because of this as either "unlikely" or "not actually a problem."
 1107          */
 1108         (void)devfs_set_cdevpriv(tp, tundtor);
 1109         CURVNET_RESTORE();
 1110         return (0);
 1111 }
 1112 
 1113 /*
 1114  * tundtor - tear down the device - mark i/f down & delete
 1115  * routing info
 1116  */
 1117 static void
 1118 tundtor(void *data)
 1119 {
 1120         struct proc *p;
 1121         struct tuntap_softc *tp;
 1122         struct ifnet *ifp;
 1123         bool l2tun;
 1124 
 1125         tp = data;
 1126         p = curproc;
 1127         ifp = TUN2IFP(tp);
 1128 
 1129         TUN_LOCK(tp);
 1130 
 1131         /*
 1132          * Realistically, we can't be obstinate here.  This only means that the
 1133          * tuntap device was closed out of order, and the last closer wasn't the
 1134          * controller.  These are still good to know about, though, as software
 1135          * should avoid multiple processes with a tuntap device open and
 1136          * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
 1137          * parent).
 1138          */
 1139         if (p->p_pid != tp->tun_pid) {
 1140                 log(LOG_INFO,
 1141                     "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
 1142                     p->p_pid, p->p_comm, tp->tun_dev->si_name);
 1143         }
 1144 
 1145         /*
 1146          * junk all pending output
 1147          */
 1148         CURVNET_SET(ifp->if_vnet);
 1149 
 1150         l2tun = false;
 1151         if ((tp->tun_flags & TUN_L2) != 0) {
 1152                 l2tun = true;
 1153                 IF_DRAIN(&ifp->if_snd);
 1154         } else {
 1155                 IFQ_PURGE(&ifp->if_snd);
 1156         }
 1157 
 1158         /* For vmnet, we won't do most of the address/route bits */
 1159         if ((tp->tun_flags & TUN_VMNET) != 0 ||
 1160             (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
 1161                 goto out;
 1162 
 1163         if (ifp->if_flags & IFF_UP) {
 1164                 TUN_UNLOCK(tp);
 1165                 if_down(ifp);
 1166                 TUN_LOCK(tp);
 1167         }
 1168 
 1169         /* Delete all addresses and routes which reference this interface. */
 1170         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1171                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1172                 TUN_UNLOCK(tp);
 1173                 if_purgeaddrs(ifp);
 1174                 TUN_LOCK(tp);
 1175         }
 1176 
 1177 out:
 1178         if_link_state_change(ifp, LINK_STATE_DOWN);
 1179         CURVNET_RESTORE();
 1180 
 1181         funsetown(&tp->tun_sigio);
 1182         selwakeuppri(&tp->tun_rsel, PZERO + 1);
 1183         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 1184         TUNDEBUG (ifp, "closed\n");
 1185         tp->tun_flags &= ~TUN_OPEN;
 1186         tp->tun_pid = 0;
 1187         tun_vnethdr_set(ifp, 0);
 1188 
 1189         tun_unbusy_locked(tp);
 1190         TUN_UNLOCK(tp);
 1191 }
 1192 
 1193 static void
 1194 tuninit(struct ifnet *ifp)
 1195 {
 1196         struct tuntap_softc *tp = ifp->if_softc;
 1197 
 1198         TUNDEBUG(ifp, "tuninit\n");
 1199 
 1200         TUN_LOCK(tp);
 1201         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1202         if ((tp->tun_flags & TUN_L2) == 0) {
 1203                 ifp->if_flags |= IFF_UP;
 1204                 getmicrotime(&ifp->if_lastchange);
 1205                 TUN_UNLOCK(tp);
 1206         } else {
 1207                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 1208                 TUN_UNLOCK(tp);
 1209                 /* attempt to start output */
 1210                 tunstart_l2(ifp);
 1211         }
 1212 
 1213 }
 1214 
 1215 /*
 1216  * Used only for l2 tunnel.
 1217  */
 1218 static void
 1219 tunifinit(void *xtp)
 1220 {
 1221         struct tuntap_softc *tp;
 1222 
 1223         tp = (struct tuntap_softc *)xtp;
 1224         tuninit(tp->tun_ifp);
 1225 }
 1226 
 1227 /*
 1228  * To be called under TUN_LOCK. Update ifp->if_hwassist according to the
 1229  * current value of ifp->if_capenable.
 1230  */
 1231 static void
 1232 tun_caps_changed(struct ifnet *ifp)
 1233 {
 1234         uint64_t hwassist = 0;
 1235 
 1236         TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc);
 1237         if (ifp->if_capenable & IFCAP_TXCSUM)
 1238                 hwassist |= CSUM_TCP | CSUM_UDP;
 1239         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 1240                 hwassist |= CSUM_TCP_IPV6
 1241                     | CSUM_UDP_IPV6;
 1242         if (ifp->if_capenable & IFCAP_TSO4)
 1243                 hwassist |= CSUM_IP_TSO;
 1244         if (ifp->if_capenable & IFCAP_TSO6)
 1245                 hwassist |= CSUM_IP6_TSO;
 1246         ifp->if_hwassist = hwassist;
 1247 }
 1248 
 1249 /*
 1250  * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust
 1251  * if_capabilities and if_capenable as needed.
 1252  */
 1253 static void
 1254 tun_vnethdr_set(struct ifnet *ifp, int vhdrlen)
 1255 {
 1256         struct tuntap_softc *tp = ifp->if_softc;
 1257 
 1258         TUN_LOCK_ASSERT(tp);
 1259 
 1260         if (tp->tun_vhdrlen == vhdrlen)
 1261                 return;
 1262 
 1263         /*
 1264          * Update if_capabilities to reflect the
 1265          * functionalities offered by the virtio-net
 1266          * header.
 1267          */
 1268         if (vhdrlen != 0)
 1269                 ifp->if_capabilities |=
 1270                         TAP_VNET_HDR_CAPS;
 1271         else
 1272                 ifp->if_capabilities &=
 1273                         ~TAP_VNET_HDR_CAPS;
 1274         /*
 1275          * Disable any capabilities that we don't
 1276          * support anymore.
 1277          */
 1278         ifp->if_capenable &= ifp->if_capabilities;
 1279         tun_caps_changed(ifp);
 1280         tp->tun_vhdrlen = vhdrlen;
 1281 
 1282         TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n",
 1283             vhdrlen, ifp->if_capabilities);
 1284 }
 1285 
 1286 /*
 1287  * Process an ioctl request.
 1288  */
 1289 static int
 1290 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1291 {
 1292         struct ifreq *ifr = (struct ifreq *)data;
 1293         struct tuntap_softc *tp;
 1294         struct ifstat *ifs;
 1295         struct ifmediareq       *ifmr;
 1296         int             dummy, error = 0;
 1297         bool            l2tun;
 1298 
 1299         ifmr = NULL;
 1300         sx_xlock(&tun_ioctl_sx);
 1301         tp = ifp->if_softc;
 1302         if (tp == NULL) {
 1303                 error = ENXIO;
 1304                 goto bad;
 1305         }
 1306         l2tun = (tp->tun_flags & TUN_L2) != 0;
 1307         switch(cmd) {
 1308         case SIOCGIFSTATUS:
 1309                 ifs = (struct ifstat *)data;
 1310                 TUN_LOCK(tp);
 1311                 if (tp->tun_pid)
 1312                         snprintf(ifs->ascii, sizeof(ifs->ascii),
 1313                             "\tOpened by PID %d\n", tp->tun_pid);
 1314                 else
 1315                         ifs->ascii[0] = '\0';
 1316                 TUN_UNLOCK(tp);
 1317                 break;
 1318         case SIOCSIFADDR:
 1319                 if (l2tun)
 1320                         error = ether_ioctl(ifp, cmd, data);
 1321                 else
 1322                         tuninit(ifp);
 1323                 if (error == 0)
 1324                     TUNDEBUG(ifp, "address set\n");
 1325                 break;
 1326         case SIOCSIFMTU:
 1327                 ifp->if_mtu = ifr->ifr_mtu;
 1328                 TUNDEBUG(ifp, "mtu set\n");
 1329                 break;
 1330         case SIOCSIFFLAGS:
 1331         case SIOCADDMULTI:
 1332         case SIOCDELMULTI:
 1333                 break;
 1334         case SIOCGIFMEDIA:
 1335                 if (!l2tun) {
 1336                         error = EINVAL;
 1337                         break;
 1338                 }
 1339 
 1340                 ifmr = (struct ifmediareq *)data;
 1341                 dummy = ifmr->ifm_count;
 1342                 ifmr->ifm_count = 1;
 1343                 ifmr->ifm_status = IFM_AVALID;
 1344                 ifmr->ifm_active = IFM_ETHER;
 1345                 if (tp->tun_flags & TUN_OPEN)
 1346                         ifmr->ifm_status |= IFM_ACTIVE;
 1347                 ifmr->ifm_current = ifmr->ifm_active;
 1348                 if (dummy >= 1) {
 1349                         int media = IFM_ETHER;
 1350                         error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
 1351                 }
 1352                 break;
 1353         case SIOCSIFCAP:
 1354                 TUN_LOCK(tp);
 1355                 ifp->if_capenable = ifr->ifr_reqcap;
 1356                 tun_caps_changed(ifp);
 1357                 TUN_UNLOCK(tp);
 1358                 VLAN_CAPABILITIES(ifp);
 1359                 break;
 1360         default:
 1361                 if (l2tun) {
 1362                         error = ether_ioctl(ifp, cmd, data);
 1363                 } else {
 1364                         error = EINVAL;
 1365                 }
 1366         }
 1367 bad:
 1368         sx_xunlock(&tun_ioctl_sx);
 1369         return (error);
 1370 }
 1371 
 1372 /*
 1373  * tunoutput - queue packets from higher level ready to put out.
 1374  */
 1375 static int
 1376 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
 1377     struct route *ro)
 1378 {
 1379         struct tuntap_softc *tp = ifp->if_softc;
 1380         u_short cached_tun_flags;
 1381         int error;
 1382         u_int32_t af;
 1383 
 1384         TUNDEBUG (ifp, "tunoutput\n");
 1385 
 1386 #ifdef MAC
 1387         error = mac_ifnet_check_transmit(ifp, m0);
 1388         if (error) {
 1389                 m_freem(m0);
 1390                 return (error);
 1391         }
 1392 #endif
 1393 
 1394         /* Could be unlocked read? */
 1395         TUN_LOCK(tp);
 1396         cached_tun_flags = tp->tun_flags;
 1397         TUN_UNLOCK(tp);
 1398         if ((cached_tun_flags & TUN_READY) != TUN_READY) {
 1399                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 1400                 m_freem (m0);
 1401                 return (EHOSTDOWN);
 1402         }
 1403 
 1404         if ((ifp->if_flags & IFF_UP) != IFF_UP) {
 1405                 m_freem (m0);
 1406                 return (EHOSTDOWN);
 1407         }
 1408 
 1409         /* BPF writes need to be handled specially. */
 1410         if (dst->sa_family == AF_UNSPEC)
 1411                 bcopy(dst->sa_data, &af, sizeof(af));
 1412         else
 1413                 af = RO_GET_FAMILY(ro, dst);
 1414 
 1415         if (bpf_peers_present(ifp->if_bpf))
 1416                 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
 1417 
 1418         /* prepend sockaddr? this may abort if the mbuf allocation fails */
 1419         if (cached_tun_flags & TUN_LMODE) {
 1420                 /* allocate space for sockaddr */
 1421                 M_PREPEND(m0, dst->sa_len, M_NOWAIT);
 1422 
 1423                 /* if allocation failed drop packet */
 1424                 if (m0 == NULL) {
 1425                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 1426                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1427                         return (ENOBUFS);
 1428                 } else {
 1429                         bcopy(dst, m0->m_data, dst->sa_len);
 1430                 }
 1431         }
 1432 
 1433         if (cached_tun_flags & TUN_IFHEAD) {
 1434                 /* Prepend the address family */
 1435                 M_PREPEND(m0, 4, M_NOWAIT);
 1436 
 1437                 /* if allocation failed drop packet */
 1438                 if (m0 == NULL) {
 1439                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 1440                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1441                         return (ENOBUFS);
 1442                 } else
 1443                         *(u_int32_t *)m0->m_data = htonl(af);
 1444         } else {
 1445 #ifdef INET
 1446                 if (af != AF_INET)
 1447 #endif
 1448                 {
 1449                         m_freem(m0);
 1450                         return (EAFNOSUPPORT);
 1451                 }
 1452         }
 1453 
 1454         error = (ifp->if_transmit)(ifp, m0);
 1455         if (error)
 1456                 return (ENOBUFS);
 1457         if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 1458         return (0);
 1459 }
 1460 
 1461 /*
 1462  * the cdevsw interface is now pretty minimal.
 1463  */
 1464 static  int
 1465 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
 1466     struct thread *td)
 1467 {
 1468         struct ifreq ifr, *ifrp;
 1469         struct tuntap_softc *tp = dev->si_drv1;
 1470         struct ifnet *ifp = TUN2IFP(tp);
 1471         struct tuninfo *tunp;
 1472         int error, iflags, ival;
 1473         bool    l2tun;
 1474 
 1475         l2tun = (tp->tun_flags & TUN_L2) != 0;
 1476         if (l2tun) {
 1477                 /* tap specific ioctls */
 1478                 switch(cmd) {
 1479                 /* VMware/VMnet port ioctl's */
 1480 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
 1481     defined(COMPAT_FREEBSD4)
 1482                 case _IO('V', 0):
 1483                         ival = IOCPARM_IVAL(data);
 1484                         data = (caddr_t)&ival;
 1485                         /* FALLTHROUGH */
 1486 #endif
 1487                 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
 1488                         iflags = *(int *)data;
 1489                         iflags &= TUN_VMIO_FLAG_MASK;
 1490                         iflags &= ~IFF_CANTCHANGE;
 1491                         iflags |= IFF_UP;
 1492 
 1493                         TUN_LOCK(tp);
 1494                         ifp->if_flags = iflags |
 1495                             (ifp->if_flags & IFF_CANTCHANGE);
 1496                         TUN_UNLOCK(tp);
 1497 
 1498                         return (0);
 1499                 case SIOCGIFADDR:       /* get MAC address of the remote side */
 1500                         TUN_LOCK(tp);
 1501                         bcopy(&tp->tun_ether.octet, data,
 1502                             sizeof(tp->tun_ether.octet));
 1503                         TUN_UNLOCK(tp);
 1504 
 1505                         return (0);
 1506                 case SIOCSIFADDR:       /* set MAC address of the remote side */
 1507                         TUN_LOCK(tp);
 1508                         bcopy(data, &tp->tun_ether.octet,
 1509                             sizeof(tp->tun_ether.octet));
 1510                         TUN_UNLOCK(tp);
 1511 
 1512                         return (0);
 1513                 case TAPSVNETHDR:
 1514                         ival = *(int *)data;
 1515                         if (ival != 0 &&
 1516                             ival != sizeof(struct virtio_net_hdr) &&
 1517                             ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) {
 1518                                 return (EINVAL);
 1519                         }
 1520                         TUN_LOCK(tp);
 1521                         tun_vnethdr_set(ifp, ival);
 1522                         TUN_UNLOCK(tp);
 1523 
 1524                         return (0);
 1525                 case TAPGVNETHDR:
 1526                         TUN_LOCK(tp);
 1527                         *(int *)data = tp->tun_vhdrlen;
 1528                         TUN_UNLOCK(tp);
 1529 
 1530                         return (0);
 1531                 }
 1532 
 1533                 /* Fall through to the common ioctls if unhandled */
 1534         } else {
 1535                 switch (cmd) {
 1536                 case TUNSLMODE:
 1537                         TUN_LOCK(tp);
 1538                         if (*(int *)data) {
 1539                                 tp->tun_flags |= TUN_LMODE;
 1540                                 tp->tun_flags &= ~TUN_IFHEAD;
 1541                         } else
 1542                                 tp->tun_flags &= ~TUN_LMODE;
 1543                         TUN_UNLOCK(tp);
 1544 
 1545                         return (0);
 1546                 case TUNSIFHEAD:
 1547                         TUN_LOCK(tp);
 1548                         if (*(int *)data) {
 1549                                 tp->tun_flags |= TUN_IFHEAD;
 1550                                 tp->tun_flags &= ~TUN_LMODE;
 1551                         } else
 1552                                 tp->tun_flags &= ~TUN_IFHEAD;
 1553                         TUN_UNLOCK(tp);
 1554 
 1555                         return (0);
 1556                 case TUNGIFHEAD:
 1557                         TUN_LOCK(tp);
 1558                         *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
 1559                         TUN_UNLOCK(tp);
 1560 
 1561                         return (0);
 1562                 case TUNSIFMODE:
 1563                         /* deny this if UP */
 1564                         if (TUN2IFP(tp)->if_flags & IFF_UP)
 1565                                 return (EBUSY);
 1566 
 1567                         switch (*(int *)data & ~IFF_MULTICAST) {
 1568                         case IFF_POINTOPOINT:
 1569                         case IFF_BROADCAST:
 1570                                 TUN_LOCK(tp);
 1571                                 TUN2IFP(tp)->if_flags &=
 1572                                     ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
 1573                                 TUN2IFP(tp)->if_flags |= *(int *)data;
 1574                                 TUN_UNLOCK(tp);
 1575 
 1576                                 break;
 1577                         default:
 1578                                 return (EINVAL);
 1579                         }
 1580 
 1581                         return (0);
 1582                 case TUNSIFPID:
 1583                         TUN_LOCK(tp);
 1584                         tp->tun_pid = curthread->td_proc->p_pid;
 1585                         TUN_UNLOCK(tp);
 1586 
 1587                         return (0);
 1588                 }
 1589                 /* Fall through to the common ioctls if unhandled */
 1590         }
 1591 
 1592         switch (cmd) {
 1593         case TUNGIFNAME:
 1594                 ifrp = (struct ifreq *)data;
 1595                 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
 1596 
 1597                 return (0);
 1598         case TUNSIFINFO:
 1599                 tunp = (struct tuninfo *)data;
 1600                 if (TUN2IFP(tp)->if_type != tunp->type)
 1601                         return (EPROTOTYPE);
 1602                 TUN_LOCK(tp);
 1603                 if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
 1604                         strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
 1605                         ifr.ifr_mtu = tunp->mtu;
 1606                         CURVNET_SET(TUN2IFP(tp)->if_vnet);
 1607                         error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
 1608                             (caddr_t)&ifr, td);
 1609                         CURVNET_RESTORE();
 1610                         if (error) {
 1611                                 TUN_UNLOCK(tp);
 1612                                 return (error);
 1613                         }
 1614                 }
 1615                 TUN2IFP(tp)->if_baudrate = tunp->baudrate;
 1616                 TUN_UNLOCK(tp);
 1617                 break;
 1618         case TUNGIFINFO:
 1619                 tunp = (struct tuninfo *)data;
 1620                 TUN_LOCK(tp);
 1621                 tunp->mtu = TUN2IFP(tp)->if_mtu;
 1622                 tunp->type = TUN2IFP(tp)->if_type;
 1623                 tunp->baudrate = TUN2IFP(tp)->if_baudrate;
 1624                 TUN_UNLOCK(tp);
 1625                 break;
 1626         case TUNSDEBUG:
 1627                 tundebug = *(int *)data;
 1628                 break;
 1629         case TUNGDEBUG:
 1630                 *(int *)data = tundebug;
 1631                 break;
 1632         case FIONBIO:
 1633                 break;
 1634         case FIOASYNC:
 1635                 TUN_LOCK(tp);
 1636                 if (*(int *)data)
 1637                         tp->tun_flags |= TUN_ASYNC;
 1638                 else
 1639                         tp->tun_flags &= ~TUN_ASYNC;
 1640                 TUN_UNLOCK(tp);
 1641                 break;
 1642         case FIONREAD:
 1643                 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
 1644                         struct mbuf *mb;
 1645                         IFQ_LOCK(&TUN2IFP(tp)->if_snd);
 1646                         IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
 1647                         for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
 1648                                 *(int *)data += mb->m_len;
 1649                         IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
 1650                 } else
 1651                         *(int *)data = 0;
 1652                 break;
 1653         case FIOSETOWN:
 1654                 return (fsetown(*(int *)data, &tp->tun_sigio));
 1655 
 1656         case FIOGETOWN:
 1657                 *(int *)data = fgetown(&tp->tun_sigio);
 1658                 return (0);
 1659 
 1660         /* This is deprecated, FIOSETOWN should be used instead. */
 1661         case TIOCSPGRP:
 1662                 return (fsetown(-(*(int *)data), &tp->tun_sigio));
 1663 
 1664         /* This is deprecated, FIOGETOWN should be used instead. */
 1665         case TIOCGPGRP:
 1666                 *(int *)data = -fgetown(&tp->tun_sigio);
 1667                 return (0);
 1668 
 1669         default:
 1670                 return (ENOTTY);
 1671         }
 1672         return (0);
 1673 }
 1674 
 1675 /*
 1676  * The cdevsw read interface - reads a packet at a time, or at
 1677  * least as much of a packet as can be read.
 1678  */
 1679 static  int
 1680 tunread(struct cdev *dev, struct uio *uio, int flag)
 1681 {
 1682         struct tuntap_softc *tp = dev->si_drv1;
 1683         struct ifnet    *ifp = TUN2IFP(tp);
 1684         struct mbuf     *m;
 1685         size_t          len;
 1686         int             error = 0;
 1687 
 1688         TUNDEBUG (ifp, "read\n");
 1689         TUN_LOCK(tp);
 1690         if ((tp->tun_flags & TUN_READY) != TUN_READY) {
 1691                 TUN_UNLOCK(tp);
 1692                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 1693                 return (EHOSTDOWN);
 1694         }
 1695 
 1696         tp->tun_flags &= ~TUN_RWAIT;
 1697 
 1698         for (;;) {
 1699                 IFQ_DEQUEUE(&ifp->if_snd, m);
 1700                 if (m != NULL)
 1701                         break;
 1702                 if (flag & O_NONBLOCK) {
 1703                         TUN_UNLOCK(tp);
 1704                         return (EWOULDBLOCK);
 1705                 }
 1706                 tp->tun_flags |= TUN_RWAIT;
 1707                 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
 1708                     "tunread", 0);
 1709                 if (error != 0) {
 1710                         TUN_UNLOCK(tp);
 1711                         return (error);
 1712                 }
 1713         }
 1714         TUN_UNLOCK(tp);
 1715 
 1716         if ((tp->tun_flags & TUN_L2) != 0)
 1717                 BPF_MTAP(ifp, m);
 1718 
 1719         len = min(tp->tun_vhdrlen, uio->uio_resid);
 1720         if (len > 0) {
 1721                 struct virtio_net_hdr_mrg_rxbuf vhdr;
 1722 
 1723                 bzero(&vhdr, sizeof(vhdr));
 1724                 if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) {
 1725                         m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr);
 1726                 }
 1727 
 1728                 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 1729                     "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 1730                     vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 1731                     vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 1732                     vhdr.hdr.csum_offset);
 1733                 error = uiomove(&vhdr, len, uio);
 1734         }
 1735 
 1736         while (m && uio->uio_resid > 0 && error == 0) {
 1737                 len = min(uio->uio_resid, m->m_len);
 1738                 if (len != 0)
 1739                         error = uiomove(mtod(m, void *), len, uio);
 1740                 m = m_free(m);
 1741         }
 1742 
 1743         if (m) {
 1744                 TUNDEBUG(ifp, "Dropping mbuf\n");
 1745                 m_freem(m);
 1746         }
 1747         return (error);
 1748 }
 1749 
 1750 static int
 1751 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,
 1752             struct virtio_net_hdr_mrg_rxbuf *vhdr)
 1753 {
 1754         struct epoch_tracker et;
 1755         struct ether_header *eh;
 1756         struct ifnet *ifp;
 1757 
 1758         ifp = TUN2IFP(tp);
 1759 
 1760         /*
 1761          * Only pass a unicast frame to ether_input(), if it would
 1762          * actually have been received by non-virtual hardware.
 1763          */
 1764         if (m->m_len < sizeof(struct ether_header)) {
 1765                 m_freem(m);
 1766                 return (0);
 1767         }
 1768 
 1769         eh = mtod(m, struct ether_header *);
 1770 
 1771         if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
 1772             !ETHER_IS_MULTICAST(eh->ether_dhost) &&
 1773             bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
 1774                 m_freem(m);
 1775                 return (0);
 1776         }
 1777 
 1778         if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) {
 1779                 m_freem(m);
 1780                 return (0);
 1781         }
 1782 
 1783         /* Pass packet up to parent. */
 1784         CURVNET_SET(ifp->if_vnet);
 1785         NET_EPOCH_ENTER(et);
 1786         (*ifp->if_input)(ifp, m);
 1787         NET_EPOCH_EXIT(et);
 1788         CURVNET_RESTORE();
 1789         /* ibytes are counted in parent */
 1790         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 1791         return (0);
 1792 }
 1793 
 1794 static int
 1795 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
 1796 {
 1797         struct epoch_tracker et;
 1798         struct ifnet *ifp;
 1799         int family, isr;
 1800 
 1801         ifp = TUN2IFP(tp);
 1802         /* Could be unlocked read? */
 1803         TUN_LOCK(tp);
 1804         if (tp->tun_flags & TUN_IFHEAD) {
 1805                 TUN_UNLOCK(tp);
 1806                 if (m->m_len < sizeof(family) &&
 1807                 (m = m_pullup(m, sizeof(family))) == NULL)
 1808                         return (ENOBUFS);
 1809                 family = ntohl(*mtod(m, u_int32_t *));
 1810                 m_adj(m, sizeof(family));
 1811         } else {
 1812                 TUN_UNLOCK(tp);
 1813                 family = AF_INET;
 1814         }
 1815 
 1816         BPF_MTAP2(ifp, &family, sizeof(family), m);
 1817 
 1818         switch (family) {
 1819 #ifdef INET
 1820         case AF_INET:
 1821                 isr = NETISR_IP;
 1822                 break;
 1823 #endif
 1824 #ifdef INET6
 1825         case AF_INET6:
 1826                 isr = NETISR_IPV6;
 1827                 break;
 1828 #endif
 1829         default:
 1830                 m_freem(m);
 1831                 return (EAFNOSUPPORT);
 1832         }
 1833         random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
 1834         if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 1835         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 1836         CURVNET_SET(ifp->if_vnet);
 1837         M_SETFIB(m, ifp->if_fib);
 1838         NET_EPOCH_ENTER(et);
 1839         netisr_dispatch(isr, m);
 1840         NET_EPOCH_EXIT(et);
 1841         CURVNET_RESTORE();
 1842         return (0);
 1843 }
 1844 
 1845 /*
 1846  * the cdevsw write interface - an atomic write is a packet - or else!
 1847  */
 1848 static  int
 1849 tunwrite(struct cdev *dev, struct uio *uio, int flag)
 1850 {
 1851         struct virtio_net_hdr_mrg_rxbuf vhdr;
 1852         struct tuntap_softc *tp;
 1853         struct ifnet    *ifp;
 1854         struct mbuf     *m;
 1855         uint32_t        mru;
 1856         int             align, vhdrlen, error;
 1857         bool            l2tun;
 1858 
 1859         tp = dev->si_drv1;
 1860         ifp = TUN2IFP(tp);
 1861         TUNDEBUG(ifp, "tunwrite\n");
 1862         if ((ifp->if_flags & IFF_UP) != IFF_UP)
 1863                 /* ignore silently */
 1864                 return (0);
 1865 
 1866         if (uio->uio_resid == 0)
 1867                 return (0);
 1868 
 1869         l2tun = (tp->tun_flags & TUN_L2) != 0;
 1870         mru = l2tun ? TAPMRU : TUNMRU;
 1871         vhdrlen = tp->tun_vhdrlen;
 1872         align = 0;
 1873         if (l2tun) {
 1874                 align = ETHER_ALIGN;
 1875                 mru += vhdrlen;
 1876         } else if ((tp->tun_flags & TUN_IFHEAD) != 0)
 1877                 mru += sizeof(uint32_t);        /* family */
 1878         if (uio->uio_resid < 0 || uio->uio_resid > mru) {
 1879                 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
 1880                 return (EIO);
 1881         }
 1882 
 1883         if (vhdrlen > 0) {
 1884                 error = uiomove(&vhdr, vhdrlen, uio);
 1885                 if (error != 0)
 1886                         return (error);
 1887                 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 1888                     "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 1889                     vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 1890                     vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 1891                     vhdr.hdr.csum_offset);
 1892         }
 1893 
 1894         if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
 1895                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 1896                 return (ENOBUFS);
 1897         }
 1898 
 1899         m->m_pkthdr.rcvif = ifp;
 1900 #ifdef MAC
 1901         mac_ifnet_create_mbuf(ifp, m);
 1902 #endif
 1903 
 1904         if (l2tun)
 1905                 return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL));
 1906 
 1907         return (tunwrite_l3(tp, m));
 1908 }
 1909 
 1910 /*
 1911  * tunpoll - the poll interface, this is only useful on reads
 1912  * really. The write detect always returns true, write never blocks
 1913  * anyway, it either accepts the packet or drops it.
 1914  */
 1915 static  int
 1916 tunpoll(struct cdev *dev, int events, struct thread *td)
 1917 {
 1918         struct tuntap_softc *tp = dev->si_drv1;
 1919         struct ifnet    *ifp = TUN2IFP(tp);
 1920         int             revents = 0;
 1921 
 1922         TUNDEBUG(ifp, "tunpoll\n");
 1923 
 1924         if (events & (POLLIN | POLLRDNORM)) {
 1925                 IFQ_LOCK(&ifp->if_snd);
 1926                 if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 1927                         TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
 1928                         revents |= events & (POLLIN | POLLRDNORM);
 1929                 } else {
 1930                         TUNDEBUG(ifp, "tunpoll waiting\n");
 1931                         selrecord(td, &tp->tun_rsel);
 1932                 }
 1933                 IFQ_UNLOCK(&ifp->if_snd);
 1934         }
 1935         revents |= events & (POLLOUT | POLLWRNORM);
 1936 
 1937         return (revents);
 1938 }
 1939 
 1940 /*
 1941  * tunkqfilter - support for the kevent() system call.
 1942  */
 1943 static int
 1944 tunkqfilter(struct cdev *dev, struct knote *kn)
 1945 {
 1946         struct tuntap_softc     *tp = dev->si_drv1;
 1947         struct ifnet    *ifp = TUN2IFP(tp);
 1948 
 1949         switch(kn->kn_filter) {
 1950         case EVFILT_READ:
 1951                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
 1952                     ifp->if_xname, dev2unit(dev));
 1953                 kn->kn_fop = &tun_read_filterops;
 1954                 break;
 1955 
 1956         case EVFILT_WRITE:
 1957                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
 1958                     ifp->if_xname, dev2unit(dev));
 1959                 kn->kn_fop = &tun_write_filterops;
 1960                 break;
 1961 
 1962         default:
 1963                 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
 1964                     ifp->if_xname, dev2unit(dev));
 1965                 return(EINVAL);
 1966         }
 1967 
 1968         kn->kn_hook = tp;
 1969         knlist_add(&tp->tun_rsel.si_note, kn, 0);
 1970 
 1971         return (0);
 1972 }
 1973 
 1974 /*
 1975  * Return true of there is data in the interface queue.
 1976  */
 1977 static int
 1978 tunkqread(struct knote *kn, long hint)
 1979 {
 1980         int                     ret;
 1981         struct tuntap_softc     *tp = kn->kn_hook;
 1982         struct cdev             *dev = tp->tun_dev;
 1983         struct ifnet    *ifp = TUN2IFP(tp);
 1984 
 1985         if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
 1986                 TUNDEBUG(ifp,
 1987                     "%s have data in the queue.  Len = %d, minor = %#x\n",
 1988                     ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
 1989                 ret = 1;
 1990         } else {
 1991                 TUNDEBUG(ifp,
 1992                     "%s waiting for data, minor = %#x\n", ifp->if_xname,
 1993                     dev2unit(dev));
 1994                 ret = 0;
 1995         }
 1996 
 1997         return (ret);
 1998 }
 1999 
 2000 /*
 2001  * Always can write, always return MTU in kn->data.
 2002  */
 2003 static int
 2004 tunkqwrite(struct knote *kn, long hint)
 2005 {
 2006         struct tuntap_softc     *tp = kn->kn_hook;
 2007         struct ifnet    *ifp = TUN2IFP(tp);
 2008 
 2009         kn->kn_data = ifp->if_mtu;
 2010 
 2011         return (1);
 2012 }
 2013 
 2014 static void
 2015 tunkqdetach(struct knote *kn)
 2016 {
 2017         struct tuntap_softc     *tp = kn->kn_hook;
 2018 
 2019         knlist_remove(&tp->tun_rsel.si_note, kn, 0);
 2020 }

Cache object: e9dbd4f1bcbdef09f43daa6cc4bde45f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.