The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_encap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $  */
    2 /*      $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $       */
    3 
    4 /*
    5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the project nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  */
   32 /*
   33  * My grandfather said that there's a devil inside tunnelling technology...
   34  *
   35  * We have surprisingly many protocols that want packets with IP protocol
   36  * #4 or #41.  Here's a list of protocols that want protocol #41:
   37  *      RFC1933 configured tunnel
   38  *      RFC1933 automatic tunnel
   39  *      RFC2401 IPsec tunnel
   40  *      RFC2473 IPv6 generic packet tunnelling
   41  *      RFC2529 6over4 tunnel
   42  *      RFC3056 6to4 tunnel
   43  *      isatap tunnel
   44  *      mobile-ip6 (uses RFC2473)
   45  * Here's a list of protocol that want protocol #4:
   46  *      RFC1853 IPv4-in-IPv4 tunnelling
   47  *      RFC2003 IPv4 encapsulation within IPv4
   48  *      RFC2344 reverse tunnelling for mobile-ip4
   49  *      RFC2401 IPsec tunnel
   50  * Well, what can I say.  They impose different en/decapsulation mechanism
   51  * from each other, so they need separate protocol handler.  The only one
   52  * we can easily determine by protocol # is IPsec, which always has
   53  * AH/ESP/IPComp header right after outer IP header.
   54  *
   55  * So, clearly good old protosw does not work for protocol #4 and #41.
   56  * The code will let you match protocol via src/dst address pair.
   57  */
   58 /* XXX is M_NETADDR correct? */
   59 
   60 #include <sys/cdefs.h>
   61 __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $");
   62 
   63 #ifdef _KERNEL_OPT
   64 #include "opt_mrouting.h"
   65 #include "opt_inet.h"
   66 #include "opt_net_mpsafe.h"
   67 #endif
   68 
   69 #include <sys/param.h>
   70 #include <sys/systm.h>
   71 #include <sys/socket.h>
   72 #include <sys/socketvar.h> /* for softnet_lock */
   73 #include <sys/sockio.h>
   74 #include <sys/mbuf.h>
   75 #include <sys/errno.h>
   76 #include <sys/queue.h>
   77 #include <sys/kmem.h>
   78 #include <sys/mutex.h>
   79 #include <sys/condvar.h>
   80 #include <sys/psref.h>
   81 #include <sys/pslist.h>
   82 #include <sys/thmap.h>
   83 
   84 #include <net/if.h>
   85 
   86 #include <netinet/in.h>
   87 #include <netinet/in_systm.h>
   88 #include <netinet/ip.h>
   89 #include <netinet/ip_var.h>
   90 #include <netinet/ip_encap.h>
   91 #ifdef MROUTING
   92 #include <netinet/ip_mroute.h>
   93 #endif /* MROUTING */
   94 
   95 #ifdef INET6
   96 #include <netinet/ip6.h>
   97 #include <netinet6/ip6_var.h>
   98 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
   99 #include <netinet6/in6_var.h>
  100 #include <netinet6/in6_pcb.h>
  101 #include <netinet/icmp6.h>
  102 #endif
  103 
  104 #ifdef NET_MPSAFE
  105 #define ENCAP_MPSAFE    1
  106 #endif
  107 
  108 enum direction { INBOUND, OUTBOUND };
  109 
  110 #ifdef INET
  111 static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
  112     struct psref *);
  113 #endif
  114 #ifdef INET6
  115 static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
  116     struct psref *);
  117 #endif
  118 static int encap_add(struct encaptab *);
  119 static int encap_remove(struct encaptab *);
  120 static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
  121 static void encap_key_init(struct encap_key *, const struct sockaddr *,
  122     const struct sockaddr *);
  123 static void encap_key_inc(struct encap_key *);
  124 
  125 /*
  126  * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
  127  * encap_table. So, it cannot use pserialize_read_enter()
  128  */
  129 static struct {
  130         struct pslist_head      list;
  131         pserialize_t            psz;
  132         struct psref_class      *elem_class; /* for the element of et_list */
  133 } encaptab  __cacheline_aligned = {
  134         .list = PSLIST_INITIALIZER,
  135 };
  136 #define encap_table encaptab.list
  137 
  138 static struct {
  139         kmutex_t        lock;
  140         kcondvar_t      cv;
  141         struct lwp      *busy;
  142 } encap_whole __cacheline_aligned;
  143 
  144 static thmap_t *encap_map[2];   /* 0 for AF_INET, 1 for AF_INET6 */
  145 
  146 static bool encap_initialized = false;
  147 /*
  148  * must be done before other encap interfaces initialization.
  149  */
  150 void
  151 encapinit(void)
  152 {
  153 
  154         if (encap_initialized)
  155                 return;
  156 
  157         encaptab.psz = pserialize_create();
  158         encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
  159 
  160         mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
  161         cv_init(&encap_whole.cv, "ip_encap cv");
  162         encap_whole.busy = NULL;
  163 
  164         encap_initialized = true;
  165 }
  166 
  167 void
  168 encap_init(void)
  169 {
  170         static int initialized = 0;
  171 
  172         if (initialized)
  173                 return;
  174         initialized++;
  175 #if 0
  176         /*
  177          * we cannot use LIST_INIT() here, since drivers may want to call
  178          * encap_attach(), on driver attach.  encap_init() will be called
  179          * on AF_INET{,6} initialization, which happens after driver
  180          * initialization - using LIST_INIT() here can nuke encap_attach()
  181          * from drivers.
  182          */
  183         PSLIST_INIT(&encap_table);
  184 #endif
  185 
  186         encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY);
  187 #ifdef INET6
  188         encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY);
  189 #endif
  190 }
  191 
  192 #ifdef INET
  193 static struct encaptab *
  194 encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
  195     struct psref *match_psref)
  196 {
  197         struct ip *ip;
  198         struct ip_pack4 pack;
  199         struct encaptab *ep, *match;
  200         int prio, matchprio;
  201         int s;
  202         thmap_t *emap = encap_map[0];
  203         struct encap_key key;
  204 
  205         KASSERT(m->m_len >= sizeof(*ip));
  206 
  207         ip = mtod(m, struct ip *);
  208 
  209         memset(&pack, 0, sizeof(pack));
  210         pack.p.sp_len = sizeof(pack);
  211         pack.mine.sin_family = pack.yours.sin_family = AF_INET;
  212         pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
  213         if (dir == INBOUND) {
  214                 pack.mine.sin_addr = ip->ip_dst;
  215                 pack.yours.sin_addr = ip->ip_src;
  216         } else {
  217                 pack.mine.sin_addr = ip->ip_src;
  218                 pack.yours.sin_addr = ip->ip_dst;
  219         }
  220 
  221         match = NULL;
  222         matchprio = 0;
  223 
  224         s = pserialize_read_enter();
  225 
  226         encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours));
  227         while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
  228                 struct psref elem_psref;
  229 
  230                 KASSERT(ep->af == AF_INET);
  231 
  232                 if (ep->proto >= 0 && ep->proto != proto) {
  233                         encap_key_inc(&key);
  234                         continue;
  235                 }
  236 
  237                 psref_acquire(&elem_psref, &ep->psref,
  238                     encaptab.elem_class);
  239                 if (ep->func) {
  240                         pserialize_read_exit(s);
  241                         prio = (*ep->func)(m, off, proto, ep->arg);
  242                         s = pserialize_read_enter();
  243                 } else {
  244                         prio = pack.mine.sin_len + pack.yours.sin_len;
  245                 }
  246 
  247                 if (prio <= 0) {
  248                         psref_release(&elem_psref, &ep->psref,
  249                             encaptab.elem_class);
  250                         encap_key_inc(&key);
  251                         continue;
  252                 }
  253                 if (prio > matchprio) {
  254                         /* release last matched ep */
  255                         if (match != NULL)
  256                                 psref_release(match_psref, &match->psref,
  257                                     encaptab.elem_class);
  258 
  259                         psref_copy(match_psref, &elem_psref,
  260                             encaptab.elem_class);
  261                         matchprio = prio;
  262                         match = ep;
  263                 }
  264 
  265                 psref_release(&elem_psref, &ep->psref,
  266                     encaptab.elem_class);
  267                 encap_key_inc(&key);
  268         }
  269 
  270         PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
  271                 struct psref elem_psref;
  272 
  273                 if (ep->af != AF_INET)
  274                         continue;
  275                 if (ep->proto >= 0 && ep->proto != proto)
  276                         continue;
  277 
  278                 psref_acquire(&elem_psref, &ep->psref,
  279                     encaptab.elem_class);
  280                 pserialize_read_exit(s);
  281                 /* ep->func is sleepable. e.g. rtalloc1 */
  282                 prio = (*ep->func)(m, off, proto, ep->arg);
  283                 s = pserialize_read_enter();
  284 
  285                 /*
  286                  * We prioritize the matches by using bit length of the
  287                  * matches.  user-supplied matching function
  288                  * should return the bit length of the matches (for example,
  289                  * if both src/dst are matched for IPv4, 64 should be returned).
  290                  * 0 or negative return value means "it did not match".
  291                  *
  292                  * We need to loop through all the possible candidates
  293                  * to get the best match - the search takes O(n) for
  294                  * n attachments (i.e. interfaces).
  295                  */
  296                 if (prio <= 0) {
  297                         psref_release(&elem_psref, &ep->psref,
  298                             encaptab.elem_class);
  299                         continue;
  300                 }
  301                 if (prio > matchprio) {
  302                         /* release last matched ep */
  303                         if (match != NULL)
  304                                 psref_release(match_psref, &match->psref,
  305                                     encaptab.elem_class);
  306 
  307                         psref_copy(match_psref, &elem_psref,
  308                             encaptab.elem_class);
  309                         matchprio = prio;
  310                         match = ep;
  311                 }
  312                 KASSERTMSG((match == NULL) || psref_held(&match->psref,
  313                         encaptab.elem_class),
  314                     "current match = %p, but not hold its psref", match);
  315 
  316                 psref_release(&elem_psref, &ep->psref,
  317                     encaptab.elem_class);
  318         }
  319         pserialize_read_exit(s);
  320 
  321         return match;
  322 }
  323 
  324 void
  325 encap4_input(struct mbuf *m, int off, int proto)
  326 {
  327         const struct encapsw *esw;
  328         struct encaptab *match;
  329         struct psref match_psref;
  330 
  331         match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
  332         if (match) {
  333                 /* found a match, "match" has the best one */
  334                 esw = match->esw;
  335                 if (esw && esw->encapsw4.pr_input) {
  336                         (*esw->encapsw4.pr_input)(m, off, proto, match->arg);
  337                         psref_release(&match_psref, &match->psref,
  338                             encaptab.elem_class);
  339                 } else {
  340                         psref_release(&match_psref, &match->psref,
  341                             encaptab.elem_class);
  342                         m_freem(m);
  343                 }
  344                 return;
  345         }
  346 
  347         /* last resort: inject to raw socket */
  348         SOFTNET_LOCK_IF_NET_MPSAFE();
  349         rip_input(m, off, proto);
  350         SOFTNET_UNLOCK_IF_NET_MPSAFE();
  351 }
  352 #endif
  353 
  354 #ifdef INET6
  355 static struct encaptab *
  356 encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
  357     struct psref *match_psref)
  358 {
  359         struct ip6_hdr *ip6;
  360         struct ip_pack6 pack;
  361         int prio, matchprio;
  362         int s;
  363         struct encaptab *ep, *match;
  364         thmap_t *emap = encap_map[1];
  365         struct encap_key key;
  366 
  367         KASSERT(m->m_len >= sizeof(*ip6));
  368 
  369         ip6 = mtod(m, struct ip6_hdr *);
  370 
  371         memset(&pack, 0, sizeof(pack));
  372         pack.p.sp_len = sizeof(pack);
  373         pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
  374         pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
  375         if (dir == INBOUND) {
  376                 pack.mine.sin6_addr = ip6->ip6_dst;
  377                 pack.yours.sin6_addr = ip6->ip6_src;
  378         } else {
  379                 pack.mine.sin6_addr = ip6->ip6_src;
  380                 pack.yours.sin6_addr = ip6->ip6_dst;
  381         }
  382 
  383         match = NULL;
  384         matchprio = 0;
  385 
  386         s = pserialize_read_enter();
  387 
  388         encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours));
  389         while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
  390                 struct psref elem_psref;
  391 
  392                 KASSERT(ep->af == AF_INET6);
  393 
  394                 if (ep->proto >= 0 && ep->proto != proto) {
  395                         encap_key_inc(&key);
  396                         continue;
  397                 }
  398 
  399                 psref_acquire(&elem_psref, &ep->psref,
  400                     encaptab.elem_class);
  401                 if (ep->func) {
  402                         pserialize_read_exit(s);
  403                         prio = (*ep->func)(m, off, proto, ep->arg);
  404                         s = pserialize_read_enter();
  405                 } else {
  406                         prio = pack.mine.sin6_len + pack.yours.sin6_len;
  407                 }
  408 
  409                 if (prio <= 0) {
  410                         psref_release(&elem_psref, &ep->psref,
  411                             encaptab.elem_class);
  412                         encap_key_inc(&key);
  413                         continue;
  414                 }
  415                 if (prio > matchprio) {
  416                         /* release last matched ep */
  417                         if (match != NULL)
  418                                 psref_release(match_psref, &match->psref,
  419                                     encaptab.elem_class);
  420 
  421                         psref_copy(match_psref, &elem_psref,
  422                             encaptab.elem_class);
  423                         matchprio = prio;
  424                         match = ep;
  425                 }
  426                 psref_release(&elem_psref, &ep->psref,
  427                     encaptab.elem_class);
  428                 encap_key_inc(&key);
  429         }
  430 
  431         PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
  432                 struct psref elem_psref;
  433 
  434                 if (ep->af != AF_INET6)
  435                         continue;
  436                 if (ep->proto >= 0 && ep->proto != proto)
  437                         continue;
  438 
  439                 psref_acquire(&elem_psref, &ep->psref,
  440                     encaptab.elem_class);
  441 
  442                 pserialize_read_exit(s);
  443                 /* ep->func is sleepable. e.g. rtalloc1 */
  444                 prio = (*ep->func)(m, off, proto, ep->arg);
  445                 s = pserialize_read_enter();
  446 
  447                 /* see encap4_lookup() for issues here */
  448                 if (prio <= 0) {
  449                         psref_release(&elem_psref, &ep->psref,
  450                             encaptab.elem_class);
  451                         continue;
  452                 }
  453                 if (prio > matchprio) {
  454                         /* release last matched ep */
  455                         if (match != NULL)
  456                                 psref_release(match_psref, &match->psref,
  457                                     encaptab.elem_class);
  458 
  459                         psref_copy(match_psref, &elem_psref,
  460                             encaptab.elem_class);
  461                         matchprio = prio;
  462                         match = ep;
  463                 }
  464                 KASSERTMSG((match == NULL) || psref_held(&match->psref,
  465                         encaptab.elem_class),
  466                     "current match = %p, but not hold its psref", match);
  467 
  468                 psref_release(&elem_psref, &ep->psref,
  469                     encaptab.elem_class);
  470         }
  471         pserialize_read_exit(s);
  472 
  473         return match;
  474 }
  475 
  476 int
  477 encap6_input(struct mbuf **mp, int *offp, int proto)
  478 {
  479         struct mbuf *m = *mp;
  480         const struct encapsw *esw;
  481         struct encaptab *match;
  482         struct psref match_psref;
  483         int rv;
  484 
  485         match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
  486 
  487         if (match) {
  488                 /* found a match */
  489                 esw = match->esw;
  490                 if (esw && esw->encapsw6.pr_input) {
  491                         int ret;
  492                         ret = (*esw->encapsw6.pr_input)(mp, offp, proto,
  493                             match->arg);
  494                         psref_release(&match_psref, &match->psref,
  495                             encaptab.elem_class);
  496                         return ret;
  497                 } else {
  498                         psref_release(&match_psref, &match->psref,
  499                             encaptab.elem_class);
  500                         m_freem(m);
  501                         return IPPROTO_DONE;
  502                 }
  503         }
  504 
  505         /* last resort: inject to raw socket */
  506         SOFTNET_LOCK_IF_NET_MPSAFE();
  507         rv = rip6_input(mp, offp, proto);
  508         SOFTNET_UNLOCK_IF_NET_MPSAFE();
  509         return rv;
  510 }
  511 #endif
  512 
  513 static int
  514 encap_add(struct encaptab *ep)
  515 {
  516 
  517         KASSERT(encap_lock_held());
  518 
  519         PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);
  520 
  521         return 0;
  522 }
  523 
  524 static int
  525 encap_remove(struct encaptab *ep)
  526 {
  527         int error = 0;
  528 
  529         KASSERT(encap_lock_held());
  530 
  531         PSLIST_WRITER_REMOVE(ep, chain);
  532 
  533         return error;
  534 }
  535 
  536 static void
  537 encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
  538 {
  539 
  540         KASSERT(sp != NULL && dp != NULL);
  541         KASSERT(sp->sa_len == dp->sa_len);
  542         KASSERT(af == sp->sa_family && af == dp->sa_family);
  543 
  544         socklen_t len __diagused = sockaddr_getsize_by_family(af);
  545         KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len);
  546 }
  547 
  548 const struct encaptab *
  549 encap_attach_func(int af, int proto,
  550     encap_priofunc_t *func,
  551     const struct encapsw *esw, void *arg)
  552 {
  553         struct encaptab *ep;
  554         int error;
  555 #ifndef ENCAP_MPSAFE
  556         int s;
  557 
  558         s = splsoftnet();
  559 #endif
  560 
  561         ASSERT_SLEEPABLE();
  562 
  563         /* sanity check on args */
  564         KASSERT(func != NULL);
  565         KASSERT(af == AF_INET
  566 #ifdef INET6
  567             || af == AF_INET6
  568 #endif
  569         );
  570 
  571         ep = kmem_alloc(sizeof(*ep), KM_SLEEP);
  572         memset(ep, 0, sizeof(*ep));
  573 
  574         ep->af = af;
  575         ep->proto = proto;
  576         ep->func = func;
  577         ep->esw = esw;
  578         ep->arg = arg;
  579         psref_target_init(&ep->psref, encaptab.elem_class);
  580 
  581         error = encap_add(ep);
  582         if (error)
  583                 goto gc;
  584 
  585         error = 0;
  586 #ifndef ENCAP_MPSAFE
  587         splx(s);
  588 #endif
  589         return ep;
  590 
  591 gc:
  592         kmem_free(ep, sizeof(*ep));
  593 #ifndef ENCAP_MPSAFE
  594         splx(s);
  595 #endif
  596         return NULL;
  597 }
  598 
  599 static void
  600 encap_key_init(struct encap_key *key,
  601     const struct sockaddr *local, const struct sockaddr *remote)
  602 {
  603 
  604         memset(key, 0, sizeof(*key));
  605 
  606         sockaddr_copy(&key->local_sa, sizeof(key->local_u), local);
  607         sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote);
  608 }
  609 
  610 static void
  611 encap_key_inc(struct encap_key *key)
  612 {
  613 
  614         (key->seq)++;
  615 }
  616 
  617 static void
  618 encap_key_dec(struct encap_key *key)
  619 {
  620 
  621         (key->seq)--;
  622 }
  623 
  624 static void
  625 encap_key_copy(struct encap_key *dst, const struct encap_key *src)
  626 {
  627 
  628         memset(dst, 0, sizeof(*dst));
  629         *dst = *src;
  630 }
  631 
  632 /*
  633  * src is always my side, and dst is always remote side.
  634  * Return value will be necessary as input (cookie) for encap_detach().
  635  */
  636 const struct encaptab *
  637 encap_attach_addr(int af, int proto,
  638     const struct sockaddr *src, const struct sockaddr *dst,
  639     encap_priofunc_t *func,
  640     const struct encapsw *esw, void *arg)
  641 {
  642         struct encaptab *ep;
  643         size_t l;
  644         thmap_t *emap;
  645         void *retep;
  646         struct ip_pack4 *pack4;
  647 #ifdef INET6
  648         struct ip_pack6 *pack6;
  649 #endif
  650 
  651         ASSERT_SLEEPABLE();
  652 
  653         encap_afcheck(af, src, dst);
  654 
  655         switch (af) {
  656         case AF_INET:
  657                 l = sizeof(*pack4);
  658                 emap = encap_map[0];
  659                 break;
  660 #ifdef INET6
  661         case AF_INET6:
  662                 l = sizeof(*pack6);
  663                 emap = encap_map[1];
  664                 break;
  665 #endif
  666         default:
  667                 return NULL;
  668         }
  669 
  670         ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
  671         ep->addrpack = kmem_zalloc(l, KM_SLEEP);
  672         ep->addrpack->sa_len = l & 0xff;
  673         ep->af = af;
  674         ep->proto = proto;
  675         ep->flag = IP_ENCAP_ADDR_ENABLE;
  676         switch (af) {
  677         case AF_INET:
  678                 pack4 = (struct ip_pack4 *)ep->addrpack;
  679                 ep->src = (struct sockaddr *)&pack4->mine;
  680                 ep->dst = (struct sockaddr *)&pack4->yours;
  681                 break;
  682 #ifdef INET6
  683         case AF_INET6:
  684                 pack6 = (struct ip_pack6 *)ep->addrpack;
  685                 ep->src = (struct sockaddr *)&pack6->mine;
  686                 ep->dst = (struct sockaddr *)&pack6->yours;
  687                 break;
  688 #endif
  689         }
  690         memcpy(ep->src, src, src->sa_len);
  691         memcpy(ep->dst, dst, dst->sa_len);
  692         ep->esw = esw;
  693         ep->arg = arg;
  694         ep->func = func;
  695         psref_target_init(&ep->psref, encaptab.elem_class);
  696 
  697         encap_key_init(&ep->key, src, dst);
  698         while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep)
  699                 encap_key_inc(&ep->key);
  700         return ep;
  701 }
  702 
  703 
  704 /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
  705 
  706 #ifdef INET6
  707 void *
  708 encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
  709 {
  710         void *d = d0;
  711         struct ip6_hdr *ip6;
  712         struct mbuf *m;
  713         int off;
  714         struct ip6ctlparam *ip6cp = NULL;
  715         int nxt;
  716         int s;
  717         struct encaptab *ep;
  718         const struct encapsw *esw;
  719 
  720         if (sa->sa_family != AF_INET6 ||
  721             sa->sa_len != sizeof(struct sockaddr_in6))
  722                 return NULL;
  723 
  724         if ((unsigned)cmd >= PRC_NCMDS)
  725                 return NULL;
  726         if (cmd == PRC_HOSTDEAD)
  727                 d = NULL;
  728         else if (cmd == PRC_MSGSIZE)
  729                 ; /* special code is present, see below */
  730         else if (inet6ctlerrmap[cmd] == 0)
  731                 return NULL;
  732 
  733         /* if the parameter is from icmp6, decode it. */
  734         if (d != NULL) {
  735                 ip6cp = (struct ip6ctlparam *)d;
  736                 m = ip6cp->ip6c_m;
  737                 ip6 = ip6cp->ip6c_ip6;
  738                 off = ip6cp->ip6c_off;
  739                 nxt = ip6cp->ip6c_nxt;
  740 
  741                 if (ip6 && cmd == PRC_MSGSIZE) {
  742                         int valid = 0;
  743                         struct encaptab *match;
  744                         struct psref elem_psref;
  745 
  746                         /*
  747                         * Check to see if we have a valid encap configuration.
  748                         */
  749                         match = encap6_lookup(m, off, nxt, OUTBOUND,
  750                             &elem_psref);
  751                         if (match) {
  752                                 valid++;
  753                                 psref_release(&elem_psref, &match->psref,
  754                                     encaptab.elem_class);
  755                         }
  756 
  757                         /*
  758                         * Depending on the value of "valid" and routing table
  759                         * size (mtudisc_{hi,lo}wat), we will:
  760                         * - recalcurate the new MTU and create the
  761                         *   corresponding routing entry, or
  762                         * - ignore the MTU change notification.
  763                         */
  764                         icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
  765                 }
  766         } else {
  767                 m = NULL;
  768                 ip6 = NULL;
  769                 nxt = -1;
  770         }
  771 
  772         /* inform all listeners */
  773 
  774         s = pserialize_read_enter();
  775         PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
  776                 struct psref elem_psref;
  777 
  778                 if (ep->af != AF_INET6)
  779                         continue;
  780                 if (ep->proto >= 0 && ep->proto != nxt)
  781                         continue;
  782 
  783                 /* should optimize by looking at address pairs */
  784 
  785                 /* XXX need to pass ep->arg or ep itself to listeners */
  786                 psref_acquire(&elem_psref, &ep->psref,
  787                     encaptab.elem_class);
  788                 esw = ep->esw;
  789                 if (esw && esw->encapsw6.pr_ctlinput) {
  790                         pserialize_read_exit(s);
  791                         /* pr_ctlinput is sleepable. e.g. rtcache_free */
  792                         (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
  793                         s = pserialize_read_enter();
  794                 }
  795                 psref_release(&elem_psref, &ep->psref,
  796                     encaptab.elem_class);
  797         }
  798         pserialize_read_exit(s);
  799 
  800         rip6_ctlinput(cmd, sa, d0);
  801         return NULL;
  802 }
  803 #endif
  804 
  805 static int
  806 encap_detach_addr(const struct encaptab *ep)
  807 {
  808         thmap_t *emap;
  809         struct encaptab *retep;
  810         struct encaptab *target;
  811         void *thgc;
  812         struct encap_key key;
  813 
  814         KASSERT(encap_lock_held());
  815         KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE);
  816 
  817         switch (ep->af) {
  818         case AF_INET:
  819                 emap = encap_map[0];
  820                 break;
  821 #ifdef INET6
  822         case AF_INET6:
  823                 emap = encap_map[1];
  824                 break;
  825 #endif
  826         default:
  827                 return EINVAL;
  828         }
  829 
  830         retep = thmap_del(emap, &ep->key, sizeof(ep->key));
  831         if (retep != ep) {
  832                 return ENOENT;
  833         }
  834         target = retep;
  835 
  836         /*
  837          * To keep continuity, decrement seq after detached encaptab.
  838          */
  839         encap_key_copy(&key, &ep->key);
  840         encap_key_inc(&key);
  841         while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) {
  842                 void *pp;
  843 
  844                 encap_key_dec(&retep->key);
  845                 pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep);
  846                 KASSERT(retep == pp);
  847 
  848                 encap_key_inc(&key);
  849         }
  850 
  851         thgc = thmap_stage_gc(emap);
  852         pserialize_perform(encaptab.psz);
  853         thmap_gc(emap, thgc);
  854         psref_target_destroy(&target->psref, encaptab.elem_class);
  855         kmem_free(target->addrpack, target->addrpack->sa_len);
  856         kmem_free(target, sizeof(*target));
  857 
  858         return 0;
  859 }
  860 
  861 int
  862 encap_detach(const struct encaptab *cookie)
  863 {
  864         const struct encaptab *ep = cookie;
  865         struct encaptab *p;
  866         int error;
  867 
  868         KASSERT(encap_lock_held());
  869 
  870         if (ep->flag & IP_ENCAP_ADDR_ENABLE)
  871                 return encap_detach_addr(ep);
  872 
  873         PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
  874                 if (p == ep) {
  875                         error = encap_remove(p);
  876                         if (error)
  877                                 return error;
  878                         else
  879                                 break;
  880                 }
  881         }
  882         if (p == NULL)
  883                 return ENOENT;
  884 
  885         pserialize_perform(encaptab.psz);
  886         psref_target_destroy(&p->psref,
  887             encaptab.elem_class);
  888         kmem_free(p, sizeof(*p));
  889 
  890         return 0;
  891 }
  892 
  893 int
  894 encap_lock_enter(void)
  895 {
  896         int error;
  897 
  898         mutex_enter(&encap_whole.lock);
  899         while (encap_whole.busy != NULL) {
  900                 error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
  901                 if (error) {
  902                         mutex_exit(&encap_whole.lock);
  903                         return error;
  904                 }
  905         }
  906         KASSERT(encap_whole.busy == NULL);
  907         encap_whole.busy = curlwp;
  908         mutex_exit(&encap_whole.lock);
  909 
  910         return 0;
  911 }
  912 
  913 void
  914 encap_lock_exit(void)
  915 {
  916 
  917         mutex_enter(&encap_whole.lock);
  918         KASSERT(encap_whole.busy == curlwp);
  919         encap_whole.busy = NULL;
  920         cv_broadcast(&encap_whole.cv);
  921         mutex_exit(&encap_whole.lock);
  922 }
  923 
  924 bool
  925 encap_lock_held(void)
  926 {
  927 
  928         return (encap_whole.busy == curlwp);
  929 }

Cache object: 0c2949d01038e6ed85f82673acd1333f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.