The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netpfil/ipfw/nat64/nat64lsn.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2015-2019 Yandex LLC
    5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
    6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  *
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/counter.h>
   36 #include <sys/ck.h>
   37 #include <sys/epoch.h>
   38 #include <sys/errno.h>
   39 #include <sys/hash.h>
   40 #include <sys/kernel.h>
   41 #include <sys/lock.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/module.h>
   45 #include <sys/rmlock.h>
   46 #include <sys/socket.h>
   47 #include <sys/syslog.h>
   48 #include <sys/sysctl.h>
   49 
   50 #include <net/if.h>
   51 #include <net/if_var.h>
   52 #include <net/if_pflog.h>
   53 #include <net/pfil.h>
   54 
   55 #include <netinet/in.h>
   56 #include <netinet/ip.h>
   57 #include <netinet/ip_var.h>
   58 #include <netinet/ip_fw.h>
   59 #include <netinet/ip6.h>
   60 #include <netinet/icmp6.h>
   61 #include <netinet/ip_icmp.h>
   62 #include <netinet/tcp.h>
   63 #include <netinet/udp.h>
   64 #include <netinet6/in6_var.h>
   65 #include <netinet6/ip6_var.h>
   66 #include <netinet6/ip_fw_nat64.h>
   67 
   68 #include <netpfil/ipfw/ip_fw_private.h>
   69 #include <netpfil/pf/pf.h>
   70 
   71 #include "nat64lsn.h"
   72 
   73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
   74 
   75 #define NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
   76 #define NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
   77 #define NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
   78 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
   79 
   80 static uma_zone_t nat64lsn_host_zone;
   81 static uma_zone_t nat64lsn_pgchunk_zone;
   82 static uma_zone_t nat64lsn_pg_zone;
   83 static uma_zone_t nat64lsn_aliaslink_zone;
   84 static uma_zone_t nat64lsn_state_zone;
   85 static uma_zone_t nat64lsn_job_zone;
   86 
   87 static void nat64lsn_periodic(void *data);
   88 #define PERIODIC_DELAY          4
   89 #define NAT64_LOOKUP(chain, cmd)        \
   90         (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
   91 /*
   92  * Delayed job queue, used to create new hosts
   93  * and new portgroups
   94  */
   95 enum nat64lsn_jtype {
   96         JTYPE_NEWHOST = 1,
   97         JTYPE_NEWPORTGROUP,
   98         JTYPE_DESTROY,
   99 };
  100 
  101 struct nat64lsn_job_item {
  102         STAILQ_ENTRY(nat64lsn_job_item) entries;
  103         enum nat64lsn_jtype     jtype;
  104 
  105         union {
  106                 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
  107                         struct mbuf             *m;
  108                         struct nat64lsn_host    *host;
  109                         struct nat64lsn_state   *state;
  110                         uint32_t                src6_hval;
  111                         uint32_t                state_hval;
  112                         struct ipfw_flow_id     f_id;
  113                         in_addr_t               faddr;
  114                         uint16_t                port;
  115                         uint8_t                 proto;
  116                         uint8_t                 done;
  117                 };
  118                 struct { /* used by JTYPE_DESTROY */
  119                         struct nat64lsn_hosts_slist     hosts;
  120                         struct nat64lsn_pg_slist        portgroups;
  121                         struct nat64lsn_pgchunk         *pgchunk;
  122                         struct epoch_context            epoch_ctx;
  123                 };
  124         };
  125 };
  126 
  127 static struct mtx jmtx;
  128 #define JQUEUE_LOCK_INIT()      mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
  129 #define JQUEUE_LOCK_DESTROY()   mtx_destroy(&jmtx)
  130 #define JQUEUE_LOCK()           mtx_lock(&jmtx)
  131 #define JQUEUE_UNLOCK()         mtx_unlock(&jmtx)
  132 
  133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
  134     struct nat64lsn_job_item *ji);
  135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
  136     struct nat64lsn_job_item *ji);
  137 static struct nat64lsn_job_item *nat64lsn_create_job(
  138     struct nat64lsn_cfg *cfg, int jtype);
  139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
  140     struct nat64lsn_job_item *ji);
  141 static void nat64lsn_job_destroy(epoch_context_t ctx);
  142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
  143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
  144 
  145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
  146     const struct ipfw_flow_id *f_id, struct mbuf **mp);
  147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
  148     struct ipfw_flow_id *f_id, struct mbuf **mp);
  149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
  150     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
  151 
  152 #define NAT64_BIT_TCP_FIN       0       /* FIN was seen */
  153 #define NAT64_BIT_TCP_SYN       1       /* First syn in->out */
  154 #define NAT64_BIT_TCP_ESTAB     2       /* Packet with Ack */
  155 #define NAT64_BIT_READY_IPV4    6       /* state is ready for translate4 */
  156 #define NAT64_BIT_STALE         7       /* state is going to be expired */
  157 
  158 #define NAT64_FLAG_FIN          (1 << NAT64_BIT_TCP_FIN)
  159 #define NAT64_FLAG_SYN          (1 << NAT64_BIT_TCP_SYN)
  160 #define NAT64_FLAG_ESTAB        (1 << NAT64_BIT_TCP_ESTAB)
  161 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
  162 
  163 #define NAT64_FLAG_READY        (1 << NAT64_BIT_READY_IPV4)
  164 #define NAT64_FLAG_STALE        (1 << NAT64_BIT_STALE)
  165 
  166 static inline uint8_t
  167 convert_tcp_flags(uint8_t flags)
  168 {
  169         uint8_t result;
  170 
  171         result = flags & (TH_FIN|TH_SYN);
  172         result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
  173         result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
  174 
  175         return (result);
  176 }
  177 
  178 static void
  179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
  180     struct nat64lsn_state *state)
  181 {
  182 
  183         memset(plog, 0, sizeof(*plog));
  184         plog->length = PFLOG_HDRLEN;
  185         plog->af = family;
  186         plog->action = PF_NAT;
  187         plog->dir = PF_IN;
  188         plog->rulenr = htonl(state->ip_src);
  189         plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
  190             (state->proto << 8) | (state->ip_dst & 0xff));
  191         plog->ruleset[0] = '\0';
  192         strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
  193         ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
  194 }
  195 
  196 #define HVAL(p, n, s)   jenkins_hash32((const uint32_t *)(p), (n), (s))
  197 #define HOST_HVAL(c, a) HVAL((a),\
  198     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
  199 #define HOSTS(c, v)     ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
  200 
  201 #define ALIASLINK_HVAL(c, f)    HVAL(&(f)->dst_ip6,\
  202     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
  203 #define ALIAS_BYHASH(c, v)      \
  204     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
  205 static struct nat64lsn_aliaslink*
  206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
  207     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
  208 {
  209 
  210         /*
  211          * We can implement some different algorithms how
  212          * select an alias address.
  213          * XXX: for now we use first available.
  214          */
  215         return (CK_SLIST_FIRST(&host->aliases));
  216 }
  217 
  218 #define STATE_HVAL(c, d)        HVAL((d), 2, (c)->hash_seed)
  219 #define STATE_HASH(h, v)        \
  220     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
  221 #define STATES_CHUNK(p, v)      \
  222     ((p)->chunks_count == 1 ? (p)->states : \
  223         ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
  224 
  225 #ifdef __LP64__
  226 #define FREEMASK_FFSLL(pg, faddr)               \
  227     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
  228 #define FREEMASK_BTR(pg, faddr, bit)    \
  229     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
  230 #define FREEMASK_BTS(pg, faddr, bit)    \
  231     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
  232 #define FREEMASK_ISSET(pg, faddr, bit)  \
  233     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
  234 #define FREEMASK_COPY(pg, n, out)       \
  235     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
  236 #else
  237 static inline int
  238 freemask_ffsll(uint32_t *freemask)
  239 {
  240         int i;
  241 
  242         if ((i = ffsl(freemask[0])) != 0)
  243                 return (i);
  244         if ((i = ffsl(freemask[1])) != 0)
  245                 return (i + 32);
  246         return (0);
  247 }
  248 #define FREEMASK_FFSLL(pg, faddr)               \
  249     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
  250 #define FREEMASK_BTR(pg, faddr, bit)    \
  251     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
  252 #define FREEMASK_BTS(pg, faddr, bit)    \
  253     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
  254 #define FREEMASK_ISSET(pg, faddr, bit)  \
  255     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
  256 #define FREEMASK_COPY(pg, n, out)       \
  257     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
  258         ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
  259 #endif /* !__LP64__ */
  260 
  261 #define NAT64LSN_TRY_PGCNT      32
  262 static struct nat64lsn_pg*
  263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
  264     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
  265     uint32_t *pgidx, in_addr_t faddr)
  266 {
  267         struct nat64lsn_pg *pg, *oldpg;
  268         uint32_t idx, oldidx;
  269         int cnt;
  270 
  271         cnt = 0;
  272         /* First try last used PG */
  273         oldpg = pg = ck_pr_load_ptr(pgptr);
  274         idx = oldidx = ck_pr_load_32(pgidx);
  275         /* If pgidx is out of range, reset it to the first pgchunk */
  276         if (!ISSET32(*chunkmask, idx / 32))
  277                 idx = 0;
  278         do {
  279                 ck_pr_fence_load();
  280                 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
  281                         /*
  282                          * If last used PG has not free states,
  283                          * try to update pointer.
  284                          * NOTE: it can be already updated by jobs handler,
  285                          *       thus we use CAS operation.
  286                          */
  287                         if (cnt > 0)
  288                                 ck_pr_cas_ptr(pgptr, oldpg, pg);
  289                         return (pg);
  290                 }
  291                 /* Stop if idx is out of range */
  292                 if (!ISSET32(*chunkmask, idx / 32))
  293                         break;
  294 
  295                 if (ISSET32(pgmask[idx / 32], idx % 32))
  296                         pg = ck_pr_load_ptr(
  297                             &chunks[idx / 32]->pgptr[idx % 32]);
  298                 else
  299                         pg = NULL;
  300 
  301                 idx++;
  302         } while (++cnt < NAT64LSN_TRY_PGCNT);
  303 
  304         /* If pgidx is out of range, reset it to the first pgchunk */
  305         if (!ISSET32(*chunkmask, idx / 32))
  306                 idx = 0;
  307         ck_pr_cas_32(pgidx, oldidx, idx);
  308         return (NULL);
  309 }
  310 
  311 static struct nat64lsn_state*
  312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
  313     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
  314     uint16_t port, uint8_t proto)
  315 {
  316         struct nat64lsn_aliaslink *link;
  317         struct nat64lsn_state *state;
  318         struct nat64lsn_pg *pg;
  319         int i, offset;
  320 
  321         NAT64LSN_EPOCH_ASSERT();
  322 
  323         /* Check that we already have state for given arguments */
  324         CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
  325                 if (state->proto == proto && state->ip_dst == faddr &&
  326                     state->sport == port && state->dport == f_id->dst_port)
  327                         return (state);
  328         }
  329 
  330         link = nat64lsn_get_aliaslink(cfg, host, f_id);
  331         if (link == NULL)
  332                 return (NULL);
  333 
  334         switch (proto) {
  335         case IPPROTO_TCP:
  336                 pg = nat64lsn_get_pg(
  337                     &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
  338                     link->alias->tcp, &link->alias->tcp_pg,
  339                     &link->alias->tcp_pgidx, faddr);
  340                 break;
  341         case IPPROTO_UDP:
  342                 pg = nat64lsn_get_pg(
  343                     &link->alias->udp_chunkmask, link->alias->udp_pgmask,
  344                     link->alias->udp, &link->alias->udp_pg,
  345                     &link->alias->udp_pgidx, faddr);
  346                 break;
  347         case IPPROTO_ICMP:
  348                 pg = nat64lsn_get_pg(
  349                     &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
  350                     link->alias->icmp, &link->alias->icmp_pg,
  351                     &link->alias->icmp_pgidx, faddr);
  352                 break;
  353         default:
  354                 panic("%s: wrong proto %d", __func__, proto);
  355         }
  356         if (pg == NULL)
  357                 return (NULL);
  358 
  359         /* Check that PG has some free states */
  360         state = NULL;
  361         i = FREEMASK_BITCOUNT(pg, faddr);
  362         while (i-- > 0) {
  363                 offset = FREEMASK_FFSLL(pg, faddr);
  364                 if (offset == 0) {
  365                         /*
  366                          * We lost the race.
  367                          * No more free states in this PG.
  368                          */
  369                         break;
  370                 }
  371 
  372                 /* Lets try to atomically grab the state */
  373                 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
  374                         state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
  375                         /* Initialize */
  376                         state->flags = proto != IPPROTO_TCP ? 0 :
  377                             convert_tcp_flags(f_id->_flags);
  378                         state->proto = proto;
  379                         state->aport = pg->base_port + offset - 1;
  380                         state->dport = f_id->dst_port;
  381                         state->sport = port;
  382                         state->ip6_dst = f_id->dst_ip6;
  383                         state->ip_dst = faddr;
  384                         state->ip_src = link->alias->addr;
  385                         state->hval = hval;
  386                         state->host = host;
  387                         SET_AGE(state->timestamp);
  388 
  389                         /* Insert new state into host's hash table */
  390                         HOST_LOCK(host);
  391                         CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
  392                             state, entries);
  393                         host->states_count++;
  394                         /*
  395                          * XXX: In case if host is going to be expired,
  396                          * reset NAT64LSN_DEADHOST flag.
  397                          */
  398                         host->flags &= ~NAT64LSN_DEADHOST;
  399                         HOST_UNLOCK(host);
  400                         NAT64STAT_INC(&cfg->base.stats, screated);
  401                         /* Mark the state as ready for translate4 */
  402                         ck_pr_fence_store();
  403                         ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
  404                         break;
  405                 }
  406         }
  407         return (state);
  408 }
  409 
  410 /*
  411  * Inspects icmp packets to see if the message contains different
  412  * packet header so we need to alter @addr and @port.
  413  */
  414 static int
  415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
  416     uint16_t *port)
  417 {
  418         struct icmp *icmp;
  419         struct ip *ip;
  420         int off;
  421         uint8_t inner_proto;
  422 
  423         ip = mtod(*mp, struct ip *); /* Outer IP header */
  424         off = (ip->ip_hl << 2) + ICMP_MINLEN;
  425         if ((*mp)->m_len < off)
  426                 *mp = m_pullup(*mp, off);
  427         if (*mp == NULL)
  428                 return (ENOMEM);
  429 
  430         ip = mtod(*mp, struct ip *); /* Outer IP header */
  431         icmp = L3HDR(ip, struct icmp *);
  432         switch (icmp->icmp_type) {
  433         case ICMP_ECHO:
  434         case ICMP_ECHOREPLY:
  435                 /* Use icmp ID as distinguisher */
  436                 *port = ntohs(icmp->icmp_id);
  437                 return (0);
  438         case ICMP_UNREACH:
  439         case ICMP_TIMXCEED:
  440                 break;
  441         default:
  442                 return (EOPNOTSUPP);
  443         }
  444         /*
  445          * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
  446          * of ULP header.
  447          */
  448         if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
  449                 return (EINVAL);
  450         if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
  451                 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
  452         if (*mp == NULL)
  453                 return (ENOMEM);
  454         ip = mtodo(*mp, off); /* Inner IP header */
  455         inner_proto = ip->ip_p;
  456         off += ip->ip_hl << 2; /* Skip inner IP header */
  457         *addr = ntohl(ip->ip_src.s_addr);
  458         if ((*mp)->m_len < off + ICMP_MINLEN)
  459                 *mp = m_pullup(*mp, off + ICMP_MINLEN);
  460         if (*mp == NULL)
  461                 return (ENOMEM);
  462         switch (inner_proto) {
  463         case IPPROTO_TCP:
  464         case IPPROTO_UDP:
  465                 /* Copy source port from the header */
  466                 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
  467                 *proto = inner_proto;
  468                 return (0);
  469         case IPPROTO_ICMP:
  470                 /*
  471                  * We will translate only ICMP errors for our ICMP
  472                  * echo requests.
  473                  */
  474                 icmp = mtodo(*mp, off);
  475                 if (icmp->icmp_type != ICMP_ECHO)
  476                         return (EOPNOTSUPP);
  477                 *port = ntohs(icmp->icmp_id);
  478                 return (0);
  479         };
  480         return (EOPNOTSUPP);
  481 }
  482 
  483 static struct nat64lsn_state*
  484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
  485     in_addr_t faddr, uint16_t port, uint8_t proto)
  486 {
  487         struct nat64lsn_state *state;
  488         struct nat64lsn_pg *pg;
  489         int chunk_idx, pg_idx, state_idx;
  490 
  491         NAT64LSN_EPOCH_ASSERT();
  492 
  493         if (port < NAT64_MIN_PORT)
  494                 return (NULL);
  495         /*
  496          * Alias keeps 32 pgchunks for each protocol.
  497          * Each pgchunk has 32 pointers to portgroup.
  498          * Each portgroup has 64 states for ports.
  499          */
  500         port -= NAT64_MIN_PORT;
  501         chunk_idx = port / 2048;
  502 
  503         port -= chunk_idx * 2048;
  504         pg_idx = port / 64;
  505         state_idx = port % 64;
  506 
  507         /*
  508          * First check in proto_chunkmask that we have allocated PG chunk.
  509          * Then check in proto_pgmask that we have valid PG pointer.
  510          */
  511         pg = NULL;
  512         switch (proto) {
  513         case IPPROTO_TCP:
  514                 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
  515                     ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
  516                         pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
  517                         break;
  518                 }
  519                 return (NULL);
  520         case IPPROTO_UDP:
  521                 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
  522                     ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
  523                         pg = alias->udp[chunk_idx]->pgptr[pg_idx];
  524                         break;
  525                 }
  526                 return (NULL);
  527         case IPPROTO_ICMP:
  528                 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
  529                     ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
  530                         pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
  531                         break;
  532                 }
  533                 return (NULL);
  534         default:
  535                 panic("%s: wrong proto %d", __func__, proto);
  536         }
  537         if (pg == NULL)
  538                 return (NULL);
  539 
  540         if (FREEMASK_ISSET(pg, faddr, state_idx))
  541                 return (NULL);
  542 
  543         state = &STATES_CHUNK(pg, faddr)->state[state_idx];
  544         ck_pr_fence_load();
  545         if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
  546                 return (state);
  547         return (NULL);
  548 }
  549 
  550 /*
  551  * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
  552  * that might be unknown until reassembling is completed.
  553  */
  554 static struct mbuf*
  555 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
  556     uint16_t *port)
  557 {
  558         struct ip *ip;
  559         int len;
  560 
  561         m = ip_reass(m);
  562         if (m == NULL)
  563                 return (NULL);
  564         /* IP header must be contigious after ip_reass() */
  565         ip = mtod(m, struct ip *);
  566         len = ip->ip_hl << 2;
  567         switch (ip->ip_p) {
  568         case IPPROTO_ICMP:
  569                 len += ICMP_MINLEN; /* Enough to get icmp_id */
  570                 break;
  571         case IPPROTO_TCP:
  572                 len += sizeof(struct tcphdr);
  573                 break;
  574         case IPPROTO_UDP:
  575                 len += sizeof(struct udphdr);
  576                 break;
  577         default:
  578                 m_freem(m);
  579                 NAT64STAT_INC(&cfg->base.stats, noproto);
  580                 return (NULL);
  581         }
  582         if (m->m_len < len) {
  583                 m = m_pullup(m, len);
  584                 if (m == NULL) {
  585                         NAT64STAT_INC(&cfg->base.stats, nomem);
  586                         return (NULL);
  587                 }
  588                 ip = mtod(m, struct ip *);
  589         }
  590         switch (ip->ip_p) {
  591         case IPPROTO_TCP:
  592                 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
  593                 break;
  594         case IPPROTO_UDP:
  595                 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
  596                 break;
  597         }
  598         return (m);
  599 }
  600 
  601 static int
  602 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
  603     const struct ipfw_flow_id *f_id, struct mbuf **mp)
  604 {
  605         struct pfloghdr loghdr, *logdata;
  606         struct in6_addr src6;
  607         struct nat64lsn_state *state;
  608         struct nat64lsn_alias *alias;
  609         uint32_t addr, flags;
  610         uint16_t port, ts;
  611         int ret;
  612         uint8_t proto;
  613 
  614         addr = f_id->dst_ip;
  615         port = f_id->dst_port;
  616         proto = f_id->proto;
  617         if (addr < cfg->prefix4 || addr > cfg->pmask4) {
  618                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
  619                 return (cfg->nomatch_verdict);
  620         }
  621 
  622         /* Reassemble fragments if needed */
  623         ret = ntohs(mtod(*mp, struct ip *)->ip_off);
  624         if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
  625                 *mp = nat64lsn_reassemble4(cfg, *mp, &port);
  626                 if (*mp == NULL)
  627                         return (IP_FW_DENY);
  628         }
  629 
  630         /* Check if protocol is supported */
  631         switch (proto) {
  632         case IPPROTO_ICMP:
  633                 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
  634                 if (ret != 0) {
  635                         if (ret == ENOMEM) {
  636                                 NAT64STAT_INC(&cfg->base.stats, nomem);
  637                                 return (IP_FW_DENY);
  638                         }
  639                         NAT64STAT_INC(&cfg->base.stats, noproto);
  640                         return (cfg->nomatch_verdict);
  641                 }
  642                 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
  643                         NAT64STAT_INC(&cfg->base.stats, nomatch4);
  644                         return (cfg->nomatch_verdict);
  645                 }
  646                 /* FALLTHROUGH */
  647         case IPPROTO_TCP:
  648         case IPPROTO_UDP:
  649                 break;
  650         default:
  651                 NAT64STAT_INC(&cfg->base.stats, noproto);
  652                 return (cfg->nomatch_verdict);
  653         }
  654 
  655         alias = &ALIAS_BYHASH(cfg, addr);
  656         MPASS(addr == alias->addr);
  657 
  658         /* Check that we have state for this port */
  659         state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
  660             port, proto);
  661         if (state == NULL) {
  662                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
  663                 return (cfg->nomatch_verdict);
  664         }
  665 
  666         /* TODO: Check flags to see if we need to do some static mapping */
  667 
  668         /* Update some state fields if need */
  669         SET_AGE(ts);
  670         if (f_id->proto == IPPROTO_TCP)
  671                 flags = convert_tcp_flags(f_id->_flags);
  672         else
  673                 flags = 0;
  674         if (state->timestamp != ts)
  675                 state->timestamp = ts;
  676         if ((state->flags & flags) != flags)
  677                 state->flags |= flags;
  678 
  679         port = htons(state->sport);
  680         src6 = state->ip6_dst;
  681 
  682         if (cfg->base.flags & NAT64_LOG) {
  683                 logdata = &loghdr;
  684                 nat64lsn_log(logdata, *mp, AF_INET, state);
  685         } else
  686                 logdata = NULL;
  687 
  688         /*
  689          * We already have src6 with embedded address, but it is possible,
  690          * that src_ip is different than state->ip_dst, this is why we
  691          * do embedding again.
  692          */
  693         nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
  694         ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
  695             &cfg->base, logdata);
  696         if (ret == NAT64SKIP)
  697                 return (cfg->nomatch_verdict);
  698         if (ret == NAT64RETURN)
  699                 *mp = NULL;
  700         return (IP_FW_DENY);
  701 }
  702 
  703 /*
  704  * Check if particular state is stale and should be deleted.
  705  * Return 1 if true, 0 otherwise.
  706  */
  707 static int
  708 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
  709 {
  710         int age, ttl;
  711 
  712         /* State was marked as stale in previous pass. */
  713         if (ISSET32(state->flags, NAT64_BIT_STALE))
  714                 return (1);
  715 
  716         /* State is not yet initialized, it is going to be READY */
  717         if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
  718                 return (0);
  719 
  720         age = GET_AGE(state->timestamp);
  721         switch (state->proto) {
  722         case IPPROTO_TCP:
  723                 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
  724                         ttl = cfg->st_close_ttl;
  725                 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
  726                         ttl = cfg->st_estab_ttl;
  727                 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
  728                         ttl = cfg->st_syn_ttl;
  729                 else
  730                         ttl = cfg->st_syn_ttl;
  731                 if (age > ttl)
  732                         return (1);
  733                 break;
  734         case IPPROTO_UDP:
  735                 if (age > cfg->st_udp_ttl)
  736                         return (1);
  737                 break;
  738         case IPPROTO_ICMP:
  739                 if (age > cfg->st_icmp_ttl)
  740                         return (1);
  741                 break;
  742         }
  743         return (0);
  744 }
  745 
  746 static int
  747 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
  748 {
  749         struct nat64lsn_state *state;
  750         struct nat64lsn_host *host;
  751         uint64_t freemask;
  752         int c, i, update_age;
  753 
  754         update_age = 0;
  755         for (c = 0; c < pg->chunks_count; c++) {
  756                 FREEMASK_COPY(pg, c, freemask);
  757                 for (i = 0; i < 64; i++) {
  758                         if (ISSET64(freemask, i))
  759                                 continue;
  760                         state = &STATES_CHUNK(pg, c)->state[i];
  761                         if (nat64lsn_check_state(cfg, state) == 0) {
  762                                 update_age = 1;
  763                                 continue;
  764                         }
  765                         /*
  766                          * Expire state:
  767                          * 1. Mark as STALE and unlink from host's hash.
  768                          * 2. Set bit in freemask.
  769                          */
  770                         if (ISSET32(state->flags, NAT64_BIT_STALE)) {
  771                                 /*
  772                                  * State was marked as STALE in previous
  773                                  * pass. Now it is safe to release it.
  774                                  */
  775                                 state->flags = 0;
  776                                 ck_pr_fence_store();
  777                                 FREEMASK_BTS(pg, c, i);
  778                                 NAT64STAT_INC(&cfg->base.stats, sdeleted);
  779                                 continue;
  780                         }
  781                         MPASS(state->flags & NAT64_FLAG_READY);
  782 
  783                         host = state->host;
  784                         HOST_LOCK(host);
  785                         CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
  786                             state, nat64lsn_state, entries);
  787                         host->states_count--;
  788                         HOST_UNLOCK(host);
  789 
  790                         /* Reset READY flag */
  791                         ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
  792                         /* And set STALE flag */
  793                         ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
  794                         ck_pr_fence_store();
  795                         /*
  796                          * Now translate6 will not use this state, wait
  797                          * until it become safe for translate4, then mark
  798                          * state as free.
  799                          */
  800                 }
  801         }
  802 
  803         /*
  804          * We have some alive states, update timestamp.
  805          */
  806         if (update_age)
  807                 SET_AGE(pg->timestamp);
  808 
  809         if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
  810                 return (0);
  811 
  812         return (1);
  813 }
  814 
  815 static void
  816 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
  817     struct nat64lsn_pg_slist *portgroups)
  818 {
  819         struct nat64lsn_alias *alias;
  820         struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
  821         uint32_t *pgmask, *pgidx;
  822         int i, idx;
  823 
  824         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
  825                 alias = &cfg->aliases[i];
  826                 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
  827                         if (nat64lsn_maintain_pg(cfg, pg) == 0)
  828                                 continue;
  829                         /* Always keep first PG */
  830                         if (pg->base_port == NAT64_MIN_PORT)
  831                                 continue;
  832                         /*
  833                          * PG is expired, unlink it and schedule for
  834                          * deferred destroying.
  835                          */
  836                         idx = (pg->base_port - NAT64_MIN_PORT) / 64;
  837                         switch (pg->proto) {
  838                         case IPPROTO_TCP:
  839                                 pgmask = alias->tcp_pgmask;
  840                                 pgptr = &alias->tcp_pg;
  841                                 pgidx = &alias->tcp_pgidx;
  842                                 firstpg = alias->tcp[0]->pgptr[0];
  843                                 break;
  844                         case IPPROTO_UDP:
  845                                 pgmask = alias->udp_pgmask;
  846                                 pgptr = &alias->udp_pg;
  847                                 pgidx = &alias->udp_pgidx;
  848                                 firstpg = alias->udp[0]->pgptr[0];
  849                                 break;
  850                         case IPPROTO_ICMP:
  851                                 pgmask = alias->icmp_pgmask;
  852                                 pgptr = &alias->icmp_pg;
  853                                 pgidx = &alias->icmp_pgidx;
  854                                 firstpg = alias->icmp[0]->pgptr[0];
  855                                 break;
  856                         }
  857                         /* Reset the corresponding bit in pgmask array. */
  858                         ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
  859                         ck_pr_fence_store();
  860                         /* If last used PG points to this PG, reset it. */
  861                         ck_pr_cas_ptr(pgptr, pg, firstpg);
  862                         ck_pr_cas_32(pgidx, idx, 0);
  863                         /* Unlink PG from alias's chain */
  864                         ALIAS_LOCK(alias);
  865                         CK_SLIST_REMOVE(&alias->portgroups, pg,
  866                             nat64lsn_pg, entries);
  867                         alias->portgroups_count--;
  868                         ALIAS_UNLOCK(alias);
  869                         /* And link to job's chain for deferred destroying */
  870                         NAT64STAT_INC(&cfg->base.stats, spgdeleted);
  871                         CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
  872                 }
  873         }
  874 }
  875 
  876 static void
  877 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
  878     struct nat64lsn_hosts_slist *hosts)
  879 {
  880         struct nat64lsn_host *host, *tmp;
  881         int i;
  882 
  883         for (i = 0; i < cfg->hosts_hashsize; i++) {
  884                 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
  885                     entries, tmp) {
  886                         /* Is host was marked in previous call? */
  887                         if (host->flags & NAT64LSN_DEADHOST) {
  888                                 if (host->states_count > 0) {
  889                                         host->flags &= ~NAT64LSN_DEADHOST;
  890                                         continue;
  891                                 }
  892                                 /*
  893                                  * Unlink host from hash table and schedule
  894                                  * it for deferred destroying.
  895                                  */
  896                                 CFG_LOCK(cfg);
  897                                 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
  898                                     nat64lsn_host, entries);
  899                                 cfg->hosts_count--;
  900                                 CFG_UNLOCK(cfg);
  901                                 CK_SLIST_INSERT_HEAD(hosts, host, entries);
  902                                 continue;
  903                         }
  904                         if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
  905                                 continue;
  906                         if (host->states_count > 0)
  907                                 continue;
  908                         /* Mark host as going to be expired in next pass */
  909                         host->flags |= NAT64LSN_DEADHOST;
  910                         ck_pr_fence_store();
  911                 }
  912         }
  913 }
  914 
  915 static struct nat64lsn_pgchunk*
  916 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
  917 {
  918 #if 0
  919         struct nat64lsn_alias *alias;
  920         struct nat64lsn_pgchunk *chunk;
  921         uint32_t pgmask;
  922         int i, c;
  923 
  924         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
  925                 alias = &cfg->aliases[i];
  926                 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
  927                         continue;
  928                 /* Always keep single chunk allocated */
  929                 for (c = 1; c < 32; c++) {
  930                         if ((alias->tcp_chunkmask & (1 << c)) == 0)
  931                                 break;
  932                         chunk = ck_pr_load_ptr(&alias->tcp[c]);
  933                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
  934                                 continue;
  935                         ck_pr_btr_32(&alias->tcp_chunkmask, c);
  936                         ck_pr_fence_load();
  937                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
  938                                 continue;
  939                 }
  940         }
  941 #endif
  942         return (NULL);
  943 }
  944 
  945 #if 0
  946 static void
  947 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
  948 {
  949         struct nat64lsn_host *h;
  950         struct nat64lsn_states_slist *hash;
  951         int i, j, hsize;
  952 
  953         for (i = 0; i < cfg->hosts_hashsize; i++) {
  954                 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
  955                          if (h->states_count / 2 < h->states_hashsize ||
  956                              h->states_hashsize >= NAT64LSN_MAX_HSIZE)
  957                                  continue;
  958                          hsize = h->states_hashsize * 2;
  959                          hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
  960                          if (hash == NULL)
  961                                  continue;
  962                          for (j = 0; j < hsize; j++)
  963                                 CK_SLIST_INIT(&hash[i]);
  964 
  965                          ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
  966                 }
  967         }
  968 }
  969 #endif
  970 
  971 /*
  972  * This procedure is used to perform various maintenance
  973  * on dynamic hash list. Currently it is called every 4 seconds.
  974  */
  975 static void
  976 nat64lsn_periodic(void *data)
  977 {
  978         struct nat64lsn_job_item *ji;
  979         struct nat64lsn_cfg *cfg;
  980 
  981         cfg = (struct nat64lsn_cfg *) data;
  982         CURVNET_SET(cfg->vp);
  983         if (cfg->hosts_count > 0) {
  984                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
  985                 if (ji != NULL) {
  986                         ji->jtype = JTYPE_DESTROY;
  987                         CK_SLIST_INIT(&ji->hosts);
  988                         CK_SLIST_INIT(&ji->portgroups);
  989                         nat64lsn_expire_hosts(cfg, &ji->hosts);
  990                         nat64lsn_expire_portgroups(cfg, &ji->portgroups);
  991                         ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
  992                         NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
  993                             nat64lsn_job_destroy);
  994                 } else
  995                         NAT64STAT_INC(&cfg->base.stats, jnomem);
  996         }
  997         callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
  998         CURVNET_RESTORE();
  999 }
 1000 
 1001 #define ALLOC_ERROR(stage, type)        ((stage) ? 10 * (type) + (stage): 0)
 1002 #define HOST_ERROR(stage)               ALLOC_ERROR(stage, 1)
 1003 #define PG_ERROR(stage)                 ALLOC_ERROR(stage, 2)
 1004 static int
 1005 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
 1006 {
 1007         char a[INET6_ADDRSTRLEN];
 1008         struct nat64lsn_aliaslink *link;
 1009         struct nat64lsn_host *host;
 1010         struct nat64lsn_state *state;
 1011         uint32_t hval, data[2];
 1012         int i;
 1013 
 1014         /* Check that host was not yet added. */
 1015         NAT64LSN_EPOCH_ASSERT();
 1016         CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
 1017                 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
 1018                         /* The host was allocated in previous call. */
 1019                         ji->host = host;
 1020                         goto get_state;
 1021                 }
 1022         }
 1023 
 1024         host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
 1025         if (ji->host == NULL)
 1026                 return (HOST_ERROR(1));
 1027 
 1028         host->states_hashsize = NAT64LSN_HSIZE;
 1029         host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
 1030             host->states_hashsize, M_NAT64LSN, M_NOWAIT);
 1031         if (host->states_hash == NULL) {
 1032                 uma_zfree(nat64lsn_host_zone, host);
 1033                 return (HOST_ERROR(2));
 1034         }
 1035 
 1036         link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
 1037         if (link == NULL) {
 1038                 free(host->states_hash, M_NAT64LSN);
 1039                 uma_zfree(nat64lsn_host_zone, host);
 1040                 return (HOST_ERROR(3));
 1041         }
 1042 
 1043         /* Initialize */
 1044         HOST_LOCK_INIT(host);
 1045         SET_AGE(host->timestamp);
 1046         host->addr = ji->f_id.src_ip6;
 1047         host->hval = ji->src6_hval;
 1048         host->flags = 0;
 1049         host->states_count = 0;
 1050         host->states_hashsize = NAT64LSN_HSIZE;
 1051         CK_SLIST_INIT(&host->aliases);
 1052         for (i = 0; i < host->states_hashsize; i++)
 1053                 CK_SLIST_INIT(&host->states_hash[i]);
 1054 
 1055         /* Determine alias from flow hash. */
 1056         hval = ALIASLINK_HVAL(cfg, &ji->f_id);
 1057         link->alias = &ALIAS_BYHASH(cfg, hval);
 1058         CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
 1059 
 1060         ALIAS_LOCK(link->alias);
 1061         CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
 1062         link->alias->hosts_count++;
 1063         ALIAS_UNLOCK(link->alias);
 1064 
 1065         CFG_LOCK(cfg);
 1066         CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
 1067         cfg->hosts_count++;
 1068         CFG_UNLOCK(cfg);
 1069 
 1070 get_state:
 1071         data[0] = ji->faddr;
 1072         data[1] = (ji->f_id.dst_port << 16) | ji->port;
 1073         ji->state_hval = hval = STATE_HVAL(cfg, data);
 1074         state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
 1075             ji->faddr, ji->port, ji->proto);
 1076         /*
 1077          * We failed to obtain new state, used alias needs new PG.
 1078          * XXX: or another alias should be used.
 1079          */
 1080         if (state == NULL) {
 1081                 /* Try to allocate new PG */
 1082                 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
 1083                         return (HOST_ERROR(4));
 1084                 /* We assume that nat64lsn_alloc_pg() got state */
 1085         } else
 1086                 ji->state = state;
 1087 
 1088         ji->done = 1;
 1089         DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
 1090             inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
 1091         return (HOST_ERROR(0));
 1092 }
 1093 
 1094 static int
 1095 nat64lsn_find_pg_place(uint32_t *data)
 1096 {
 1097         int i;
 1098 
 1099         for (i = 0; i < 32; i++) {
 1100                 if (~data[i] == 0)
 1101                         continue;
 1102                 return (i * 32 + ffs(~data[i]) - 1);
 1103         }
 1104         return (-1);
 1105 }
 1106 
 1107 static int
 1108 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
 1109     struct nat64lsn_alias *alias, uint32_t *chunkmask,
 1110     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
 1111     struct nat64lsn_pg **pgptr, uint8_t proto)
 1112 {
 1113         struct nat64lsn_pg *pg;
 1114         int i, pg_idx, chunk_idx;
 1115 
 1116         /* Find place in pgchunk where PG can be added */
 1117         pg_idx = nat64lsn_find_pg_place(pgmask);
 1118         if (pg_idx < 0) /* no more PGs */
 1119                 return (PG_ERROR(1));
 1120         /* Check that we have allocated pgchunk for given PG index */
 1121         chunk_idx = pg_idx / 32;
 1122         if (!ISSET32(*chunkmask, chunk_idx)) {
 1123                 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
 1124                     M_NOWAIT);
 1125                 if (chunks[chunk_idx] == NULL)
 1126                         return (PG_ERROR(2));
 1127                 ck_pr_bts_32(chunkmask, chunk_idx);
 1128                 ck_pr_fence_store();
 1129         }
 1130         /* Allocate PG and states chunks */
 1131         pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
 1132         if (pg == NULL)
 1133                 return (PG_ERROR(3));
 1134         pg->chunks_count = cfg->states_chunks;
 1135         if (pg->chunks_count > 1) {
 1136                 pg->freemask_chunk = malloc(pg->chunks_count *
 1137                     sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
 1138                 if (pg->freemask_chunk == NULL) {
 1139                         uma_zfree(nat64lsn_pg_zone, pg);
 1140                         return (PG_ERROR(4));
 1141                 }
 1142                 pg->states_chunk = malloc(pg->chunks_count *
 1143                     sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
 1144                     M_NOWAIT | M_ZERO);
 1145                 if (pg->states_chunk == NULL) {
 1146                         free(pg->freemask_chunk, M_NAT64LSN);
 1147                         uma_zfree(nat64lsn_pg_zone, pg);
 1148                         return (PG_ERROR(5));
 1149                 }
 1150                 for (i = 0; i < pg->chunks_count; i++) {
 1151                         pg->states_chunk[i] = uma_zalloc(
 1152                             nat64lsn_state_zone, M_NOWAIT);
 1153                         if (pg->states_chunk[i] == NULL)
 1154                                 goto states_failed;
 1155                 }
 1156                 memset(pg->freemask_chunk, 0xff,
 1157                     sizeof(uint64_t) * pg->chunks_count);
 1158         } else {
 1159                 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
 1160                 if (pg->states == NULL) {
 1161                         uma_zfree(nat64lsn_pg_zone, pg);
 1162                         return (PG_ERROR(6));
 1163                 }
 1164                 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
 1165         }
 1166 
 1167         /* Initialize PG and hook it to pgchunk */
 1168         SET_AGE(pg->timestamp);
 1169         pg->proto = proto;
 1170         pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
 1171         ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
 1172         ck_pr_fence_store();
 1173         ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
 1174         ck_pr_store_ptr(pgptr, pg);
 1175 
 1176         ALIAS_LOCK(alias);
 1177         CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
 1178         SET_AGE(alias->timestamp);
 1179         alias->portgroups_count++;
 1180         ALIAS_UNLOCK(alias);
 1181         NAT64STAT_INC(&cfg->base.stats, spgcreated);
 1182         return (PG_ERROR(0));
 1183 
 1184 states_failed:
 1185         for (i = 0; i < pg->chunks_count; i++)
 1186                 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
 1187         free(pg->freemask_chunk, M_NAT64LSN);
 1188         free(pg->states_chunk, M_NAT64LSN);
 1189         uma_zfree(nat64lsn_pg_zone, pg);
 1190         return (PG_ERROR(7));
 1191 }
 1192 
 1193 static int
 1194 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
 1195 {
 1196         struct nat64lsn_aliaslink *link;
 1197         struct nat64lsn_alias *alias;
 1198         int ret;
 1199 
 1200         link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
 1201         if (link == NULL)
 1202                 return (PG_ERROR(1));
 1203 
 1204         /*
 1205          * TODO: check that we did not already allocated PG in
 1206          *       previous call.
 1207          */
 1208 
 1209         ret = 0;
 1210         alias = link->alias;
 1211         /* Find place in pgchunk where PG can be added */
 1212         switch (ji->proto) {
 1213         case IPPROTO_TCP:
 1214                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
 1215                     &alias->tcp_chunkmask, alias->tcp_pgmask,
 1216                     alias->tcp, &alias->tcp_pg, ji->proto);
 1217                 break;
 1218         case IPPROTO_UDP:
 1219                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
 1220                     &alias->udp_chunkmask, alias->udp_pgmask,
 1221                     alias->udp, &alias->udp_pg, ji->proto);
 1222                 break;
 1223         case IPPROTO_ICMP:
 1224                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
 1225                     &alias->icmp_chunkmask, alias->icmp_pgmask,
 1226                     alias->icmp, &alias->icmp_pg, ji->proto);
 1227                 break;
 1228         default:
 1229                 panic("%s: wrong proto %d", __func__, ji->proto);
 1230         }
 1231         if (ret == PG_ERROR(1)) {
 1232                 /*
 1233                  * PG_ERROR(1) means that alias lacks free PGs
 1234                  * XXX: try next alias.
 1235                  */
 1236                 printf("NAT64LSN: %s: failed to obtain PG\n",
 1237                     __func__);
 1238                 return (ret);
 1239         }
 1240         if (ret == PG_ERROR(0)) {
 1241                 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
 1242                     ji->state_hval, ji->faddr, ji->port, ji->proto);
 1243                 if (ji->state == NULL)
 1244                         ret = PG_ERROR(8);
 1245                 else
 1246                         ji->done = 1;
 1247         }
 1248         return (ret);
 1249 }
 1250 
 1251 static void
 1252 nat64lsn_do_request(void *data)
 1253 {
 1254         struct epoch_tracker et;
 1255         struct nat64lsn_job_head jhead;
 1256         struct nat64lsn_job_item *ji, *ji2;
 1257         struct nat64lsn_cfg *cfg;
 1258         int jcount;
 1259         uint8_t flags;
 1260 
 1261         cfg = (struct nat64lsn_cfg *)data;
 1262         if (cfg->jlen == 0)
 1263                 return;
 1264 
 1265         CURVNET_SET(cfg->vp);
 1266         STAILQ_INIT(&jhead);
 1267 
 1268         /* Grab queue */
 1269         JQUEUE_LOCK();
 1270         STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
 1271         jcount = cfg->jlen;
 1272         cfg->jlen = 0;
 1273         JQUEUE_UNLOCK();
 1274 
 1275         /* TODO: check if we need to resize hash */
 1276 
 1277         NAT64STAT_INC(&cfg->base.stats, jcalls);
 1278         DPRINTF(DP_JQUEUE, "count=%d", jcount);
 1279 
 1280         /*
 1281          * TODO:
 1282          * What we should do here is to build a hash
 1283          * to ensure we don't have lots of duplicate requests.
 1284          * Skip this for now.
 1285          *
 1286          * TODO: Limit per-call number of items
 1287          */
 1288 
 1289         NAT64LSN_EPOCH_ENTER(et);
 1290         STAILQ_FOREACH(ji, &jhead, entries) {
 1291                 switch (ji->jtype) {
 1292                 case JTYPE_NEWHOST:
 1293                         if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
 1294                                 NAT64STAT_INC(&cfg->base.stats, jhostfails);
 1295                         break;
 1296                 case JTYPE_NEWPORTGROUP:
 1297                         if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
 1298                                 NAT64STAT_INC(&cfg->base.stats, jportfails);
 1299                         break;
 1300                 default:
 1301                         continue;
 1302                 }
 1303                 if (ji->done != 0) {
 1304                         flags = ji->proto != IPPROTO_TCP ? 0 :
 1305                             convert_tcp_flags(ji->f_id._flags);
 1306                         nat64lsn_translate6_internal(cfg, &ji->m,
 1307                             ji->state, flags);
 1308                         NAT64STAT_INC(&cfg->base.stats, jreinjected);
 1309                 }
 1310         }
 1311         NAT64LSN_EPOCH_EXIT(et);
 1312 
 1313         ji = STAILQ_FIRST(&jhead);
 1314         while (ji != NULL) {
 1315                 ji2 = STAILQ_NEXT(ji, entries);
 1316                 /*
 1317                  * In any case we must free mbuf if
 1318                  * translator did not consumed it.
 1319                  */
 1320                 m_freem(ji->m);
 1321                 uma_zfree(nat64lsn_job_zone, ji);
 1322                 ji = ji2;
 1323         }
 1324         CURVNET_RESTORE();
 1325 }
 1326 
 1327 static struct nat64lsn_job_item *
 1328 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
 1329 {
 1330         struct nat64lsn_job_item *ji;
 1331 
 1332         /*
 1333          * Do not try to lock possibly contested mutex if we're near the
 1334          * limit. Drop packet instead.
 1335          */
 1336         ji = NULL;
 1337         if (cfg->jlen >= cfg->jmaxlen)
 1338                 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
 1339         else {
 1340                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
 1341                 if (ji == NULL)
 1342                         NAT64STAT_INC(&cfg->base.stats, jnomem);
 1343         }
 1344         if (ji == NULL) {
 1345                 NAT64STAT_INC(&cfg->base.stats, dropped);
 1346                 DPRINTF(DP_DROPS, "failed to create job");
 1347         } else {
 1348                 ji->jtype = jtype;
 1349                 ji->done = 0;
 1350         }
 1351         return (ji);
 1352 }
 1353 
 1354 static void
 1355 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
 1356 {
 1357 
 1358         JQUEUE_LOCK();
 1359         STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
 1360         NAT64STAT_INC(&cfg->base.stats, jrequests);
 1361         cfg->jlen++;
 1362 
 1363         if (callout_pending(&cfg->jcallout) == 0)
 1364                 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
 1365         JQUEUE_UNLOCK();
 1366 }
 1367 
 1368 static void
 1369 nat64lsn_job_destroy(epoch_context_t ctx)
 1370 {
 1371         struct nat64lsn_job_item *ji;
 1372         struct nat64lsn_host *host;
 1373         struct nat64lsn_pg *pg;
 1374         int i;
 1375 
 1376         ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
 1377         MPASS(ji->jtype == JTYPE_DESTROY);
 1378         while (!CK_SLIST_EMPTY(&ji->hosts)) {
 1379                 host = CK_SLIST_FIRST(&ji->hosts);
 1380                 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
 1381                 if (host->states_count > 0) {
 1382                         /*
 1383                          * XXX: The state has been created
 1384                          * during host deletion.
 1385                          */
 1386                         printf("NAT64LSN: %s: destroying host with %d "
 1387                             "states\n", __func__, host->states_count);
 1388                 }
 1389                 nat64lsn_destroy_host(host);
 1390         }
 1391         while (!CK_SLIST_EMPTY(&ji->portgroups)) {
 1392                 pg = CK_SLIST_FIRST(&ji->portgroups);
 1393                 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
 1394                 for (i = 0; i < pg->chunks_count; i++) {
 1395                         if (FREEMASK_BITCOUNT(pg, i) != 64) {
 1396                                 /*
 1397                                  * XXX: The state has been created during
 1398                                  * PG deletion.
 1399                                  */
 1400                                 printf("NAT64LSN: %s: destroying PG %p "
 1401                                     "with non-empty chunk %d\n", __func__,
 1402                                     pg, i);
 1403                         }
 1404                 }
 1405                 nat64lsn_destroy_pg(pg);
 1406         }
 1407         uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
 1408         uma_zfree(nat64lsn_job_zone, ji);
 1409 }
 1410 
 1411 static int
 1412 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
 1413     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
 1414     in_addr_t faddr, uint16_t port, uint8_t proto)
 1415 {
 1416         struct nat64lsn_job_item *ji;
 1417 
 1418         ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
 1419         if (ji != NULL) {
 1420                 ji->m = *mp;
 1421                 ji->f_id = *f_id;
 1422                 ji->faddr = faddr;
 1423                 ji->port = port;
 1424                 ji->proto = proto;
 1425                 ji->src6_hval = hval;
 1426 
 1427                 nat64lsn_enqueue_job(cfg, ji);
 1428                 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
 1429                 *mp = NULL;
 1430         }
 1431         return (IP_FW_DENY);
 1432 }
 1433 
 1434 static int
 1435 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
 1436     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
 1437     in_addr_t faddr, uint16_t port, uint8_t proto)
 1438 {
 1439         struct nat64lsn_job_item *ji;
 1440 
 1441         ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
 1442         if (ji != NULL) {
 1443                 ji->m = *mp;
 1444                 ji->f_id = *f_id;
 1445                 ji->faddr = faddr;
 1446                 ji->port = port;
 1447                 ji->proto = proto;
 1448                 ji->state_hval = hval;
 1449                 ji->host = host;
 1450 
 1451                 nat64lsn_enqueue_job(cfg, ji);
 1452                 NAT64STAT_INC(&cfg->base.stats, jportreq);
 1453                 *mp = NULL;
 1454         }
 1455         return (IP_FW_DENY);
 1456 }
 1457 
 1458 static int
 1459 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
 1460     struct nat64lsn_state *state, uint8_t flags)
 1461 {
 1462         struct pfloghdr loghdr, *logdata;
 1463         int ret;
 1464         uint16_t ts;
 1465 
 1466         /* Update timestamp and flags if needed */
 1467         SET_AGE(ts);
 1468         if (state->timestamp != ts)
 1469                 state->timestamp = ts;
 1470         if ((state->flags & flags) != 0)
 1471                 state->flags |= flags;
 1472 
 1473         if (cfg->base.flags & NAT64_LOG) {
 1474                 logdata = &loghdr;
 1475                 nat64lsn_log(logdata, *mp, AF_INET6, state);
 1476         } else
 1477                 logdata = NULL;
 1478 
 1479         ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
 1480             htons(state->aport), &cfg->base, logdata);
 1481         if (ret == NAT64SKIP)
 1482                 return (cfg->nomatch_verdict);
 1483         if (ret == NAT64RETURN)
 1484                 *mp = NULL;
 1485         return (IP_FW_DENY);
 1486 }
 1487 
 1488 static int
 1489 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
 1490     struct mbuf **mp)
 1491 {
 1492         struct nat64lsn_state *state;
 1493         struct nat64lsn_host *host;
 1494         struct icmp6_hdr *icmp6;
 1495         uint32_t addr, hval, data[2];
 1496         int offset, proto;
 1497         uint16_t port;
 1498         uint8_t flags;
 1499 
 1500         /* Check if protocol is supported */
 1501         port = f_id->src_port;
 1502         proto = f_id->proto;
 1503         switch (f_id->proto) {
 1504         case IPPROTO_ICMPV6:
 1505                 /*
 1506                  * For ICMPv6 echo reply/request we use icmp6_id as
 1507                  * local port.
 1508                  */
 1509                 offset = 0;
 1510                 proto = nat64_getlasthdr(*mp, &offset);
 1511                 if (proto < 0) {
 1512                         NAT64STAT_INC(&cfg->base.stats, dropped);
 1513                         DPRINTF(DP_DROPS, "mbuf isn't contigious");
 1514                         return (IP_FW_DENY);
 1515                 }
 1516                 if (proto == IPPROTO_ICMPV6) {
 1517                         icmp6 = mtodo(*mp, offset);
 1518                         if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
 1519                             icmp6->icmp6_type == ICMP6_ECHO_REPLY)
 1520                                 port = ntohs(icmp6->icmp6_id);
 1521                 }
 1522                 proto = IPPROTO_ICMP;
 1523                 /* FALLTHROUGH */
 1524         case IPPROTO_TCP:
 1525         case IPPROTO_UDP:
 1526                 break;
 1527         default:
 1528                 NAT64STAT_INC(&cfg->base.stats, noproto);
 1529                 return (cfg->nomatch_verdict);
 1530         }
 1531 
 1532         /* Extract IPv4 from destination IPv6 address */
 1533         addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
 1534         if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
 1535                 char a[INET_ADDRSTRLEN];
 1536 
 1537                 NAT64STAT_INC(&cfg->base.stats, dropped);
 1538                 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
 1539                     inet_ntop(AF_INET, &addr, a, sizeof(a)));
 1540                 return (IP_FW_DENY); /* XXX: add extra stats? */
 1541         }
 1542 
 1543         /* Try to find host */
 1544         hval = HOST_HVAL(cfg, &f_id->src_ip6);
 1545         CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
 1546                 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
 1547                         break;
 1548         }
 1549         /* We use IPv4 address in host byte order */
 1550         addr = ntohl(addr);
 1551         if (host == NULL)
 1552                 return (nat64lsn_request_host(cfg, f_id, mp,
 1553                     hval, addr, port, proto));
 1554 
 1555         flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
 1556 
 1557         data[0] = addr;
 1558         data[1] = (f_id->dst_port << 16) | port;
 1559         hval = STATE_HVAL(cfg, data);
 1560         state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
 1561             port, proto);
 1562         if (state == NULL)
 1563                 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
 1564                     port, proto));
 1565         return (nat64lsn_translate6_internal(cfg, mp, state, flags));
 1566 }
 1567 
 1568 /*
 1569  * Main dataplane entry point.
 1570  */
 1571 int
 1572 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
 1573     ipfw_insn *cmd, int *done)
 1574 {
 1575         struct nat64lsn_cfg *cfg;
 1576         ipfw_insn *icmd;
 1577         int ret;
 1578 
 1579         IPFW_RLOCK_ASSERT(ch);
 1580 
 1581         *done = 0;      /* continue the search in case of failure */
 1582         icmd = cmd + 1;
 1583         if (cmd->opcode != O_EXTERNAL_ACTION ||
 1584             cmd->arg1 != V_nat64lsn_eid ||
 1585             icmd->opcode != O_EXTERNAL_INSTANCE ||
 1586             (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
 1587                 return (IP_FW_DENY);
 1588 
 1589         *done = 1;      /* terminate the search */
 1590 
 1591         switch (args->f_id.addr_type) {
 1592         case 4:
 1593                 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
 1594                 break;
 1595         case 6:
 1596                 /*
 1597                  * Check that destination IPv6 address matches our prefix6.
 1598                  */
 1599                 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
 1600                     memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
 1601                     cfg->base.plat_plen / 8) != 0) {
 1602                         ret = cfg->nomatch_verdict;
 1603                         break;
 1604                 }
 1605                 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
 1606                 break;
 1607         default:
 1608                 ret = cfg->nomatch_verdict;
 1609         }
 1610 
 1611         if (ret != IP_FW_PASS && args->m != NULL) {
 1612                 m_freem(args->m);
 1613                 args->m = NULL;
 1614         }
 1615         return (ret);
 1616 }
 1617 
 1618 static int
 1619 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
 1620 {
 1621         struct nat64lsn_states_chunk *chunk;
 1622         int i;
 1623 
 1624         chunk = (struct nat64lsn_states_chunk *)mem;
 1625         for (i = 0; i < 64; i++)
 1626                 chunk->state[i].flags = 0;
 1627         return (0);
 1628 }
 1629 
 1630 void
 1631 nat64lsn_init_internal(void)
 1632 {
 1633 
 1634         nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
 1635             sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
 1636             UMA_ALIGN_PTR, 0);
 1637         nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
 1638             sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
 1639             UMA_ALIGN_PTR, 0);
 1640         nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
 1641             sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
 1642             UMA_ALIGN_PTR, 0);
 1643         nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
 1644             sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
 1645             UMA_ALIGN_PTR, 0);
 1646         nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
 1647             sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
 1648             NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 1649         nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
 1650             sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
 1651             UMA_ALIGN_PTR, 0);
 1652         JQUEUE_LOCK_INIT();
 1653 }
 1654 
 1655 void
 1656 nat64lsn_uninit_internal(void)
 1657 {
 1658 
 1659         /* XXX: epoch_task drain */
 1660         JQUEUE_LOCK_DESTROY();
 1661         uma_zdestroy(nat64lsn_host_zone);
 1662         uma_zdestroy(nat64lsn_pgchunk_zone);
 1663         uma_zdestroy(nat64lsn_pg_zone);
 1664         uma_zdestroy(nat64lsn_aliaslink_zone);
 1665         uma_zdestroy(nat64lsn_state_zone);
 1666         uma_zdestroy(nat64lsn_job_zone);
 1667 }
 1668 
 1669 void
 1670 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
 1671 {
 1672 
 1673         CALLOUT_LOCK(cfg);
 1674         callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
 1675             nat64lsn_periodic, cfg);
 1676         CALLOUT_UNLOCK(cfg);
 1677 }
 1678 
 1679 struct nat64lsn_cfg *
 1680 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
 1681 {
 1682         struct nat64lsn_cfg *cfg;
 1683         struct nat64lsn_alias *alias;
 1684         int i, naddr;
 1685 
 1686         cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
 1687             M_WAITOK | M_ZERO);
 1688 
 1689         CFG_LOCK_INIT(cfg);
 1690         CALLOUT_LOCK_INIT(cfg);
 1691         STAILQ_INIT(&cfg->jhead);
 1692         cfg->vp = curvnet;
 1693         COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
 1694 
 1695         cfg->hash_seed = arc4random();
 1696         cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
 1697         cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
 1698             cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
 1699         for (i = 0; i < cfg->hosts_hashsize; i++)
 1700                 CK_SLIST_INIT(&cfg->hosts_hash[i]);
 1701 
 1702         naddr = 1 << (32 - plen);
 1703         cfg->prefix4 = prefix;
 1704         cfg->pmask4 = prefix | (naddr - 1);
 1705         cfg->plen4 = plen;
 1706         cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
 1707             M_NAT64LSN, M_WAITOK | M_ZERO);
 1708         for (i = 0; i < naddr; i++) {
 1709                 alias = &cfg->aliases[i];
 1710                 alias->addr = prefix + i; /* host byte order */
 1711                 CK_SLIST_INIT(&alias->hosts);
 1712                 ALIAS_LOCK_INIT(alias);
 1713         }
 1714 
 1715         callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
 1716         callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
 1717 
 1718         return (cfg);
 1719 }
 1720 
 1721 static void
 1722 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
 1723 {
 1724         int i;
 1725 
 1726         if (pg->chunks_count == 1) {
 1727                 uma_zfree(nat64lsn_state_zone, pg->states);
 1728         } else {
 1729                 for (i = 0; i < pg->chunks_count; i++)
 1730                         uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
 1731                 free(pg->states_chunk, M_NAT64LSN);
 1732                 free(pg->freemask_chunk, M_NAT64LSN);
 1733         }
 1734         uma_zfree(nat64lsn_pg_zone, pg);
 1735 }
 1736 
 1737 static void
 1738 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
 1739     struct nat64lsn_alias *alias)
 1740 {
 1741         struct nat64lsn_pg *pg;
 1742         int i;
 1743 
 1744         while (!CK_SLIST_EMPTY(&alias->portgroups)) {
 1745                 pg = CK_SLIST_FIRST(&alias->portgroups);
 1746                 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
 1747                 nat64lsn_destroy_pg(pg);
 1748         }
 1749         for (i = 0; i < 32; i++) {
 1750                 if (ISSET32(alias->tcp_chunkmask, i))
 1751                         uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
 1752                 if (ISSET32(alias->udp_chunkmask, i))
 1753                         uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
 1754                 if (ISSET32(alias->icmp_chunkmask, i))
 1755                         uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
 1756         }
 1757         ALIAS_LOCK_DESTROY(alias);
 1758 }
 1759 
 1760 static void
 1761 nat64lsn_destroy_host(struct nat64lsn_host *host)
 1762 {
 1763         struct nat64lsn_aliaslink *link;
 1764 
 1765         while (!CK_SLIST_EMPTY(&host->aliases)) {
 1766                 link = CK_SLIST_FIRST(&host->aliases);
 1767                 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
 1768 
 1769                 ALIAS_LOCK(link->alias);
 1770                 CK_SLIST_REMOVE(&link->alias->hosts, link,
 1771                     nat64lsn_aliaslink, alias_entries);
 1772                 link->alias->hosts_count--;
 1773                 ALIAS_UNLOCK(link->alias);
 1774 
 1775                 uma_zfree(nat64lsn_aliaslink_zone, link);
 1776         }
 1777         HOST_LOCK_DESTROY(host);
 1778         free(host->states_hash, M_NAT64LSN);
 1779         uma_zfree(nat64lsn_host_zone, host);
 1780 }
 1781 
 1782 void
 1783 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
 1784 {
 1785         struct nat64lsn_host *host;
 1786         int i;
 1787 
 1788         CALLOUT_LOCK(cfg);
 1789         callout_drain(&cfg->periodic);
 1790         CALLOUT_UNLOCK(cfg);
 1791         callout_drain(&cfg->jcallout);
 1792 
 1793         for (i = 0; i < cfg->hosts_hashsize; i++) {
 1794                 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
 1795                         host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
 1796                         CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
 1797                         nat64lsn_destroy_host(host);
 1798                 }
 1799         }
 1800 
 1801         for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
 1802                 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
 1803 
 1804         CALLOUT_LOCK_DESTROY(cfg);
 1805         CFG_LOCK_DESTROY(cfg);
 1806         COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
 1807         free(cfg->hosts_hash, M_NAT64LSN);
 1808         free(cfg->aliases, M_NAT64LSN);
 1809         free(cfg, M_NAT64LSN);
 1810 }

Cache object: de96c6cc3dcd9005b058a8e6d072eae2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.