The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netpfil/ipfw/ip_fw_table.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
    5  * Copyright (c) 2014 Yandex LLC
    6  * Copyright (c) 2014 Alexander V. Chernikov
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 /*
   34  * Lookup table support for ipfw.
   35  *
   36  * This file contains handlers for all generic tables' operations:
   37  * add/del/flush entries, list/dump tables etc..
   38  *
   39  * Table data modification is protected by both UH and runtime lock
   40  * while reading configuration/data is protected by UH lock.
   41  *
   42  * Lookup algorithms for all table types are located in ip_fw_table_algo.c
   43  */
   44 
   45 #include "opt_ipfw.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/malloc.h>
   50 #include <sys/kernel.h>
   51 #include <sys/lock.h>
   52 #include <sys/rwlock.h>
   53 #include <sys/rmlock.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/queue.h>
   57 #include <net/if.h>     /* ip_fw.h requires IFNAMSIZ */
   58 
   59 #include <netinet/in.h>
   60 #include <netinet/ip_var.h>     /* struct ipfw_rule_ref */
   61 #include <netinet/ip_fw.h>
   62 
   63 #include <netpfil/ipfw/ip_fw_private.h>
   64 #include <netpfil/ipfw/ip_fw_table.h>
   65 
   66  /*
   67  * Table has the following `type` concepts:
   68  *
   69  * `no.type` represents lookup key type (addr, ifp, uid, etc..)
   70  * vmask represents bitmask of table values which are present at the moment.
   71  * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
   72  * single-value-for-all approach.
   73  */
   74 struct table_config {
   75         struct named_object     no;
   76         uint8_t         tflags;         /* type flags */
   77         uint8_t         locked;         /* 1 if locked from changes */
   78         uint8_t         linked;         /* 1 if already linked */
   79         uint8_t         ochanged;       /* used by set swapping */
   80         uint8_t         vshared;        /* 1 if using shared value array */
   81         uint8_t         spare[3];
   82         uint32_t        count;          /* Number of records */
   83         uint32_t        limit;          /* Max number of records */
   84         uint32_t        vmask;          /* bitmask with supported values */
   85         uint32_t        ocount;         /* used by set swapping */
   86         uint64_t        gencnt;         /* generation count */
   87         char            tablename[64];  /* table name */
   88         struct table_algo       *ta;    /* Callbacks for given algo */
   89         void            *astate;        /* algorithm state */
   90         struct table_info       ti_copy;        /* data to put to table_info */
   91         struct namedobj_instance        *vi;
   92 };
   93 
   94 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
   95     struct table_config **tc);
   96 static struct table_config *find_table(struct namedobj_instance *ni,
   97     struct tid_info *ti);
   98 static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
   99     struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
  100 static void free_table_config(struct namedobj_instance *ni,
  101     struct table_config *tc);
  102 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
  103     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
  104 static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
  105 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
  106 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
  107     struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
  108 #define OP_ADD  1
  109 #define OP_DEL  0
  110 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
  111     struct sockopt_data *sd);
  112 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
  113     ipfw_xtable_info *i);
  114 static int dump_table_tentry(void *e, void *arg);
  115 static int dump_table_xentry(void *e, void *arg);
  116 
  117 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
  118     struct tid_info *b);
  119 
  120 static int check_table_name(const char *name);
  121 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
  122     struct table_config *tc, struct table_info *ti, uint32_t count);
  123 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
  124 
  125 static struct table_algo *find_table_algo(struct tables_config *tableconf,
  126     struct tid_info *ti, char *name);
  127 
  128 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
  129 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
  130 
  131 #define CHAIN_TO_NI(chain)      (CHAIN_TO_TCFG(chain)->namehash)
  132 #define KIDX_TO_TI(ch, k)       (&(((struct table_info *)(ch)->tablestate)[k]))
  133 
  134 #define TA_BUF_SZ       128     /* On-stack buffer for add/delete state */
  135 
  136 void
  137 rollback_toperation_state(struct ip_fw_chain *ch, void *object)
  138 {
  139         struct tables_config *tcfg;
  140         struct op_state *os;
  141 
  142         tcfg = CHAIN_TO_TCFG(ch);
  143         TAILQ_FOREACH(os, &tcfg->state_list, next)
  144                 os->func(object, os);
  145 }
  146 
  147 void
  148 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
  149 {
  150         struct tables_config *tcfg;
  151 
  152         tcfg = CHAIN_TO_TCFG(ch);
  153         TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
  154 }
  155 
  156 void
  157 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
  158 {
  159         struct tables_config *tcfg;
  160 
  161         tcfg = CHAIN_TO_TCFG(ch);
  162         TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
  163 }
  164 
  165 void
  166 tc_ref(struct table_config *tc)
  167 {
  168 
  169         tc->no.refcnt++;
  170 }
  171 
  172 void
  173 tc_unref(struct table_config *tc)
  174 {
  175 
  176         tc->no.refcnt--;
  177 }
  178 
  179 static struct table_value *
  180 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
  181 {
  182         struct table_value *pval;
  183 
  184         pval = (struct table_value *)ch->valuestate;
  185 
  186         return (&pval[kidx]);
  187 }
  188 
  189 /*
  190  * Checks if we're able to insert/update entry @tei into table
  191  * w.r.t @tc limits.
  192  * May alter @tei to indicate insertion error / insert
  193  * options.
  194  *
  195  * Returns 0 if operation can be performed/
  196  */
  197 static int
  198 check_table_limit(struct table_config *tc, struct tentry_info *tei)
  199 {
  200 
  201         if (tc->limit == 0 || tc->count < tc->limit)
  202                 return (0);
  203 
  204         if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
  205                 /* Notify userland on error cause */
  206                 tei->flags |= TEI_FLAGS_LIMIT;
  207                 return (EFBIG);
  208         }
  209 
  210         /*
  211          * We have UPDATE flag set.
  212          * Permit updating record (if found),
  213          * but restrict adding new one since we've
  214          * already hit the limit.
  215          */
  216         tei->flags |= TEI_FLAGS_DONTADD;
  217 
  218         return (0);
  219 }
  220 
  221 /*
  222  * Convert algorithm callback return code into
  223  * one of pre-defined states known by userland.
  224  */
  225 static void
  226 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
  227 {
  228         int flag;
  229 
  230         flag = 0;
  231 
  232         switch (error) {
  233         case 0:
  234                 if (op == OP_ADD && num != 0)
  235                         flag = TEI_FLAGS_ADDED;
  236                 if (op == OP_DEL)
  237                         flag = TEI_FLAGS_DELETED;
  238                 break;
  239         case ENOENT:
  240                 flag = TEI_FLAGS_NOTFOUND;
  241                 break;
  242         case EEXIST:
  243                 flag = TEI_FLAGS_EXISTS;
  244                 break;
  245         default:
  246                 flag = TEI_FLAGS_ERROR;
  247         }
  248 
  249         tei->flags |= flag;
  250 }
  251 
  252 /*
  253  * Creates and references table with default parameters.
  254  * Saves table config, algo and allocated kidx info @ptc, @pta and
  255  * @pkidx if non-zero.
  256  * Used for table auto-creation to support old binaries.
  257  *
  258  * Returns 0 on success.
  259  */
  260 static int
  261 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
  262     uint16_t *pkidx)
  263 {
  264         ipfw_xtable_info xi;
  265         int error;
  266 
  267         memset(&xi, 0, sizeof(xi));
  268         /* Set default value mask for legacy clients */
  269         xi.vmask = IPFW_VTYPE_LEGACY;
  270 
  271         error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
  272         if (error != 0)
  273                 return (error);
  274 
  275         return (0);
  276 }
  277 
  278 /*
  279  * Find and reference existing table optionally
  280  * creating new one.
  281  *
  282  * Saves found table config into @ptc.
  283  * Note function may drop/acquire UH_WLOCK.
  284  * Returns 0 if table was found/created and referenced
  285  * or non-zero return code.
  286  */
  287 static int
  288 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
  289     struct tentry_info *tei, uint32_t count, int op,
  290     struct table_config **ptc)
  291 {
  292         struct namedobj_instance *ni;
  293         struct table_config *tc;
  294         uint16_t kidx;
  295         int error;
  296 
  297         IPFW_UH_WLOCK_ASSERT(ch);
  298 
  299         ni = CHAIN_TO_NI(ch);
  300         tc = NULL;
  301         if ((tc = find_table(ni, ti)) != NULL) {
  302                 /* check table type */
  303                 if (tc->no.subtype != ti->type)
  304                         return (EINVAL);
  305 
  306                 if (tc->locked != 0)
  307                         return (EACCES);
  308 
  309                 /* Try to exit early on limit hit */
  310                 if (op == OP_ADD && count == 1 &&
  311                     check_table_limit(tc, tei) != 0)
  312                         return (EFBIG);
  313 
  314                 /* Reference and return */
  315                 tc->no.refcnt++;
  316                 *ptc = tc;
  317                 return (0);
  318         }
  319 
  320         if (op == OP_DEL)
  321                 return (ESRCH);
  322 
  323         /* Compatibility mode: create new table for old clients */
  324         if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
  325                 return (ESRCH);
  326 
  327         IPFW_UH_WUNLOCK(ch);
  328         error = create_table_compat(ch, ti, &kidx);
  329         IPFW_UH_WLOCK(ch);
  330 
  331         if (error != 0)
  332                 return (error);
  333 
  334         tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
  335         KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
  336 
  337         /* OK, now we've got referenced table. */
  338         *ptc = tc;
  339         return (0);
  340 }
  341 
  342 /*
  343  * Rolls back already @added to @tc entries using state array @ta_buf_m.
  344  * Assume the following layout:
  345  * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
  346  * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
  347  *   for storing deleted state
  348  */
  349 static void
  350 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
  351     struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
  352     uint32_t count, uint32_t added)
  353 {
  354         struct table_algo *ta;
  355         struct tentry_info *ptei;
  356         caddr_t v, vv;
  357         size_t ta_buf_sz;
  358         int error __diagused, i;
  359         uint32_t num;
  360 
  361         IPFW_UH_WLOCK_ASSERT(ch);
  362 
  363         ta = tc->ta;
  364         ta_buf_sz = ta->ta_buf_size;
  365         v = ta_buf_m;
  366         vv = v + count * ta_buf_sz;
  367         for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
  368                 ptei = &tei[i];
  369                 if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
  370                         /*
  371                          * We have old value stored by previous
  372                          * call in @ptei->value. Do add once again
  373                          * to restore it.
  374                          */
  375                         error = ta->add(tc->astate, tinfo, ptei, v, &num);
  376                         KASSERT(error == 0, ("rollback UPDATE fail"));
  377                         KASSERT(num == 0, ("rollback UPDATE fail2"));
  378                         continue;
  379                 }
  380 
  381                 error = ta->prepare_del(ch, ptei, vv);
  382                 KASSERT(error == 0, ("pre-rollback INSERT failed"));
  383                 error = ta->del(tc->astate, tinfo, ptei, vv, &num);
  384                 KASSERT(error == 0, ("rollback INSERT failed"));
  385                 tc->count -= num;
  386         }
  387 }
  388 
  389 /*
  390  * Prepares add/del state for all @count entries in @tei.
  391  * Uses either stack buffer (@ta_buf) or allocates a new one.
  392  * Stores pointer to allocated buffer back to @ta_buf.
  393  *
  394  * Returns 0 on success.
  395  */
  396 static int
  397 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
  398     struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
  399 {
  400         caddr_t ta_buf_m, v;
  401         size_t ta_buf_sz, sz;
  402         struct tentry_info *ptei;
  403         int error, i;
  404 
  405         error = 0;
  406         ta_buf_sz = ta->ta_buf_size;
  407         if (count == 1) {
  408                 /* Single add/delete, use on-stack buffer */
  409                 memset(*ta_buf, 0, TA_BUF_SZ);
  410                 ta_buf_m = *ta_buf;
  411         } else {
  412                 /*
  413                  * Multiple adds/deletes, allocate larger buffer
  414                  *
  415                  * Note we need 2xcount buffer for add case:
  416                  * we have hold both ADD state
  417                  * and DELETE state (this may be needed
  418                  * if we need to rollback all changes)
  419                  */
  420                 sz = count * ta_buf_sz;
  421                 ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
  422                     M_WAITOK | M_ZERO);
  423         }
  424 
  425         v = ta_buf_m;
  426         for (i = 0; i < count; i++, v += ta_buf_sz) {
  427                 ptei = &tei[i];
  428                 error = (op == OP_ADD) ?
  429                     ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
  430 
  431                 /*
  432                  * Some syntax error (incorrect mask, or address, or
  433                  * anything). Return error regardless of atomicity
  434                  * settings.
  435                  */
  436                 if (error != 0)
  437                         break;
  438         }
  439 
  440         *ta_buf = ta_buf_m;
  441         return (error);
  442 }
  443 
  444 /*
  445  * Flushes allocated state for each @count entries in @tei.
  446  * Frees @ta_buf_m if differs from stack buffer @ta_buf.
  447  */
  448 static void
  449 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
  450     struct tentry_info *tei, uint32_t count, int rollback,
  451     caddr_t ta_buf_m, caddr_t ta_buf)
  452 {
  453         caddr_t v;
  454         struct tentry_info *ptei;
  455         size_t ta_buf_sz;
  456         int i;
  457 
  458         ta_buf_sz = ta->ta_buf_size;
  459 
  460         /* Run cleaning callback anyway */
  461         v = ta_buf_m;
  462         for (i = 0; i < count; i++, v += ta_buf_sz) {
  463                 ptei = &tei[i];
  464                 ta->flush_entry(ch, ptei, v);
  465                 if (ptei->ptv != NULL) {
  466                         free(ptei->ptv, M_IPFW);
  467                         ptei->ptv = NULL;
  468                 }
  469         }
  470 
  471         /* Clean up "deleted" state in case of rollback */
  472         if (rollback != 0) {
  473                 v = ta_buf_m + count * ta_buf_sz;
  474                 for (i = 0; i < count; i++, v += ta_buf_sz)
  475                         ta->flush_entry(ch, &tei[i], v);
  476         }
  477 
  478         if (ta_buf_m != ta_buf)
  479                 free(ta_buf_m, M_TEMP);
  480 }
  481 
  482 static void
  483 rollback_add_entry(void *object, struct op_state *_state)
  484 {
  485         struct ip_fw_chain *ch __diagused;
  486         struct tableop_state *ts;
  487 
  488         ts = (struct tableop_state *)_state;
  489 
  490         if (ts->tc != object && ts->ch != object)
  491                 return;
  492 
  493         ch = ts->ch;
  494 
  495         IPFW_UH_WLOCK_ASSERT(ch);
  496 
  497         /* Call specifid unlockers */
  498         rollback_table_values(ts);
  499 
  500         /* Indicate we've called */
  501         ts->modified = 1;
  502 }
  503 
  504 /*
  505  * Adds/updates one or more entries in table @ti.
  506  *
  507  * Function may drop/reacquire UH wlock multiple times due to
  508  * items alloc, algorithm callbacks (check_space), value linkage
  509  * (new values, value storage realloc), etc..
  510  * Other processes like other adds (which may involve storage resize),
  511  * table swaps (which changes table data and may change algo type),
  512  * table modify (which may change value mask) may be executed
  513  * simultaneously so we need to deal with it.
  514  *
  515  * The following approach was implemented:
  516  * we have per-chain linked list, protected with UH lock.
  517  * add_table_entry prepares special on-stack structure wthich is passed
  518  * to its descendants. Users add this structure to this list before unlock.
  519  * After performing needed operations and acquiring UH lock back, each user
  520  * checks if structure has changed. If true, it rolls local state back and
  521  * returns without error to the caller.
  522  * add_table_entry() on its own checks if structure has changed and restarts
  523  * its operation from the beginning (goto restart).
  524  *
  525  * Functions which are modifying fields of interest (currently
  526  *   resize_shared_value_storage() and swap_tables() )
  527  * traverses given list while holding UH lock immediately before
  528  * performing their operations calling function provided be list entry
  529  * ( currently rollback_add_entry  ) which performs rollback for all necessary
  530  * state and sets appropriate values in structure indicating rollback
  531  * has happened.
  532  *
  533  * Algo interaction:
  534  * Function references @ti first to ensure table won't
  535  * disappear or change its type.
  536  * After that, prepare_add callback is called for each @tei entry.
  537  * Next, we try to add each entry under UH+WHLOCK
  538  * using add() callback.
  539  * Finally, we free all state by calling flush_entry callback
  540  * for each @tei.
  541  *
  542  * Returns 0 on success.
  543  */
  544 int
  545 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
  546     struct tentry_info *tei, uint8_t flags, uint32_t count)
  547 {
  548         struct table_config *tc;
  549         struct table_algo *ta;
  550         uint16_t kidx;
  551         int error, first_error, i, rollback;
  552         uint32_t num, numadd;
  553         struct tentry_info *ptei;
  554         struct tableop_state ts;
  555         char ta_buf[TA_BUF_SZ];
  556         caddr_t ta_buf_m, v;
  557 
  558         memset(&ts, 0, sizeof(ts));
  559         ta = NULL;
  560         IPFW_UH_WLOCK(ch);
  561 
  562         /*
  563          * Find and reference existing table.
  564          */
  565 restart:
  566         if (ts.modified != 0) {
  567                 IPFW_UH_WUNLOCK(ch);
  568                 flush_batch_buffer(ch, ta, tei, count, rollback,
  569                     ta_buf_m, ta_buf);
  570                 memset(&ts, 0, sizeof(ts));
  571                 ta = NULL;
  572                 IPFW_UH_WLOCK(ch);
  573         }
  574 
  575         error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
  576         if (error != 0) {
  577                 IPFW_UH_WUNLOCK(ch);
  578                 return (error);
  579         }
  580         ta = tc->ta;
  581 
  582         /* Fill in tablestate */
  583         ts.ch = ch;
  584         ts.opstate.func = rollback_add_entry;
  585         ts.tc = tc;
  586         ts.vshared = tc->vshared;
  587         ts.vmask = tc->vmask;
  588         ts.ta = ta;
  589         ts.tei = tei;
  590         ts.count = count;
  591         rollback = 0;
  592         add_toperation_state(ch, &ts);
  593         IPFW_UH_WUNLOCK(ch);
  594 
  595         /* Allocate memory and prepare record(s) */
  596         /* Pass stack buffer by default */
  597         ta_buf_m = ta_buf;
  598         error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
  599 
  600         IPFW_UH_WLOCK(ch);
  601         del_toperation_state(ch, &ts);
  602         /* Drop reference we've used in first search */
  603         tc->no.refcnt--;
  604 
  605         /* Check prepare_batch_buffer() error */
  606         if (error != 0)
  607                 goto cleanup;
  608 
  609         /*
  610          * Check if table swap has happened.
  611          * (so table algo might be changed).
  612          * Restart operation to achieve consistent behavior.
  613          */
  614         if (ts.modified != 0)
  615                 goto restart;
  616 
  617         /*
  618          * Link all values values to shared/per-table value array.
  619          *
  620          * May release/reacquire UH_WLOCK.
  621          */
  622         error = ipfw_link_table_values(ch, &ts, flags);
  623         if (error != 0)
  624                 goto cleanup;
  625         if (ts.modified != 0)
  626                 goto restart;
  627 
  628         /*
  629          * Ensure we are able to add all entries without additional
  630          * memory allocations. May release/reacquire UH_WLOCK.
  631          */
  632         kidx = tc->no.kidx;
  633         error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
  634         if (error != 0)
  635                 goto cleanup;
  636         if (ts.modified != 0)
  637                 goto restart;
  638 
  639         /* We've got valid table in @tc. Let's try to add data */
  640         kidx = tc->no.kidx;
  641         ta = tc->ta;
  642         numadd = 0;
  643         first_error = 0;
  644 
  645         IPFW_WLOCK(ch);
  646 
  647         v = ta_buf_m;
  648         for (i = 0; i < count; i++, v += ta->ta_buf_size) {
  649                 ptei = &tei[i];
  650                 num = 0;
  651                 /* check limit before adding */
  652                 if ((error = check_table_limit(tc, ptei)) == 0) {
  653                         /*
  654                          * It should be safe to insert a record w/o
  655                          * a properly-linked value if atomicity is
  656                          * not required.
  657                          *
  658                          * If the added item does not have a valid value
  659                          * index, it would get rejected by ta->add().
  660                          * */
  661                         error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
  662                             ptei, v, &num);
  663                         /* Set status flag to inform userland */
  664                         store_tei_result(ptei, OP_ADD, error, num);
  665                 }
  666                 if (error == 0) {
  667                         /* Update number of records to ease limit checking */
  668                         tc->count += num;
  669                         numadd += num;
  670                         continue;
  671                 }
  672 
  673                 if (first_error == 0)
  674                         first_error = error;
  675 
  676                 /*
  677                  * Some error have happened. Check our atomicity
  678                  * settings: continue if atomicity is not required,
  679                  * rollback changes otherwise.
  680                  */
  681                 if ((flags & IPFW_CTF_ATOMIC) == 0)
  682                         continue;
  683 
  684                 rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
  685                     tei, ta_buf_m, count, i);
  686 
  687                 rollback = 1;
  688                 break;
  689         }
  690 
  691         IPFW_WUNLOCK(ch);
  692 
  693         ipfw_garbage_table_values(ch, tc, tei, count, rollback);
  694 
  695         /* Permit post-add algorithm grow/rehash. */
  696         if (numadd != 0)
  697                 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
  698 
  699         /* Return first error to user, if any */
  700         error = first_error;
  701 
  702 cleanup:
  703         IPFW_UH_WUNLOCK(ch);
  704 
  705         flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
  706 
  707         return (error);
  708 }
  709 
  710 /*
  711  * Deletes one or more entries in table @ti.
  712  *
  713  * Returns 0 on success.
  714  */
  715 int
  716 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
  717     struct tentry_info *tei, uint8_t flags, uint32_t count)
  718 {
  719         struct table_config *tc;
  720         struct table_algo *ta;
  721         struct tentry_info *ptei;
  722         uint16_t kidx;
  723         int error, first_error, i;
  724         uint32_t num, numdel;
  725         char ta_buf[TA_BUF_SZ];
  726         caddr_t ta_buf_m, v;
  727 
  728         /*
  729          * Find and reference existing table.
  730          */
  731         IPFW_UH_WLOCK(ch);
  732         error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
  733         if (error != 0) {
  734                 IPFW_UH_WUNLOCK(ch);
  735                 return (error);
  736         }
  737         ta = tc->ta;
  738         IPFW_UH_WUNLOCK(ch);
  739 
  740         /* Allocate memory and prepare record(s) */
  741         /* Pass stack buffer by default */
  742         ta_buf_m = ta_buf;
  743         error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
  744         if (error != 0)
  745                 goto cleanup;
  746 
  747         IPFW_UH_WLOCK(ch);
  748 
  749         /* Drop reference we've used in first search */
  750         tc->no.refcnt--;
  751 
  752         /*
  753          * Check if table algo is still the same.
  754          * (changed ta may be the result of table swap).
  755          */
  756         if (ta != tc->ta) {
  757                 IPFW_UH_WUNLOCK(ch);
  758                 error = EINVAL;
  759                 goto cleanup;
  760         }
  761 
  762         kidx = tc->no.kidx;
  763         numdel = 0;
  764         first_error = 0;
  765 
  766         IPFW_WLOCK(ch);
  767         v = ta_buf_m;
  768         for (i = 0; i < count; i++, v += ta->ta_buf_size) {
  769                 ptei = &tei[i];
  770                 num = 0;
  771                 error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
  772                     &num);
  773                 /* Save state for userland */
  774                 store_tei_result(ptei, OP_DEL, error, num);
  775                 if (error != 0 && first_error == 0)
  776                         first_error = error;
  777                 tc->count -= num;
  778                 numdel += num;
  779         }
  780         IPFW_WUNLOCK(ch);
  781 
  782         /* Unlink non-used values */
  783         ipfw_garbage_table_values(ch, tc, tei, count, 0);
  784 
  785         if (numdel != 0) {
  786                 /* Run post-del hook to permit shrinking */
  787                 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
  788         }
  789 
  790         IPFW_UH_WUNLOCK(ch);
  791 
  792         /* Return first error to user, if any */
  793         error = first_error;
  794 
  795 cleanup:
  796         flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
  797 
  798         return (error);
  799 }
  800 
  801 /*
  802  * Ensure that table @tc has enough space to add @count entries without
  803  * need for reallocation.
  804  *
  805  * Callbacks order:
  806  * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
  807  *
  808  * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
  809  * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
  810  * 3) modify (UH_WLOCK + WLOCK) - switch pointers
  811  * 4) flush_modify (UH_WLOCK) - free state, if needed
  812  *
  813  * Returns 0 on success.
  814  */
  815 static int
  816 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
  817     struct table_config *tc, struct table_info *ti, uint32_t count)
  818 {
  819         struct table_algo *ta;
  820         uint64_t pflags;
  821         char ta_buf[TA_BUF_SZ];
  822         int error;
  823 
  824         IPFW_UH_WLOCK_ASSERT(ch);
  825 
  826         error = 0;
  827         ta = tc->ta;
  828         if (ta->need_modify == NULL)
  829                 return (0);
  830 
  831         /* Acquire reference not to loose @tc between locks/unlocks */
  832         tc->no.refcnt++;
  833 
  834         /*
  835          * TODO: think about avoiding race between large add/large delete
  836          * operation on algorithm which implements shrinking along with
  837          * growing.
  838          */
  839         while (true) {
  840                 pflags = 0;
  841                 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
  842                         error = 0;
  843                         break;
  844                 }
  845 
  846                 /* We have to shrink/grow table */
  847                 if (ts != NULL)
  848                         add_toperation_state(ch, ts);
  849                 IPFW_UH_WUNLOCK(ch);
  850 
  851                 memset(&ta_buf, 0, sizeof(ta_buf));
  852                 error = ta->prepare_mod(ta_buf, &pflags);
  853 
  854                 IPFW_UH_WLOCK(ch);
  855                 if (ts != NULL)
  856                         del_toperation_state(ch, ts);
  857 
  858                 if (error != 0)
  859                         break;
  860 
  861                 if (ts != NULL && ts->modified != 0) {
  862                         /*
  863                          * Swap operation has happened
  864                          * so we're currently operating on other
  865                          * table data. Stop doing this.
  866                          */
  867                         ta->flush_mod(ta_buf);
  868                         break;
  869                 }
  870 
  871                 /* Check if we still need to alter table */
  872                 ti = KIDX_TO_TI(ch, tc->no.kidx);
  873                 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
  874                         IPFW_UH_WUNLOCK(ch);
  875 
  876                         /*
  877                          * Other thread has already performed resize.
  878                          * Flush our state and return.
  879                          */
  880                         ta->flush_mod(ta_buf);
  881                         break;
  882                 }
  883 
  884                 error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
  885                 if (error == 0) {
  886                         /* Do actual modification */
  887                         IPFW_WLOCK(ch);
  888                         ta->modify(tc->astate, ti, ta_buf, pflags);
  889                         IPFW_WUNLOCK(ch);
  890                 }
  891 
  892                 /* Anyway, flush data and retry */
  893                 ta->flush_mod(ta_buf);
  894         }
  895 
  896         tc->no.refcnt--;
  897         return (error);
  898 }
  899 
  900 /*
  901  * Adds or deletes record in table.
  902  * Data layout (v0):
  903  * Request: [ ip_fw3_opheader ipfw_table_xentry ]
  904  *
  905  * Returns 0 on success
  906  */
  907 static int
  908 manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
  909     struct sockopt_data *sd)
  910 {
  911         ipfw_table_xentry *xent;
  912         struct tentry_info tei;
  913         struct tid_info ti;
  914         struct table_value v;
  915         int error, hdrlen, read;
  916 
  917         hdrlen = offsetof(ipfw_table_xentry, k);
  918 
  919         /* Check minimum header size */
  920         if (sd->valsize < (sizeof(*op3) + hdrlen))
  921                 return (EINVAL);
  922 
  923         read = sizeof(ip_fw3_opheader);
  924 
  925         /* Check if xentry len field is valid */
  926         xent = (ipfw_table_xentry *)(op3 + 1);
  927         if (xent->len < hdrlen || xent->len + read > sd->valsize)
  928                 return (EINVAL);
  929 
  930         memset(&tei, 0, sizeof(tei));
  931         tei.paddr = &xent->k;
  932         tei.masklen = xent->masklen;
  933         ipfw_import_table_value_legacy(xent->value, &v);
  934         tei.pvalue = &v;
  935         /* Old requests compatibility */
  936         tei.flags = TEI_FLAGS_COMPAT;
  937         if (xent->type == IPFW_TABLE_ADDR) {
  938                 if (xent->len - hdrlen == sizeof(in_addr_t))
  939                         tei.subtype = AF_INET;
  940                 else
  941                         tei.subtype = AF_INET6;
  942         }
  943 
  944         memset(&ti, 0, sizeof(ti));
  945         ti.uidx = xent->tbl;
  946         ti.type = xent->type;
  947 
  948         error = (op3->opcode == IP_FW_TABLE_XADD) ?
  949             add_table_entry(ch, &ti, &tei, 0, 1) :
  950             del_table_entry(ch, &ti, &tei, 0, 1);
  951 
  952         return (error);
  953 }
  954 
  955 /*
  956  * Adds or deletes record in table.
  957  * Data layout (v1)(current):
  958  * Request: [ ipfw_obj_header
  959  *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
  960  * ]
  961  *
  962  * Returns 0 on success
  963  */
  964 static int
  965 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
  966     struct sockopt_data *sd)
  967 {
  968         ipfw_obj_tentry *tent, *ptent;
  969         ipfw_obj_ctlv *ctlv;
  970         ipfw_obj_header *oh;
  971         struct tentry_info *ptei, tei, *tei_buf;
  972         struct tid_info ti;
  973         int error, i, kidx, read;
  974 
  975         /* Check minimum header size */
  976         if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
  977                 return (EINVAL);
  978 
  979         /* Check if passed data is too long */
  980         if (sd->valsize != sd->kavail)
  981                 return (EINVAL);
  982 
  983         oh = (ipfw_obj_header *)sd->kbuf;
  984 
  985         /* Basic length checks for TLVs */
  986         if (oh->ntlv.head.length != sizeof(oh->ntlv))
  987                 return (EINVAL);
  988 
  989         read = sizeof(*oh);
  990 
  991         ctlv = (ipfw_obj_ctlv *)(oh + 1);
  992         if (ctlv->head.length + read != sd->valsize)
  993                 return (EINVAL);
  994 
  995         read += sizeof(*ctlv);
  996         tent = (ipfw_obj_tentry *)(ctlv + 1);
  997         if (ctlv->count * sizeof(*tent) + read != sd->valsize)
  998                 return (EINVAL);
  999 
 1000         if (ctlv->count == 0)
 1001                 return (0);
 1002 
 1003         /*
 1004          * Mark entire buffer as "read".
 1005          * This instructs sopt api write it back
 1006          * after function return.
 1007          */
 1008         ipfw_get_sopt_header(sd, sd->valsize);
 1009 
 1010         /* Perform basic checks for each entry */
 1011         ptent = tent;
 1012         kidx = tent->idx;
 1013         for (i = 0; i < ctlv->count; i++, ptent++) {
 1014                 if (ptent->head.length != sizeof(*ptent))
 1015                         return (EINVAL);
 1016                 if (ptent->idx != kidx)
 1017                         return (ENOTSUP);
 1018         }
 1019 
 1020         /* Convert data into kernel request objects */
 1021         objheader_to_ti(oh, &ti);
 1022         ti.type = oh->ntlv.type;
 1023         ti.uidx = kidx;
 1024 
 1025         /* Use on-stack buffer for single add/del */
 1026         if (ctlv->count == 1) {
 1027                 memset(&tei, 0, sizeof(tei));
 1028                 tei_buf = &tei;
 1029         } else
 1030                 tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
 1031                     M_WAITOK | M_ZERO);
 1032 
 1033         ptei = tei_buf;
 1034         ptent = tent;
 1035         for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 1036                 ptei->paddr = &ptent->k;
 1037                 ptei->subtype = ptent->subtype;
 1038                 ptei->masklen = ptent->masklen;
 1039                 if (ptent->head.flags & IPFW_TF_UPDATE)
 1040                         ptei->flags |= TEI_FLAGS_UPDATE;
 1041 
 1042                 ipfw_import_table_value_v1(&ptent->v.value);
 1043                 ptei->pvalue = (struct table_value *)&ptent->v.value;
 1044         }
 1045 
 1046         error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
 1047             add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
 1048             del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
 1049 
 1050         /* Translate result back to userland */
 1051         ptei = tei_buf;
 1052         ptent = tent;
 1053         for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 1054                 if (ptei->flags & TEI_FLAGS_ADDED)
 1055                         ptent->result = IPFW_TR_ADDED;
 1056                 else if (ptei->flags & TEI_FLAGS_DELETED)
 1057                         ptent->result = IPFW_TR_DELETED;
 1058                 else if (ptei->flags & TEI_FLAGS_UPDATED)
 1059                         ptent->result = IPFW_TR_UPDATED;
 1060                 else if (ptei->flags & TEI_FLAGS_LIMIT)
 1061                         ptent->result = IPFW_TR_LIMIT;
 1062                 else if (ptei->flags & TEI_FLAGS_ERROR)
 1063                         ptent->result = IPFW_TR_ERROR;
 1064                 else if (ptei->flags & TEI_FLAGS_NOTFOUND)
 1065                         ptent->result = IPFW_TR_NOTFOUND;
 1066                 else if (ptei->flags & TEI_FLAGS_EXISTS)
 1067                         ptent->result = IPFW_TR_EXISTS;
 1068                 ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
 1069         }
 1070 
 1071         if (tei_buf != &tei)
 1072                 free(tei_buf, M_TEMP);
 1073 
 1074         return (error);
 1075 }
 1076 
 1077 /*
 1078  * Looks up an entry in given table.
 1079  * Data layout (v0)(current):
 1080  * Request: [ ipfw_obj_header ipfw_obj_tentry ]
 1081  * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
 1082  *
 1083  * Returns 0 on success
 1084  */
 1085 static int
 1086 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1087     struct sockopt_data *sd)
 1088 {
 1089         ipfw_obj_tentry *tent;
 1090         ipfw_obj_header *oh;
 1091         struct tid_info ti;
 1092         struct table_config *tc;
 1093         struct table_algo *ta;
 1094         struct table_info *kti;
 1095         struct table_value *pval;
 1096         struct namedobj_instance *ni;
 1097         int error;
 1098         size_t sz;
 1099 
 1100         /* Check minimum header size */
 1101         sz = sizeof(*oh) + sizeof(*tent);
 1102         if (sd->valsize != sz)
 1103                 return (EINVAL);
 1104 
 1105         oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 1106         tent = (ipfw_obj_tentry *)(oh + 1);
 1107 
 1108         /* Basic length checks for TLVs */
 1109         if (oh->ntlv.head.length != sizeof(oh->ntlv))
 1110                 return (EINVAL);
 1111 
 1112         objheader_to_ti(oh, &ti);
 1113         ti.type = oh->ntlv.type;
 1114         ti.uidx = tent->idx;
 1115 
 1116         IPFW_UH_RLOCK(ch);
 1117         ni = CHAIN_TO_NI(ch);
 1118 
 1119         /*
 1120          * Find existing table and check its type .
 1121          */
 1122         ta = NULL;
 1123         if ((tc = find_table(ni, &ti)) == NULL) {
 1124                 IPFW_UH_RUNLOCK(ch);
 1125                 return (ESRCH);
 1126         }
 1127 
 1128         /* check table type */
 1129         if (tc->no.subtype != ti.type) {
 1130                 IPFW_UH_RUNLOCK(ch);
 1131                 return (EINVAL);
 1132         }
 1133 
 1134         kti = KIDX_TO_TI(ch, tc->no.kidx);
 1135         ta = tc->ta;
 1136 
 1137         if (ta->find_tentry == NULL)
 1138                 return (ENOTSUP);
 1139 
 1140         error = ta->find_tentry(tc->astate, kti, tent);
 1141         if (error == 0) {
 1142                 pval = get_table_value(ch, tc, tent->v.kidx);
 1143                 ipfw_export_table_value_v1(pval, &tent->v.value);
 1144         }
 1145         IPFW_UH_RUNLOCK(ch);
 1146 
 1147         return (error);
 1148 }
 1149 
 1150 /*
 1151  * Flushes all entries or destroys given table.
 1152  * Data layout (v0)(current):
 1153  * Request: [ ipfw_obj_header ]
 1154  *
 1155  * Returns 0 on success
 1156  */
 1157 static int
 1158 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1159     struct sockopt_data *sd)
 1160 {
 1161         int error;
 1162         struct _ipfw_obj_header *oh;
 1163         struct tid_info ti;
 1164 
 1165         if (sd->valsize != sizeof(*oh))
 1166                 return (EINVAL);
 1167 
 1168         oh = (struct _ipfw_obj_header *)op3;
 1169         objheader_to_ti(oh, &ti);
 1170 
 1171         if (op3->opcode == IP_FW_TABLE_XDESTROY)
 1172                 error = destroy_table(ch, &ti);
 1173         else if (op3->opcode == IP_FW_TABLE_XFLUSH)
 1174                 error = flush_table(ch, &ti);
 1175         else
 1176                 return (ENOTSUP);
 1177 
 1178         return (error);
 1179 }
 1180 
 1181 static void
 1182 restart_flush(void *object, struct op_state *_state)
 1183 {
 1184         struct tableop_state *ts;
 1185 
 1186         ts = (struct tableop_state *)_state;
 1187 
 1188         if (ts->tc != object)
 1189                 return;
 1190 
 1191         /* Indicate we've called */
 1192         ts->modified = 1;
 1193 }
 1194 
 1195 /*
 1196  * Flushes given table.
 1197  *
 1198  * Function create new table instance with the same
 1199  * parameters, swaps it with old one and
 1200  * flushes state without holding runtime WLOCK.
 1201  *
 1202  * Returns 0 on success.
 1203  */
 1204 int
 1205 flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
 1206 {
 1207         struct namedobj_instance *ni;
 1208         struct table_config *tc;
 1209         struct table_algo *ta;
 1210         struct table_info ti_old, ti_new, *tablestate;
 1211         void *astate_old, *astate_new;
 1212         char algostate[64], *pstate;
 1213         struct tableop_state ts;
 1214         int error, need_gc;
 1215         uint16_t kidx;
 1216         uint8_t tflags;
 1217 
 1218         /*
 1219          * Stage 1: save table algorithm.
 1220          * Reference found table to ensure it won't disappear.
 1221          */
 1222         IPFW_UH_WLOCK(ch);
 1223         ni = CHAIN_TO_NI(ch);
 1224         if ((tc = find_table(ni, ti)) == NULL) {
 1225                 IPFW_UH_WUNLOCK(ch);
 1226                 return (ESRCH);
 1227         }
 1228         need_gc = 0;
 1229         astate_new = NULL;
 1230         memset(&ti_new, 0, sizeof(ti_new));
 1231 restart:
 1232         /* Set up swap handler */
 1233         memset(&ts, 0, sizeof(ts));
 1234         ts.opstate.func = restart_flush;
 1235         ts.tc = tc;
 1236 
 1237         ta = tc->ta;
 1238         /* Do not flush readonly tables */
 1239         if ((ta->flags & TA_FLAG_READONLY) != 0) {
 1240                 IPFW_UH_WUNLOCK(ch);
 1241                 return (EACCES);
 1242         }
 1243         /* Save startup algo parameters */
 1244         if (ta->print_config != NULL) {
 1245                 ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
 1246                     algostate, sizeof(algostate));
 1247                 pstate = algostate;
 1248         } else
 1249                 pstate = NULL;
 1250         tflags = tc->tflags;
 1251         tc->no.refcnt++;
 1252         add_toperation_state(ch, &ts);
 1253         IPFW_UH_WUNLOCK(ch);
 1254 
 1255         /*
 1256          * Stage 1.5: if this is not the first attempt, destroy previous state
 1257          */
 1258         if (need_gc != 0) {
 1259                 ta->destroy(astate_new, &ti_new);
 1260                 need_gc = 0;
 1261         }
 1262 
 1263         /*
 1264          * Stage 2: allocate new table instance using same algo.
 1265          */
 1266         memset(&ti_new, 0, sizeof(struct table_info));
 1267         error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
 1268 
 1269         /*
 1270          * Stage 3: swap old state pointers with newly-allocated ones.
 1271          * Decrease refcount.
 1272          */
 1273         IPFW_UH_WLOCK(ch);
 1274         tc->no.refcnt--;
 1275         del_toperation_state(ch, &ts);
 1276 
 1277         if (error != 0) {
 1278                 IPFW_UH_WUNLOCK(ch);
 1279                 return (error);
 1280         }
 1281 
 1282         /*
 1283          * Restart operation if table swap has happened:
 1284          * even if algo may be the same, algo init parameters
 1285          * may change. Restart operation instead of doing
 1286          * complex checks.
 1287          */
 1288         if (ts.modified != 0) {
 1289                 /* Delay destroying data since we're holding UH lock */
 1290                 need_gc = 1;
 1291                 goto restart;
 1292         }
 1293 
 1294         ni = CHAIN_TO_NI(ch);
 1295         kidx = tc->no.kidx;
 1296         tablestate = (struct table_info *)ch->tablestate;
 1297 
 1298         IPFW_WLOCK(ch);
 1299         ti_old = tablestate[kidx];
 1300         tablestate[kidx] = ti_new;
 1301         IPFW_WUNLOCK(ch);
 1302 
 1303         astate_old = tc->astate;
 1304         tc->astate = astate_new;
 1305         tc->ti_copy = ti_new;
 1306         tc->count = 0;
 1307 
 1308         /* Notify algo on real @ti address */
 1309         if (ta->change_ti != NULL)
 1310                 ta->change_ti(tc->astate, &tablestate[kidx]);
 1311 
 1312         /*
 1313          * Stage 4: unref values.
 1314          */
 1315         ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
 1316         IPFW_UH_WUNLOCK(ch);
 1317 
 1318         /*
 1319          * Stage 5: perform real flush/destroy.
 1320          */
 1321         ta->destroy(astate_old, &ti_old);
 1322 
 1323         return (0);
 1324 }
 1325 
 1326 /*
 1327  * Swaps two tables.
 1328  * Data layout (v0)(current):
 1329  * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
 1330  *
 1331  * Returns 0 on success
 1332  */
 1333 static int
 1334 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1335     struct sockopt_data *sd)
 1336 {
 1337         int error;
 1338         struct _ipfw_obj_header *oh;
 1339         struct tid_info ti_a, ti_b;
 1340 
 1341         if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
 1342                 return (EINVAL);
 1343 
 1344         oh = (struct _ipfw_obj_header *)op3;
 1345         ntlv_to_ti(&oh->ntlv, &ti_a);
 1346         ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
 1347 
 1348         error = swap_tables(ch, &ti_a, &ti_b);
 1349 
 1350         return (error);
 1351 }
 1352 
 1353 /*
 1354  * Swaps two tables of the same type/valtype.
 1355  *
 1356  * Checks if tables are compatible and limits
 1357  * permits swap, than actually perform swap.
 1358  *
 1359  * Each table consists of 2 different parts:
 1360  * config:
 1361  *   @tc (with name, set, kidx) and rule bindings, which is "stable".
 1362  *   number of items
 1363  *   table algo
 1364  * runtime:
 1365  *   runtime data @ti (ch->tablestate)
 1366  *   runtime cache in @tc
 1367  *   algo-specific data (@tc->astate)
 1368  *
 1369  * So we switch:
 1370  *  all runtime data
 1371  *   number of items
 1372  *   table algo
 1373  *
 1374  * After that we call @ti change handler for each table.
 1375  *
 1376  * Note that referencing @tc won't protect tc->ta from change.
 1377  * XXX: Do we need to restrict swap between locked tables?
 1378  * XXX: Do we need to exchange ftype?
 1379  *
 1380  * Returns 0 on success.
 1381  */
 1382 static int
 1383 swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
 1384     struct tid_info *b)
 1385 {
 1386         struct namedobj_instance *ni;
 1387         struct table_config *tc_a, *tc_b;
 1388         struct table_algo *ta;
 1389         struct table_info ti, *tablestate;
 1390         void *astate;
 1391         uint32_t count;
 1392 
 1393         /*
 1394          * Stage 1: find both tables and ensure they are of
 1395          * the same type.
 1396          */
 1397         IPFW_UH_WLOCK(ch);
 1398         ni = CHAIN_TO_NI(ch);
 1399         if ((tc_a = find_table(ni, a)) == NULL) {
 1400                 IPFW_UH_WUNLOCK(ch);
 1401                 return (ESRCH);
 1402         }
 1403         if ((tc_b = find_table(ni, b)) == NULL) {
 1404                 IPFW_UH_WUNLOCK(ch);
 1405                 return (ESRCH);
 1406         }
 1407 
 1408         /* It is very easy to swap between the same table */
 1409         if (tc_a == tc_b) {
 1410                 IPFW_UH_WUNLOCK(ch);
 1411                 return (0);
 1412         }
 1413 
 1414         /* Check type and value are the same */
 1415         if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
 1416                 IPFW_UH_WUNLOCK(ch);
 1417                 return (EINVAL);
 1418         }
 1419 
 1420         /* Check limits before swap */
 1421         if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
 1422             (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
 1423                 IPFW_UH_WUNLOCK(ch);
 1424                 return (EFBIG);
 1425         }
 1426 
 1427         /* Check if one of the tables is readonly */
 1428         if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
 1429                 IPFW_UH_WUNLOCK(ch);
 1430                 return (EACCES);
 1431         }
 1432 
 1433         /* Notify we're going to swap */
 1434         rollback_toperation_state(ch, tc_a);
 1435         rollback_toperation_state(ch, tc_b);
 1436 
 1437         /* Everything is fine, prepare to swap */
 1438         tablestate = (struct table_info *)ch->tablestate;
 1439         ti = tablestate[tc_a->no.kidx];
 1440         ta = tc_a->ta;
 1441         astate = tc_a->astate;
 1442         count = tc_a->count;
 1443 
 1444         IPFW_WLOCK(ch);
 1445         /* a <- b */
 1446         tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
 1447         tc_a->ta = tc_b->ta;
 1448         tc_a->astate = tc_b->astate;
 1449         tc_a->count = tc_b->count;
 1450         /* b <- a */
 1451         tablestate[tc_b->no.kidx] = ti;
 1452         tc_b->ta = ta;
 1453         tc_b->astate = astate;
 1454         tc_b->count = count;
 1455         IPFW_WUNLOCK(ch);
 1456 
 1457         /* Ensure tc.ti copies are in sync */
 1458         tc_a->ti_copy = tablestate[tc_a->no.kidx];
 1459         tc_b->ti_copy = tablestate[tc_b->no.kidx];
 1460 
 1461         /* Notify both tables on @ti change */
 1462         if (tc_a->ta->change_ti != NULL)
 1463                 tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
 1464         if (tc_b->ta->change_ti != NULL)
 1465                 tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
 1466 
 1467         IPFW_UH_WUNLOCK(ch);
 1468 
 1469         return (0);
 1470 }
 1471 
 1472 /*
 1473  * Destroys table specified by @ti.
 1474  * Data layout (v0)(current):
 1475  * Request: [ ip_fw3_opheader ]
 1476  *
 1477  * Returns 0 on success
 1478  */
 1479 static int
 1480 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
 1481 {
 1482         struct namedobj_instance *ni;
 1483         struct table_config *tc;
 1484 
 1485         IPFW_UH_WLOCK(ch);
 1486 
 1487         ni = CHAIN_TO_NI(ch);
 1488         if ((tc = find_table(ni, ti)) == NULL) {
 1489                 IPFW_UH_WUNLOCK(ch);
 1490                 return (ESRCH);
 1491         }
 1492 
 1493         /* Do not permit destroying referenced tables */
 1494         if (tc->no.refcnt > 0) {
 1495                 IPFW_UH_WUNLOCK(ch);
 1496                 return (EBUSY);
 1497         }
 1498 
 1499         IPFW_WLOCK(ch);
 1500         unlink_table(ch, tc);
 1501         IPFW_WUNLOCK(ch);
 1502 
 1503         /* Free obj index */
 1504         if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
 1505                 printf("Error unlinking kidx %d from table %s\n",
 1506                     tc->no.kidx, tc->tablename);
 1507 
 1508         /* Unref values used in tables while holding UH lock */
 1509         ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
 1510         IPFW_UH_WUNLOCK(ch);
 1511 
 1512         free_table_config(ni, tc);
 1513 
 1514         return (0);
 1515 }
 1516 
 1517 static uint32_t
 1518 roundup2p(uint32_t v)
 1519 {
 1520 
 1521         v--;
 1522         v |= v >> 1;
 1523         v |= v >> 2;
 1524         v |= v >> 4;
 1525         v |= v >> 8;
 1526         v |= v >> 16;
 1527         v++;
 1528 
 1529         return (v);
 1530 }
 1531 
 1532 /*
 1533  * Grow tables index.
 1534  *
 1535  * Returns 0 on success.
 1536  */
 1537 int
 1538 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
 1539 {
 1540         unsigned int tbl;
 1541         struct namedobj_instance *ni;
 1542         void *new_idx, *old_tablestate, *tablestate;
 1543         struct table_info *ti;
 1544         struct table_config *tc;
 1545         int i, new_blocks;
 1546 
 1547         /* Check new value for validity */
 1548         if (ntables == 0)
 1549                 return (EINVAL);
 1550         if (ntables > IPFW_TABLES_MAX)
 1551                 ntables = IPFW_TABLES_MAX;
 1552         /* Alight to nearest power of 2 */
 1553         ntables = (unsigned int)roundup2p(ntables); 
 1554 
 1555         /* Allocate new pointers */
 1556         tablestate = malloc(ntables * sizeof(struct table_info),
 1557             M_IPFW, M_WAITOK | M_ZERO);
 1558 
 1559         ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
 1560 
 1561         IPFW_UH_WLOCK(ch);
 1562 
 1563         tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
 1564         ni = CHAIN_TO_NI(ch);
 1565 
 1566         /* Temporary restrict decreasing max_tables */
 1567         if (ntables < V_fw_tables_max) {
 1568                 /*
 1569                  * FIXME: Check if we really can shrink
 1570                  */
 1571                 IPFW_UH_WUNLOCK(ch);
 1572                 return (EINVAL);
 1573         }
 1574 
 1575         /* Copy table info/indices */
 1576         memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
 1577         ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
 1578 
 1579         IPFW_WLOCK(ch);
 1580 
 1581         /* Change pointers */
 1582         old_tablestate = ch->tablestate;
 1583         ch->tablestate = tablestate;
 1584         ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
 1585 
 1586         V_fw_tables_max = ntables;
 1587 
 1588         IPFW_WUNLOCK(ch);
 1589 
 1590         /* Notify all consumers that their @ti pointer has changed */
 1591         ti = (struct table_info *)ch->tablestate;
 1592         for (i = 0; i < tbl; i++, ti++) {
 1593                 if (ti->lookup == NULL)
 1594                         continue;
 1595                 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
 1596                 if (tc == NULL || tc->ta->change_ti == NULL)
 1597                         continue;
 1598 
 1599                 tc->ta->change_ti(tc->astate, ti);
 1600         }
 1601 
 1602         IPFW_UH_WUNLOCK(ch);
 1603 
 1604         /* Free old pointers */
 1605         free(old_tablestate, M_IPFW);
 1606         ipfw_objhash_bitmap_free(new_idx, new_blocks);
 1607 
 1608         return (0);
 1609 }
 1610 
 1611 /*
 1612  * Lookup table's named object by its @kidx.
 1613  */
 1614 struct named_object *
 1615 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
 1616 {
 1617 
 1618         return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
 1619 }
 1620 
 1621 /*
 1622  * Take reference to table specified in @ntlv.
 1623  * On success return its @kidx.
 1624  */
 1625 int
 1626 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
 1627 {
 1628         struct tid_info ti;
 1629         struct table_config *tc;
 1630         int error;
 1631 
 1632         IPFW_UH_WLOCK_ASSERT(ch);
 1633 
 1634         ntlv_to_ti(ntlv, &ti);
 1635         error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
 1636         if (error != 0)
 1637                 return (error);
 1638 
 1639         if (tc == NULL)
 1640                 return (ESRCH);
 1641 
 1642         tc_ref(tc);
 1643         *kidx = tc->no.kidx;
 1644 
 1645         return (0);
 1646 }
 1647 
 1648 void
 1649 ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
 1650 {
 1651 
 1652         struct namedobj_instance *ni;
 1653         struct named_object *no;
 1654 
 1655         IPFW_UH_WLOCK_ASSERT(ch);
 1656         ni = CHAIN_TO_NI(ch);
 1657         no = ipfw_objhash_lookup_kidx(ni, kidx);
 1658         KASSERT(no != NULL, ("Table with index %d not found", kidx));
 1659         no->refcnt--;
 1660 }
 1661 
 1662 /*
 1663  * Lookup an arbitrary key @paddr of length @plen in table @tbl.
 1664  * Stores found value in @val.
 1665  *
 1666  * Returns 1 if key was found.
 1667  */
 1668 int
 1669 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
 1670     void *paddr, uint32_t *val)
 1671 {
 1672         struct table_info *ti;
 1673 
 1674         ti = KIDX_TO_TI(ch, tbl);
 1675 
 1676         return (ti->lookup(ti, paddr, plen, val));
 1677 }
 1678 
 1679 /*
 1680  * Info/List/dump support for tables.
 1681  *
 1682  */
 1683 
 1684 /*
 1685  * High-level 'get' cmds sysctl handlers
 1686  */
 1687 
 1688 /*
 1689  * Lists all tables currently available in kernel.
 1690  * Data layout (v0)(current):
 1691  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
 1692  * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
 1693  *
 1694  * Returns 0 on success
 1695  */
 1696 static int
 1697 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1698     struct sockopt_data *sd)
 1699 {
 1700         struct _ipfw_obj_lheader *olh;
 1701         int error;
 1702 
 1703         olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 1704         if (olh == NULL)
 1705                 return (EINVAL);
 1706         if (sd->valsize < olh->size)
 1707                 return (EINVAL);
 1708 
 1709         IPFW_UH_RLOCK(ch);
 1710         error = export_tables(ch, olh, sd);
 1711         IPFW_UH_RUNLOCK(ch);
 1712 
 1713         return (error);
 1714 }
 1715 
 1716 /*
 1717  * Store table info to buffer provided by @sd.
 1718  * Data layout (v0)(current):
 1719  * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
 1720  * Reply: [ ipfw_obj_header ipfw_xtable_info ]
 1721  *
 1722  * Returns 0 on success.
 1723  */
 1724 static int
 1725 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1726     struct sockopt_data *sd)
 1727 {
 1728         struct _ipfw_obj_header *oh;
 1729         struct table_config *tc;
 1730         struct tid_info ti;
 1731         size_t sz;
 1732 
 1733         sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
 1734         oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 1735         if (oh == NULL)
 1736                 return (EINVAL);
 1737 
 1738         objheader_to_ti(oh, &ti);
 1739 
 1740         IPFW_UH_RLOCK(ch);
 1741         if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 1742                 IPFW_UH_RUNLOCK(ch);
 1743                 return (ESRCH);
 1744         }
 1745 
 1746         export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
 1747         IPFW_UH_RUNLOCK(ch);
 1748 
 1749         return (0);
 1750 }
 1751 
 1752 /*
 1753  * Modifies existing table.
 1754  * Data layout (v0)(current):
 1755  * Request: [ ipfw_obj_header ipfw_xtable_info ]
 1756  *
 1757  * Returns 0 on success
 1758  */
 1759 static int
 1760 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1761     struct sockopt_data *sd)
 1762 {
 1763         struct _ipfw_obj_header *oh;
 1764         ipfw_xtable_info *i;
 1765         char *tname;
 1766         struct tid_info ti;
 1767         struct namedobj_instance *ni;
 1768         struct table_config *tc;
 1769 
 1770         if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 1771                 return (EINVAL);
 1772 
 1773         oh = (struct _ipfw_obj_header *)sd->kbuf;
 1774         i = (ipfw_xtable_info *)(oh + 1);
 1775 
 1776         /*
 1777          * Verify user-supplied strings.
 1778          * Check for null-terminated/zero-length strings/
 1779          */
 1780         tname = oh->ntlv.name;
 1781         if (check_table_name(tname) != 0)
 1782                 return (EINVAL);
 1783 
 1784         objheader_to_ti(oh, &ti);
 1785         ti.type = i->type;
 1786 
 1787         IPFW_UH_WLOCK(ch);
 1788         ni = CHAIN_TO_NI(ch);
 1789         if ((tc = find_table(ni, &ti)) == NULL) {
 1790                 IPFW_UH_WUNLOCK(ch);
 1791                 return (ESRCH);
 1792         }
 1793 
 1794         /* Do not support any modifications for readonly tables */
 1795         if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
 1796                 IPFW_UH_WUNLOCK(ch);
 1797                 return (EACCES);
 1798         }
 1799 
 1800         if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
 1801                 tc->limit = i->limit;
 1802         if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
 1803                 tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
 1804         IPFW_UH_WUNLOCK(ch);
 1805 
 1806         return (0);
 1807 }
 1808 
 1809 /*
 1810  * Creates new table.
 1811  * Data layout (v0)(current):
 1812  * Request: [ ipfw_obj_header ipfw_xtable_info ]
 1813  *
 1814  * Returns 0 on success
 1815  */
 1816 static int
 1817 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 1818     struct sockopt_data *sd)
 1819 {
 1820         struct _ipfw_obj_header *oh;
 1821         ipfw_xtable_info *i;
 1822         char *tname, *aname;
 1823         struct tid_info ti;
 1824         struct namedobj_instance *ni;
 1825 
 1826         if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 1827                 return (EINVAL);
 1828 
 1829         oh = (struct _ipfw_obj_header *)sd->kbuf;
 1830         i = (ipfw_xtable_info *)(oh + 1);
 1831 
 1832         /*
 1833          * Verify user-supplied strings.
 1834          * Check for null-terminated/zero-length strings/
 1835          */
 1836         tname = oh->ntlv.name;
 1837         aname = i->algoname;
 1838         if (check_table_name(tname) != 0 ||
 1839             strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
 1840                 return (EINVAL);
 1841 
 1842         if (aname[0] == '\0') {
 1843                 /* Use default algorithm */
 1844                 aname = NULL;
 1845         }
 1846 
 1847         objheader_to_ti(oh, &ti);
 1848         ti.type = i->type;
 1849 
 1850         ni = CHAIN_TO_NI(ch);
 1851 
 1852         IPFW_UH_RLOCK(ch);
 1853         if (find_table(ni, &ti) != NULL) {
 1854                 IPFW_UH_RUNLOCK(ch);
 1855                 return (EEXIST);
 1856         }
 1857         IPFW_UH_RUNLOCK(ch);
 1858 
 1859         return (create_table_internal(ch, &ti, aname, i, NULL, 0));
 1860 }
 1861 
 1862 /*
 1863  * Creates new table based on @ti and @aname.
 1864  *
 1865  * Assume @aname to be checked and valid.
 1866  * Stores allocated table kidx inside @pkidx (if non-NULL).
 1867  * Reference created table if @compat is non-zero.
 1868  *
 1869  * Returns 0 on success.
 1870  */
 1871 static int
 1872 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
 1873     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
 1874 {
 1875         struct namedobj_instance *ni;
 1876         struct table_config *tc, *tc_new, *tmp;
 1877         struct table_algo *ta;
 1878         uint16_t kidx;
 1879 
 1880         ni = CHAIN_TO_NI(ch);
 1881 
 1882         ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
 1883         if (ta == NULL)
 1884                 return (ENOTSUP);
 1885 
 1886         tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
 1887         if (tc == NULL)
 1888                 return (ENOMEM);
 1889 
 1890         tc->vmask = i->vmask;
 1891         tc->limit = i->limit;
 1892         if (ta->flags & TA_FLAG_READONLY)
 1893                 tc->locked = 1;
 1894         else
 1895                 tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
 1896 
 1897         IPFW_UH_WLOCK(ch);
 1898 
 1899         /* Check if table has been already created */
 1900         tc_new = find_table(ni, ti);
 1901         if (tc_new != NULL) {
 1902                 /*
 1903                  * Compat: do not fail if we're
 1904                  * requesting to create existing table
 1905                  * which has the same type
 1906                  */
 1907                 if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
 1908                         IPFW_UH_WUNLOCK(ch);
 1909                         free_table_config(ni, tc);
 1910                         return (EEXIST);
 1911                 }
 1912 
 1913                 /* Exchange tc and tc_new for proper refcounting & freeing */
 1914                 tmp = tc;
 1915                 tc = tc_new;
 1916                 tc_new = tmp;
 1917         } else {
 1918                 /* New table */
 1919                 if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
 1920                         IPFW_UH_WUNLOCK(ch);
 1921                         printf("Unable to allocate table index."
 1922                             " Consider increasing net.inet.ip.fw.tables_max");
 1923                         free_table_config(ni, tc);
 1924                         return (EBUSY);
 1925                 }
 1926                 tc->no.kidx = kidx;
 1927                 tc->no.etlv = IPFW_TLV_TBL_NAME;
 1928 
 1929                 link_table(ch, tc);
 1930         }
 1931 
 1932         if (compat != 0)
 1933                 tc->no.refcnt++;
 1934         if (pkidx != NULL)
 1935                 *pkidx = tc->no.kidx;
 1936 
 1937         IPFW_UH_WUNLOCK(ch);
 1938 
 1939         if (tc_new != NULL)
 1940                 free_table_config(ni, tc_new);
 1941 
 1942         return (0);
 1943 }
 1944 
 1945 static void
 1946 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
 1947 {
 1948 
 1949         memset(ti, 0, sizeof(struct tid_info));
 1950         ti->set = ntlv->set;
 1951         ti->uidx = ntlv->idx;
 1952         ti->tlvs = ntlv;
 1953         ti->tlen = ntlv->head.length;
 1954 }
 1955 
 1956 static void
 1957 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
 1958 {
 1959 
 1960         ntlv_to_ti(&oh->ntlv, ti);
 1961 }
 1962 
 1963 struct namedobj_instance *
 1964 ipfw_get_table_objhash(struct ip_fw_chain *ch)
 1965 {
 1966 
 1967         return (CHAIN_TO_NI(ch));
 1968 }
 1969 
 1970 /*
 1971  * Exports basic table info as name TLV.
 1972  * Used inside dump_static_rules() to provide info
 1973  * about all tables referenced by current ruleset.
 1974  *
 1975  * Returns 0 on success.
 1976  */
 1977 int
 1978 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
 1979     struct sockopt_data *sd)
 1980 {
 1981         struct namedobj_instance *ni;
 1982         struct named_object *no;
 1983         ipfw_obj_ntlv *ntlv;
 1984 
 1985         ni = CHAIN_TO_NI(ch);
 1986 
 1987         no = ipfw_objhash_lookup_kidx(ni, kidx);
 1988         KASSERT(no != NULL, ("invalid table kidx passed"));
 1989 
 1990         ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
 1991         if (ntlv == NULL)
 1992                 return (ENOMEM);
 1993 
 1994         ntlv->head.type = IPFW_TLV_TBL_NAME;
 1995         ntlv->head.length = sizeof(*ntlv);
 1996         ntlv->idx = no->kidx;
 1997         strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
 1998 
 1999         return (0);
 2000 }
 2001 
 2002 struct dump_args {
 2003         struct ip_fw_chain *ch;
 2004         struct table_info *ti;
 2005         struct table_config *tc;
 2006         struct sockopt_data *sd;
 2007         uint32_t cnt;
 2008         uint16_t uidx;
 2009         int error;
 2010         uint32_t size;
 2011         ipfw_table_entry *ent;
 2012         ta_foreach_f *f;
 2013         void *farg;
 2014         ipfw_obj_tentry tent;
 2015 };
 2016 
 2017 static int
 2018 count_ext_entries(void *e, void *arg)
 2019 {
 2020         struct dump_args *da;
 2021 
 2022         da = (struct dump_args *)arg;
 2023         da->cnt++;
 2024 
 2025         return (0);
 2026 }
 2027 
 2028 /*
 2029  * Gets number of items from table either using
 2030  * internal counter or calling algo callback for
 2031  * externally-managed tables.
 2032  *
 2033  * Returns number of records.
 2034  */
 2035 static uint32_t
 2036 table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
 2037 {
 2038         struct table_info *ti;
 2039         struct table_algo *ta;
 2040         struct dump_args da;
 2041 
 2042         ti = KIDX_TO_TI(ch, tc->no.kidx);
 2043         ta = tc->ta;
 2044 
 2045         /* Use internal counter for self-managed tables */
 2046         if ((ta->flags & TA_FLAG_READONLY) == 0)
 2047                 return (tc->count);
 2048 
 2049         /* Use callback to quickly get number of items */
 2050         if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
 2051                 return (ta->get_count(tc->astate, ti));
 2052 
 2053         /* Count number of iterms ourselves */
 2054         memset(&da, 0, sizeof(da));
 2055         ta->foreach(tc->astate, ti, count_ext_entries, &da);
 2056 
 2057         return (da.cnt);
 2058 }
 2059 
 2060 /*
 2061  * Exports table @tc info into standard ipfw_xtable_info format.
 2062  */
 2063 static void
 2064 export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
 2065     ipfw_xtable_info *i)
 2066 {
 2067         struct table_info *ti;
 2068         struct table_algo *ta;
 2069 
 2070         i->type = tc->no.subtype;
 2071         i->tflags = tc->tflags;
 2072         i->vmask = tc->vmask;
 2073         i->set = tc->no.set;
 2074         i->kidx = tc->no.kidx;
 2075         i->refcnt = tc->no.refcnt;
 2076         i->count = table_get_count(ch, tc);
 2077         i->limit = tc->limit;
 2078         i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
 2079         i->size = i->count * sizeof(ipfw_obj_tentry);
 2080         i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 2081         strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
 2082         ti = KIDX_TO_TI(ch, tc->no.kidx);
 2083         ta = tc->ta;
 2084         if (ta->print_config != NULL) {
 2085                 /* Use algo function to print table config to string */
 2086                 ta->print_config(tc->astate, ti, i->algoname,
 2087                     sizeof(i->algoname));
 2088         } else
 2089                 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 2090         /* Dump algo-specific data, if possible */
 2091         if (ta->dump_tinfo != NULL) {
 2092                 ta->dump_tinfo(tc->astate, ti, &i->ta_info);
 2093                 i->ta_info.flags |= IPFW_TATFLAGS_DATA;
 2094         }
 2095 }
 2096 
 2097 struct dump_table_args {
 2098         struct ip_fw_chain *ch;
 2099         struct sockopt_data *sd;
 2100 };
 2101 
 2102 static int
 2103 export_table_internal(struct namedobj_instance *ni, struct named_object *no,
 2104     void *arg)
 2105 {
 2106         ipfw_xtable_info *i;
 2107         struct dump_table_args *dta;
 2108 
 2109         dta = (struct dump_table_args *)arg;
 2110 
 2111         i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
 2112         KASSERT(i != NULL, ("previously checked buffer is not enough"));
 2113 
 2114         export_table_info(dta->ch, (struct table_config *)no, i);
 2115         return (0);
 2116 }
 2117 
 2118 /*
 2119  * Export all tables as ipfw_xtable_info structures to
 2120  * storage provided by @sd.
 2121  *
 2122  * If supplied buffer is too small, fills in required size
 2123  * and returns ENOMEM.
 2124  * Returns 0 on success.
 2125  */
 2126 static int
 2127 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
 2128     struct sockopt_data *sd)
 2129 {
 2130         uint32_t size;
 2131         uint32_t count;
 2132         struct dump_table_args dta;
 2133 
 2134         count = ipfw_objhash_count(CHAIN_TO_NI(ch));
 2135         size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
 2136 
 2137         /* Fill in header regadless of buffer size */
 2138         olh->count = count;
 2139         olh->objsize = sizeof(ipfw_xtable_info);
 2140 
 2141         if (size > olh->size) {
 2142                 olh->size = size;
 2143                 return (ENOMEM);
 2144         }
 2145 
 2146         olh->size = size;
 2147 
 2148         dta.ch = ch;
 2149         dta.sd = sd;
 2150 
 2151         ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
 2152 
 2153         return (0);
 2154 }
 2155 
 2156 /*
 2157  * Dumps all table data
 2158  * Data layout (v1)(current):
 2159  * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
 2160  * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
 2161  *
 2162  * Returns 0 on success
 2163  */
 2164 static int
 2165 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 2166     struct sockopt_data *sd)
 2167 {
 2168         struct _ipfw_obj_header *oh;
 2169         ipfw_xtable_info *i;
 2170         struct tid_info ti;
 2171         struct table_config *tc;
 2172         struct table_algo *ta;
 2173         struct dump_args da;
 2174         uint32_t sz;
 2175 
 2176         sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 2177         oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 2178         if (oh == NULL)
 2179                 return (EINVAL);
 2180 
 2181         i = (ipfw_xtable_info *)(oh + 1);
 2182         objheader_to_ti(oh, &ti);
 2183 
 2184         IPFW_UH_RLOCK(ch);
 2185         if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 2186                 IPFW_UH_RUNLOCK(ch);
 2187                 return (ESRCH);
 2188         }
 2189         export_table_info(ch, tc, i);
 2190 
 2191         if (sd->valsize < i->size) {
 2192                 /*
 2193                  * Submitted buffer size is not enough.
 2194                  * WE've already filled in @i structure with
 2195                  * relevant table info including size, so we
 2196                  * can return. Buffer will be flushed automatically.
 2197                  */
 2198                 IPFW_UH_RUNLOCK(ch);
 2199                 return (ENOMEM);
 2200         }
 2201 
 2202         /*
 2203          * Do the actual dump in eXtended format
 2204          */
 2205         memset(&da, 0, sizeof(da));
 2206         da.ch = ch;
 2207         da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 2208         da.tc = tc;
 2209         da.sd = sd;
 2210 
 2211         ta = tc->ta;
 2212 
 2213         ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
 2214         IPFW_UH_RUNLOCK(ch);
 2215 
 2216         return (da.error);
 2217 }
 2218 
 2219 /*
 2220  * Dumps all table data
 2221  * Data layout (version 0)(legacy):
 2222  * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
 2223  * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
 2224  *
 2225  * Returns 0 on success
 2226  */
 2227 static int
 2228 dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 2229     struct sockopt_data *sd)
 2230 {
 2231         ipfw_xtable *xtbl;
 2232         struct tid_info ti;
 2233         struct table_config *tc;
 2234         struct table_algo *ta;
 2235         struct dump_args da;
 2236         size_t sz, count;
 2237 
 2238         xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
 2239         if (xtbl == NULL)
 2240                 return (EINVAL);
 2241 
 2242         memset(&ti, 0, sizeof(ti));
 2243         ti.uidx = xtbl->tbl;
 2244 
 2245         IPFW_UH_RLOCK(ch);
 2246         if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 2247                 IPFW_UH_RUNLOCK(ch);
 2248                 return (0);
 2249         }
 2250         count = table_get_count(ch, tc);
 2251         sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
 2252 
 2253         xtbl->cnt = count;
 2254         xtbl->size = sz;
 2255         xtbl->type = tc->no.subtype;
 2256         xtbl->tbl = ti.uidx;
 2257 
 2258         if (sd->valsize < sz) {
 2259                 /*
 2260                  * Submitted buffer size is not enough.
 2261                  * WE've already filled in @i structure with
 2262                  * relevant table info including size, so we
 2263                  * can return. Buffer will be flushed automatically.
 2264                  */
 2265                 IPFW_UH_RUNLOCK(ch);
 2266                 return (ENOMEM);
 2267         }
 2268 
 2269         /* Do the actual dump in eXtended format */
 2270         memset(&da, 0, sizeof(da));
 2271         da.ch = ch;
 2272         da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 2273         da.tc = tc;
 2274         da.sd = sd;
 2275 
 2276         ta = tc->ta;
 2277 
 2278         ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
 2279         IPFW_UH_RUNLOCK(ch);
 2280 
 2281         return (0);
 2282 }
 2283 
 2284 /*
 2285  * Legacy function to retrieve number of items in table.
 2286  */
 2287 static int
 2288 get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 2289     struct sockopt_data *sd)
 2290 {
 2291         uint32_t *tbl;
 2292         struct tid_info ti;
 2293         size_t sz;
 2294         int error;
 2295 
 2296         sz = sizeof(*op3) + sizeof(uint32_t);
 2297         op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
 2298         if (op3 == NULL)
 2299                 return (EINVAL);
 2300 
 2301         tbl = (uint32_t *)(op3 + 1);
 2302         memset(&ti, 0, sizeof(ti));
 2303         ti.uidx = *tbl;
 2304         IPFW_UH_RLOCK(ch);
 2305         error = ipfw_count_xtable(ch, &ti, tbl);
 2306         IPFW_UH_RUNLOCK(ch);
 2307         return (error);
 2308 }
 2309 
 2310 /*
 2311  * Legacy IP_FW_TABLE_GETSIZE handler
 2312  */
 2313 int
 2314 ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 2315 {
 2316         struct table_config *tc;
 2317 
 2318         if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 2319                 return (ESRCH);
 2320         *cnt = table_get_count(ch, tc);
 2321         return (0);
 2322 }
 2323 
 2324 /*
 2325  * Legacy IP_FW_TABLE_XGETSIZE handler
 2326  */
 2327 int
 2328 ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 2329 {
 2330         struct table_config *tc;
 2331         uint32_t count;
 2332 
 2333         if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
 2334                 *cnt = 0;
 2335                 return (0); /* 'table all list' requires success */
 2336         }
 2337 
 2338         count = table_get_count(ch, tc);
 2339         *cnt = count * sizeof(ipfw_table_xentry);
 2340         if (count > 0)
 2341                 *cnt += sizeof(ipfw_xtable);
 2342         return (0);
 2343 }
 2344 
 2345 static int
 2346 dump_table_entry(void *e, void *arg)
 2347 {
 2348         struct dump_args *da;
 2349         struct table_config *tc;
 2350         struct table_algo *ta;
 2351         ipfw_table_entry *ent;
 2352         struct table_value *pval;
 2353         int error;
 2354 
 2355         da = (struct dump_args *)arg;
 2356 
 2357         tc = da->tc;
 2358         ta = tc->ta;
 2359 
 2360         /* Out of memory, returning */
 2361         if (da->cnt == da->size)
 2362                 return (1);
 2363         ent = da->ent++;
 2364         ent->tbl = da->uidx;
 2365         da->cnt++;
 2366 
 2367         error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 2368         if (error != 0)
 2369                 return (error);
 2370 
 2371         ent->addr = da->tent.k.addr.s_addr;
 2372         ent->masklen = da->tent.masklen;
 2373         pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 2374         ent->value = ipfw_export_table_value_legacy(pval);
 2375 
 2376         return (0);
 2377 }
 2378 
 2379 /*
 2380  * Dumps table in pre-8.1 legacy format.
 2381  */
 2382 int
 2383 ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
 2384     ipfw_table *tbl)
 2385 {
 2386         struct table_config *tc;
 2387         struct table_algo *ta;
 2388         struct dump_args da;
 2389 
 2390         tbl->cnt = 0;
 2391 
 2392         if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 2393                 return (0);     /* XXX: We should return ESRCH */
 2394 
 2395         ta = tc->ta;
 2396 
 2397         /* This dump format supports IPv4 only */
 2398         if (tc->no.subtype != IPFW_TABLE_ADDR)
 2399                 return (0);
 2400 
 2401         memset(&da, 0, sizeof(da));
 2402         da.ch = ch;
 2403         da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 2404         da.tc = tc;
 2405         da.ent = &tbl->ent[0];
 2406         da.size = tbl->size;
 2407 
 2408         tbl->cnt = 0;
 2409         ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
 2410         tbl->cnt = da.cnt;
 2411 
 2412         return (0);
 2413 }
 2414 
 2415 /*
 2416  * Dumps table entry in eXtended format (v1)(current).
 2417  */
 2418 static int
 2419 dump_table_tentry(void *e, void *arg)
 2420 {
 2421         struct dump_args *da;
 2422         struct table_config *tc;
 2423         struct table_algo *ta;
 2424         struct table_value *pval;
 2425         ipfw_obj_tentry *tent;
 2426         int error;
 2427 
 2428         da = (struct dump_args *)arg;
 2429 
 2430         tc = da->tc;
 2431         ta = tc->ta;
 2432 
 2433         tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
 2434         /* Out of memory, returning */
 2435         if (tent == NULL) {
 2436                 da->error = ENOMEM;
 2437                 return (1);
 2438         }
 2439         tent->head.length = sizeof(ipfw_obj_tentry);
 2440         tent->idx = da->uidx;
 2441 
 2442         error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 2443         if (error != 0)
 2444                 return (error);
 2445 
 2446         pval = get_table_value(da->ch, da->tc, tent->v.kidx);
 2447         ipfw_export_table_value_v1(pval, &tent->v.value);
 2448 
 2449         return (0);
 2450 }
 2451 
 2452 /*
 2453  * Dumps table entry in eXtended format (v0).
 2454  */
 2455 static int
 2456 dump_table_xentry(void *e, void *arg)
 2457 {
 2458         struct dump_args *da;
 2459         struct table_config *tc;
 2460         struct table_algo *ta;
 2461         ipfw_table_xentry *xent;
 2462         ipfw_obj_tentry *tent;
 2463         struct table_value *pval;
 2464         int error;
 2465 
 2466         da = (struct dump_args *)arg;
 2467 
 2468         tc = da->tc;
 2469         ta = tc->ta;
 2470 
 2471         xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
 2472         /* Out of memory, returning */
 2473         if (xent == NULL)
 2474                 return (1);
 2475         xent->len = sizeof(ipfw_table_xentry);
 2476         xent->tbl = da->uidx;
 2477 
 2478         memset(&da->tent, 0, sizeof(da->tent));
 2479         tent = &da->tent;
 2480         error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 2481         if (error != 0)
 2482                 return (error);
 2483 
 2484         /* Convert current format to previous one */
 2485         xent->masklen = tent->masklen;
 2486         pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 2487         xent->value = ipfw_export_table_value_legacy(pval);
 2488         /* Apply some hacks */
 2489         if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
 2490                 xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
 2491                 xent->flags = IPFW_TCF_INET;
 2492         } else
 2493                 memcpy(&xent->k, &tent->k, sizeof(xent->k));
 2494 
 2495         return (0);
 2496 }
 2497 
 2498 /*
 2499  * Helper function to export table algo data
 2500  * to tentry format before calling user function.
 2501  *
 2502  * Returns 0 on success.
 2503  */
 2504 static int
 2505 prepare_table_tentry(void *e, void *arg)
 2506 {
 2507         struct dump_args *da;
 2508         struct table_config *tc;
 2509         struct table_algo *ta;
 2510         int error;
 2511 
 2512         da = (struct dump_args *)arg;
 2513 
 2514         tc = da->tc;
 2515         ta = tc->ta;
 2516 
 2517         error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 2518         if (error != 0)
 2519                 return (error);
 2520 
 2521         da->f(&da->tent, da->farg);
 2522 
 2523         return (0);
 2524 }
 2525 
 2526 /*
 2527  * Allow external consumers to read table entries in standard format.
 2528  */
 2529 int
 2530 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
 2531     ta_foreach_f *f, void *arg)
 2532 {
 2533         struct namedobj_instance *ni;
 2534         struct table_config *tc;
 2535         struct table_algo *ta;
 2536         struct dump_args da;
 2537 
 2538         ni = CHAIN_TO_NI(ch);
 2539 
 2540         tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
 2541         if (tc == NULL)
 2542                 return (ESRCH);
 2543 
 2544         ta = tc->ta;
 2545 
 2546         memset(&da, 0, sizeof(da));
 2547         da.ch = ch;
 2548         da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 2549         da.tc = tc;
 2550         da.f = f;
 2551         da.farg = arg;
 2552 
 2553         ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
 2554 
 2555         return (0);
 2556 }
 2557 
 2558 /*
 2559  * Table algorithms
 2560  */ 
 2561 
 2562 /*
 2563  * Finds algorithm by index, table type or supplied name.
 2564  *
 2565  * Returns pointer to algo or NULL.
 2566  */
 2567 static struct table_algo *
 2568 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
 2569 {
 2570         int i, l;
 2571         struct table_algo *ta;
 2572 
 2573         if (ti->type > IPFW_TABLE_MAXTYPE)
 2574                 return (NULL);
 2575 
 2576         /* Search by index */
 2577         if (ti->atype != 0) {
 2578                 if (ti->atype > tcfg->algo_count)
 2579                         return (NULL);
 2580                 return (tcfg->algo[ti->atype]);
 2581         }
 2582 
 2583         if (name == NULL) {
 2584                 /* Return default algorithm for given type if set */
 2585                 return (tcfg->def_algo[ti->type]);
 2586         }
 2587 
 2588         /* Search by name */
 2589         /* TODO: better search */
 2590         for (i = 1; i <= tcfg->algo_count; i++) {
 2591                 ta = tcfg->algo[i];
 2592 
 2593                 /*
 2594                  * One can supply additional algorithm
 2595                  * parameters so we compare only the first word
 2596                  * of supplied name:
 2597                  * 'addr:chash hsize=32'
 2598                  * '^^^^^^^^^'
 2599                  *
 2600                  */
 2601                 l = strlen(ta->name);
 2602                 if (strncmp(name, ta->name, l) != 0)
 2603                         continue;
 2604                 if (name[l] != '\0' && name[l] != ' ')
 2605                         continue;
 2606                 /* Check if we're requesting proper table type */
 2607                 if (ti->type != 0 && ti->type != ta->type)
 2608                         return (NULL);
 2609                 return (ta);
 2610         }
 2611 
 2612         return (NULL);
 2613 }
 2614 
 2615 /*
 2616  * Register new table algo @ta.
 2617  * Stores algo id inside @idx.
 2618  *
 2619  * Returns 0 on success.
 2620  */
 2621 int
 2622 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
 2623     int *idx)
 2624 {
 2625         struct tables_config *tcfg;
 2626         struct table_algo *ta_new;
 2627         size_t sz;
 2628 
 2629         if (size > sizeof(struct table_algo))
 2630                 return (EINVAL);
 2631 
 2632         /* Check for the required on-stack size for add/del */
 2633         sz = roundup2(ta->ta_buf_size, sizeof(void *));
 2634         if (sz > TA_BUF_SZ)
 2635                 return (EINVAL);
 2636 
 2637         KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
 2638 
 2639         /* Copy algorithm data to stable storage. */
 2640         ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
 2641         memcpy(ta_new, ta, size);
 2642 
 2643         tcfg = CHAIN_TO_TCFG(ch);
 2644 
 2645         KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
 2646 
 2647         tcfg->algo[++tcfg->algo_count] = ta_new;
 2648         ta_new->idx = tcfg->algo_count;
 2649 
 2650         /* Set algorithm as default one for given type */
 2651         if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
 2652             tcfg->def_algo[ta_new->type] == NULL)
 2653                 tcfg->def_algo[ta_new->type] = ta_new;
 2654 
 2655         *idx = ta_new->idx;
 2656 
 2657         return (0);
 2658 }
 2659 
 2660 /*
 2661  * Unregisters table algo using @idx as id.
 2662  * XXX: It is NOT safe to call this function in any place
 2663  * other than ipfw instance destroy handler.
 2664  */
 2665 void
 2666 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
 2667 {
 2668         struct tables_config *tcfg;
 2669         struct table_algo *ta;
 2670 
 2671         tcfg = CHAIN_TO_TCFG(ch);
 2672 
 2673         KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
 2674             idx, tcfg->algo_count));
 2675 
 2676         ta = tcfg->algo[idx];
 2677         KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
 2678 
 2679         if (tcfg->def_algo[ta->type] == ta)
 2680                 tcfg->def_algo[ta->type] = NULL;
 2681 
 2682         free(ta, M_IPFW);
 2683 }
 2684 
 2685 /*
 2686  * Lists all table algorithms currently available.
 2687  * Data layout (v0)(current):
 2688  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
 2689  * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
 2690  *
 2691  * Returns 0 on success
 2692  */
 2693 static int
 2694 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
 2695     struct sockopt_data *sd)
 2696 {
 2697         struct _ipfw_obj_lheader *olh;
 2698         struct tables_config *tcfg;
 2699         ipfw_ta_info *i;
 2700         struct table_algo *ta;
 2701         uint32_t count, n, size;
 2702 
 2703         olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 2704         if (olh == NULL)
 2705                 return (EINVAL);
 2706         if (sd->valsize < olh->size)
 2707                 return (EINVAL);
 2708 
 2709         IPFW_UH_RLOCK(ch);
 2710         tcfg = CHAIN_TO_TCFG(ch);
 2711         count = tcfg->algo_count;
 2712         size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
 2713 
 2714         /* Fill in header regadless of buffer size */
 2715         olh->count = count;
 2716         olh->objsize = sizeof(ipfw_ta_info);
 2717 
 2718         if (size > olh->size) {
 2719                 olh->size = size;
 2720                 IPFW_UH_RUNLOCK(ch);
 2721                 return (ENOMEM);
 2722         }
 2723         olh->size = size;
 2724 
 2725         for (n = 1; n <= count; n++) {
 2726                 i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
 2727                 KASSERT(i != NULL, ("previously checked buffer is not enough"));
 2728                 ta = tcfg->algo[n];
 2729                 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 2730                 i->type = ta->type;
 2731                 i->refcnt = ta->refcnt;
 2732         }
 2733 
 2734         IPFW_UH_RUNLOCK(ch);
 2735 
 2736         return (0);
 2737 }
 2738 
 2739 static int
 2740 classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 2741 {
 2742         /* Basic IPv4/IPv6 or u32 lookups */
 2743         *puidx = cmd->arg1;
 2744         /* Assume ADDR by default */
 2745         *ptype = IPFW_TABLE_ADDR;
 2746         int v;
 2747                 
 2748         if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
 2749                 /*
 2750                  * generic lookup. The key must be
 2751                  * in 32bit big-endian format.
 2752                  */
 2753                 v = ((ipfw_insn_u32 *)cmd)->d[1];
 2754                 switch (v) {
 2755                 case LOOKUP_DST_IP:
 2756                 case LOOKUP_SRC_IP:
 2757                         break;
 2758                 case LOOKUP_DST_PORT:
 2759                 case LOOKUP_SRC_PORT:
 2760                 case LOOKUP_UID:
 2761                 case LOOKUP_JAIL:
 2762                 case LOOKUP_DSCP:
 2763                         *ptype = IPFW_TABLE_NUMBER;
 2764                         break;
 2765                 case LOOKUP_DST_MAC:
 2766                 case LOOKUP_SRC_MAC:
 2767                         *ptype = IPFW_TABLE_MAC;
 2768                         break;
 2769                 }
 2770         }
 2771 
 2772         return (0);
 2773 }
 2774 
 2775 static int
 2776 classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 2777 {
 2778         ipfw_insn_if *cmdif;
 2779 
 2780         /* Interface table, possibly */
 2781         cmdif = (ipfw_insn_if *)cmd;
 2782         if (cmdif->name[0] != '\1')
 2783                 return (1);
 2784 
 2785         *ptype = IPFW_TABLE_INTERFACE;
 2786         *puidx = cmdif->p.kidx;
 2787 
 2788         return (0);
 2789 }
 2790 
 2791 static int
 2792 classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 2793 {
 2794 
 2795         *puidx = cmd->arg1;
 2796         *ptype = IPFW_TABLE_FLOW;
 2797 
 2798         return (0);
 2799 }
 2800 
 2801 static int
 2802 classify_mac_lookup(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 2803 {
 2804         *puidx = cmd->arg1;
 2805         *ptype = IPFW_TABLE_MAC;
 2806         return (0);
 2807 }
 2808 
 2809 static void
 2810 update_arg1(ipfw_insn *cmd, uint16_t idx)
 2811 {
 2812 
 2813         cmd->arg1 = idx;
 2814 }
 2815 
 2816 static void
 2817 update_via(ipfw_insn *cmd, uint16_t idx)
 2818 {
 2819         ipfw_insn_if *cmdif;
 2820 
 2821         cmdif = (ipfw_insn_if *)cmd;
 2822         cmdif->p.kidx = idx;
 2823 }
 2824 
 2825 static int
 2826 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
 2827     struct named_object **pno)
 2828 {
 2829         struct table_config *tc;
 2830         int error;
 2831 
 2832         IPFW_UH_WLOCK_ASSERT(ch);
 2833 
 2834         error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
 2835         if (error != 0)
 2836                 return (error);
 2837 
 2838         *pno = &tc->no;
 2839         return (0);
 2840 }
 2841 
 2842 /* XXX: sets-sets! */
 2843 static struct named_object *
 2844 table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
 2845 {
 2846         struct namedobj_instance *ni;
 2847         struct table_config *tc;
 2848 
 2849         IPFW_UH_WLOCK_ASSERT(ch);
 2850         ni = CHAIN_TO_NI(ch);
 2851         tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
 2852         KASSERT(tc != NULL, ("Table with index %d not found", idx));
 2853 
 2854         return (&tc->no);
 2855 }
 2856 
 2857 static int
 2858 table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
 2859     enum ipfw_sets_cmd cmd)
 2860 {
 2861 
 2862         switch (cmd) {
 2863         case SWAP_ALL:
 2864         case TEST_ALL:
 2865         case MOVE_ALL:
 2866                 /*
 2867                  * Always return success, the real action and decision
 2868                  * should make table_manage_sets_all().
 2869                  */
 2870                 return (0);
 2871         case TEST_ONE:
 2872         case MOVE_ONE:
 2873                 /*
 2874                  * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
 2875                  * if set number will be used in hash function. Currently
 2876                  * we can just use generic handler that replaces set value.
 2877                  */
 2878                 if (V_fw_tables_sets == 0)
 2879                         return (0);
 2880                 break;
 2881         case COUNT_ONE:
 2882                 /*
 2883                  * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
 2884                  * disabled. This allow skip table's opcodes from additional
 2885                  * checks when specific rules moved to another set.
 2886                  */
 2887                 if (V_fw_tables_sets == 0)
 2888                         return (EOPNOTSUPP);
 2889         }
 2890         /* Use generic sets handler when per-set sysctl is enabled. */
 2891         return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
 2892             set, new_set, cmd));
 2893 }
 2894 
 2895 /*
 2896  * We register several opcode rewriters for lookup tables.
 2897  * All tables opcodes have the same ETLV type, but different subtype.
 2898  * To avoid invoking sets handler several times for XXX_ALL commands,
 2899  * we use separate manage_sets handler. O_RECV has the lowest value,
 2900  * so it should be called first.
 2901  */
 2902 static int
 2903 table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
 2904     enum ipfw_sets_cmd cmd)
 2905 {
 2906 
 2907         switch (cmd) {
 2908         case SWAP_ALL:
 2909         case TEST_ALL:
 2910                 /*
 2911                  * Return success for TEST_ALL, since nothing prevents
 2912                  * move rules from one set to another. All tables are
 2913                  * accessible from all sets when per-set tables sysctl
 2914                  * is disabled.
 2915                  */
 2916         case MOVE_ALL:
 2917                 if (V_fw_tables_sets == 0)
 2918                         return (0);
 2919                 break;
 2920         default:
 2921                 return (table_manage_sets(ch, set, new_set, cmd));
 2922         }
 2923         /* Use generic sets handler when per-set sysctl is enabled. */
 2924         return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
 2925             set, new_set, cmd));
 2926 }
 2927 
 2928 static struct opcode_obj_rewrite opcodes[] = {
 2929         {
 2930                 .opcode = O_IP_SRC_LOOKUP,
 2931                 .etlv = IPFW_TLV_TBL_NAME,
 2932                 .classifier = classify_srcdst,
 2933                 .update = update_arg1,
 2934                 .find_byname = table_findbyname,
 2935                 .find_bykidx = table_findbykidx,
 2936                 .create_object = create_table_compat,
 2937                 .manage_sets = table_manage_sets,
 2938         },
 2939         {
 2940                 .opcode = O_IP_DST_LOOKUP,
 2941                 .etlv = IPFW_TLV_TBL_NAME,
 2942                 .classifier = classify_srcdst,
 2943                 .update = update_arg1,
 2944                 .find_byname = table_findbyname,
 2945                 .find_bykidx = table_findbykidx,
 2946                 .create_object = create_table_compat,
 2947                 .manage_sets = table_manage_sets,
 2948         },
 2949         {
 2950                 .opcode = O_IP_FLOW_LOOKUP,
 2951                 .etlv = IPFW_TLV_TBL_NAME,
 2952                 .classifier = classify_flow,
 2953                 .update = update_arg1,
 2954                 .find_byname = table_findbyname,
 2955                 .find_bykidx = table_findbykidx,
 2956                 .create_object = create_table_compat,
 2957                 .manage_sets = table_manage_sets,
 2958         },
 2959         {
 2960                 .opcode = O_MAC_SRC_LOOKUP,
 2961                 .etlv = IPFW_TLV_TBL_NAME,
 2962                 .classifier = classify_mac_lookup,
 2963                 .update = update_arg1,
 2964                 .find_byname = table_findbyname,
 2965                 .find_bykidx = table_findbykidx,
 2966                 .create_object = create_table_compat,
 2967                 .manage_sets = table_manage_sets,
 2968         },
 2969         {
 2970                 .opcode = O_MAC_DST_LOOKUP,
 2971                 .etlv = IPFW_TLV_TBL_NAME,
 2972                 .classifier = classify_mac_lookup,
 2973                 .update = update_arg1,
 2974                 .find_byname = table_findbyname,
 2975                 .find_bykidx = table_findbykidx,
 2976                 .create_object = create_table_compat,
 2977                 .manage_sets = table_manage_sets,
 2978         },
 2979         {
 2980                 .opcode = O_XMIT,
 2981                 .etlv = IPFW_TLV_TBL_NAME,
 2982                 .classifier = classify_via,
 2983                 .update = update_via,
 2984                 .find_byname = table_findbyname,
 2985                 .find_bykidx = table_findbykidx,
 2986                 .create_object = create_table_compat,
 2987                 .manage_sets = table_manage_sets,
 2988         },
 2989         {
 2990                 .opcode = O_RECV,
 2991                 .etlv = IPFW_TLV_TBL_NAME,
 2992                 .classifier = classify_via,
 2993                 .update = update_via,
 2994                 .find_byname = table_findbyname,
 2995                 .find_bykidx = table_findbykidx,
 2996                 .create_object = create_table_compat,
 2997                 .manage_sets = table_manage_sets_all,
 2998         },
 2999         {
 3000                 .opcode = O_VIA,
 3001                 .etlv = IPFW_TLV_TBL_NAME,
 3002                 .classifier = classify_via,
 3003                 .update = update_via,
 3004                 .find_byname = table_findbyname,
 3005                 .find_bykidx = table_findbykidx,
 3006                 .create_object = create_table_compat,
 3007                 .manage_sets = table_manage_sets,
 3008         },
 3009 };
 3010 
 3011 static int
 3012 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
 3013     void *arg __unused)
 3014 {
 3015 
 3016         /* Check that there aren't any tables in not default set */
 3017         if (no->set != 0)
 3018                 return (EBUSY);
 3019         return (0);
 3020 }
 3021 
 3022 /*
 3023  * Switch between "set 0" and "rule's set" table binding,
 3024  * Check all ruleset bindings and permits changing
 3025  * IFF each binding has both rule AND table in default set (set 0).
 3026  *
 3027  * Returns 0 on success.
 3028  */
 3029 int
 3030 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
 3031 {
 3032         struct opcode_obj_rewrite *rw;
 3033         struct namedobj_instance *ni;
 3034         struct named_object *no;
 3035         struct ip_fw *rule;
 3036         ipfw_insn *cmd;
 3037         int cmdlen, i, l;
 3038         uint16_t kidx;
 3039         uint8_t subtype;
 3040 
 3041         IPFW_UH_WLOCK(ch);
 3042 
 3043         if (V_fw_tables_sets == sets) {
 3044                 IPFW_UH_WUNLOCK(ch);
 3045                 return (0);
 3046         }
 3047         ni = CHAIN_TO_NI(ch);
 3048         if (sets == 0) {
 3049                 /*
 3050                  * Prevent disabling sets support if we have some tables
 3051                  * in not default sets.
 3052                  */
 3053                 if (ipfw_objhash_foreach_type(ni, test_sets_cb,
 3054                     NULL, IPFW_TLV_TBL_NAME) != 0) {
 3055                         IPFW_UH_WUNLOCK(ch);
 3056                         return (EBUSY);
 3057                 }
 3058         }
 3059         /*
 3060          * Scan all rules and examine tables opcodes.
 3061          */
 3062         for (i = 0; i < ch->n_rules; i++) {
 3063                 rule = ch->map[i];
 3064 
 3065                 l = rule->cmd_len;
 3066                 cmd = rule->cmd;
 3067                 cmdlen = 0;
 3068                 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
 3069                         cmdlen = F_LEN(cmd);
 3070                         /* Check only tables opcodes */
 3071                         for (kidx = 0, rw = opcodes;
 3072                             rw < opcodes + nitems(opcodes); rw++) {
 3073                                 if (rw->opcode != cmd->opcode)
 3074                                         continue;
 3075                                 if (rw->classifier(cmd, &kidx, &subtype) == 0)
 3076                                         break;
 3077                         }
 3078                         if (kidx == 0)
 3079                                 continue;
 3080                         no = ipfw_objhash_lookup_kidx(ni, kidx);
 3081                         /* Check if both table object and rule has the set 0 */
 3082                         if (no->set != 0 || rule->set != 0) {
 3083                                 IPFW_UH_WUNLOCK(ch);
 3084                                 return (EBUSY);
 3085                         }
 3086                 }
 3087         }
 3088         V_fw_tables_sets = sets;
 3089         IPFW_UH_WUNLOCK(ch);
 3090         return (0);
 3091 }
 3092 
 3093 /*
 3094  * Checks table name for validity.
 3095  * Enforce basic length checks, the rest
 3096  * should be done in userland.
 3097  *
 3098  * Returns 0 if name is considered valid.
 3099  */
 3100 static int
 3101 check_table_name(const char *name)
 3102 {
 3103 
 3104         /*
 3105          * TODO: do some more complicated checks
 3106          */
 3107         return (ipfw_check_object_name_generic(name));
 3108 }
 3109 
 3110 /*
 3111  * Finds table config based on either legacy index
 3112  * or name in ntlv.
 3113  * Note @ti structure contains unchecked data from userland.
 3114  *
 3115  * Returns 0 in success and fills in @tc with found config
 3116  */
 3117 static int
 3118 find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
 3119     struct table_config **tc)
 3120 {
 3121         char *name, bname[16];
 3122         struct named_object *no;
 3123         ipfw_obj_ntlv *ntlv;
 3124         uint32_t set;
 3125 
 3126         if (ti->tlvs != NULL) {
 3127                 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 3128                     IPFW_TLV_TBL_NAME);
 3129                 if (ntlv == NULL)
 3130                         return (EINVAL);
 3131                 name = ntlv->name;
 3132 
 3133                 /*
 3134                  * Use set provided by @ti instead of @ntlv one.
 3135                  * This is needed due to different sets behavior
 3136                  * controlled by V_fw_tables_sets.
 3137                  */
 3138                 set = (V_fw_tables_sets != 0) ? ti->set : 0;
 3139         } else {
 3140                 snprintf(bname, sizeof(bname), "%d", ti->uidx);
 3141                 name = bname;
 3142                 set = 0;
 3143         }
 3144 
 3145         no = ipfw_objhash_lookup_name(ni, set, name);
 3146         *tc = (struct table_config *)no;
 3147 
 3148         return (0);
 3149 }
 3150 
 3151 /*
 3152  * Finds table config based on either legacy index
 3153  * or name in ntlv.
 3154  * Note @ti structure contains unchecked data from userland.
 3155  *
 3156  * Returns pointer to table_config or NULL.
 3157  */
 3158 static struct table_config *
 3159 find_table(struct namedobj_instance *ni, struct tid_info *ti)
 3160 {
 3161         struct table_config *tc;
 3162 
 3163         if (find_table_err(ni, ti, &tc) != 0)
 3164                 return (NULL);
 3165 
 3166         return (tc);
 3167 }
 3168 
 3169 /*
 3170  * Allocate new table config structure using
 3171  * specified @algo and @aname.
 3172  *
 3173  * Returns pointer to config or NULL.
 3174  */
 3175 static struct table_config *
 3176 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
 3177     struct table_algo *ta, char *aname, uint8_t tflags)
 3178 {
 3179         char *name, bname[16];
 3180         struct table_config *tc;
 3181         int error;
 3182         ipfw_obj_ntlv *ntlv;
 3183         uint32_t set;
 3184 
 3185         if (ti->tlvs != NULL) {
 3186                 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 3187                     IPFW_TLV_TBL_NAME);
 3188                 if (ntlv == NULL)
 3189                         return (NULL);
 3190                 name = ntlv->name;
 3191                 set = (V_fw_tables_sets == 0) ? 0 : ntlv->set;
 3192         } else {
 3193                 /* Compat part: convert number to string representation */
 3194                 snprintf(bname, sizeof(bname), "%d", ti->uidx);
 3195                 name = bname;
 3196                 set = 0;
 3197         }
 3198 
 3199         tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
 3200         tc->no.name = tc->tablename;
 3201         tc->no.subtype = ta->type;
 3202         tc->no.set = set;
 3203         tc->tflags = tflags;
 3204         tc->ta = ta;
 3205         strlcpy(tc->tablename, name, sizeof(tc->tablename));
 3206         /* Set "shared" value type by default */
 3207         tc->vshared = 1;
 3208 
 3209         /* Preallocate data structures for new tables */
 3210         error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
 3211         if (error != 0) {
 3212                 free(tc, M_IPFW);
 3213                 return (NULL);
 3214         }
 3215 
 3216         return (tc);
 3217 }
 3218 
 3219 /*
 3220  * Destroys table state and config.
 3221  */
 3222 static void
 3223 free_table_config(struct namedobj_instance *ni, struct table_config *tc)
 3224 {
 3225 
 3226         KASSERT(tc->linked == 0, ("free() on linked config"));
 3227         /* UH lock MUST NOT be held */
 3228 
 3229         /*
 3230          * We're using ta without any locking/referencing.
 3231          * TODO: fix this if we're going to use unloadable algos.
 3232          */
 3233         tc->ta->destroy(tc->astate, &tc->ti_copy);
 3234         free(tc, M_IPFW);
 3235 }
 3236 
 3237 /*
 3238  * Links @tc to @chain table named instance.
 3239  * Sets appropriate type/states in @chain table info.
 3240  */
 3241 static void
 3242 link_table(struct ip_fw_chain *ch, struct table_config *tc)
 3243 {
 3244         struct namedobj_instance *ni;
 3245         struct table_info *ti;
 3246         uint16_t kidx;
 3247 
 3248         IPFW_UH_WLOCK_ASSERT(ch);
 3249 
 3250         ni = CHAIN_TO_NI(ch);
 3251         kidx = tc->no.kidx;
 3252 
 3253         ipfw_objhash_add(ni, &tc->no);
 3254 
 3255         ti = KIDX_TO_TI(ch, kidx);
 3256         *ti = tc->ti_copy;
 3257 
 3258         /* Notify algo on real @ti address */
 3259         if (tc->ta->change_ti != NULL)
 3260                 tc->ta->change_ti(tc->astate, ti);
 3261 
 3262         tc->linked = 1;
 3263         tc->ta->refcnt++;
 3264 }
 3265 
 3266 /*
 3267  * Unlinks @tc from @chain table named instance.
 3268  * Zeroes states in @chain and stores them in @tc.
 3269  */
 3270 static void
 3271 unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
 3272 {
 3273         struct namedobj_instance *ni;
 3274         struct table_info *ti;
 3275         uint16_t kidx;
 3276 
 3277         IPFW_UH_WLOCK_ASSERT(ch);
 3278         IPFW_WLOCK_ASSERT(ch);
 3279 
 3280         ni = CHAIN_TO_NI(ch);
 3281         kidx = tc->no.kidx;
 3282 
 3283         /* Clear state. @ti copy is already saved inside @tc */
 3284         ipfw_objhash_del(ni, &tc->no);
 3285         ti = KIDX_TO_TI(ch, kidx);
 3286         memset(ti, 0, sizeof(struct table_info));
 3287         tc->linked = 0;
 3288         tc->ta->refcnt--;
 3289 
 3290         /* Notify algo on real @ti address */
 3291         if (tc->ta->change_ti != NULL)
 3292                 tc->ta->change_ti(tc->astate, NULL);
 3293 }
 3294 
 3295 static struct ipfw_sopt_handler scodes[] = {
 3296         { IP_FW_TABLE_XCREATE,  0,      HDIR_SET,       create_table },
 3297         { IP_FW_TABLE_XDESTROY, 0,      HDIR_SET,       flush_table_v0 },
 3298         { IP_FW_TABLE_XFLUSH,   0,      HDIR_SET,       flush_table_v0 },
 3299         { IP_FW_TABLE_XMODIFY,  0,      HDIR_BOTH,      modify_table },
 3300         { IP_FW_TABLE_XINFO,    0,      HDIR_GET,       describe_table },
 3301         { IP_FW_TABLES_XLIST,   0,      HDIR_GET,       list_tables },
 3302         { IP_FW_TABLE_XLIST,    0,      HDIR_GET,       dump_table_v0 },
 3303         { IP_FW_TABLE_XLIST,    1,      HDIR_GET,       dump_table_v1 },
 3304         { IP_FW_TABLE_XADD,     0,      HDIR_BOTH,      manage_table_ent_v0 },
 3305         { IP_FW_TABLE_XADD,     1,      HDIR_BOTH,      manage_table_ent_v1 },
 3306         { IP_FW_TABLE_XDEL,     0,      HDIR_BOTH,      manage_table_ent_v0 },
 3307         { IP_FW_TABLE_XDEL,     1,      HDIR_BOTH,      manage_table_ent_v1 },
 3308         { IP_FW_TABLE_XFIND,    0,      HDIR_GET,       find_table_entry },
 3309         { IP_FW_TABLE_XSWAP,    0,      HDIR_SET,       swap_table },
 3310         { IP_FW_TABLES_ALIST,   0,      HDIR_GET,       list_table_algo },
 3311         { IP_FW_TABLE_XGETSIZE, 0,      HDIR_GET,       get_table_size },
 3312 };
 3313 
 3314 static int
 3315 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
 3316     void *arg)
 3317 {
 3318 
 3319         unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
 3320         if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
 3321                 printf("Error unlinking kidx %d from table %s\n",
 3322                     no->kidx, no->name);
 3323         free_table_config(ni, (struct table_config *)no);
 3324         return (0);
 3325 }
 3326 
 3327 /*
 3328  * Shuts tables module down.
 3329  */
 3330 void
 3331 ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
 3332 {
 3333 
 3334         IPFW_DEL_SOPT_HANDLER(last, scodes);
 3335         IPFW_DEL_OBJ_REWRITER(last, opcodes);
 3336 
 3337         /* Remove all tables from working set */
 3338         IPFW_UH_WLOCK(ch);
 3339         IPFW_WLOCK(ch);
 3340         ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
 3341         IPFW_WUNLOCK(ch);
 3342         IPFW_UH_WUNLOCK(ch);
 3343 
 3344         /* Free pointers itself */
 3345         free(ch->tablestate, M_IPFW);
 3346 
 3347         ipfw_table_value_destroy(ch, last);
 3348         ipfw_table_algo_destroy(ch);
 3349 
 3350         ipfw_objhash_destroy(CHAIN_TO_NI(ch));
 3351         free(CHAIN_TO_TCFG(ch), M_IPFW);
 3352 }
 3353 
 3354 /*
 3355  * Starts tables module.
 3356  */
 3357 int
 3358 ipfw_init_tables(struct ip_fw_chain *ch, int first)
 3359 {
 3360         struct tables_config *tcfg;
 3361 
 3362         /* Allocate pointers */
 3363         ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
 3364             M_IPFW, M_WAITOK | M_ZERO);
 3365 
 3366         tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
 3367         tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
 3368         ch->tblcfg = tcfg;
 3369 
 3370         ipfw_table_value_init(ch, first);
 3371         ipfw_table_algo_init(ch);
 3372 
 3373         IPFW_ADD_OBJ_REWRITER(first, opcodes);
 3374         IPFW_ADD_SOPT_HANDLER(first, scodes);
 3375         return (0);
 3376 }

Cache object: 916d6c08f6ce81c88ee5e9ea2a7d60fc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.