The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_ratelimit.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  *
    3  * SPDX-License-Identifier: BSD-3-Clause
    4  *
    5  * Copyright (c) 2018-2020
    6  *      Netflix Inc.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  * __FBSDID("$FreeBSD$");
   29  *
   30  */
   31 /**
   32  * Author: Randall Stewart <rrs@netflix.com>
   33  */
   34 #ifndef __tcp_ratelimit_h__
   35 #define __tcp_ratelimit_h__
   36 
   37 struct m_snd_tag;
   38 
   39 #define RL_MIN_DIVISOR 50
   40 #define RL_DEFAULT_DIVISOR 1000
   41 
   42 /* Flags on an individual rate */
   43 #define HDWRPACE_INITED         0x0001
   44 #define HDWRPACE_TAGPRESENT     0x0002
   45 #define HDWRPACE_IFPDEPARTED    0x0004
   46 struct tcp_hwrate_limit_table {
   47         const struct tcp_rate_set *ptbl;        /* Pointer to parent table */
   48         struct m_snd_tag *tag;  /* Send tag if needed (chelsio) */
   49         long     rate;          /* Rate we get in Bytes per second (Bps) */
   50         long     using;         /* How many flows are using this hdwr rate. */
   51         long     rs_num_enobufs;
   52         uint32_t time_between;  /* Time-Gap between packets at this rate */
   53         uint32_t flags;
   54 };
   55 
   56 /* Rateset flags */
   57 #define RS_IS_DEFF      0x0001  /* Its a lagg, do a double lookup */
   58 #define RS_IS_INTF      0x0002  /* Its a plain interface */
   59 #define RS_NO_PRE       0x0004  /* The interfacd has set rates */
   60 #define RS_INT_TBL      0x0010  /*
   61                                  * The table is the internal version
   62                                  * which has special setup requirements.
   63                                  */
   64 #define RS_IS_DEAD      0x0020  /* The RS is dead list */
   65 #define RS_FUNERAL_SCHD 0x0040  /* Is a epoch call scheduled to bury this guy?*/
   66 #define RS_INTF_NO_SUP  0x0100  /* The interface does not support the ratelimiting */
   67 
   68 struct tcp_rate_set {
   69         struct sysctl_ctx_list sysctl_ctx;
   70         CK_LIST_ENTRY(tcp_rate_set) next;
   71         struct ifnet *rs_ifp;
   72         struct tcp_hwrate_limit_table *rs_rlt;
   73         uint64_t rs_flows_using;
   74         uint64_t rs_flow_limit;
   75         uint32_t rs_if_dunit;
   76         int rs_rate_cnt;
   77         int rs_min_seg;
   78         int rs_highest_valid;
   79         int rs_lowest_valid;
   80         int rs_disable;
   81         int rs_flags;
   82         struct epoch_context rs_epoch_ctx;
   83 };
   84 
   85 CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
   86 
   87 /* Request flags */
   88 #define RS_PACING_EXACT_MATCH   0x0001  /* Need an exact match for rate */
   89 #define RS_PACING_GT            0x0002  /* Greater than requested */
   90 #define RS_PACING_GEQ           0x0004  /* Greater than or equal too */
   91 #define RS_PACING_LT            0x0008  /* Less than requested rate */
   92 #define RS_PACING_SUB_OK        0x0010  /* If a rate can't be found get the
   93                                          * next best rate (highest or lowest). */
   94 #ifdef _KERNEL
   95 #ifndef ETHERNET_SEGMENT_SIZE
   96 #define ETHERNET_SEGMENT_SIZE 1514
   97 #endif
   98 #ifdef RATELIMIT
   99 #define DETAILED_RATELIMIT_SYSCTL 1     /*
  100                                          * Undefine this if you don't want
  101                                          * detailed rates to appear in
  102                                          * net.inet.tcp.rl.
  103                                          * With the defintion each rate
  104                                          * shows up in your sysctl tree
  105                                          * this can be big.
  106                                          */
  107 uint64_t inline
  108 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle)
  109 {
  110         return (rle->ptbl->rs_rlt[rle->ptbl->rs_highest_valid].rate);
  111 }
  112 
  113 uint64_t
  114 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp);
  115 
  116 const struct tcp_hwrate_limit_table *
  117 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
  118     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate);
  119 
  120 const struct tcp_hwrate_limit_table *
  121 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
  122     struct tcpcb *tp, struct ifnet *ifp,
  123     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate);
  124 void
  125 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
  126     struct tcpcb *tp);
  127 
  128 uint32_t
  129 tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
  130     const struct tcp_hwrate_limit_table *te, int *err, int divisor);
  131 
  132 void
  133 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte);
  134 
  135 #else
  136 static inline const struct tcp_hwrate_limit_table *
  137 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
  138     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
  139 {
  140         if (error)
  141                 *error = EOPNOTSUPP;
  142         return (NULL);
  143 }
  144 
  145 static inline const struct tcp_hwrate_limit_table *
  146 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
  147     struct tcpcb *tp, struct ifnet *ifp,
  148     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
  149 {
  150         if (error)
  151                 *error = EOPNOTSUPP;
  152         return (NULL);
  153 }
  154 
  155 static inline void
  156 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
  157     struct tcpcb *tp)
  158 {
  159         return;
  160 }
  161 
  162 static uint64_t inline
  163 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle)
  164 {
  165         return (0);
  166 }
  167 
  168 static uint64_t inline
  169 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp)
  170 {
  171         return (0);
  172 }
  173 
  174 static inline uint32_t
  175 tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
  176    const struct tcp_hwrate_limit_table *te, int *err, int divisor)
  177 {
  178         /*
  179          * We use the google formula to calculate the
  180          * TSO size. I.E.
  181          * bw < 24Meg
  182          *   tso = 2mss
  183          * else
  184          *   tso = min(bw/(div=1000), 64k)
  185          *
  186          * Note for these calculations we ignore the
  187          * packet overhead (enet hdr, ip hdr and tcp hdr).
  188          * We only get the google formula when we have
  189          * divisor = 1000, which is the default for now.
  190          */
  191         uint64_t bytes;
  192         uint32_t new_tso, min_tso_segs;
  193 
  194         /* It can't be zero */
  195         if ((divisor == 0) ||
  196             (divisor < RL_MIN_DIVISOR)) {
  197                 bytes = bw / RL_DEFAULT_DIVISOR;
  198         } else
  199                 bytes = bw / divisor;
  200         /* We can't ever send more than 65k in a TSO */
  201         if (bytes > 0xffff) {
  202                 bytes = 0xffff;
  203         }
  204         /* Round up */
  205         new_tso = (bytes + segsiz - 1) / segsiz;
  206         if (can_use_1mss)
  207                 min_tso_segs = 1;
  208         else
  209                 min_tso_segs = 2;
  210         if (new_tso < min_tso_segs)
  211                 new_tso = min_tso_segs;
  212         new_tso *= segsiz;
  213         return (new_tso);
  214 }
  215 
  216 /* Do nothing if RATELIMIT is not defined */
  217 static void
  218 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
  219 {
  220 }
  221 
  222 #endif
  223 
  224 /*
  225  * Given a b/w and a segsiz, and optional hardware
  226  * rate limit, return the ideal size to burst
  227  * out at once. Note the parameter can_use_1mss
  228  * dictates if the transport will tolerate a 1mss
  229  * limit, if not it will bottom out at 2mss (think
  230  * delayed ack).
  231  */
  232 static inline uint32_t
  233 tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
  234                           const struct tcp_hwrate_limit_table *te, int *err)
  235 {
  236 
  237         return (tcp_get_pacing_burst_size_w_divisor(tp, bw, segsiz,
  238                                                     can_use_1mss,
  239                                                     te, err, 0));
  240 }
  241 
  242 #endif
  243 #endif

Cache object: b7fea1bb9bc54ff28a8ac91bc3138942


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.