The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_poll.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001-2002 Luigi Rizzo
    3  *
    4  * Supported by: the Xorp Project (www.xorp.org)
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
   28  */
   29 
   30 #include "opt_ifpoll.h"
   31 
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/ktr.h>
   35 #include <sys/malloc.h>
   36 #include <sys/serialize.h>
   37 #include <sys/socket.h>
   38 #include <sys/sysctl.h>
   39 
   40 #include <sys/thread2.h>
   41 #include <sys/msgport2.h>
   42 
   43 #include <machine/atomic.h>
   44 #include <machine/clock.h>
   45 #include <machine/smp.h>
   46 
   47 #include <net/if.h>
   48 #include <net/if_poll.h>
   49 #include <net/netmsg2.h>
   50 #include <net/netisr2.h>
   51 
   52 /*
   53  * Polling support for network device drivers.
   54  *
   55  * Drivers which support this feature try to register one status polling
   56  * handler and several TX/RX polling handlers with the polling code.
   57  * If interface's if_npoll is called with non-NULL second argument, then
   58  * a register operation is requested, else a deregister operation is
   59  * requested.  If the requested operation is "register", driver should
   60  * setup the ifpoll_info passed in accoding its own needs:
   61  *   ifpoll_info.ifpi_status.status_func == NULL
   62  *     No status polling handler will be installed on CPU(0)
   63  *   ifpoll_info.ifpi_rx[n].poll_func == NULL
   64  *     No RX polling handler will be installed on CPU(n)
   65  *   ifpoll_info.ifpi_tx[n].poll_func == NULL
   66  *     No TX polling handler will be installed on CPU(n)
   67  *
   68  * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz).
   69  * TX and status polling could be done at lower frequency than RX frequency
   70  * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac).  To avoid systimer
   71  * staggering at high frequency, RX systimer gives TX and status polling a
   72  * piggyback (XXX).
   73  *
   74  * All of the registered polling handlers are called only if the interface
   75  * is marked as 'IFF_RUNNING and IFF_NPOLLING'.  However, the interface's
   76  * register and deregister function (ifnet.if_npoll) will be called even
   77  * if interface is not marked with 'IFF_RUNNING'.
   78  *
   79  * If registration is successful, the driver must disable interrupts,
   80  * and further I/O is performed through the TX/RX polling handler, which
   81  * are invoked (at least once per clock tick) with 3 arguments: the "arg"
   82  * passed at register time, a struct ifnet pointer, and a "count" limit.
   83  * The registered serializer will be held before calling the related
   84  * polling handler.
   85  *
   86  * The count limit specifies how much work the handler can do during the
   87  * call -- typically this is the number of packets to be received, or
   88  * transmitted, etc. (drivers are free to interpret this number, as long
   89  * as the max time spent in the function grows roughly linearly with the
   90  * count).
   91  *
   92  * A second variable controls the sharing of CPU between polling/kernel
   93  * network processing, and other activities (typically userlevel tasks):
   94  * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the
   95  * share of CPU allocated to user tasks.  CPU is allocated proportionally
   96  * to the shares, by dynamically adjusting the "count" (poll_burst).
   97  *
   98  * Other parameters can should be left to their default values.
   99  * The following constraints hold
  100  *
  101  *      1 <= poll_burst <= poll_burst_max
  102  *      1 <= poll_each_burst <= poll_burst_max
  103  *      MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
  104  */
  105 
  106 #define IFPOLL_LIST_LEN         128
  107 #define IFPOLL_FREQ_MAX         30000
  108 
  109 #define MIN_IOPOLL_BURST_MAX    10
  110 #define MAX_IOPOLL_BURST_MAX    5000
  111 #define IOPOLL_BURST_MAX        250     /* good for 1000Mbit net and HZ=6000 */
  112 
  113 #define IOPOLL_EACH_BURST       50
  114 #define IOPOLL_USER_FRAC        50
  115 
  116 #define IFPOLL_FREQ_DEFAULT     6000
  117 
  118 #define IFPOLL_TXFRAC_DEFAULT   1       /* 1/1 of the pollhz */
  119 #define IFPOLL_STFRAC_DEFAULT   120     /* 1/120 of the pollhz */
  120 
  121 #define IFPOLL_RX               0x1
  122 #define IFPOLL_TX               0x2
  123 
  124 union ifpoll_time {
  125         struct timeval          tv;
  126         uint64_t                tsc;
  127 };
  128 
  129 struct iopoll_rec {
  130         struct lwkt_serialize   *serializer;
  131         struct ifnet            *ifp;
  132         void                    *arg;
  133         ifpoll_iofn_t           poll_func;
  134 };
  135 
  136 struct iopoll_ctx {
  137         union ifpoll_time       prev_t;
  138         u_long                  short_ticks;            /* statistics */
  139         u_long                  lost_polls;             /* statistics */
  140         u_long                  suspect;                /* statistics */
  141         u_long                  stalled;                /* statistics */
  142         uint32_t                pending_polls;          /* state */
  143 
  144         struct netmsg_base      poll_netmsg;
  145         struct netmsg_base      poll_more_netmsg;
  146 
  147         int                     poll_cpuid;
  148         int                     pollhz;
  149         uint32_t                phase;                  /* state */
  150         int                     residual_burst;         /* state */
  151         uint32_t                poll_each_burst;        /* tunable */
  152         union ifpoll_time       poll_start_t;           /* state */
  153 
  154         uint32_t                poll_burst;             /* state */
  155         uint32_t                poll_burst_max;         /* tunable */
  156         uint32_t                user_frac;              /* tunable */
  157         uint32_t                kern_frac;              /* state */
  158 
  159         uint32_t                poll_handlers; /* next free entry in pr[]. */
  160         struct iopoll_rec       pr[IFPOLL_LIST_LEN];
  161 
  162         struct sysctl_ctx_list  poll_sysctl_ctx;
  163         struct sysctl_oid       *poll_sysctl_tree;
  164 } __cachealign;
  165 
  166 struct poll_comm {
  167         struct systimer         pollclock;
  168         int                     poll_cpuid;
  169 
  170         int                     stfrac_count;           /* state */
  171         int                     poll_stfrac;            /* tunable */
  172 
  173         int                     txfrac_count;           /* state */
  174         int                     poll_txfrac;            /* tunable */
  175 
  176         int                     pollhz;                 /* tunable */
  177 
  178         struct sysctl_ctx_list  sysctl_ctx;
  179         struct sysctl_oid       *sysctl_tree;
  180 } __cachealign;
  181 
  182 struct stpoll_rec {
  183         struct lwkt_serialize   *serializer;
  184         struct ifnet            *ifp;
  185         ifpoll_stfn_t           status_func;
  186 };
  187 
  188 struct stpoll_ctx {
  189         struct netmsg_base      poll_netmsg;
  190 
  191         uint32_t                poll_handlers; /* next free entry in pr[]. */
  192         struct stpoll_rec       pr[IFPOLL_LIST_LEN];
  193 
  194         struct sysctl_ctx_list  poll_sysctl_ctx;
  195         struct sysctl_oid       *poll_sysctl_tree;
  196 } __cachealign;
  197 
  198 struct iopoll_sysctl_netmsg {
  199         struct netmsg_base      base;
  200         struct iopoll_ctx       *ctx;
  201 };
  202 
  203 void            ifpoll_init_pcpu(int);
  204 static void     ifpoll_register_handler(netmsg_t);
  205 static void     ifpoll_deregister_handler(netmsg_t);
  206 
  207 /*
  208  * Status polling
  209  */
  210 static void     stpoll_init(void);
  211 static void     stpoll_handler(netmsg_t);
  212 static void     stpoll_clock(struct stpoll_ctx *);
  213 static int      stpoll_register(struct ifnet *, const struct ifpoll_status *);
  214 static int      stpoll_deregister(struct ifnet *);
  215 
  216 /*
  217  * RX/TX polling
  218  */
  219 static struct iopoll_ctx *iopoll_ctx_create(int, int);
  220 static void     iopoll_init(int);
  221 static void     rxpoll_handler(netmsg_t);
  222 static void     txpoll_handler(netmsg_t);
  223 static void     rxpollmore_handler(netmsg_t);
  224 static void     txpollmore_handler(netmsg_t);
  225 static void     iopoll_clock(struct iopoll_ctx *);
  226 static int      iopoll_register(struct ifnet *, struct iopoll_ctx *,
  227                     const struct ifpoll_io *);
  228 static int      iopoll_deregister(struct ifnet *, struct iopoll_ctx *);
  229 
  230 static void     iopoll_add_sysctl(struct sysctl_ctx_list *,
  231                     struct sysctl_oid_list *, struct iopoll_ctx *, int);
  232 static void     sysctl_burstmax_handler(netmsg_t);
  233 static int      sysctl_burstmax(SYSCTL_HANDLER_ARGS);
  234 static void     sysctl_eachburst_handler(netmsg_t);
  235 static int      sysctl_eachburst(SYSCTL_HANDLER_ARGS);
  236 
  237 /*
  238  * Common functions
  239  */
  240 static void     poll_comm_init(int);
  241 static void     poll_comm_start(int);
  242 static void     poll_comm_adjust_pollhz(struct poll_comm *);
  243 static void     poll_comm_systimer0(systimer_t, int, struct intrframe *);
  244 static void     poll_comm_systimer(systimer_t, int, struct intrframe *);
  245 static void     sysctl_pollhz_handler(netmsg_t);
  246 static void     sysctl_stfrac_handler(netmsg_t);
  247 static void     sysctl_txfrac_handler(netmsg_t);
  248 static int      sysctl_pollhz(SYSCTL_HANDLER_ARGS);
  249 static int      sysctl_stfrac(SYSCTL_HANDLER_ARGS);
  250 static int      sysctl_txfrac(SYSCTL_HANDLER_ARGS);
  251 static int      sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS);
  252 static int      sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS);
  253 
  254 static struct stpoll_ctx        stpoll_context;
  255 static struct poll_comm         *poll_common[MAXCPU];
  256 static struct iopoll_ctx        *rxpoll_context[MAXCPU];
  257 static struct iopoll_ctx        *txpoll_context[MAXCPU];
  258 
  259 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0,
  260             "Network device polling parameters");
  261 
  262 static int      iopoll_burst_max = IOPOLL_BURST_MAX;
  263 static int      iopoll_each_burst = IOPOLL_EACH_BURST;
  264 static int      iopoll_user_frac = IOPOLL_USER_FRAC;
  265 
  266 static int      ifpoll_pollhz = IFPOLL_FREQ_DEFAULT;
  267 static int      ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT;
  268 static int      ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT;
  269 
  270 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max);
  271 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst);
  272 TUNABLE_INT("net.ifpoll.user_frac", &iopoll_user_frac);
  273 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz);
  274 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac);
  275 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac);
  276 
  277 #if !defined(KTR_IF_POLL)
  278 #define  KTR_IF_POLL            KTR_ALL
  279 #endif
  280 KTR_INFO_MASTER(if_poll);
  281 KTR_INFO(KTR_IF_POLL, if_poll, rx_start, 0, "rx start");
  282 KTR_INFO(KTR_IF_POLL, if_poll, rx_end, 1, "rx end");
  283 KTR_INFO(KTR_IF_POLL, if_poll, tx_start, 2, "tx start");
  284 KTR_INFO(KTR_IF_POLL, if_poll, tx_end, 3, "tx end");
  285 KTR_INFO(KTR_IF_POLL, if_poll, rx_mstart, 4, "rx more start");
  286 KTR_INFO(KTR_IF_POLL, if_poll, rx_mend, 5, "rx more end");
  287 KTR_INFO(KTR_IF_POLL, if_poll, tx_mstart, 6, "tx more start");
  288 KTR_INFO(KTR_IF_POLL, if_poll, tx_mend, 7, "tx more end");
  289 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_start, 8, "ioclock start");
  290 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_end, 9, "ioclock end");
  291 #define logpoll(name)   KTR_LOG(if_poll_ ## name)
  292 
  293 #define IFPOLL_FREQ_ADJ(comm)   (((comm)->poll_cpuid * 3) % 50)
  294 
  295 static __inline int
  296 poll_comm_pollhz_div(const struct poll_comm *comm, int pollhz)
  297 {
  298         return pollhz + IFPOLL_FREQ_ADJ(comm);
  299 }
  300 
  301 static __inline int
  302 poll_comm_pollhz_conv(const struct poll_comm *comm, int pollhz)
  303 {
  304         return pollhz - IFPOLL_FREQ_ADJ(comm);
  305 }
  306 
  307 static __inline void
  308 ifpoll_sendmsg_oncpu(netmsg_t msg)
  309 {
  310         if (msg->lmsg.ms_flags & MSGF_DONE)
  311                 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), &msg->lmsg);
  312 }
  313 
  314 static __inline void
  315 sched_stpoll(struct stpoll_ctx *st_ctx)
  316 {
  317         ifpoll_sendmsg_oncpu((netmsg_t)&st_ctx->poll_netmsg);
  318 }
  319 
  320 static __inline void
  321 sched_iopoll(struct iopoll_ctx *io_ctx)
  322 {
  323         ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_netmsg);
  324 }
  325 
  326 static __inline void
  327 sched_iopollmore(struct iopoll_ctx *io_ctx)
  328 {
  329         ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_more_netmsg);
  330 }
  331 
  332 static __inline void
  333 ifpoll_time_get(union ifpoll_time *t)
  334 {
  335         if (tsc_invariant)
  336                 t->tsc = rdtsc();
  337         else
  338                 microuptime(&t->tv);
  339 }
  340 
  341 /* Return time diff in us */
  342 static __inline int
  343 ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e)
  344 {
  345         if (tsc_invariant) {
  346                 return (((e->tsc - s->tsc) * 1000000) / tsc_frequency);
  347         } else {
  348                 return ((e->tv.tv_usec - s->tv.tv_usec) +
  349                         (e->tv.tv_sec - s->tv.tv_sec) * 1000000);
  350         }
  351 }
  352 
  353 /*
  354  * Initialize per-cpu polling(4) context.  Called from kern_clock.c:
  355  */
  356 void
  357 ifpoll_init_pcpu(int cpuid)
  358 {
  359         if (cpuid >= ncpus2)
  360                 return;
  361 
  362         poll_comm_init(cpuid);
  363 
  364         if (cpuid == 0)
  365                 stpoll_init();
  366         iopoll_init(cpuid);
  367 
  368         poll_comm_start(cpuid);
  369 }
  370 
  371 int
  372 ifpoll_register(struct ifnet *ifp)
  373 {
  374         struct ifpoll_info *info;
  375         struct netmsg_base nmsg;
  376         int error;
  377 
  378         if (ifp->if_npoll == NULL) {
  379                 /* Device does not support polling */
  380                 return EOPNOTSUPP;
  381         }
  382 
  383         info = kmalloc(sizeof(*info), M_TEMP, M_WAITOK | M_ZERO);
  384 
  385         /*
  386          * Attempt to register.  Interlock with IFF_NPOLLING.
  387          */
  388 
  389         ifnet_serialize_all(ifp);
  390 
  391         if (ifp->if_flags & IFF_NPOLLING) {
  392                 /* Already polling */
  393                 ifnet_deserialize_all(ifp);
  394                 kfree(info, M_TEMP);
  395                 return EBUSY;
  396         }
  397 
  398         info->ifpi_ifp = ifp;
  399 
  400         ifp->if_flags |= IFF_NPOLLING;
  401         ifp->if_npoll(ifp, info);
  402 
  403         ifnet_deserialize_all(ifp);
  404 
  405         netmsg_init(&nmsg, NULL, &curthread->td_msgport,
  406                     0, ifpoll_register_handler);
  407         nmsg.lmsg.u.ms_resultp = info;
  408 
  409         error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0);
  410         if (error) {
  411                 if (!ifpoll_deregister(ifp)) {
  412                         if_printf(ifp, "ifpoll_register: "
  413                                   "ifpoll_deregister failed!\n");
  414                 }
  415         }
  416 
  417         kfree(info, M_TEMP);
  418         return error;
  419 }
  420 
  421 int
  422 ifpoll_deregister(struct ifnet *ifp)
  423 {
  424         struct netmsg_base nmsg;
  425         int error;
  426 
  427         if (ifp->if_npoll == NULL)
  428                 return EOPNOTSUPP;
  429 
  430         ifnet_serialize_all(ifp);
  431 
  432         if ((ifp->if_flags & IFF_NPOLLING) == 0) {
  433                 ifnet_deserialize_all(ifp);
  434                 return EINVAL;
  435         }
  436         ifp->if_flags &= ~IFF_NPOLLING;
  437 
  438         ifnet_deserialize_all(ifp);
  439 
  440         netmsg_init(&nmsg, NULL, &curthread->td_msgport,
  441                     0, ifpoll_deregister_handler);
  442         nmsg.lmsg.u.ms_resultp = ifp;
  443 
  444         error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0);
  445         if (!error) {
  446                 ifnet_serialize_all(ifp);
  447                 ifp->if_npoll(ifp, NULL);
  448                 ifnet_deserialize_all(ifp);
  449         }
  450         return error;
  451 }
  452 
  453 static void
  454 ifpoll_register_handler(netmsg_t nmsg)
  455 {
  456         const struct ifpoll_info *info = nmsg->lmsg.u.ms_resultp;
  457         int cpuid = mycpuid, nextcpu;
  458         int error;
  459 
  460         KKASSERT(cpuid < ncpus2);
  461         KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid));
  462 
  463         if (cpuid == 0) {
  464                 error = stpoll_register(info->ifpi_ifp, &info->ifpi_status);
  465                 if (error)
  466                         goto failed;
  467         }
  468 
  469         error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid],
  470                                 &info->ifpi_rx[cpuid]);
  471         if (error)
  472                 goto failed;
  473 
  474         error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid],
  475                                 &info->ifpi_tx[cpuid]);
  476         if (error)
  477                 goto failed;
  478 
  479         /* Adjust polling frequency, after all registration is done */
  480         poll_comm_adjust_pollhz(poll_common[cpuid]);
  481 
  482         nextcpu = cpuid + 1;
  483         if (nextcpu < ncpus2)
  484                 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg);
  485         else
  486                 lwkt_replymsg(&nmsg->lmsg, 0);
  487         return;
  488 failed:
  489         lwkt_replymsg(&nmsg->lmsg, error);
  490 }
  491 
  492 static void
  493 ifpoll_deregister_handler(netmsg_t nmsg)
  494 {
  495         struct ifnet *ifp = nmsg->lmsg.u.ms_resultp;
  496         int cpuid = mycpuid, nextcpu;
  497 
  498         KKASSERT(cpuid < ncpus2);
  499         KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid));
  500 
  501         /* Ignore errors */
  502         if (cpuid == 0)
  503                 stpoll_deregister(ifp);
  504         iopoll_deregister(ifp, rxpoll_context[cpuid]);
  505         iopoll_deregister(ifp, txpoll_context[cpuid]);
  506 
  507         /* Adjust polling frequency, after all deregistration is done */
  508         poll_comm_adjust_pollhz(poll_common[cpuid]);
  509 
  510         nextcpu = cpuid + 1;
  511         if (nextcpu < ncpus2)
  512                 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg);
  513         else
  514                 lwkt_replymsg(&nmsg->lmsg, 0);
  515 }
  516 
  517 static void
  518 stpoll_init(void)
  519 {
  520         struct stpoll_ctx *st_ctx = &stpoll_context;
  521         const struct poll_comm *comm = poll_common[0];
  522 
  523         sysctl_ctx_init(&st_ctx->poll_sysctl_ctx);
  524         st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx,
  525                                    SYSCTL_CHILDREN(comm->sysctl_tree),
  526                                    OID_AUTO, "status", CTLFLAG_RD, 0, "");
  527 
  528         SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx,
  529                         SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree),
  530                         OID_AUTO, "handlers", CTLFLAG_RD,
  531                         &st_ctx->poll_handlers, 0,
  532                         "Number of registered status poll handlers");
  533 
  534         netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport,
  535                     0, stpoll_handler);
  536 }
  537 
  538 /*
  539  * stpoll_handler is scheduled by sched_stpoll when appropriate, typically
  540  * once per polling systimer tick.
  541  */
  542 static void
  543 stpoll_handler(netmsg_t msg)
  544 {
  545         struct stpoll_ctx *st_ctx = &stpoll_context;
  546         struct thread *td = curthread;
  547         int i;
  548 
  549         KKASSERT(&td->td_msgport == netisr_cpuport(0));
  550 
  551         crit_enter_quick(td);
  552 
  553         /* Reply ASAP */
  554         lwkt_replymsg(&msg->lmsg, 0);
  555 
  556         if (st_ctx->poll_handlers == 0) {
  557                 crit_exit_quick(td);
  558                 return;
  559         }
  560 
  561         for (i = 0; i < st_ctx->poll_handlers; ++i) {
  562                 const struct stpoll_rec *rec = &st_ctx->pr[i];
  563                 struct ifnet *ifp = rec->ifp;
  564 
  565                 if (!lwkt_serialize_try(rec->serializer))
  566                         continue;
  567 
  568                 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
  569                     (IFF_RUNNING | IFF_NPOLLING))
  570                         rec->status_func(ifp);
  571 
  572                 lwkt_serialize_exit(rec->serializer);
  573         }
  574 
  575         crit_exit_quick(td);
  576 }
  577 
  578 /*
  579  * Hook from status poll systimer.  Tries to schedule an status poll.
  580  * NOTE: Caller should hold critical section.
  581  */
  582 static void
  583 stpoll_clock(struct stpoll_ctx *st_ctx)
  584 {
  585         KKASSERT(mycpuid == 0);
  586 
  587         if (st_ctx->poll_handlers == 0)
  588                 return;
  589         sched_stpoll(st_ctx);
  590 }
  591 
  592 static int
  593 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec)
  594 {
  595         struct stpoll_ctx *st_ctx = &stpoll_context;
  596         int error;
  597 
  598         KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
  599 
  600         if (st_rec->status_func == NULL)
  601                 return 0;
  602 
  603         /*
  604          * Check if there is room.
  605          */
  606         if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) {
  607                 /*
  608                  * List full, cannot register more entries.
  609                  * This should never happen; if it does, it is probably a
  610                  * broken driver trying to register multiple times. Checking
  611                  * this at runtime is expensive, and won't solve the problem
  612                  * anyways, so just report a few times and then give up.
  613                  */
  614                 static int verbose = 10; /* XXX */
  615 
  616                 if (verbose > 0) {
  617                         kprintf("status poll handlers list full, "
  618                                 "maybe a broken driver ?\n");
  619                         verbose--;
  620                 }
  621                 error = ENOENT;
  622         } else {
  623                 struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers];
  624 
  625                 rec->ifp = ifp;
  626                 rec->serializer = st_rec->serializer;
  627                 rec->status_func = st_rec->status_func;
  628 
  629                 st_ctx->poll_handlers++;
  630                 error = 0;
  631         }
  632         return error;
  633 }
  634 
  635 static int
  636 stpoll_deregister(struct ifnet *ifp)
  637 {
  638         struct stpoll_ctx *st_ctx = &stpoll_context;
  639         int i, error;
  640 
  641         KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
  642 
  643         for (i = 0; i < st_ctx->poll_handlers; ++i) {
  644                 if (st_ctx->pr[i].ifp == ifp) /* Found it */
  645                         break;
  646         }
  647         if (i == st_ctx->poll_handlers) {
  648                 error = ENOENT;
  649         } else {
  650                 st_ctx->poll_handlers--;
  651                 if (i < st_ctx->poll_handlers) {
  652                         /* Last entry replaces this one. */
  653                         st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers];
  654                 }
  655                 error = 0;
  656         }
  657         return error;
  658 }
  659 
  660 static __inline void
  661 iopoll_reset_state(struct iopoll_ctx *io_ctx)
  662 {
  663         crit_enter();
  664         io_ctx->poll_burst = io_ctx->poll_each_burst;
  665         io_ctx->pending_polls = 0;
  666         io_ctx->residual_burst = 0;
  667         io_ctx->phase = 0;
  668         io_ctx->kern_frac = 0;
  669         bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t));
  670         bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t));
  671         crit_exit();
  672 }
  673 
  674 static void
  675 iopoll_init(int cpuid)
  676 {
  677         KKASSERT(cpuid < ncpus2);
  678 
  679         rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX);
  680         txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX);
  681 }
  682 
  683 static struct iopoll_ctx *
  684 iopoll_ctx_create(int cpuid, int poll_type)
  685 {
  686         struct poll_comm *comm;
  687         struct iopoll_ctx *io_ctx;
  688         const char *poll_type_str;
  689         netisr_fn_t handler, more_handler;
  690 
  691         KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX);
  692 
  693         /*
  694          * Make sure that tunables are in sane state
  695          */
  696         if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX)
  697                 iopoll_burst_max = MIN_IOPOLL_BURST_MAX;
  698         else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX)
  699                 iopoll_burst_max = MAX_IOPOLL_BURST_MAX;
  700 
  701         if (iopoll_each_burst > iopoll_burst_max)
  702                 iopoll_each_burst = iopoll_burst_max;
  703 
  704         comm = poll_common[cpuid];
  705 
  706         /*
  707          * Create the per-cpu polling context
  708          */
  709         io_ctx = kmalloc_cachealign(sizeof(*io_ctx), M_DEVBUF,
  710             M_WAITOK | M_ZERO);
  711 
  712         io_ctx->poll_each_burst = iopoll_each_burst;
  713         io_ctx->poll_burst_max = iopoll_burst_max;
  714         io_ctx->user_frac = iopoll_user_frac;
  715         if (poll_type == IFPOLL_RX)
  716                 io_ctx->pollhz = comm->pollhz;
  717         else
  718                 io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1);
  719         io_ctx->poll_cpuid = cpuid;
  720         iopoll_reset_state(io_ctx);
  721 
  722         if (poll_type == IFPOLL_RX) {
  723                 handler = rxpoll_handler;
  724                 more_handler = rxpollmore_handler;
  725         } else {
  726                 handler = txpoll_handler;
  727                 more_handler = txpollmore_handler;
  728         }
  729 
  730         netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport,
  731             0, handler);
  732         io_ctx->poll_netmsg.lmsg.u.ms_resultp = io_ctx;
  733 
  734         netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport,
  735             0, more_handler);
  736         io_ctx->poll_more_netmsg.lmsg.u.ms_resultp = io_ctx;
  737 
  738         /*
  739          * Initialize per-cpu sysctl nodes
  740          */
  741         if (poll_type == IFPOLL_RX)
  742                 poll_type_str = "rx";
  743         else
  744                 poll_type_str = "tx";
  745 
  746         sysctl_ctx_init(&io_ctx->poll_sysctl_ctx);
  747         io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx,
  748                                    SYSCTL_CHILDREN(comm->sysctl_tree),
  749                                    OID_AUTO, poll_type_str, CTLFLAG_RD, 0, "");
  750         iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx,
  751             SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx, poll_type);
  752 
  753         return io_ctx;
  754 }
  755 
  756 /*
  757  * Hook from iopoll systimer.  Tries to schedule an iopoll, but keeps
  758  * track of lost ticks due to the previous handler taking too long.
  759  * Normally, this should not happen, because polling handler should
  760  * run for a short time.  However, in some cases (e.g. when there are
  761  * changes in link status etc.) the drivers take a very long time
  762  * (even in the order of milliseconds) to reset and reconfigure the
  763  * device, causing apparent lost polls.
  764  *
  765  * The first part of the code is just for debugging purposes, and tries
  766  * to count how often hardclock ticks are shorter than they should,
  767  * meaning either stray interrupts or delayed events.
  768  *
  769  * WARNING! called from fastint or IPI, the MP lock might not be held.
  770  * NOTE: Caller should hold critical section.
  771  */
  772 static void
  773 iopoll_clock(struct iopoll_ctx *io_ctx)
  774 {
  775         union ifpoll_time t;
  776         int delta;
  777 
  778         KKASSERT(mycpuid == io_ctx->poll_cpuid);
  779 
  780         if (io_ctx->poll_handlers == 0)
  781                 return;
  782 
  783         logpoll(ioclock_start);
  784 
  785         ifpoll_time_get(&t);
  786         delta = ifpoll_time_diff(&io_ctx->prev_t, &t);
  787         if (delta * io_ctx->pollhz < 500000)
  788                 io_ctx->short_ticks++;
  789         else
  790                 io_ctx->prev_t = t;
  791 
  792         if (io_ctx->pending_polls > 100) {
  793                 /*
  794                  * Too much, assume it has stalled (not always true
  795                  * see comment above).
  796                  */
  797                 io_ctx->stalled++;
  798                 io_ctx->pending_polls = 0;
  799                 io_ctx->phase = 0;
  800         }
  801 
  802         if (io_ctx->phase <= 2) {
  803                 if (io_ctx->phase != 0)
  804                         io_ctx->suspect++;
  805                 io_ctx->phase = 1;
  806                 sched_iopoll(io_ctx);
  807                 io_ctx->phase = 2;
  808         }
  809         if (io_ctx->pending_polls++ > 0)
  810                 io_ctx->lost_polls++;
  811 
  812         logpoll(ioclock_end);
  813 }
  814 
  815 /*
  816  * rxpoll_handler and txpoll_handler are scheduled by sched_iopoll when
  817  * appropriate, typically once per polling systimer tick.
  818  *
  819  * Note that the message is replied immediately in order to allow a new
  820  * ISR to be scheduled in the handler.
  821  */
  822 static void
  823 rxpoll_handler(netmsg_t msg)
  824 {
  825         struct iopoll_ctx *io_ctx;
  826         struct thread *td = curthread;
  827         int i, cycles;
  828 
  829         logpoll(rx_start);
  830 
  831         io_ctx = msg->lmsg.u.ms_resultp;
  832         KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
  833 
  834         crit_enter_quick(td);
  835 
  836         /* Reply ASAP */
  837         lwkt_replymsg(&msg->lmsg, 0);
  838 
  839         if (io_ctx->poll_handlers == 0) {
  840                 crit_exit_quick(td);
  841                 logpoll(rx_end);
  842                 return;
  843         }
  844 
  845         io_ctx->phase = 3;
  846         if (io_ctx->residual_burst == 0) {
  847                 /* First call in this tick */
  848                 ifpoll_time_get(&io_ctx->poll_start_t);
  849                 io_ctx->residual_burst = io_ctx->poll_burst;
  850         }
  851         cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ?
  852                  io_ctx->residual_burst : io_ctx->poll_each_burst;
  853         io_ctx->residual_burst -= cycles;
  854 
  855         for (i = 0; i < io_ctx->poll_handlers; i++) {
  856                 const struct iopoll_rec *rec = &io_ctx->pr[i];
  857                 struct ifnet *ifp = rec->ifp;
  858 
  859                 if (!lwkt_serialize_try(rec->serializer))
  860                         continue;
  861 
  862                 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
  863                     (IFF_RUNNING | IFF_NPOLLING))
  864                         rec->poll_func(ifp, rec->arg, cycles);
  865 
  866                 lwkt_serialize_exit(rec->serializer);
  867         }
  868 
  869         /*
  870          * Do a quick exit/enter to catch any higher-priority
  871          * interrupt sources.
  872          */
  873         crit_exit_quick(td);
  874         crit_enter_quick(td);
  875 
  876         sched_iopollmore(io_ctx);
  877         io_ctx->phase = 4;
  878 
  879         crit_exit_quick(td);
  880 
  881         logpoll(rx_end);
  882 }
  883 
  884 static void
  885 txpoll_handler(netmsg_t msg)
  886 {
  887         struct iopoll_ctx *io_ctx;
  888         struct thread *td = curthread;
  889         int i;
  890 
  891         logpoll(tx_start);
  892 
  893         io_ctx = msg->lmsg.u.ms_resultp;
  894         KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
  895 
  896         crit_enter_quick(td);
  897 
  898         /* Reply ASAP */
  899         lwkt_replymsg(&msg->lmsg, 0);
  900 
  901         if (io_ctx->poll_handlers == 0) {
  902                 crit_exit_quick(td);
  903                 logpoll(tx_end);
  904                 return;
  905         }
  906 
  907         io_ctx->phase = 3;
  908 
  909         for (i = 0; i < io_ctx->poll_handlers; i++) {
  910                 const struct iopoll_rec *rec = &io_ctx->pr[i];
  911                 struct ifnet *ifp = rec->ifp;
  912 
  913                 if (!lwkt_serialize_try(rec->serializer))
  914                         continue;
  915 
  916                 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
  917                     (IFF_RUNNING | IFF_NPOLLING))
  918                         rec->poll_func(ifp, rec->arg, -1);
  919 
  920                 lwkt_serialize_exit(rec->serializer);
  921         }
  922 
  923         /*
  924          * Do a quick exit/enter to catch any higher-priority
  925          * interrupt sources.
  926          */
  927         crit_exit_quick(td);
  928         crit_enter_quick(td);
  929 
  930         sched_iopollmore(io_ctx);
  931         io_ctx->phase = 4;
  932 
  933         crit_exit_quick(td);
  934 
  935         logpoll(tx_end);
  936 }
  937 
  938 /*
  939  * rxpollmore_handler and txpollmore_handler are called after other netisr's,
  940  * possibly scheduling another rxpoll_handler or txpoll_handler call, or
  941  * adapting the burst size for the next cycle.
  942  *
  943  * It is very bad to fetch large bursts of packets from a single card at once,
  944  * because the burst could take a long time to be completely processed leading
  945  * to unfairness.  To reduce the problem, and also to account better for time
  946  * spent in network-related processing, we split the burst in smaller chunks
  947  * of fixed size, giving control to the other netisr's between chunks.  This
  948  * helps in improving the fairness, reducing livelock and accounting for the
  949  * work performed in low level handling.
  950  */
  951 static void
  952 rxpollmore_handler(netmsg_t msg)
  953 {
  954         struct thread *td = curthread;
  955         struct iopoll_ctx *io_ctx;
  956         union ifpoll_time t;
  957         int kern_load;
  958         uint32_t pending_polls;
  959 
  960         logpoll(rx_mstart);
  961 
  962         io_ctx = msg->lmsg.u.ms_resultp;
  963         KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
  964 
  965         crit_enter_quick(td);
  966 
  967         /* Replay ASAP */
  968         lwkt_replymsg(&msg->lmsg, 0);
  969 
  970         if (io_ctx->poll_handlers == 0) {
  971                 crit_exit_quick(td);
  972                 logpoll(rx_mend);
  973                 return;
  974         }
  975 
  976         io_ctx->phase = 5;
  977         if (io_ctx->residual_burst > 0) {
  978                 sched_iopoll(io_ctx);
  979                 crit_exit_quick(td);
  980                 /* Will run immediately on return, followed by netisrs */
  981                 logpoll(rx_mend);
  982                 return;
  983         }
  984 
  985         /* Here we can account time spent in iopoll's in this tick */
  986         ifpoll_time_get(&t);
  987         kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t);
  988         kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */
  989         io_ctx->kern_frac = kern_load;
  990 
  991         if (kern_load > (100 - io_ctx->user_frac)) {
  992                 /* Try decrease ticks */
  993                 if (io_ctx->poll_burst > 1)
  994                         io_ctx->poll_burst--;
  995         } else {
  996                 if (io_ctx->poll_burst < io_ctx->poll_burst_max)
  997                         io_ctx->poll_burst++;
  998         }
  999 
 1000         io_ctx->pending_polls--;
 1001         pending_polls = io_ctx->pending_polls;
 1002 
 1003         if (pending_polls == 0) {
 1004                 /* We are done */
 1005                 io_ctx->phase = 0;
 1006         } else {
 1007                 /*
 1008                  * Last cycle was long and caused us to miss one or more
 1009                  * hardclock ticks.  Restart processing again, but slightly
 1010                  * reduce the burst size to prevent that this happens again.
 1011                  */
 1012                 io_ctx->poll_burst -= (io_ctx->poll_burst / 8);
 1013                 if (io_ctx->poll_burst < 1)
 1014                         io_ctx->poll_burst = 1;
 1015                 sched_iopoll(io_ctx);
 1016                 io_ctx->phase = 6;
 1017         }
 1018 
 1019         crit_exit_quick(td);
 1020 
 1021         logpoll(rx_mend);
 1022 }
 1023 
 1024 static void
 1025 txpollmore_handler(netmsg_t msg)
 1026 {
 1027         struct thread *td = curthread;
 1028         struct iopoll_ctx *io_ctx;
 1029         uint32_t pending_polls;
 1030 
 1031         logpoll(tx_mstart);
 1032 
 1033         io_ctx = msg->lmsg.u.ms_resultp;
 1034         KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
 1035 
 1036         crit_enter_quick(td);
 1037 
 1038         /* Replay ASAP */
 1039         lwkt_replymsg(&msg->lmsg, 0);
 1040 
 1041         if (io_ctx->poll_handlers == 0) {
 1042                 crit_exit_quick(td);
 1043                 logpoll(tx_mend);
 1044                 return;
 1045         }
 1046 
 1047         io_ctx->phase = 5;
 1048 
 1049         io_ctx->pending_polls--;
 1050         pending_polls = io_ctx->pending_polls;
 1051 
 1052         if (pending_polls == 0) {
 1053                 /* We are done */
 1054                 io_ctx->phase = 0;
 1055         } else {
 1056                 /*
 1057                  * Last cycle was long and caused us to miss one or more
 1058                  * hardclock ticks.  Restart processing again.
 1059                  */
 1060                 sched_iopoll(io_ctx);
 1061                 io_ctx->phase = 6;
 1062         }
 1063 
 1064         crit_exit_quick(td);
 1065 
 1066         logpoll(tx_mend);
 1067 }
 1068 
 1069 static void
 1070 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
 1071     struct iopoll_ctx *io_ctx, int poll_type)
 1072 {
 1073         if (poll_type == IFPOLL_RX) {
 1074                 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
 1075                     CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax,
 1076                     "IU", "Max Polling burst size");
 1077 
 1078                 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
 1079                     CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst,
 1080                     "IU", "Max size of each burst");
 1081 
 1082                 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
 1083                     &io_ctx->poll_burst, 0, "Current polling burst size");
 1084 
 1085                 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
 1086                     &io_ctx->user_frac, 0, "Desired user fraction of cpu time");
 1087 
 1088                 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD,
 1089                     &io_ctx->kern_frac, 0, "Kernel fraction of cpu time");
 1090 
 1091                 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
 1092                     &io_ctx->residual_burst, 0,
 1093                     "# of residual cycles in burst");
 1094         }
 1095 
 1096         SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
 1097             &io_ctx->phase, 0, "Polling phase");
 1098 
 1099         SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
 1100             &io_ctx->suspect, "Suspected events");
 1101 
 1102         SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
 1103             &io_ctx->stalled, "Potential stalls");
 1104 
 1105         SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
 1106             &io_ctx->short_ticks,
 1107             "Hardclock ticks shorter than they should be");
 1108 
 1109         SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
 1110             &io_ctx->lost_polls,
 1111             "How many times we would have lost a poll tick");
 1112 
 1113         SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
 1114             &io_ctx->pending_polls, 0, "Do we need to poll again");
 1115 
 1116         SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
 1117             &io_ctx->poll_handlers, 0, "Number of registered poll handlers");
 1118 }
 1119 
 1120 static void
 1121 sysctl_burstmax_handler(netmsg_t nmsg)
 1122 {
 1123         struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg;
 1124         struct iopoll_ctx *io_ctx;
 1125 
 1126         io_ctx = msg->ctx;
 1127         KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
 1128 
 1129         io_ctx->poll_burst_max = nmsg->lmsg.u.ms_result;
 1130         if (io_ctx->poll_each_burst > io_ctx->poll_burst_max)
 1131                 io_ctx->poll_each_burst = io_ctx->poll_burst_max;
 1132         if (io_ctx->poll_burst > io_ctx->poll_burst_max)
 1133                 io_ctx->poll_burst = io_ctx->poll_burst_max;
 1134         if (io_ctx->residual_burst > io_ctx->poll_burst_max)
 1135                 io_ctx->residual_burst = io_ctx->poll_burst_max;
 1136 
 1137         lwkt_replymsg(&nmsg->lmsg, 0);
 1138 }
 1139 
 1140 static int
 1141 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
 1142 {
 1143         struct iopoll_ctx *io_ctx = arg1;
 1144         struct iopoll_sysctl_netmsg msg;
 1145         uint32_t burst_max;
 1146         int error;
 1147 
 1148         burst_max = io_ctx->poll_burst_max;
 1149         error = sysctl_handle_int(oidp, &burst_max, 0, req);
 1150         if (error || req->newptr == NULL)
 1151                 return error;
 1152         if (burst_max < MIN_IOPOLL_BURST_MAX)
 1153                 burst_max = MIN_IOPOLL_BURST_MAX;
 1154         else if (burst_max > MAX_IOPOLL_BURST_MAX)
 1155                 burst_max = MAX_IOPOLL_BURST_MAX;
 1156 
 1157         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
 1158                     0, sysctl_burstmax_handler);
 1159         msg.base.lmsg.u.ms_result = burst_max;
 1160         msg.ctx = io_ctx;
 1161 
 1162         return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid),
 1163             &msg.base.lmsg, 0);
 1164 }
 1165 
 1166 static void
 1167 sysctl_eachburst_handler(netmsg_t nmsg)
 1168 {
 1169         struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg;
 1170         struct iopoll_ctx *io_ctx;
 1171         uint32_t each_burst;
 1172 
 1173         io_ctx = msg->ctx;
 1174         KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
 1175 
 1176         each_burst = nmsg->lmsg.u.ms_result;
 1177         if (each_burst > io_ctx->poll_burst_max)
 1178                 each_burst = io_ctx->poll_burst_max;
 1179         else if (each_burst < 1)
 1180                 each_burst = 1;
 1181         io_ctx->poll_each_burst = each_burst;
 1182 
 1183         lwkt_replymsg(&nmsg->lmsg, 0);
 1184 }
 1185 
 1186 static int
 1187 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
 1188 {
 1189         struct iopoll_ctx *io_ctx = arg1;
 1190         struct iopoll_sysctl_netmsg msg;
 1191         uint32_t each_burst;
 1192         int error;
 1193 
 1194         each_burst = io_ctx->poll_each_burst;
 1195         error = sysctl_handle_int(oidp, &each_burst, 0, req);
 1196         if (error || req->newptr == NULL)
 1197                 return error;
 1198 
 1199         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
 1200                     0, sysctl_eachburst_handler);
 1201         msg.base.lmsg.u.ms_result = each_burst;
 1202         msg.ctx = io_ctx;
 1203 
 1204         return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid),
 1205             &msg.base.lmsg, 0);
 1206 }
 1207 
 1208 static int
 1209 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx,
 1210                 const struct ifpoll_io *io_rec)
 1211 {
 1212         int error;
 1213 
 1214         KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
 1215 
 1216         if (io_rec->poll_func == NULL)
 1217                 return 0;
 1218 
 1219         /*
 1220          * Check if there is room.
 1221          */
 1222         if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) {
 1223                 /*
 1224                  * List full, cannot register more entries.
 1225                  * This should never happen; if it does, it is probably a
 1226                  * broken driver trying to register multiple times. Checking
 1227                  * this at runtime is expensive, and won't solve the problem
 1228                  * anyways, so just report a few times and then give up.
 1229                  */
 1230                 static int verbose = 10; /* XXX */
 1231                 if (verbose > 0) {
 1232                         kprintf("io poll handlers list full, "
 1233                                 "maybe a broken driver ?\n");
 1234                         verbose--;
 1235                 }
 1236                 error = ENOENT;
 1237         } else {
 1238                 struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers];
 1239 
 1240                 rec->ifp = ifp;
 1241                 rec->serializer = io_rec->serializer;
 1242                 rec->arg = io_rec->arg;
 1243                 rec->poll_func = io_rec->poll_func;
 1244 
 1245                 io_ctx->poll_handlers++;
 1246                 error = 0;
 1247         }
 1248         return error;
 1249 }
 1250 
 1251 static int
 1252 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx)
 1253 {
 1254         int i, error;
 1255 
 1256         KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid));
 1257 
 1258         for (i = 0; i < io_ctx->poll_handlers; ++i) {
 1259                 if (io_ctx->pr[i].ifp == ifp) /* Found it */
 1260                         break;
 1261         }
 1262         if (i == io_ctx->poll_handlers) {
 1263                 error = ENOENT;
 1264         } else {
 1265                 io_ctx->poll_handlers--;
 1266                 if (i < io_ctx->poll_handlers) {
 1267                         /* Last entry replaces this one. */
 1268                         io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers];
 1269                 }
 1270 
 1271                 if (io_ctx->poll_handlers == 0)
 1272                         iopoll_reset_state(io_ctx);
 1273                 error = 0;
 1274         }
 1275         return error;
 1276 }
 1277 
 1278 static void
 1279 poll_comm_init(int cpuid)
 1280 {
 1281         struct poll_comm *comm;
 1282         char cpuid_str[16];
 1283 
 1284         comm = kmalloc_cachealign(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO);
 1285 
 1286         if (ifpoll_stfrac < 1)
 1287                 ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT;
 1288         if (ifpoll_txfrac < 1)
 1289                 ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT;
 1290 
 1291         comm->poll_cpuid = cpuid;
 1292         comm->pollhz = poll_comm_pollhz_div(comm, ifpoll_pollhz);
 1293         comm->poll_stfrac = ifpoll_stfrac - 1;
 1294         comm->poll_txfrac = ifpoll_txfrac - 1;
 1295 
 1296         ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid);
 1297 
 1298         sysctl_ctx_init(&comm->sysctl_ctx);
 1299         comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx,
 1300                             SYSCTL_STATIC_CHILDREN(_net_ifpoll),
 1301                             OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
 1302 
 1303         SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree),
 1304                         OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW,
 1305                         comm, 0, sysctl_pollhz,
 1306                         "I", "Device polling frequency");
 1307 
 1308         if (cpuid == 0) {
 1309                 SYSCTL_ADD_PROC(&comm->sysctl_ctx,
 1310                                 SYSCTL_CHILDREN(comm->sysctl_tree),
 1311                                 OID_AUTO, "status_frac",
 1312                                 CTLTYPE_INT | CTLFLAG_RW,
 1313                                 comm, 0, sysctl_stfrac,
 1314                                 "I", "# of cycles before status is polled");
 1315         }
 1316         SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree),
 1317                         OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW,
 1318                         comm, 0, sysctl_txfrac,
 1319                         "I", "# of cycles before TX is polled");
 1320 
 1321         poll_common[cpuid] = comm;
 1322 }
 1323 
 1324 static void
 1325 poll_comm_start(int cpuid)
 1326 {
 1327         struct poll_comm *comm = poll_common[cpuid];
 1328         systimer_func_t func;
 1329 
 1330         /*
 1331          * Initialize systimer
 1332          */
 1333         if (cpuid == 0)
 1334                 func = poll_comm_systimer0;
 1335         else
 1336                 func = poll_comm_systimer;
 1337         systimer_init_periodic_nq(&comm->pollclock, func, comm, 1);
 1338 }
 1339 
 1340 static void
 1341 _poll_comm_systimer(struct poll_comm *comm)
 1342 {
 1343         iopoll_clock(rxpoll_context[comm->poll_cpuid]);
 1344         if (comm->txfrac_count-- == 0) {
 1345                 comm->txfrac_count = comm->poll_txfrac;
 1346                 iopoll_clock(txpoll_context[comm->poll_cpuid]);
 1347         }
 1348 }
 1349 
 1350 static void
 1351 poll_comm_systimer0(systimer_t info, int in_ipi __unused,
 1352     struct intrframe *frame __unused)
 1353 {
 1354         struct poll_comm *comm = info->data;
 1355         globaldata_t gd = mycpu;
 1356 
 1357         KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0);
 1358 
 1359         crit_enter_gd(gd);
 1360 
 1361         if (comm->stfrac_count-- == 0) {
 1362                 comm->stfrac_count = comm->poll_stfrac;
 1363                 stpoll_clock(&stpoll_context);
 1364         }
 1365         _poll_comm_systimer(comm);
 1366 
 1367         crit_exit_gd(gd);
 1368 }
 1369 
 1370 static void
 1371 poll_comm_systimer(systimer_t info, int in_ipi __unused,
 1372     struct intrframe *frame __unused)
 1373 {
 1374         struct poll_comm *comm = info->data;
 1375         globaldata_t gd = mycpu;
 1376 
 1377         KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0);
 1378 
 1379         crit_enter_gd(gd);
 1380         _poll_comm_systimer(comm);
 1381         crit_exit_gd(gd);
 1382 }
 1383 
 1384 static void
 1385 poll_comm_adjust_pollhz(struct poll_comm *comm)
 1386 {
 1387         uint32_t handlers;
 1388         int pollhz = 1;
 1389 
 1390         KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid));
 1391 
 1392         /*
 1393          * If there is no polling handler registered, set systimer
 1394          * frequency to the lowest value.  Polling systimer frequency
 1395          * will be adjusted to the requested value, once there are
 1396          * registered handlers.
 1397          */
 1398         handlers = rxpoll_context[mycpuid]->poll_handlers +
 1399                    txpoll_context[mycpuid]->poll_handlers;
 1400         if (comm->poll_cpuid == 0)
 1401                 handlers += stpoll_context.poll_handlers;
 1402         if (handlers)
 1403                 pollhz = comm->pollhz;
 1404         systimer_adjust_periodic(&comm->pollclock, pollhz);
 1405 }
 1406 
 1407 static int
 1408 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
 1409 {
 1410         struct poll_comm *comm = arg1;
 1411         struct netmsg_base nmsg;
 1412         int error, phz;
 1413 
 1414         phz = poll_comm_pollhz_conv(comm, comm->pollhz);
 1415         error = sysctl_handle_int(oidp, &phz, 0, req);
 1416         if (error || req->newptr == NULL)
 1417                 return error;
 1418         if (phz <= 0)
 1419                 return EINVAL;
 1420         else if (phz > IFPOLL_FREQ_MAX)
 1421                 phz = IFPOLL_FREQ_MAX;
 1422 
 1423         netmsg_init(&nmsg, NULL, &curthread->td_msgport,
 1424                     0, sysctl_pollhz_handler);
 1425         nmsg.lmsg.u.ms_result = phz;
 1426 
 1427         return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0);
 1428 }
 1429 
 1430 static void
 1431 sysctl_pollhz_handler(netmsg_t nmsg)
 1432 {
 1433         struct poll_comm *comm = poll_common[mycpuid];
 1434 
 1435         KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid));
 1436 
 1437         /* Save polling frequency */
 1438         comm->pollhz = poll_comm_pollhz_div(comm, nmsg->lmsg.u.ms_result);
 1439 
 1440         /*
 1441          * Adjust cached pollhz
 1442          */
 1443         rxpoll_context[mycpuid]->pollhz = comm->pollhz;
 1444         txpoll_context[mycpuid]->pollhz =
 1445             comm->pollhz / (comm->poll_txfrac + 1);
 1446 
 1447         /*
 1448          * Adjust polling frequency
 1449          */
 1450         poll_comm_adjust_pollhz(comm);
 1451 
 1452         lwkt_replymsg(&nmsg->lmsg, 0);
 1453 }
 1454 
 1455 static int
 1456 sysctl_stfrac(SYSCTL_HANDLER_ARGS)
 1457 {
 1458         struct poll_comm *comm = arg1;
 1459         struct netmsg_base nmsg;
 1460         int error, stfrac;
 1461 
 1462         KKASSERT(comm->poll_cpuid == 0);
 1463 
 1464         stfrac = comm->poll_stfrac + 1;
 1465         error = sysctl_handle_int(oidp, &stfrac, 0, req);
 1466         if (error || req->newptr == NULL)
 1467                 return error;
 1468         if (stfrac < 1)
 1469                 return EINVAL;
 1470 
 1471         netmsg_init(&nmsg, NULL, &curthread->td_msgport,
 1472                     0, sysctl_stfrac_handler);
 1473         nmsg.lmsg.u.ms_result = stfrac - 1;
 1474 
 1475         return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0);
 1476 }
 1477 
 1478 static void
 1479 sysctl_stfrac_handler(netmsg_t nmsg)
 1480 {
 1481         struct poll_comm *comm = poll_common[mycpuid];
 1482         int stfrac = nmsg->lmsg.u.ms_result;
 1483 
 1484         KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid));
 1485 
 1486         crit_enter();
 1487         comm->poll_stfrac = stfrac;
 1488         if (comm->stfrac_count > comm->poll_stfrac)
 1489                 comm->stfrac_count = comm->poll_stfrac;
 1490         crit_exit();
 1491 
 1492         lwkt_replymsg(&nmsg->lmsg, 0);
 1493 }
 1494 
 1495 static int
 1496 sysctl_txfrac(SYSCTL_HANDLER_ARGS)
 1497 {
 1498         struct poll_comm *comm = arg1;
 1499         struct netmsg_base nmsg;
 1500         int error, txfrac;
 1501 
 1502         txfrac = comm->poll_txfrac + 1;
 1503         error = sysctl_handle_int(oidp, &txfrac, 0, req);
 1504         if (error || req->newptr == NULL)
 1505                 return error;
 1506         if (txfrac < 1)
 1507                 return EINVAL;
 1508 
 1509         netmsg_init(&nmsg, NULL, &curthread->td_msgport,
 1510                     0, sysctl_txfrac_handler);
 1511         nmsg.lmsg.u.ms_result = txfrac - 1;
 1512 
 1513         return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0);
 1514 }
 1515 
 1516 static void
 1517 sysctl_txfrac_handler(netmsg_t nmsg)
 1518 {
 1519         struct poll_comm *comm = poll_common[mycpuid];
 1520         int txfrac = nmsg->lmsg.u.ms_result;
 1521 
 1522         KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid));
 1523 
 1524         crit_enter();
 1525         comm->poll_txfrac = txfrac;
 1526         if (comm->txfrac_count > comm->poll_txfrac)
 1527                 comm->txfrac_count = comm->poll_txfrac;
 1528         crit_exit();
 1529 
 1530         lwkt_replymsg(&nmsg->lmsg, 0);
 1531 }
 1532 
 1533 void
 1534 ifpoll_compat_setup(struct ifpoll_compat *cp,
 1535     struct sysctl_ctx_list *sysctl_ctx,
 1536     struct sysctl_oid *sysctl_tree,
 1537     int unit, struct lwkt_serialize *slz)
 1538 {
 1539         cp->ifpc_stcount = 0;
 1540         cp->ifpc_stfrac = ((poll_common[0]->poll_stfrac + 1) *
 1541             howmany(IOPOLL_BURST_MAX, IOPOLL_EACH_BURST)) - 1;
 1542 
 1543         cp->ifpc_cpuid = unit % ncpus2;
 1544         cp->ifpc_serializer = slz;
 1545 
 1546         if (sysctl_ctx != NULL && sysctl_tree != NULL) {
 1547                 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1548                     OID_AUTO, "npoll_stfrac", CTLTYPE_INT | CTLFLAG_RW,
 1549                     cp, 0, sysctl_compat_npoll_stfrac, "I",
 1550                     "polling status frac");
 1551                 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1552                     OID_AUTO, "npoll_cpuid", CTLTYPE_INT | CTLFLAG_RW,
 1553                     cp, 0, sysctl_compat_npoll_cpuid, "I",
 1554                     "polling cpuid");
 1555         }
 1556 }
 1557 
 1558 static int
 1559 sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS)
 1560 {
 1561         struct ifpoll_compat *cp = arg1;
 1562         int error = 0, stfrac;
 1563 
 1564         lwkt_serialize_enter(cp->ifpc_serializer);
 1565 
 1566         stfrac = cp->ifpc_stfrac + 1;
 1567         error = sysctl_handle_int(oidp, &stfrac, 0, req);
 1568         if (!error && req->newptr != NULL) {
 1569                 if (stfrac < 1) {
 1570                         error = EINVAL;
 1571                 } else {
 1572                         cp->ifpc_stfrac = stfrac - 1;
 1573                         if (cp->ifpc_stcount > cp->ifpc_stfrac)
 1574                                 cp->ifpc_stcount = cp->ifpc_stfrac;
 1575                 }
 1576         }
 1577 
 1578         lwkt_serialize_exit(cp->ifpc_serializer);
 1579         return error;
 1580 }
 1581 
 1582 static int
 1583 sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS)
 1584 {
 1585         struct ifpoll_compat *cp = arg1;
 1586         int error = 0, cpuid;
 1587 
 1588         lwkt_serialize_enter(cp->ifpc_serializer);
 1589 
 1590         cpuid = cp->ifpc_cpuid;
 1591         error = sysctl_handle_int(oidp, &cpuid, 0, req);
 1592         if (!error && req->newptr != NULL) {
 1593                 if (cpuid < 0 || cpuid >= ncpus2)
 1594                         error = EINVAL;
 1595                 else
 1596                         cp->ifpc_cpuid = cpuid;
 1597         }
 1598 
 1599         lwkt_serialize_exit(cp->ifpc_serializer);
 1600         return error;
 1601 }

Cache object: 5a005ac9b033e46ce2f5f06466b2ddfe


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.