The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/x86/intr_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD$
   28  */
   29 
   30 /*
   31  * Machine dependent interrupt code for x86.  For x86, we have to
   32  * deal with different PICs.  Thus, we use the passed in vector to lookup
   33  * an interrupt source associated with that vector.  The interrupt source
   34  * describes which PIC the source belongs to and includes methods to handle
   35  * that source.
   36  */
   37 
   38 #include "opt_atpic.h"
   39 #include "opt_ddb.h"
   40 #include "opt_smp.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/bus.h>
   44 #include <sys/interrupt.h>
   45 #include <sys/ktr.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mutex.h>
   50 #include <sys/proc.h>
   51 #include <sys/queue.h>
   52 #include <sys/sbuf.h>
   53 #include <sys/smp.h>
   54 #include <sys/sx.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/syslog.h>
   57 #include <sys/systm.h>
   58 #include <sys/taskqueue.h>
   59 #include <sys/vmmeter.h>
   60 #include <machine/clock.h>
   61 #include <machine/intr_machdep.h>
   62 #include <machine/smp.h>
   63 #ifdef DDB
   64 #include <ddb/ddb.h>
   65 #endif
   66 
   67 #ifndef DEV_ATPIC
   68 #include <machine/segments.h>
   69 #include <machine/frame.h>
   70 #include <dev/ic/i8259.h>
   71 #include <x86/isa/icu.h>
   72 #include <isa/isareg.h>
   73 #endif
   74 
   75 #include <vm/vm.h>
   76 
   77 #define MAX_STRAY_LOG   5
   78 
   79 typedef void (*mask_fn)(void *);
   80 
   81 static int intrcnt_index;
   82 static struct intsrc **interrupt_sources;
   83 #ifdef SMP
   84 static struct intsrc **interrupt_sorted;
   85 static int intrbalance;
   86 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0,
   87     "Interrupt auto-balance interval (seconds).  Zero disables.");
   88 static struct timeout_task intrbalance_task;
   89 #endif
   90 static struct sx intrsrc_lock;
   91 static struct mtx intrpic_lock;
   92 static struct mtx intrcnt_lock;
   93 static TAILQ_HEAD(pics_head, pic) pics;
   94 u_int num_io_irqs;
   95 
   96 #if defined(SMP) && !defined(EARLY_AP_STARTUP)
   97 static int assign_cpu;
   98 #endif
   99 
  100 u_long *intrcnt;
  101 char *intrnames;
  102 size_t sintrcnt = sizeof(intrcnt);
  103 size_t sintrnames = sizeof(intrnames);
  104 int nintrcnt;
  105 
  106 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources");
  107 
  108 static int      intr_assign_cpu(void *arg, int cpu);
  109 static void     intr_disable_src(void *arg);
  110 static void     intr_init(void *__dummy);
  111 static int      intr_pic_registered(struct pic *pic);
  112 static void     intrcnt_setname(const char *name, int index);
  113 static void     intrcnt_updatename(struct intsrc *is);
  114 static void     intrcnt_register(struct intsrc *is);
  115 
  116 /*
  117  * SYSINIT levels for SI_SUB_INTR:
  118  *
  119  * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init
  120  * SI_ORDER_SECOND: Xen PICs
  121  * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges
  122  * SI_ORDER_FOURTH: Add 8259A PICs
  123  * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources
  124  * SI_ORDER_MIDDLE: SMP interrupt counters
  125  * SI_ORDER_ANY: Enable interrupts on BSP
  126  */
  127 
  128 static int
  129 intr_pic_registered(struct pic *pic)
  130 {
  131         struct pic *p;
  132 
  133         TAILQ_FOREACH(p, &pics, pics) {
  134                 if (p == pic)
  135                         return (1);
  136         }
  137         return (0);
  138 }
  139 
  140 /*
  141  * Register a new interrupt controller (PIC).  This is to support suspend
  142  * and resume where we suspend/resume controllers rather than individual
  143  * sources.  This also allows controllers with no active sources (such as
  144  * 8259As in a system using the APICs) to participate in suspend and resume.
  145  */
  146 int
  147 intr_register_pic(struct pic *pic)
  148 {
  149         int error;
  150 
  151         mtx_lock(&intrpic_lock);
  152         if (intr_pic_registered(pic))
  153                 error = EBUSY;
  154         else {
  155                 TAILQ_INSERT_TAIL(&pics, pic, pics);
  156                 error = 0;
  157         }
  158         mtx_unlock(&intrpic_lock);
  159         return (error);
  160 }
  161 
  162 /*
  163  * Allocate interrupt source arrays and register interrupt sources
  164  * once the number of interrupts is known.
  165  */
  166 static void
  167 intr_init_sources(void *arg)
  168 {
  169         struct pic *pic;
  170 
  171         MPASS(num_io_irqs > 0);
  172 
  173         interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources),
  174             M_INTR, M_WAITOK | M_ZERO);
  175 #ifdef SMP
  176         interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted),
  177             M_INTR, M_WAITOK | M_ZERO);
  178 #endif
  179 
  180         /*
  181          * - 1 ??? dummy counter.
  182          * - 2 counters for each I/O interrupt.
  183          * - 1 counter for each CPU for lapic timer.
  184          * - 1 counter for each CPU for the Hyper-V vmbus driver.
  185          * - 8 counters for each CPU for IPI counters for SMP.
  186          */
  187         nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2;
  188 #ifdef COUNT_IPIS
  189         if (mp_ncpus > 1)
  190                 nintrcnt += 8 * mp_ncpus;
  191 #endif
  192         intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK |
  193             M_ZERO);
  194         intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK |
  195             M_ZERO);
  196         sintrcnt = nintrcnt * sizeof(u_long);
  197         sintrnames = nintrcnt * (MAXCOMLEN + 1);
  198 
  199         intrcnt_setname("???", 0);
  200         intrcnt_index = 1;
  201 
  202         /*
  203          * NB: intrpic_lock is not held here to avoid LORs due to
  204          * malloc() in intr_register_source().  However, we are still
  205          * single-threaded at this point in startup so the list of
  206          * PICs shouldn't change.
  207          */
  208         TAILQ_FOREACH(pic, &pics, pics) {
  209                 if (pic->pic_register_sources != NULL)
  210                         pic->pic_register_sources(pic);
  211         }
  212 }
  213 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources,
  214     NULL);
  215 
  216 /*
  217  * Register a new interrupt source with the global interrupt system.
  218  * The global interrupts need to be disabled when this function is
  219  * called.
  220  */
  221 int
  222 intr_register_source(struct intsrc *isrc)
  223 {
  224         int error, vector;
  225 
  226         KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
  227         vector = isrc->is_pic->pic_vector(isrc);
  228         KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector,
  229             num_io_irqs));
  230         if (interrupt_sources[vector] != NULL)
  231                 return (EEXIST);
  232         error = intr_event_create(&isrc->is_event, isrc, 0, vector,
  233             intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
  234             (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
  235             vector);
  236         if (error)
  237                 return (error);
  238         sx_xlock(&intrsrc_lock);
  239         if (interrupt_sources[vector] != NULL) {
  240                 sx_xunlock(&intrsrc_lock);
  241                 intr_event_destroy(isrc->is_event);
  242                 return (EEXIST);
  243         }
  244         intrcnt_register(isrc);
  245         interrupt_sources[vector] = isrc;
  246         isrc->is_handlers = 0;
  247         sx_xunlock(&intrsrc_lock);
  248         return (0);
  249 }
  250 
  251 struct intsrc *
  252 intr_lookup_source(int vector)
  253 {
  254 
  255         if (vector < 0 || vector >= num_io_irqs)
  256                 return (NULL);
  257         return (interrupt_sources[vector]);
  258 }
  259 
  260 int
  261 intr_add_handler(const char *name, int vector, driver_filter_t filter,
  262     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
  263     int domain)
  264 {
  265         struct intsrc *isrc;
  266         int error;
  267 
  268         isrc = intr_lookup_source(vector);
  269         if (isrc == NULL)
  270                 return (EINVAL);
  271         error = intr_event_add_handler(isrc->is_event, name, filter, handler,
  272             arg, intr_priority(flags), flags, cookiep);
  273         if (error == 0) {
  274                 sx_xlock(&intrsrc_lock);
  275                 intrcnt_updatename(isrc);
  276                 isrc->is_handlers++;
  277                 if (isrc->is_handlers == 1) {
  278                         isrc->is_domain = domain;
  279                         isrc->is_pic->pic_enable_intr(isrc);
  280                         isrc->is_pic->pic_enable_source(isrc);
  281                 }
  282                 sx_xunlock(&intrsrc_lock);
  283         }
  284         return (error);
  285 }
  286 
  287 int
  288 intr_remove_handler(void *cookie)
  289 {
  290         struct intsrc *isrc;
  291         int error;
  292 
  293         isrc = intr_handler_source(cookie);
  294         error = intr_event_remove_handler(cookie);
  295         if (error == 0) {
  296                 sx_xlock(&intrsrc_lock);
  297                 isrc->is_handlers--;
  298                 if (isrc->is_handlers == 0) {
  299                         isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
  300                         isrc->is_pic->pic_disable_intr(isrc);
  301                 }
  302                 intrcnt_updatename(isrc);
  303                 sx_xunlock(&intrsrc_lock);
  304         }
  305         return (error);
  306 }
  307 
  308 int
  309 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
  310 {
  311         struct intsrc *isrc;
  312 
  313         isrc = intr_lookup_source(vector);
  314         if (isrc == NULL)
  315                 return (EINVAL);
  316         return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
  317 }
  318 
  319 static void
  320 intr_disable_src(void *arg)
  321 {
  322         struct intsrc *isrc;
  323 
  324         isrc = arg;
  325         isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
  326 }
  327 
  328 void
  329 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
  330 {
  331         struct intr_event *ie;
  332         int vector;
  333 
  334         /*
  335          * We count software interrupts when we process them.  The
  336          * code here follows previous practice, but there's an
  337          * argument for counting hardware interrupts when they're
  338          * processed too.
  339          */
  340         (*isrc->is_count)++;
  341         VM_CNT_INC(v_intr);
  342 
  343         ie = isrc->is_event;
  344 
  345         /*
  346          * XXX: We assume that IRQ 0 is only used for the ISA timer
  347          * device (clk).
  348          */
  349         vector = isrc->is_pic->pic_vector(isrc);
  350         if (vector == 0)
  351                 clkintr_pending = 1;
  352 
  353         /*
  354          * For stray interrupts, mask and EOI the source, bump the
  355          * stray count, and log the condition.
  356          */
  357         if (intr_event_handle(ie, frame) != 0) {
  358                 isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
  359                 (*isrc->is_straycount)++;
  360                 if (*isrc->is_straycount < MAX_STRAY_LOG)
  361                         log(LOG_ERR, "stray irq%d\n", vector);
  362                 else if (*isrc->is_straycount == MAX_STRAY_LOG)
  363                         log(LOG_CRIT,
  364                             "too many stray irq %d's: not logging anymore\n",
  365                             vector);
  366         }
  367 }
  368 
  369 void
  370 intr_resume(bool suspend_cancelled)
  371 {
  372         struct pic *pic;
  373 
  374 #ifndef DEV_ATPIC
  375         atpic_reset();
  376 #endif
  377         mtx_lock(&intrpic_lock);
  378         TAILQ_FOREACH(pic, &pics, pics) {
  379                 if (pic->pic_resume != NULL)
  380                         pic->pic_resume(pic, suspend_cancelled);
  381         }
  382         mtx_unlock(&intrpic_lock);
  383 }
  384 
  385 void
  386 intr_suspend(void)
  387 {
  388         struct pic *pic;
  389 
  390         mtx_lock(&intrpic_lock);
  391         TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
  392                 if (pic->pic_suspend != NULL)
  393                         pic->pic_suspend(pic);
  394         }
  395         mtx_unlock(&intrpic_lock);
  396 }
  397 
  398 static int
  399 intr_assign_cpu(void *arg, int cpu)
  400 {
  401 #ifdef SMP
  402         struct intsrc *isrc;
  403         int error;
  404 
  405 #ifdef EARLY_AP_STARTUP
  406         MPASS(mp_ncpus == 1 || smp_started);
  407 
  408         /* Nothing to do if there is only a single CPU. */
  409         if (mp_ncpus > 1 && cpu != NOCPU) {
  410 #else
  411         /*
  412          * Don't do anything during early boot.  We will pick up the
  413          * assignment once the APs are started.
  414          */
  415         if (assign_cpu && cpu != NOCPU) {
  416 #endif
  417                 isrc = arg;
  418                 sx_xlock(&intrsrc_lock);
  419                 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
  420                 if (error == 0)
  421                         isrc->is_cpu = cpu;
  422                 sx_xunlock(&intrsrc_lock);
  423         } else
  424                 error = 0;
  425         return (error);
  426 #else
  427         return (EOPNOTSUPP);
  428 #endif
  429 }
  430 
  431 static void
  432 intrcnt_setname(const char *name, int index)
  433 {
  434 
  435         snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
  436             MAXCOMLEN, name);
  437 }
  438 
  439 static void
  440 intrcnt_updatename(struct intsrc *is)
  441 {
  442 
  443         intrcnt_setname(is->is_event->ie_fullname, is->is_index);
  444 }
  445 
  446 static void
  447 intrcnt_register(struct intsrc *is)
  448 {
  449         char straystr[MAXCOMLEN + 1];
  450 
  451         KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
  452         mtx_lock_spin(&intrcnt_lock);
  453         MPASS(intrcnt_index + 2 <= nintrcnt);
  454         is->is_index = intrcnt_index;
  455         intrcnt_index += 2;
  456         snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
  457             is->is_pic->pic_vector(is));
  458         intrcnt_updatename(is);
  459         is->is_count = &intrcnt[is->is_index];
  460         intrcnt_setname(straystr, is->is_index + 1);
  461         is->is_straycount = &intrcnt[is->is_index + 1];
  462         mtx_unlock_spin(&intrcnt_lock);
  463 }
  464 
  465 void
  466 intrcnt_add(const char *name, u_long **countp)
  467 {
  468 
  469         mtx_lock_spin(&intrcnt_lock);
  470         MPASS(intrcnt_index < nintrcnt);
  471         *countp = &intrcnt[intrcnt_index];
  472         intrcnt_setname(name, intrcnt_index);
  473         intrcnt_index++;
  474         mtx_unlock_spin(&intrcnt_lock);
  475 }
  476 
  477 static void
  478 intr_init(void *dummy __unused)
  479 {
  480 
  481         TAILQ_INIT(&pics);
  482         mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
  483         sx_init(&intrsrc_lock, "intrsrc");
  484         mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
  485 }
  486 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
  487 
  488 static void
  489 intr_init_final(void *dummy __unused)
  490 {
  491 
  492         /*
  493          * Enable interrupts on the BSP after all of the interrupt
  494          * controllers are initialized.  Device interrupts are still
  495          * disabled in the interrupt controllers until interrupt
  496          * handlers are registered.  Interrupts are enabled on each AP
  497          * after their first context switch.
  498          */
  499         enable_intr();
  500 }
  501 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
  502 
  503 #ifndef DEV_ATPIC
  504 /* Initialize the two 8259A's to a known-good shutdown state. */
  505 void
  506 atpic_reset(void)
  507 {
  508 
  509         outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
  510         outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
  511         outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
  512         outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
  513         outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
  514         outb(IO_ICU1, OCW3_SEL | OCW3_RR);
  515 
  516         outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
  517         outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
  518         outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
  519         outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
  520         outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
  521         outb(IO_ICU2, OCW3_SEL | OCW3_RR);
  522 }
  523 #endif
  524 
  525 /* Add a description to an active interrupt handler. */
  526 int
  527 intr_describe(u_int vector, void *ih, const char *descr)
  528 {
  529         struct intsrc *isrc;
  530         int error;
  531 
  532         isrc = intr_lookup_source(vector);
  533         if (isrc == NULL)
  534                 return (EINVAL);
  535         error = intr_event_describe_handler(isrc->is_event, ih, descr);
  536         if (error)
  537                 return (error);
  538         intrcnt_updatename(isrc);
  539         return (0);
  540 }
  541 
  542 void
  543 intr_reprogram(void)
  544 {
  545         struct intsrc *is;
  546         u_int v;
  547 
  548         sx_xlock(&intrsrc_lock);
  549         for (v = 0; v < num_io_irqs; v++) {
  550                 is = interrupt_sources[v];
  551                 if (is == NULL)
  552                         continue;
  553                 if (is->is_pic->pic_reprogram_pin != NULL)
  554                         is->is_pic->pic_reprogram_pin(is);
  555         }
  556         sx_xunlock(&intrsrc_lock);
  557 }
  558 
  559 #ifdef DDB
  560 /*
  561  * Dump data about interrupt handlers
  562  */
  563 DB_SHOW_COMMAND(irqs, db_show_irqs)
  564 {
  565         struct intsrc **isrc;
  566         u_int i;
  567         int verbose;
  568 
  569         if (strcmp(modif, "v") == 0)
  570                 verbose = 1;
  571         else
  572                 verbose = 0;
  573         isrc = interrupt_sources;
  574         for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++)
  575                 if (*isrc != NULL)
  576                         db_dump_intr_event((*isrc)->is_event, verbose);
  577 }
  578 #endif
  579 
  580 #ifdef SMP
  581 /*
  582  * Support for balancing interrupt sources across CPUs.  For now we just
  583  * allocate CPUs round-robin.
  584  */
  585 
  586 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
  587 static int current_cpu[MAXMEMDOM];
  588 
  589 static void
  590 intr_init_cpus(void)
  591 {
  592         int i;
  593 
  594         for (i = 0; i < vm_ndomains; i++) {
  595                 current_cpu[i] = 0;
  596                 if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
  597                     !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
  598                         intr_next_cpu(i);
  599         }
  600 }
  601 
  602 /*
  603  * Return the CPU that the next interrupt source should use.  For now
  604  * this just returns the next local APIC according to round-robin.
  605  */
  606 u_int
  607 intr_next_cpu(int domain)
  608 {
  609         u_int apic_id;
  610 
  611 #ifdef EARLY_AP_STARTUP
  612         MPASS(mp_ncpus == 1 || smp_started);
  613         if (mp_ncpus == 1)
  614                 return (PCPU_GET(apic_id));
  615 #else
  616         /* Leave all interrupts on the BSP during boot. */
  617         if (!assign_cpu)
  618                 return (PCPU_GET(apic_id));
  619 #endif
  620 
  621         mtx_lock_spin(&icu_lock);
  622         apic_id = cpu_apic_ids[current_cpu[domain]];
  623         do {
  624                 current_cpu[domain]++;
  625                 if (current_cpu[domain] > mp_maxid)
  626                         current_cpu[domain] = 0;
  627         } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
  628             !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
  629         mtx_unlock_spin(&icu_lock);
  630         return (apic_id);
  631 }
  632 
  633 /* Attempt to bind the specified IRQ to the specified CPU. */
  634 int
  635 intr_bind(u_int vector, u_char cpu)
  636 {
  637         struct intsrc *isrc;
  638 
  639         isrc = intr_lookup_source(vector);
  640         if (isrc == NULL)
  641                 return (EINVAL);
  642         return (intr_event_bind(isrc->is_event, cpu));
  643 }
  644 
  645 /*
  646  * Add a CPU to our mask of valid CPUs that can be destinations of
  647  * interrupts.
  648  */
  649 void
  650 intr_add_cpu(u_int cpu)
  651 {
  652 
  653         if (cpu >= MAXCPU)
  654                 panic("%s: Invalid CPU ID", __func__);
  655         if (bootverbose)
  656                 printf("INTR: Adding local APIC %d as a target\n",
  657                     cpu_apic_ids[cpu]);
  658 
  659         CPU_SET(cpu, &intr_cpus);
  660 }
  661 
  662 #ifdef EARLY_AP_STARTUP
  663 static void
  664 intr_smp_startup(void *arg __unused)
  665 {
  666 
  667         intr_init_cpus();
  668         return;
  669 }
  670 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
  671     NULL);
  672 
  673 #else
  674 /*
  675  * Distribute all the interrupt sources among the available CPUs once the
  676  * AP's have been launched.
  677  */
  678 static void
  679 intr_shuffle_irqs(void *arg __unused)
  680 {
  681         struct intsrc *isrc;
  682         u_int cpu, i;
  683 
  684         intr_init_cpus();
  685         /* Don't bother on UP. */
  686         if (mp_ncpus == 1)
  687                 return;
  688 
  689         /* Round-robin assign a CPU to each enabled source. */
  690         sx_xlock(&intrsrc_lock);
  691         assign_cpu = 1;
  692         for (i = 0; i < num_io_irqs; i++) {
  693                 isrc = interrupt_sources[i];
  694                 if (isrc != NULL && isrc->is_handlers > 0) {
  695                         /*
  696                          * If this event is already bound to a CPU,
  697                          * then assign the source to that CPU instead
  698                          * of picking one via round-robin.  Note that
  699                          * this is careful to only advance the
  700                          * round-robin if the CPU assignment succeeds.
  701                          */
  702                         cpu = isrc->is_event->ie_cpu;
  703                         if (cpu == NOCPU)
  704                                 cpu = current_cpu[isrc->is_domain];
  705                         if (isrc->is_pic->pic_assign_cpu(isrc,
  706                             cpu_apic_ids[cpu]) == 0) {
  707                                 isrc->is_cpu = cpu;
  708                                 if (isrc->is_event->ie_cpu == NOCPU)
  709                                         intr_next_cpu(isrc->is_domain);
  710                         }
  711                 }
  712         }
  713         sx_xunlock(&intrsrc_lock);
  714 }
  715 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
  716     NULL);
  717 #endif
  718 
  719 /*
  720  * TODO: Export this information in a non-MD fashion, integrate with vmstat -i.
  721  */
  722 static int
  723 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
  724 {
  725         struct sbuf sbuf;
  726         struct intsrc *isrc;
  727         u_int i;
  728         int error;
  729 
  730         error = sysctl_wire_old_buffer(req, 0);
  731         if (error != 0)
  732                 return (error);
  733 
  734         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  735         sx_slock(&intrsrc_lock);
  736         for (i = 0; i < num_io_irqs; i++) {
  737                 isrc = interrupt_sources[i];
  738                 if (isrc == NULL)
  739                         continue;
  740                 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
  741                     isrc->is_event->ie_fullname,
  742                     isrc->is_index,
  743                     isrc->is_cpu,
  744                     isrc->is_domain,
  745                     *isrc->is_count);
  746         }
  747 
  748         sx_sunlock(&intrsrc_lock);
  749         error = sbuf_finish(&sbuf);
  750         sbuf_delete(&sbuf);
  751         return (error);
  752 }
  753 SYSCTL_PROC(_hw, OID_AUTO, intrs,
  754     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
  755     0, 0, sysctl_hw_intrs, "A",
  756     "interrupt:number @cpu: count");
  757 
  758 /*
  759  * Compare two, possibly NULL, entries in the interrupt source array
  760  * by load.
  761  */
  762 static int
  763 intrcmp(const void *one, const void *two)
  764 {
  765         const struct intsrc *i1, *i2;
  766 
  767         i1 = *(const struct intsrc * const *)one;
  768         i2 = *(const struct intsrc * const *)two;
  769         if (i1 != NULL && i2 != NULL)
  770                 return (*i1->is_count - *i2->is_count);
  771         if (i1 != NULL)
  772                 return (1);
  773         if (i2 != NULL)
  774                 return (-1);
  775         return (0);
  776 }
  777 
  778 /*
  779  * Balance IRQs across available CPUs according to load.
  780  */
  781 static void
  782 intr_balance(void *dummy __unused, int pending __unused)
  783 {
  784         struct intsrc *isrc;
  785         int interval;
  786         u_int cpu;
  787         int i;
  788 
  789         interval = intrbalance;
  790         if (interval == 0)
  791                 goto out;
  792 
  793         /*
  794          * Sort interrupts according to count.
  795          */
  796         sx_xlock(&intrsrc_lock);
  797         memcpy(interrupt_sorted, interrupt_sources, num_io_irqs *
  798             sizeof(interrupt_sorted[0]));
  799         qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]),
  800             intrcmp);
  801 
  802         /*
  803          * Restart the scan from the same location to avoid moving in the
  804          * common case.
  805          */
  806         intr_init_cpus();
  807 
  808         /*
  809          * Assign round-robin from most loaded to least.
  810          */
  811         for (i = num_io_irqs - 1; i >= 0; i--) {
  812                 isrc = interrupt_sorted[i];
  813                 if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
  814                         continue;
  815                 cpu = current_cpu[isrc->is_domain];
  816                 intr_next_cpu(isrc->is_domain);
  817                 if (isrc->is_cpu != cpu &&
  818                     isrc->is_pic->pic_assign_cpu(isrc,
  819                     cpu_apic_ids[cpu]) == 0)
  820                         isrc->is_cpu = cpu;
  821         }
  822         sx_xunlock(&intrsrc_lock);
  823 out:
  824         taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task,
  825             interval ? hz * interval : hz * 60);
  826 
  827 }
  828 
  829 static void
  830 intr_balance_init(void *dummy __unused)
  831 {
  832 
  833         TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance,
  834             NULL);
  835         taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz);
  836 }
  837 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL);
  838 
  839 #else
  840 /*
  841  * Always route interrupts to the current processor in the UP case.
  842  */
  843 u_int
  844 intr_next_cpu(int domain)
  845 {
  846 
  847         return (PCPU_GET(apic_id));
  848 }
  849 #endif

Cache object: 9c6a5a19e2cf9ee2183e8e73103d7b6b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.