The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_smp.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 /*
   28  * This module holds the global variables and machine independent functions
   29  * used for the kernel SMP support.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: stable/10/sys/kern/subr_smp.c 331910 2018-04-03 07:52:06Z avg $");
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/ktr.h>
   39 #include <sys/proc.h>
   40 #include <sys/bus.h>
   41 #include <sys/lock.h>
   42 #include <sys/mutex.h>
   43 #include <sys/pcpu.h>
   44 #include <sys/sched.h>
   45 #include <sys/smp.h>
   46 #include <sys/sysctl.h>
   47 
   48 #include <machine/cpu.h>
   49 #include <machine/smp.h>
   50 
   51 #include "opt_sched.h"
   52 
   53 #ifdef SMP
   54 volatile cpuset_t stopped_cpus;
   55 volatile cpuset_t started_cpus;
   56 volatile cpuset_t suspended_cpus;
   57 cpuset_t hlt_cpus_mask;
   58 cpuset_t logical_cpus_mask;
   59 
   60 void (*cpustop_restartfunc)(void);
   61 #endif
   62 
   63 static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
   64 
   65 /* This is used in modules that need to work in both SMP and UP. */
   66 cpuset_t all_cpus;
   67 
   68 int mp_ncpus;
   69 /* export this for libkvm consumers. */
   70 int mp_maxcpus = MAXCPU;
   71 
   72 volatile int smp_started;
   73 u_int mp_maxid;
   74 
   75 static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
   76     "Kernel SMP");
   77 
   78 SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
   79     "Max CPU ID.");
   80 
   81 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
   82     0, "Max number of CPUs that the system was compiled for.");
   83 
   84 SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0,
   85     sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode");
   86 
   87 int smp_disabled = 0;   /* has smp been disabled? */
   88 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
   89     &smp_disabled, 0, "SMP has been disabled from the loader");
   90 TUNABLE_INT("kern.smp.disabled", &smp_disabled);
   91 
   92 int smp_cpus = 1;       /* how many cpu's running */
   93 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
   94     "Number of CPUs online");
   95 
   96 int smp_topology = 0;   /* Which topology we're using. */
   97 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0,
   98     "Topology override setting; 0 is default provided by hardware.");
   99 TUNABLE_INT("kern.smp.topology", &smp_topology);
  100 
  101 #ifdef SMP
  102 /* Enable forwarding of a signal to a process running on a different CPU */
  103 static int forward_signal_enabled = 1;
  104 SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
  105            &forward_signal_enabled, 0,
  106            "Forwarding of a signal to a process on a different CPU");
  107 
  108 /* Variables needed for SMP rendezvous. */
  109 static volatile int smp_rv_ncpus;
  110 static void (*volatile smp_rv_setup_func)(void *arg);
  111 static void (*volatile smp_rv_action_func)(void *arg);
  112 static void (*volatile smp_rv_teardown_func)(void *arg);
  113 static void *volatile smp_rv_func_arg;
  114 static volatile int smp_rv_waiters[4];
  115 
  116 /* 
  117  * Shared mutex to restrict busywaits between smp_rendezvous() and
  118  * smp(_targeted)_tlb_shootdown().  A deadlock occurs if both of these
  119  * functions trigger at once and cause multiple CPUs to busywait with
  120  * interrupts disabled. 
  121  */
  122 struct mtx smp_ipi_mtx;
  123 
  124 /*
  125  * Let the MD SMP code initialize mp_maxid very early if it can.
  126  */
  127 static void
  128 mp_setmaxid(void *dummy)
  129 {
  130         cpu_mp_setmaxid();
  131 }
  132 SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
  133 
  134 /*
  135  * Call the MD SMP initialization code.
  136  */
  137 static void
  138 mp_start(void *dummy)
  139 {
  140 
  141         mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
  142 
  143         /* Probe for MP hardware. */
  144         if (smp_disabled != 0 || cpu_mp_probe() == 0) {
  145                 mp_ncpus = 1;
  146                 CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
  147                 return;
  148         }
  149 
  150         cpu_mp_start();
  151         printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
  152             mp_ncpus);
  153         cpu_mp_announce();
  154 }
  155 SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
  156 
  157 void
  158 forward_signal(struct thread *td)
  159 {
  160         int id;
  161 
  162         /*
  163          * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
  164          * this thread, so all we need to do is poke it if it is currently
  165          * executing so that it executes ast().
  166          */
  167         THREAD_LOCK_ASSERT(td, MA_OWNED);
  168         KASSERT(TD_IS_RUNNING(td),
  169             ("forward_signal: thread is not TDS_RUNNING"));
  170 
  171         CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
  172 
  173         if (!smp_started || cold || panicstr)
  174                 return;
  175         if (!forward_signal_enabled)
  176                 return;
  177 
  178         /* No need to IPI ourself. */
  179         if (td == curthread)
  180                 return;
  181 
  182         id = td->td_oncpu;
  183         if (id == NOCPU)
  184                 return;
  185         ipi_cpu(id, IPI_AST);
  186 }
  187 
  188 /*
  189  * When called the executing CPU will send an IPI to all other CPUs
  190  *  requesting that they halt execution.
  191  *
  192  * Usually (but not necessarily) called with 'other_cpus' as its arg.
  193  *
  194  *  - Signals all CPUs in map to stop.
  195  *  - Waits for each to stop.
  196  *
  197  * Returns:
  198  *  -1: error
  199  *   0: NA
  200  *   1: ok
  201  *
  202  */
  203 static int
  204 generic_stop_cpus(cpuset_t map, u_int type)
  205 {
  206 #ifdef KTR
  207         char cpusetbuf[CPUSETBUFSIZ];
  208 #endif
  209         static volatile u_int stopping_cpu = NOCPU;
  210         int i;
  211         volatile cpuset_t *cpus;
  212 
  213         KASSERT(
  214 #if defined(__amd64__) || defined(__i386__)
  215             type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
  216 #else
  217             type == IPI_STOP || type == IPI_STOP_HARD,
  218 #endif
  219             ("%s: invalid stop type", __func__));
  220 
  221         if (!smp_started)
  222                 return (0);
  223 
  224         CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
  225             cpusetobj_strprint(cpusetbuf, &map), type);
  226 
  227 #if defined(__amd64__) || defined(__i386__)
  228         /*
  229          * When suspending, ensure there are are no IPIs in progress.
  230          * IPIs that have been issued, but not yet delivered (e.g.
  231          * not pending on a vCPU when running under virtualization)
  232          * will be lost, violating FreeBSD's assumption of reliable
  233          * IPI delivery.
  234          */
  235         if (type == IPI_SUSPEND)
  236                 mtx_lock_spin(&smp_ipi_mtx);
  237 #endif
  238 
  239         if (stopping_cpu != PCPU_GET(cpuid))
  240                 while (atomic_cmpset_int(&stopping_cpu, NOCPU,
  241                     PCPU_GET(cpuid)) == 0)
  242                         while (stopping_cpu != NOCPU)
  243                                 cpu_spinwait(); /* spin */
  244 
  245         /* send the stop IPI to all CPUs in map */
  246         ipi_selected(map, type);
  247 
  248 #if defined(__amd64__) || defined(__i386__)
  249         if (type == IPI_SUSPEND)
  250                 cpus = &suspended_cpus;
  251         else
  252 #endif
  253                 cpus = &stopped_cpus;
  254 
  255         i = 0;
  256         while (!CPU_SUBSET(cpus, &map)) {
  257                 /* spin */
  258                 cpu_spinwait();
  259                 i++;
  260                 if (i == 100000000) {
  261                         printf("timeout stopping cpus\n");
  262                         break;
  263                 }
  264         }
  265 
  266 #if defined(__amd64__) || defined(__i386__)
  267         if (type == IPI_SUSPEND)
  268                 mtx_unlock_spin(&smp_ipi_mtx);
  269 #endif
  270 
  271         stopping_cpu = NOCPU;
  272         return (1);
  273 }
  274 
  275 int
  276 stop_cpus(cpuset_t map)
  277 {
  278 
  279         return (generic_stop_cpus(map, IPI_STOP));
  280 }
  281 
  282 int
  283 stop_cpus_hard(cpuset_t map)
  284 {
  285 
  286         return (generic_stop_cpus(map, IPI_STOP_HARD));
  287 }
  288 
  289 #if defined(__amd64__) || defined(__i386__)
  290 int
  291 suspend_cpus(cpuset_t map)
  292 {
  293 
  294         return (generic_stop_cpus(map, IPI_SUSPEND));
  295 }
  296 #endif
  297 
  298 /*
  299  * Called by a CPU to restart stopped CPUs. 
  300  *
  301  * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
  302  *
  303  *  - Signals all CPUs in map to restart.
  304  *  - Waits for each to restart.
  305  *
  306  * Returns:
  307  *  -1: error
  308  *   0: NA
  309  *   1: ok
  310  */
  311 static int
  312 generic_restart_cpus(cpuset_t map, u_int type)
  313 {
  314 #ifdef KTR
  315         char cpusetbuf[CPUSETBUFSIZ];
  316 #endif
  317         volatile cpuset_t *cpus;
  318 
  319         KASSERT(
  320 #if defined(__amd64__) || defined(__i386__)
  321             type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
  322 #else
  323             type == IPI_STOP || type == IPI_STOP_HARD,
  324 #endif
  325             ("%s: invalid stop type", __func__));
  326 
  327         if (!smp_started)
  328                 return 0;
  329 
  330         CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
  331 
  332 #if defined(__amd64__) || defined(__i386__)
  333         if (type == IPI_SUSPEND)
  334                 cpus = &resuming_cpus;
  335         else
  336 #endif
  337                 cpus = &stopped_cpus;
  338 
  339         /* signal other cpus to restart */
  340 #if defined(__amd64__) || defined(__i386__)
  341         if (type == IPI_SUSPEND)
  342                 CPU_COPY_STORE_REL(&map, &toresume_cpus);
  343         else
  344 #endif
  345                 CPU_COPY_STORE_REL(&map, &started_cpus);
  346 
  347         /* wait for each to clear its bit */
  348         while (CPU_OVERLAP(cpus, &map))
  349                 cpu_spinwait();
  350 
  351         return 1;
  352 }
  353 
  354 int
  355 restart_cpus(cpuset_t map)
  356 {
  357 
  358         return (generic_restart_cpus(map, IPI_STOP));
  359 }
  360 
  361 #if defined(__amd64__) || defined(__i386__)
  362 int
  363 resume_cpus(cpuset_t map)
  364 {
  365 
  366         return (generic_restart_cpus(map, IPI_SUSPEND));
  367 }
  368 #endif
  369 
  370 /*
  371  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
  372  * (if specified), rendezvous, execute the action function (if specified),
  373  * rendezvous again, execute the teardown function (if specified), and then
  374  * resume.
  375  *
  376  * Note that the supplied external functions _must_ be reentrant and aware
  377  * that they are running in parallel and in an unknown lock context.
  378  */
  379 void
  380 smp_rendezvous_action(void)
  381 {
  382         struct thread *td;
  383         void *local_func_arg;
  384         void (*local_setup_func)(void*);
  385         void (*local_action_func)(void*);
  386         void (*local_teardown_func)(void*);
  387 #ifdef INVARIANTS
  388         int owepreempt;
  389 #endif
  390 
  391         /* Ensure we have up-to-date values. */
  392         atomic_add_acq_int(&smp_rv_waiters[0], 1);
  393         while (smp_rv_waiters[0] < smp_rv_ncpus)
  394                 cpu_spinwait();
  395 
  396         /* Fetch rendezvous parameters after acquire barrier. */
  397         local_func_arg = smp_rv_func_arg;
  398         local_setup_func = smp_rv_setup_func;
  399         local_action_func = smp_rv_action_func;
  400         local_teardown_func = smp_rv_teardown_func;
  401 
  402         /*
  403          * Use a nested critical section to prevent any preemptions
  404          * from occurring during a rendezvous action routine.
  405          * Specifically, if a rendezvous handler is invoked via an IPI
  406          * and the interrupted thread was in the critical_exit()
  407          * function after setting td_critnest to 0 but before
  408          * performing a deferred preemption, this routine can be
  409          * invoked with td_critnest set to 0 and td_owepreempt true.
  410          * In that case, a critical_exit() during the rendezvous
  411          * action would trigger a preemption which is not permitted in
  412          * a rendezvous action.  To fix this, wrap all of the
  413          * rendezvous action handlers in a critical section.  We
  414          * cannot use a regular critical section however as having
  415          * critical_exit() preempt from this routine would also be
  416          * problematic (the preemption must not occur before the IPI
  417          * has been acknowledged via an EOI).  Instead, we
  418          * intentionally ignore td_owepreempt when leaving the
  419          * critical section.  This should be harmless because we do
  420          * not permit rendezvous action routines to schedule threads,
  421          * and thus td_owepreempt should never transition from 0 to 1
  422          * during this routine.
  423          */
  424         td = curthread;
  425         td->td_critnest++;
  426 #ifdef INVARIANTS
  427         owepreempt = td->td_owepreempt;
  428 #endif
  429         
  430         /*
  431          * If requested, run a setup function before the main action
  432          * function.  Ensure all CPUs have completed the setup
  433          * function before moving on to the action function.
  434          */
  435         if (local_setup_func != smp_no_rendevous_barrier) {
  436                 if (smp_rv_setup_func != NULL)
  437                         smp_rv_setup_func(smp_rv_func_arg);
  438                 atomic_add_int(&smp_rv_waiters[1], 1);
  439                 while (smp_rv_waiters[1] < smp_rv_ncpus)
  440                         cpu_spinwait();
  441         }
  442 
  443         if (local_action_func != NULL)
  444                 local_action_func(local_func_arg);
  445 
  446         if (local_teardown_func != smp_no_rendevous_barrier) {
  447                 /*
  448                  * Signal that the main action has been completed.  If a
  449                  * full exit rendezvous is requested, then all CPUs will
  450                  * wait here until all CPUs have finished the main action.
  451                  */
  452                 atomic_add_int(&smp_rv_waiters[2], 1);
  453                 while (smp_rv_waiters[2] < smp_rv_ncpus)
  454                         cpu_spinwait();
  455 
  456                 if (local_teardown_func != NULL)
  457                         local_teardown_func(local_func_arg);
  458         }
  459 
  460         /*
  461          * Signal that the rendezvous is fully completed by this CPU.
  462          * This means that no member of smp_rv_* pseudo-structure will be
  463          * accessed by this target CPU after this point; in particular,
  464          * memory pointed by smp_rv_func_arg.
  465          */
  466         atomic_add_int(&smp_rv_waiters[3], 1);
  467 
  468         td->td_critnest--;
  469         KASSERT(owepreempt == td->td_owepreempt,
  470             ("rendezvous action changed td_owepreempt"));
  471 }
  472 
  473 void
  474 smp_rendezvous_cpus(cpuset_t map,
  475         void (* setup_func)(void *), 
  476         void (* action_func)(void *),
  477         void (* teardown_func)(void *),
  478         void *arg)
  479 {
  480         int curcpumap, i, ncpus = 0;
  481 
  482         /* Look comments in the !SMP case. */
  483         if (!smp_started) {
  484                 spinlock_enter();
  485                 if (setup_func != NULL)
  486                         setup_func(arg);
  487                 if (action_func != NULL)
  488                         action_func(arg);
  489                 if (teardown_func != NULL)
  490                         teardown_func(arg);
  491                 spinlock_exit();
  492                 return;
  493         }
  494 
  495         CPU_FOREACH(i) {
  496                 if (CPU_ISSET(i, &map))
  497                         ncpus++;
  498         }
  499         if (ncpus == 0)
  500                 panic("ncpus is 0 with non-zero map");
  501 
  502         mtx_lock_spin(&smp_ipi_mtx);
  503 
  504         /* Pass rendezvous parameters via global variables. */
  505         smp_rv_ncpus = ncpus;
  506         smp_rv_setup_func = setup_func;
  507         smp_rv_action_func = action_func;
  508         smp_rv_teardown_func = teardown_func;
  509         smp_rv_func_arg = arg;
  510         smp_rv_waiters[1] = 0;
  511         smp_rv_waiters[2] = 0;
  512         smp_rv_waiters[3] = 0;
  513         atomic_store_rel_int(&smp_rv_waiters[0], 0);
  514 
  515         /*
  516          * Signal other processors, which will enter the IPI with
  517          * interrupts off.
  518          */
  519         curcpumap = CPU_ISSET(curcpu, &map);
  520         CPU_CLR(curcpu, &map);
  521         ipi_selected(map, IPI_RENDEZVOUS);
  522 
  523         /* Check if the current CPU is in the map */
  524         if (curcpumap != 0)
  525                 smp_rendezvous_action();
  526 
  527         /*
  528          * Ensure that the master CPU waits for all the other
  529          * CPUs to finish the rendezvous, so that smp_rv_*
  530          * pseudo-structure and the arg are guaranteed to not
  531          * be in use.
  532          */
  533         while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
  534                 cpu_spinwait();
  535 
  536         mtx_unlock_spin(&smp_ipi_mtx);
  537 }
  538 
  539 void
  540 smp_rendezvous(void (* setup_func)(void *), 
  541                void (* action_func)(void *),
  542                void (* teardown_func)(void *),
  543                void *arg)
  544 {
  545         smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
  546 }
  547 
  548 static struct cpu_group group[MAXCPU];
  549 
  550 struct cpu_group *
  551 smp_topo(void)
  552 {
  553         char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
  554         struct cpu_group *top;
  555 
  556         /*
  557          * Check for a fake topology request for debugging purposes.
  558          */
  559         switch (smp_topology) {
  560         case 1:
  561                 /* Dual core with no sharing.  */
  562                 top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
  563                 break;
  564         case 2:
  565                 /* No topology, all cpus are equal. */
  566                 top = smp_topo_none();
  567                 break;
  568         case 3:
  569                 /* Dual core with shared L2.  */
  570                 top = smp_topo_1level(CG_SHARE_L2, 2, 0);
  571                 break;
  572         case 4:
  573                 /* quad core, shared l3 among each package, private l2.  */
  574                 top = smp_topo_1level(CG_SHARE_L3, 4, 0);
  575                 break;
  576         case 5:
  577                 /* quad core,  2 dualcore parts on each package share l2.  */
  578                 top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
  579                 break;
  580         case 6:
  581                 /* Single-core 2xHTT */
  582                 top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
  583                 break;
  584         case 7:
  585                 /* quad core with a shared l3, 8 threads sharing L2.  */
  586                 top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
  587                     CG_FLAG_SMT);
  588                 break;
  589         default:
  590                 /* Default, ask the system what it wants. */
  591                 top = cpu_topo();
  592                 break;
  593         }
  594         /*
  595          * Verify the returned topology.
  596          */
  597         if (top->cg_count != mp_ncpus)
  598                 panic("Built bad topology at %p.  CPU count %d != %d",
  599                     top, top->cg_count, mp_ncpus);
  600         if (CPU_CMP(&top->cg_mask, &all_cpus))
  601                 panic("Built bad topology at %p.  CPU mask (%s) != (%s)",
  602                     top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
  603                     cpusetobj_strprint(cpusetbuf2, &all_cpus));
  604         return (top);
  605 }
  606 
  607 struct cpu_group *
  608 smp_topo_none(void)
  609 {
  610         struct cpu_group *top;
  611 
  612         top = &group[0];
  613         top->cg_parent = NULL;
  614         top->cg_child = NULL;
  615         top->cg_mask = all_cpus;
  616         top->cg_count = mp_ncpus;
  617         top->cg_children = 0;
  618         top->cg_level = CG_SHARE_NONE;
  619         top->cg_flags = 0;
  620         
  621         return (top);
  622 }
  623 
  624 static int
  625 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
  626     int count, int flags, int start)
  627 {
  628         char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
  629         cpuset_t mask;
  630         int i;
  631 
  632         CPU_ZERO(&mask);
  633         for (i = 0; i < count; i++, start++)
  634                 CPU_SET(start, &mask);
  635         child->cg_parent = parent;
  636         child->cg_child = NULL;
  637         child->cg_children = 0;
  638         child->cg_level = share;
  639         child->cg_count = count;
  640         child->cg_flags = flags;
  641         child->cg_mask = mask;
  642         parent->cg_children++;
  643         for (; parent != NULL; parent = parent->cg_parent) {
  644                 if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
  645                         panic("Duplicate children in %p.  mask (%s) child (%s)",
  646                             parent,
  647                             cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
  648                             cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
  649                 CPU_OR(&parent->cg_mask, &child->cg_mask);
  650                 parent->cg_count += child->cg_count;
  651         }
  652 
  653         return (start);
  654 }
  655 
  656 struct cpu_group *
  657 smp_topo_1level(int share, int count, int flags)
  658 {
  659         struct cpu_group *child;
  660         struct cpu_group *top;
  661         int packages;
  662         int cpu;
  663         int i;
  664 
  665         cpu = 0;
  666         top = &group[0];
  667         packages = mp_ncpus / count;
  668         top->cg_child = child = &group[1];
  669         top->cg_level = CG_SHARE_NONE;
  670         for (i = 0; i < packages; i++, child++)
  671                 cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
  672         return (top);
  673 }
  674 
  675 struct cpu_group *
  676 smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
  677     int l1flags)
  678 {
  679         struct cpu_group *top;
  680         struct cpu_group *l1g;
  681         struct cpu_group *l2g;
  682         int cpu;
  683         int i;
  684         int j;
  685 
  686         cpu = 0;
  687         top = &group[0];
  688         l2g = &group[1];
  689         top->cg_child = l2g;
  690         top->cg_level = CG_SHARE_NONE;
  691         top->cg_children = mp_ncpus / (l2count * l1count);
  692         l1g = l2g + top->cg_children;
  693         for (i = 0; i < top->cg_children; i++, l2g++) {
  694                 l2g->cg_parent = top;
  695                 l2g->cg_child = l1g;
  696                 l2g->cg_level = l2share;
  697                 for (j = 0; j < l2count; j++, l1g++)
  698                         cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
  699                             l1flags, cpu);
  700         }
  701         return (top);
  702 }
  703 
  704 
  705 struct cpu_group *
  706 smp_topo_find(struct cpu_group *top, int cpu)
  707 {
  708         struct cpu_group *cg;
  709         cpuset_t mask;
  710         int children;
  711         int i;
  712 
  713         CPU_SETOF(cpu, &mask);
  714         cg = top;
  715         for (;;) {
  716                 if (!CPU_OVERLAP(&cg->cg_mask, &mask))
  717                         return (NULL);
  718                 if (cg->cg_children == 0)
  719                         return (cg);
  720                 children = cg->cg_children;
  721                 for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
  722                         if (CPU_OVERLAP(&cg->cg_mask, &mask))
  723                                 break;
  724         }
  725         return (NULL);
  726 }
  727 #else /* !SMP */
  728 
  729 void
  730 smp_rendezvous_cpus(cpuset_t map,
  731         void (*setup_func)(void *), 
  732         void (*action_func)(void *),
  733         void (*teardown_func)(void *),
  734         void *arg)
  735 {
  736         /*
  737          * In the !SMP case we just need to ensure the same initial conditions
  738          * as the SMP case.
  739          */
  740         spinlock_enter();
  741         if (setup_func != NULL)
  742                 setup_func(arg);
  743         if (action_func != NULL)
  744                 action_func(arg);
  745         if (teardown_func != NULL)
  746                 teardown_func(arg);
  747         spinlock_exit();
  748 }
  749 
  750 void
  751 smp_rendezvous(void (*setup_func)(void *), 
  752                void (*action_func)(void *),
  753                void (*teardown_func)(void *),
  754                void *arg)
  755 {
  756 
  757         /* Look comments in the smp_rendezvous_cpus() case. */
  758         spinlock_enter();
  759         if (setup_func != NULL)
  760                 setup_func(arg);
  761         if (action_func != NULL)
  762                 action_func(arg);
  763         if (teardown_func != NULL)
  764                 teardown_func(arg);
  765         spinlock_exit();
  766 }
  767 
  768 /*
  769  * Provide dummy SMP support for UP kernels.  Modules that need to use SMP
  770  * APIs will still work using this dummy support.
  771  */
  772 static void
  773 mp_setvariables_for_up(void *dummy)
  774 {
  775         mp_ncpus = 1;
  776         mp_maxid = PCPU_GET(cpuid);
  777         CPU_SETOF(mp_maxid, &all_cpus);
  778         KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
  779 }
  780 SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
  781     mp_setvariables_for_up, NULL);
  782 #endif /* SMP */
  783 
  784 void
  785 smp_no_rendevous_barrier(void *dummy)
  786 {
  787 #ifdef SMP
  788         KASSERT((!smp_started),("smp_no_rendevous called and smp is started"));
  789 #endif
  790 }
  791 
  792 /*
  793  * Wait specified idle threads to switch once.  This ensures that even
  794  * preempted threads have cycled through the switch function once,
  795  * exiting their codepaths.  This allows us to change global pointers
  796  * with no other synchronization.
  797  */
  798 int
  799 quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
  800 {
  801         struct pcpu *pcpu;
  802         u_int gen[MAXCPU];
  803         int error;
  804         int cpu;
  805 
  806         error = 0;
  807         for (cpu = 0; cpu <= mp_maxid; cpu++) {
  808                 if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
  809                         continue;
  810                 pcpu = pcpu_find(cpu);
  811                 gen[cpu] = pcpu->pc_idlethread->td_generation;
  812         }
  813         for (cpu = 0; cpu <= mp_maxid; cpu++) {
  814                 if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
  815                         continue;
  816                 pcpu = pcpu_find(cpu);
  817                 thread_lock(curthread);
  818                 sched_bind(curthread, cpu);
  819                 thread_unlock(curthread);
  820                 while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
  821                         error = tsleep(quiesce_cpus, prio, wmesg, 1);
  822                         if (error != EWOULDBLOCK)
  823                                 goto out;
  824                         error = 0;
  825                 }
  826         }
  827 out:
  828         thread_lock(curthread);
  829         sched_unbind(curthread);
  830         thread_unlock(curthread);
  831 
  832         return (error);
  833 }
  834 
  835 int
  836 quiesce_all_cpus(const char *wmesg, int prio)
  837 {
  838 
  839         return quiesce_cpus(all_cpus, wmesg, prio);
  840 }
  841 
  842 /* Extra care is taken with this sysctl because the data type is volatile */
  843 static int
  844 sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
  845 {
  846         int error, active;
  847 
  848         active = smp_started;
  849         error = SYSCTL_OUT(req, &active, sizeof(active));
  850         return (error);
  851 }
  852 

Cache object: ee260956954150ef1a162821238ceacc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.