fxr.watson.org: linux-2.6 sys/kernel/rcutree

FreeBSD/Linux Kernel Cross Reference
sys/kernel/rcutree_plugin.h

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10

1 /* 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 3 * Internal non-public definitions that provide either classic 4 * or preemptible semantics. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 19 * 20 * Copyright Red Hat, 2009 21 * Copyright IBM Corporation, 2009 22 * 23 * Author: Ingo Molnar <mingo@elte.hu> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 25 */ 26 27 #include <linux/delay.h> 28 #include <linux/gfp.h> 29 #include <linux/oom.h> 30 #include <linux/smpboot.h> 31 32 #define RCU_KTHREAD_PRIO 1 33 34 #ifdef CONFIG_RCU_BOOST 35 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO 36 #else 37 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO 38 #endif 39 40 #ifdef CONFIG_RCU_NOCB_CPU 41 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 42 static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ 43 static bool rcu_nocb_poll; /* Offload kthread are to poll. */ 44 module_param(rcu_nocb_poll, bool, 0444); 45 static char __initdata nocb_buf[NR_CPUS * 5]; 46 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 47 48 /* 49 * Check the RCU kernel configuration parameters and print informative 50 * messages about anything out of the ordinary. If you like #ifdef, you 51 * will love this function. 52 */ 53 static void __init rcu_bootup_announce_oddness(void) 54 { 55 #ifdef CONFIG_RCU_TRACE 56 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); 57 #endif 58 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 59 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 60 CONFIG_RCU_FANOUT); 61 #endif 62 #ifdef CONFIG_RCU_FANOUT_EXACT 63 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); 64 #endif 65 #ifdef CONFIG_RCU_FAST_NO_HZ 66 printk(KERN_INFO 67 "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 68 #endif 69 #ifdef CONFIG_PROVE_RCU 70 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); 71 #endif 72 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 73 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 74 #endif 75 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 76 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n"); 77 #endif 78 #if defined(CONFIG_RCU_CPU_STALL_INFO) 79 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); 80 #endif 81 #if NUM_RCU_LVL_4 != 0 82 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); 83 #endif 84 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 85 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 86 if (nr_cpu_ids != NR_CPUS) 87 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 88 #ifdef CONFIG_RCU_NOCB_CPU 89 if (have_rcu_nocb_mask) { 90 if (cpumask_test_cpu(0, rcu_nocb_mask)) { 91 cpumask_clear_cpu(0, rcu_nocb_mask); 92 pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n"); 93 } 94 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 95 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); 96 if (rcu_nocb_poll) 97 pr_info("\tExperimental polled no-CBs CPUs.\n"); 98 } 99 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 100 } 101 102 #ifdef CONFIG_TREE_PREEMPT_RCU 103 104 struct rcu_state rcu_preempt_state = 105 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); 106 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 107 static struct rcu_state *rcu_state = &rcu_preempt_state; 108 109 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 110 111 /* 112 * Tell them what RCU they are running. 113 */ 114 static void __init rcu_bootup_announce(void) 115 { 116 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); 117 rcu_bootup_announce_oddness(); 118 } 119 120 /* 121 * Return the number of RCU-preempt batches processed thus far 122 * for debug and statistics. 123 */ 124 long rcu_batches_completed_preempt(void) 125 { 126 return rcu_preempt_state.completed; 127 } 128 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 129 130 /* 131 * Return the number of RCU batches processed thus far for debug & stats. 132 */ 133 long rcu_batches_completed(void) 134 { 135 return rcu_batches_completed_preempt(); 136 } 137 EXPORT_SYMBOL_GPL(rcu_batches_completed); 138 139 /* 140 * Force a quiescent state for preemptible RCU. 141 */ 142 void rcu_force_quiescent_state(void) 143 { 144 force_quiescent_state(&rcu_preempt_state); 145 } 146 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 147 148 /* 149 * Record a preemptible-RCU quiescent state for the specified CPU. Note 150 * that this just means that the task currently running on the CPU is 151 * not in a quiescent state. There might be any number of tasks blocked 152 * while in an RCU read-side critical section. 153 * 154 * Unlike the other rcu_*_qs() functions, callers to this function 155 * must disable irqs in order to protect the assignment to 156 * ->rcu_read_unlock_special. 157 */ 158 static void rcu_preempt_qs(int cpu) 159 { 160 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 161 162 if (rdp->passed_quiesce == 0) 163 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); 164 rdp->passed_quiesce = 1; 165 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 166 } 167 168 /* 169 * We have entered the scheduler, and the current task might soon be 170 * context-switched away from. If this task is in an RCU read-side 171 * critical section, we will no longer be able to rely on the CPU to 172 * record that fact, so we enqueue the task on the blkd_tasks list. 173 * The task will dequeue itself when it exits the outermost enclosing 174 * RCU read-side critical section. Therefore, the current grace period 175 * cannot be permitted to complete until the blkd_tasks list entries 176 * predating the current grace period drain, in other words, until 177 * rnp->gp_tasks becomes NULL. 178 * 179 * Caller must disable preemption. 180 */ 181 static void rcu_preempt_note_context_switch(int cpu) 182 { 183 struct task_struct *t = current; 184 unsigned long flags; 185 struct rcu_data *rdp; 186 struct rcu_node *rnp; 187 188 if (t->rcu_read_lock_nesting > 0 && 189 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 190 191 /* Possibly blocking in an RCU read-side critical section. */ 192 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 193 rnp = rdp->mynode; 194 raw_spin_lock_irqsave(&rnp->lock, flags); 195 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 196 t->rcu_blocked_node = rnp; 197 198 /* 199 * If this CPU has already checked in, then this task 200 * will hold up the next grace period rather than the 201 * current grace period. Queue the task accordingly. 202 * If the task is queued for the current grace period 203 * (i.e., this CPU has not yet passed through a quiescent 204 * state for the current grace period), then as long 205 * as that task remains queued, the current grace period 206 * cannot end. Note that there is some uncertainty as 207 * to exactly when the current grace period started. 208 * We take a conservative approach, which can result 209 * in unnecessarily waiting on tasks that started very 210 * slightly after the current grace period began. C'est 211 * la vie!!! 212 * 213 * But first, note that the current CPU must still be 214 * on line! 215 */ 216 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 217 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 218 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 219 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 220 rnp->gp_tasks = &t->rcu_node_entry; 221 #ifdef CONFIG_RCU_BOOST 222 if (rnp->boost_tasks != NULL) 223 rnp->boost_tasks = rnp->gp_tasks; 224 #endif /* #ifdef CONFIG_RCU_BOOST */ 225 } else { 226 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 227 if (rnp->qsmask & rdp->grpmask) 228 rnp->gp_tasks = &t->rcu_node_entry; 229 } 230 trace_rcu_preempt_task(rdp->rsp->name, 231 t->pid, 232 (rnp->qsmask & rdp->grpmask) 233 ? rnp->gpnum 234 : rnp->gpnum + 1); 235 raw_spin_unlock_irqrestore(&rnp->lock, flags); 236 } else if (t->rcu_read_lock_nesting < 0 && 237 t->rcu_read_unlock_special) { 238 239 /* 240 * Complete exit from RCU read-side critical section on 241 * behalf of preempted instance of __rcu_read_unlock(). 242 */ 243 rcu_read_unlock_special(t); 244 } 245 246 /* 247 * Either we were not in an RCU read-side critical section to 248 * begin with, or we have now recorded that critical section 249 * globally. Either way, we can now note a quiescent state 250 * for this CPU. Again, if we were in an RCU read-side critical 251 * section, and if that critical section was blocking the current 252 * grace period, then the fact that the task has been enqueued 253 * means that we continue to block the current grace period. 254 */ 255 local_irq_save(flags); 256 rcu_preempt_qs(cpu); 257 local_irq_restore(flags); 258 } 259 260 /* 261 * Check for preempted RCU readers blocking the current grace period 262 * for the specified rcu_node structure. If the caller needs a reliable 263 * answer, it must hold the rcu_node's ->lock. 264 */ 265 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 266 { 267 return rnp->gp_tasks != NULL; 268 } 269 270 /* 271 * Record a quiescent state for all tasks that were previously queued 272 * on the specified rcu_node structure and that were blocking the current 273 * RCU grace period. The caller must hold the specified rnp->lock with 274 * irqs disabled, and this lock is released upon return, but irqs remain 275 * disabled. 276 */ 277 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 278 __releases(rnp->lock) 279 { 280 unsigned long mask; 281 struct rcu_node *rnp_p; 282 283 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 284 raw_spin_unlock_irqrestore(&rnp->lock, flags); 285 return; /* Still need more quiescent states! */ 286 } 287 288 rnp_p = rnp->parent; 289 if (rnp_p == NULL) { 290 /* 291 * Either there is only one rcu_node in the tree, 292 * or tasks were kicked up to root rcu_node due to 293 * CPUs going offline. 294 */ 295 rcu_report_qs_rsp(&rcu_preempt_state, flags); 296 return; 297 } 298 299 /* Report up the rest of the hierarchy. */ 300 mask = rnp->grpmask; 301 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 302 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 303 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 304 } 305 306 /* 307 * Advance a ->blkd_tasks-list pointer to the next entry, instead 308 * returning NULL if at the end of the list. 309 */ 310 static struct list_head *rcu_next_node_entry(struct task_struct *t, 311 struct rcu_node *rnp) 312 { 313 struct list_head *np; 314 315 np = t->rcu_node_entry.next; 316 if (np == &rnp->blkd_tasks) 317 np = NULL; 318 return np; 319 } 320 321 /* 322 * Handle special cases during rcu_read_unlock(), such as needing to 323 * notify RCU core processing or task having blocked during the RCU 324 * read-side critical section. 325 */ 326 void rcu_read_unlock_special(struct task_struct *t) 327 { 328 int empty; 329 int empty_exp; 330 int empty_exp_now; 331 unsigned long flags; 332 struct list_head *np; 333 #ifdef CONFIG_RCU_BOOST 334 struct rt_mutex *rbmp = NULL; 335 #endif /* #ifdef CONFIG_RCU_BOOST */ 336 struct rcu_node *rnp; 337 int special; 338 339 /* NMI handlers cannot block and cannot safely manipulate state. */ 340 if (in_nmi()) 341 return; 342 343 local_irq_save(flags); 344 345 /* 346 * If RCU core is waiting for this CPU to exit critical section, 347 * let it know that we have done so. 348 */ 349 special = t->rcu_read_unlock_special; 350 if (special & RCU_READ_UNLOCK_NEED_QS) { 351 rcu_preempt_qs(smp_processor_id()); 352 } 353 354 /* Hardware IRQ handlers cannot block. */ 355 if (in_irq() || in_serving_softirq()) { 356 local_irq_restore(flags); 357 return; 358 } 359 360 /* Clean up if blocked during RCU read-side critical section. */ 361 if (special & RCU_READ_UNLOCK_BLOCKED) { 362 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 363 364 /* 365 * Remove this task from the list it blocked on. The 366 * task can migrate while we acquire the lock, but at 367 * most one time. So at most two passes through loop. 368 */ 369 for (;;) { 370 rnp = t->rcu_blocked_node; 371 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 372 if (rnp == t->rcu_blocked_node) 373 break; 374 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 375 } 376 empty = !rcu_preempt_blocked_readers_cgp(rnp); 377 empty_exp = !rcu_preempted_readers_exp(rnp); 378 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 379 np = rcu_next_node_entry(t, rnp); 380 list_del_init(&t->rcu_node_entry); 381 t->rcu_blocked_node = NULL; 382 trace_rcu_unlock_preempted_task("rcu_preempt", 383 rnp->gpnum, t->pid); 384 if (&t->rcu_node_entry == rnp->gp_tasks) 385 rnp->gp_tasks = np; 386 if (&t->rcu_node_entry == rnp->exp_tasks) 387 rnp->exp_tasks = np; 388 #ifdef CONFIG_RCU_BOOST 389 if (&t->rcu_node_entry == rnp->boost_tasks) 390 rnp->boost_tasks = np; 391 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ 392 if (t->rcu_boost_mutex) { 393 rbmp = t->rcu_boost_mutex; 394 t->rcu_boost_mutex = NULL; 395 } 396 #endif /* #ifdef CONFIG_RCU_BOOST */ 397 398 /* 399 * If this was the last task on the current list, and if 400 * we aren't waiting on any CPUs, report the quiescent state. 401 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 402 * so we must take a snapshot of the expedited state. 403 */ 404 empty_exp_now = !rcu_preempted_readers_exp(rnp); 405 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 406 trace_rcu_quiescent_state_report("preempt_rcu", 407 rnp->gpnum, 408 0, rnp->qsmask, 409 rnp->level, 410 rnp->grplo, 411 rnp->grphi, 412 !!rnp->gp_tasks); 413 rcu_report_unblock_qs_rnp(rnp, flags); 414 } else { 415 raw_spin_unlock_irqrestore(&rnp->lock, flags); 416 } 417 418 #ifdef CONFIG_RCU_BOOST 419 /* Unboost if we were boosted. */ 420 if (rbmp) 421 rt_mutex_unlock(rbmp); 422 #endif /* #ifdef CONFIG_RCU_BOOST */ 423 424 /* 425 * If this was the last task on the expedited lists, 426 * then we need to report up the rcu_node hierarchy. 427 */ 428 if (!empty_exp && empty_exp_now) 429 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); 430 } else { 431 local_irq_restore(flags); 432 } 433 } 434 435 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 436 437 /* 438 * Dump detailed information for all tasks blocking the current RCU 439 * grace period on the specified rcu_node structure. 440 */ 441 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 442 { 443 unsigned long flags; 444 struct task_struct *t; 445 446 raw_spin_lock_irqsave(&rnp->lock, flags); 447 if (!rcu_preempt_blocked_readers_cgp(rnp)) { 448 raw_spin_unlock_irqrestore(&rnp->lock, flags); 449 return; 450 } 451 t = list_entry(rnp->gp_tasks, 452 struct task_struct, rcu_node_entry); 453 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 454 sched_show_task(t); 455 raw_spin_unlock_irqrestore(&rnp->lock, flags); 456 } 457 458 /* 459 * Dump detailed information for all tasks blocking the current RCU 460 * grace period. 461 */ 462 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 463 { 464 struct rcu_node *rnp = rcu_get_root(rsp); 465 466 rcu_print_detail_task_stall_rnp(rnp); 467 rcu_for_each_leaf_node(rsp, rnp) 468 rcu_print_detail_task_stall_rnp(rnp); 469 } 470 471 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 472 473 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 474 { 475 } 476 477 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 478 479 #ifdef CONFIG_RCU_CPU_STALL_INFO 480 481 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 482 { 483 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", 484 rnp->level, rnp->grplo, rnp->grphi); 485 } 486 487 static void rcu_print_task_stall_end(void) 488 { 489 printk(KERN_CONT "\n"); 490 } 491 492 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 493 494 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 495 { 496 } 497 498 static void rcu_print_task_stall_end(void) 499 { 500 } 501 502 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 503 504 /* 505 * Scan the current list of tasks blocked within RCU read-side critical 506 * sections, printing out the tid of each. 507 */ 508 static int rcu_print_task_stall(struct rcu_node *rnp) 509 { 510 struct task_struct *t; 511 int ndetected = 0; 512 513 if (!rcu_preempt_blocked_readers_cgp(rnp)) 514 return 0; 515 rcu_print_task_stall_begin(rnp); 516 t = list_entry(rnp->gp_tasks, 517 struct task_struct, rcu_node_entry); 518 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 519 printk(KERN_CONT " P%d", t->pid); 520 ndetected++; 521 } 522 rcu_print_task_stall_end(); 523 return ndetected; 524 } 525 526 /* 527 * Check that the list of blocked tasks for the newly completed grace 528 * period is in fact empty. It is a serious bug to complete a grace 529 * period that still has RCU readers blocked! This function must be 530 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 531 * must be held by the caller. 532 * 533 * Also, if there are blocked tasks on the list, they automatically 534 * block the newly created grace period, so set up ->gp_tasks accordingly. 535 */ 536 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 537 { 538 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 539 if (!list_empty(&rnp->blkd_tasks)) 540 rnp->gp_tasks = rnp->blkd_tasks.next; 541 WARN_ON_ONCE(rnp->qsmask); 542 } 543 544 #ifdef CONFIG_HOTPLUG_CPU 545 546 /* 547 * Handle tasklist migration for case in which all CPUs covered by the 548 * specified rcu_node have gone offline. Move them up to the root 549 * rcu_node. The reason for not just moving them to the immediate 550 * parent is to remove the need for rcu_read_unlock_special() to 551 * make more than two attempts to acquire the target rcu_node's lock. 552 * Returns true if there were tasks blocking the current RCU grace 553 * period. 554 * 555 * Returns 1 if there was previously a task blocking the current grace 556 * period on the specified rcu_node structure. 557 * 558 * The caller must hold rnp->lock with irqs disabled. 559 */ 560 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 561 struct rcu_node *rnp, 562 struct rcu_data *rdp) 563 { 564 struct list_head *lp; 565 struct list_head *lp_root; 566 int retval = 0; 567 struct rcu_node *rnp_root = rcu_get_root(rsp); 568 struct task_struct *t; 569 570 if (rnp == rnp_root) { 571 WARN_ONCE(1, "Last CPU thought to be offlined?"); 572 return 0; /* Shouldn't happen: at least one CPU online. */ 573 } 574 575 /* If we are on an internal node, complain bitterly. */ 576 WARN_ON_ONCE(rnp != rdp->mynode); 577 578 /* 579 * Move tasks up to root rcu_node. Don't try to get fancy for 580 * this corner-case operation -- just put this node's tasks 581 * at the head of the root node's list, and update the root node's 582 * ->gp_tasks and ->exp_tasks pointers to those of this node's, 583 * if non-NULL. This might result in waiting for more tasks than 584 * absolutely necessary, but this is a good performance/complexity 585 * tradeoff. 586 */ 587 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) 588 retval |= RCU_OFL_TASKS_NORM_GP; 589 if (rcu_preempted_readers_exp(rnp)) 590 retval |= RCU_OFL_TASKS_EXP_GP; 591 lp = &rnp->blkd_tasks; 592 lp_root = &rnp_root->blkd_tasks; 593 while (!list_empty(lp)) { 594 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 595 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 596 list_del(&t->rcu_node_entry); 597 t->rcu_blocked_node = rnp_root; 598 list_add(&t->rcu_node_entry, lp_root); 599 if (&t->rcu_node_entry == rnp->gp_tasks) 600 rnp_root->gp_tasks = rnp->gp_tasks; 601 if (&t->rcu_node_entry == rnp->exp_tasks) 602 rnp_root->exp_tasks = rnp->exp_tasks; 603 #ifdef CONFIG_RCU_BOOST 604 if (&t->rcu_node_entry == rnp->boost_tasks) 605 rnp_root->boost_tasks = rnp->boost_tasks; 606 #endif /* #ifdef CONFIG_RCU_BOOST */ 607 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 608 } 609 610 rnp->gp_tasks = NULL; 611 rnp->exp_tasks = NULL; 612 #ifdef CONFIG_RCU_BOOST 613 rnp->boost_tasks = NULL; 614 /* 615 * In case root is being boosted and leaf was not. Make sure 616 * that we boost the tasks blocking the current grace period 617 * in this case. 618 */ 619 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 620 if (rnp_root->boost_tasks != NULL && 621 rnp_root->boost_tasks != rnp_root->gp_tasks && 622 rnp_root->boost_tasks != rnp_root->exp_tasks) 623 rnp_root->boost_tasks = rnp_root->gp_tasks; 624 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 625 #endif /* #ifdef CONFIG_RCU_BOOST */ 626 627 return retval; 628 } 629 630 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 631 632 /* 633 * Check for a quiescent state from the current CPU. When a task blocks, 634 * the task is recorded in the corresponding CPU's rcu_node structure, 635 * which is checked elsewhere. 636 * 637 * Caller must disable hard irqs. 638 */ 639 static void rcu_preempt_check_callbacks(int cpu) 640 { 641 struct task_struct *t = current; 642 643 if (t->rcu_read_lock_nesting == 0) { 644 rcu_preempt_qs(cpu); 645 return; 646 } 647 if (t->rcu_read_lock_nesting > 0 && 648 per_cpu(rcu_preempt_data, cpu).qs_pending) 649 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 650 } 651 652 #ifdef CONFIG_RCU_BOOST 653 654 static void rcu_preempt_do_callbacks(void) 655 { 656 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); 657 } 658 659 #endif /* #ifdef CONFIG_RCU_BOOST */ 660 661 /* 662 * Queue a preemptible-RCU callback for invocation after a grace period. 663 */ 664 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 665 { 666 __call_rcu(head, func, &rcu_preempt_state, -1, 0); 667 } 668 EXPORT_SYMBOL_GPL(call_rcu); 669 670 /* 671 * Queue an RCU callback for lazy invocation after a grace period. 672 * This will likely be later named something like "call_rcu_lazy()", 673 * but this change will require some way of tagging the lazy RCU 674 * callbacks in the list of pending callbacks. Until then, this 675 * function may only be called from __kfree_rcu(). 676 */ 677 void kfree_call_rcu(struct rcu_head *head, 678 void (*func)(struct rcu_head *rcu)) 679 { 680 __call_rcu(head, func, &rcu_preempt_state, -1, 1); 681 } 682 EXPORT_SYMBOL_GPL(kfree_call_rcu); 683 684 /** 685 * synchronize_rcu - wait until a grace period has elapsed. 686 * 687 * Control will return to the caller some time after a full grace 688 * period has elapsed, in other words after all currently executing RCU 689 * read-side critical sections have completed. Note, however, that 690 * upon return from synchronize_rcu(), the caller might well be executing 691 * concurrently with new RCU read-side critical sections that began while 692 * synchronize_rcu() was waiting. RCU read-side critical sections are 693 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 694 * 695 * See the description of synchronize_sched() for more detailed information 696 * on memory ordering guarantees. 697 */ 698 void synchronize_rcu(void) 699 { 700 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 701 !lock_is_held(&rcu_lock_map) && 702 !lock_is_held(&rcu_sched_lock_map), 703 "Illegal synchronize_rcu() in RCU read-side critical section"); 704 if (!rcu_scheduler_active) 705 return; 706 if (rcu_expedited) 707 synchronize_rcu_expedited(); 708 else 709 wait_rcu_gp(call_rcu); 710 } 711 EXPORT_SYMBOL_GPL(synchronize_rcu); 712 713 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 714 static unsigned long sync_rcu_preempt_exp_count; 715 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 716 717 /* 718 * Return non-zero if there are any tasks in RCU read-side critical 719 * sections blocking the current preemptible-RCU expedited grace period. 720 * If there is no preemptible-RCU expedited grace period currently in 721 * progress, returns zero unconditionally. 722 */ 723 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 724 { 725 return rnp->exp_tasks != NULL; 726 } 727 728 /* 729 * return non-zero if there is no RCU expedited grace period in progress 730 * for the specified rcu_node structure, in other words, if all CPUs and 731 * tasks covered by the specified rcu_node structure have done their bit 732 * for the current expedited grace period. Works only for preemptible 733 * RCU -- other RCU implementation use other means. 734 * 735 * Caller must hold sync_rcu_preempt_exp_mutex. 736 */ 737 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 738 { 739 return !rcu_preempted_readers_exp(rnp) && 740 ACCESS_ONCE(rnp->expmask) == 0; 741 } 742 743 /* 744 * Report the exit from RCU read-side critical section for the last task 745 * that queued itself during or before the current expedited preemptible-RCU 746 * grace period. This event is reported either to the rcu_node structure on 747 * which the task was queued or to one of that rcu_node structure's ancestors, 748 * recursively up the tree. (Calm down, calm down, we do the recursion 749 * iteratively!) 750 * 751 * Most callers will set the "wake" flag, but the task initiating the 752 * expedited grace period need not wake itself. 753 * 754 * Caller must hold sync_rcu_preempt_exp_mutex. 755 */ 756 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 757 bool wake) 758 { 759 unsigned long flags; 760 unsigned long mask; 761 762 raw_spin_lock_irqsave(&rnp->lock, flags); 763 for (;;) { 764 if (!sync_rcu_preempt_exp_done(rnp)) { 765 raw_spin_unlock_irqrestore(&rnp->lock, flags); 766 break; 767 } 768 if (rnp->parent == NULL) { 769 raw_spin_unlock_irqrestore(&rnp->lock, flags); 770 if (wake) 771 wake_up(&sync_rcu_preempt_exp_wq); 772 break; 773 } 774 mask = rnp->grpmask; 775 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 776 rnp = rnp->parent; 777 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 778 rnp->expmask &= ~mask; 779 } 780 } 781 782 /* 783 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 784 * grace period for the specified rcu_node structure. If there are no such 785 * tasks, report it up the rcu_node hierarchy. 786 * 787 * Caller must hold sync_rcu_preempt_exp_mutex and must exclude 788 * CPU hotplug operations. 789 */ 790 static void 791 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 792 { 793 unsigned long flags; 794 int must_wait = 0; 795 796 raw_spin_lock_irqsave(&rnp->lock, flags); 797 if (list_empty(&rnp->blkd_tasks)) { 798 raw_spin_unlock_irqrestore(&rnp->lock, flags); 799 } else { 800 rnp->exp_tasks = rnp->blkd_tasks.next; 801 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 802 must_wait = 1; 803 } 804 if (!must_wait) 805 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ 806 } 807 808 /** 809 * synchronize_rcu_expedited - Brute-force RCU grace period 810 * 811 * Wait for an RCU-preempt grace period, but expedite it. The basic 812 * idea is to invoke synchronize_sched_expedited() to push all the tasks to 813 * the ->blkd_tasks lists and wait for this list to drain. This consumes 814 * significant time on all CPUs and is unfriendly to real-time workloads, 815 * so is thus not recommended for any sort of common-case code. 816 * In fact, if you are using synchronize_rcu_expedited() in a loop, 817 * please restructure your code to batch your updates, and then Use a 818 * single synchronize_rcu() instead. 819 * 820 * Note that it is illegal to call this function while holding any lock 821 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 822 * to call this function from a CPU-hotplug notifier. Failing to observe 823 * these restriction will result in deadlock. 824 */ 825 void synchronize_rcu_expedited(void) 826 { 827 unsigned long flags; 828 struct rcu_node *rnp; 829 struct rcu_state *rsp = &rcu_preempt_state; 830 unsigned long snap; 831 int trycount = 0; 832 833 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 834 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 835 smp_mb(); /* Above access cannot bleed into critical section. */ 836 837 /* 838 * Block CPU-hotplug operations. This means that any CPU-hotplug 839 * operation that finds an rcu_node structure with tasks in the 840 * process of being boosted will know that all tasks blocking 841 * this expedited grace period will already be in the process of 842 * being boosted. This simplifies the process of moving tasks 843 * from leaf to root rcu_node structures. 844 */ 845 get_online_cpus(); 846 847 /* 848 * Acquire lock, falling back to synchronize_rcu() if too many 849 * lock-acquisition failures. Of course, if someone does the 850 * expedited grace period for us, just leave. 851 */ 852 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 853 if (ULONG_CMP_LT(snap, 854 ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 855 put_online_cpus(); 856 goto mb_ret; /* Others did our work for us. */ 857 } 858 if (trycount++ < 10) { 859 udelay(trycount * num_online_cpus()); 860 } else { 861 put_online_cpus(); 862 wait_rcu_gp(call_rcu); 863 return; 864 } 865 } 866 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 867 put_online_cpus(); 868 goto unlock_mb_ret; /* Others did our work for us. */ 869 } 870 871 /* force all RCU readers onto ->blkd_tasks lists. */ 872 synchronize_sched_expedited(); 873 874 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 875 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 876 raw_spin_lock_irqsave(&rnp->lock, flags); 877 rnp->expmask = rnp->qsmaskinit; 878 raw_spin_unlock_irqrestore(&rnp->lock, flags); 879 } 880 881 /* Snapshot current state of ->blkd_tasks lists. */ 882 rcu_for_each_leaf_node(rsp, rnp) 883 sync_rcu_preempt_exp_init(rsp, rnp); 884 if (NUM_RCU_NODES > 1) 885 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 886 887 put_online_cpus(); 888 889 /* Wait for snapshotted ->blkd_tasks lists to drain. */ 890 rnp = rcu_get_root(rsp); 891 wait_event(sync_rcu_preempt_exp_wq, 892 sync_rcu_preempt_exp_done(rnp)); 893 894 /* Clean up and exit. */ 895 smp_mb(); /* ensure expedited GP seen before counter increment. */ 896 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 897 unlock_mb_ret: 898 mutex_unlock(&sync_rcu_preempt_exp_mutex); 899 mb_ret: 900 smp_mb(); /* ensure subsequent action seen after grace period. */ 901 } 902 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 903 904 /** 905 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 906 * 907 * Note that this primitive does not necessarily wait for an RCU grace period 908 * to complete. For example, if there are no RCU callbacks queued anywhere 909 * in the system, then rcu_barrier() is within its rights to return 910 * immediately, without waiting for anything, much less an RCU grace period. 911 */ 912 void rcu_barrier(void) 913 { 914 _rcu_barrier(&rcu_preempt_state); 915 } 916 EXPORT_SYMBOL_GPL(rcu_barrier); 917 918 /* 919 * Initialize preemptible RCU's state structures. 920 */ 921 static void __init __rcu_init_preempt(void) 922 { 923 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 924 } 925 926 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 927 928 static struct rcu_state *rcu_state = &rcu_sched_state; 929 930 /* 931 * Tell them what RCU they are running. 932 */ 933 static void __init rcu_bootup_announce(void) 934 { 935 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 936 rcu_bootup_announce_oddness(); 937 } 938 939 /* 940 * Return the number of RCU batches processed thus far for debug & stats. 941 */ 942 long rcu_batches_completed(void) 943 { 944 return rcu_batches_completed_sched(); 945 } 946 EXPORT_SYMBOL_GPL(rcu_batches_completed); 947 948 /* 949 * Force a quiescent state for RCU, which, because there is no preemptible 950 * RCU, becomes the same as rcu-sched. 951 */ 952 void rcu_force_quiescent_state(void) 953 { 954 rcu_sched_force_quiescent_state(); 955 } 956 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 957 958 /* 959 * Because preemptible RCU does not exist, we never have to check for 960 * CPUs being in quiescent states. 961 */ 962 static void rcu_preempt_note_context_switch(int cpu) 963 { 964 } 965 966 /* 967 * Because preemptible RCU does not exist, there are never any preempted 968 * RCU readers. 969 */ 970 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 971 { 972 return 0; 973 } 974 975 #ifdef CONFIG_HOTPLUG_CPU 976 977 /* Because preemptible RCU does not exist, no quieting of tasks. */ 978 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 979 { 980 raw_spin_unlock_irqrestore(&rnp->lock, flags); 981 } 982 983 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 984 985 /* 986 * Because preemptible RCU does not exist, we never have to check for 987 * tasks blocked within RCU read-side critical sections. 988 */ 989 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 990 { 991 } 992 993 /* 994 * Because preemptible RCU does not exist, we never have to check for 995 * tasks blocked within RCU read-side critical sections. 996 */ 997 static int rcu_print_task_stall(struct rcu_node *rnp) 998 { 999 return 0; 1000 } 1001 1002 /* 1003 * Because there is no preemptible RCU, there can be no readers blocked, 1004 * so there is no need to check for blocked tasks. So check only for 1005 * bogus qsmask values. 1006 */ 1007 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 1008 { 1009 WARN_ON_ONCE(rnp->qsmask); 1010 } 1011 1012 #ifdef CONFIG_HOTPLUG_CPU 1013 1014 /* 1015 * Because preemptible RCU does not exist, it never needs to migrate 1016 * tasks that were blocked within RCU read-side critical sections, and 1017 * such non-existent tasks cannot possibly have been blocking the current 1018 * grace period. 1019 */ 1020 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 1021 struct rcu_node *rnp, 1022 struct rcu_data *rdp) 1023 { 1024 return 0; 1025 } 1026 1027 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1028 1029 /* 1030 * Because preemptible RCU does not exist, it never has any callbacks 1031 * to check. 1032 */ 1033 static void rcu_preempt_check_callbacks(int cpu) 1034 { 1035 } 1036 1037 /* 1038 * Queue an RCU callback for lazy invocation after a grace period. 1039 * This will likely be later named something like "call_rcu_lazy()", 1040 * but this change will require some way of tagging the lazy RCU 1041 * callbacks in the list of pending callbacks. Until then, this 1042 * function may only be called from __kfree_rcu(). 1043 * 1044 * Because there is no preemptible RCU, we use RCU-sched instead. 1045 */ 1046 void kfree_call_rcu(struct rcu_head *head, 1047 void (*func)(struct rcu_head *rcu)) 1048 { 1049 __call_rcu(head, func, &rcu_sched_state, -1, 1); 1050 } 1051 EXPORT_SYMBOL_GPL(kfree_call_rcu); 1052 1053 /* 1054 * Wait for an rcu-preempt grace period, but make it happen quickly. 1055 * But because preemptible RCU does not exist, map to rcu-sched. 1056 */ 1057 void synchronize_rcu_expedited(void) 1058 { 1059 synchronize_sched_expedited(); 1060 } 1061 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 1062 1063 #ifdef CONFIG_HOTPLUG_CPU 1064 1065 /* 1066 * Because preemptible RCU does not exist, there is never any need to 1067 * report on tasks preempted in RCU read-side critical sections during 1068 * expedited RCU grace periods. 1069 */ 1070 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 1071 bool wake) 1072 { 1073 } 1074 1075 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1076 1077 /* 1078 * Because preemptible RCU does not exist, rcu_barrier() is just 1079 * another name for rcu_barrier_sched(). 1080 */ 1081 void rcu_barrier(void) 1082 { 1083 rcu_barrier_sched(); 1084 } 1085 EXPORT_SYMBOL_GPL(rcu_barrier); 1086 1087 /* 1088 * Because preemptible RCU does not exist, it need not be initialized. 1089 */ 1090 static void __init __rcu_init_preempt(void) 1091 { 1092 } 1093 1094 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1095 1096 #ifdef CONFIG_RCU_BOOST 1097 1098 #include "rtmutex_common.h" 1099 1100 #ifdef CONFIG_RCU_TRACE 1101 1102 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1103 { 1104 if (list_empty(&rnp->blkd_tasks)) 1105 rnp->n_balk_blkd_tasks++; 1106 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 1107 rnp->n_balk_exp_gp_tasks++; 1108 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) 1109 rnp->n_balk_boost_tasks++; 1110 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) 1111 rnp->n_balk_notblocked++; 1112 else if (rnp->gp_tasks != NULL && 1113 ULONG_CMP_LT(jiffies, rnp->boost_time)) 1114 rnp->n_balk_notyet++; 1115 else 1116 rnp->n_balk_nos++; 1117 } 1118 1119 #else /* #ifdef CONFIG_RCU_TRACE */ 1120 1121 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1122 { 1123 } 1124 1125 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1126 1127 static void rcu_wake_cond(struct task_struct *t, int status) 1128 { 1129 /* 1130 * If the thread is yielding, only wake it when this 1131 * is invoked from idle 1132 */ 1133 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) 1134 wake_up_process(t); 1135 } 1136 1137 /* 1138 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1139 * or ->boost_tasks, advancing the pointer to the next task in the 1140 * ->blkd_tasks list. 1141 * 1142 * Note that irqs must be enabled: boosting the task can block. 1143 * Returns 1 if there are more tasks needing to be boosted. 1144 */ 1145 static int rcu_boost(struct rcu_node *rnp) 1146 { 1147 unsigned long flags; 1148 struct rt_mutex mtx; 1149 struct task_struct *t; 1150 struct list_head *tb; 1151 1152 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1153 return 0; /* Nothing left to boost. */ 1154 1155 raw_spin_lock_irqsave(&rnp->lock, flags); 1156 1157 /* 1158 * Recheck under the lock: all tasks in need of boosting 1159 * might exit their RCU read-side critical sections on their own. 1160 */ 1161 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { 1162 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1163 return 0; 1164 } 1165 1166 /* 1167 * Preferentially boost tasks blocking expedited grace periods. 1168 * This cannot starve the normal grace periods because a second 1169 * expedited grace period must boost all blocked tasks, including 1170 * those blocking the pre-existing normal grace period. 1171 */ 1172 if (rnp->exp_tasks != NULL) { 1173 tb = rnp->exp_tasks; 1174 rnp->n_exp_boosts++; 1175 } else { 1176 tb = rnp->boost_tasks; 1177 rnp->n_normal_boosts++; 1178 } 1179 rnp->n_tasks_boosted++; 1180 1181 /* 1182 * We boost task t by manufacturing an rt_mutex that appears to 1183 * be held by task t. We leave a pointer to that rt_mutex where 1184 * task t can find it, and task t will release the mutex when it 1185 * exits its outermost RCU read-side critical section. Then 1186 * simply acquiring this artificial rt_mutex will boost task 1187 * t's priority. (Thanks to tglx for suggesting this approach!) 1188 * 1189 * Note that task t must acquire rnp->lock to remove itself from 1190 * the ->blkd_tasks list, which it will do from exit() if from 1191 * nowhere else. We therefore are guaranteed that task t will 1192 * stay around at least until we drop rnp->lock. Note that 1193 * rnp->lock also resolves races between our priority boosting 1194 * and task t's exiting its outermost RCU read-side critical 1195 * section. 1196 */ 1197 t = container_of(tb, struct task_struct, rcu_node_entry); 1198 rt_mutex_init_proxy_locked(&mtx, t); 1199 t->rcu_boost_mutex = &mtx; 1200 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1201 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1202 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1203 1204 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1205 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1206 } 1207 1208 /* 1209 * Priority-boosting kthread. One per leaf rcu_node and one for the 1210 * root rcu_node. 1211 */ 1212 static int rcu_boost_kthread(void *arg) 1213 { 1214 struct rcu_node *rnp = (struct rcu_node *)arg; 1215 int spincnt = 0; 1216 int more2boost; 1217 1218 trace_rcu_utilization("Start boost kthread@init"); 1219 for (;;) { 1220 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1221 trace_rcu_utilization("End boost kthread@rcu_wait"); 1222 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1223 trace_rcu_utilization("Start boost kthread@rcu_wait"); 1224 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1225 more2boost = rcu_boost(rnp); 1226 if (more2boost) 1227 spincnt++; 1228 else 1229 spincnt = 0; 1230 if (spincnt > 10) { 1231 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1232 trace_rcu_utilization("End boost kthread@rcu_yield"); 1233 schedule_timeout_interruptible(2); 1234 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1235 spincnt = 0; 1236 } 1237 } 1238 /* NOTREACHED */ 1239 trace_rcu_utilization("End boost kthread@notreached"); 1240 return 0; 1241 } 1242 1243 /* 1244 * Check to see if it is time to start boosting RCU readers that are 1245 * blocking the current grace period, and, if so, tell the per-rcu_node 1246 * kthread to start boosting them. If there is an expedited grace 1247 * period in progress, it is always time to boost. 1248 * 1249 * The caller must hold rnp->lock, which this function releases. 1250 * The ->boost_kthread_task is immortal, so we don't need to worry 1251 * about it going away. 1252 */ 1253 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1254 { 1255 struct task_struct *t; 1256 1257 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1258 rnp->n_balk_exp_gp_tasks++; 1259 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1260 return; 1261 } 1262 if (rnp->exp_tasks != NULL || 1263 (rnp->gp_tasks != NULL && 1264 rnp->boost_tasks == NULL && 1265 rnp->qsmask == 0 && 1266 ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1267 if (rnp->exp_tasks == NULL) 1268 rnp->boost_tasks = rnp->gp_tasks; 1269 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1270 t = rnp->boost_kthread_task; 1271 if (t) 1272 rcu_wake_cond(t, rnp->boost_kthread_status); 1273 } else { 1274 rcu_initiate_boost_trace(rnp); 1275 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1276 } 1277 } 1278 1279 /* 1280 * Wake up the per-CPU kthread to invoke RCU callbacks. 1281 */ 1282 static void invoke_rcu_callbacks_kthread(void) 1283 { 1284 unsigned long flags; 1285 1286 local_irq_save(flags); 1287 __this_cpu_write(rcu_cpu_has_work, 1); 1288 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1289 current != __this_cpu_read(rcu_cpu_kthread_task)) { 1290 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), 1291 __this_cpu_read(rcu_cpu_kthread_status)); 1292 } 1293 local_irq_restore(flags); 1294 } 1295 1296 /* 1297 * Is the current CPU running the RCU-callbacks kthread? 1298 * Caller must have preemption disabled. 1299 */ 1300 static bool rcu_is_callbacks_kthread(void) 1301 { 1302 return __get_cpu_var(rcu_cpu_kthread_task) == current; 1303 } 1304 1305 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1306 1307 /* 1308 * Do priority-boost accounting for the start of a new grace period. 1309 */ 1310 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1311 { 1312 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 1313 } 1314 1315 /* 1316 * Create an RCU-boost kthread for the specified node if one does not 1317 * already exist. We only create this kthread for preemptible RCU. 1318 * Returns zero if all is well, a negated errno otherwise. 1319 */ 1320 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1321 struct rcu_node *rnp) 1322 { 1323 int rnp_index = rnp - &rsp->node[0]; 1324 unsigned long flags; 1325 struct sched_param sp; 1326 struct task_struct *t; 1327 1328 if (&rcu_preempt_state != rsp) 1329 return 0; 1330 1331 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) 1332 return 0; 1333 1334 rsp->boost = 1; 1335 if (rnp->boost_kthread_task != NULL) 1336 return 0; 1337 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1338 "rcub/%d", rnp_index); 1339 if (IS_ERR(t)) 1340 return PTR_ERR(t); 1341 raw_spin_lock_irqsave(&rnp->lock, flags); 1342 rnp->boost_kthread_task = t; 1343 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1344 sp.sched_priority = RCU_BOOST_PRIO; 1345 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1346 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1347 return 0; 1348 } 1349 1350 static void rcu_kthread_do_work(void) 1351 { 1352 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); 1353 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1354 rcu_preempt_do_callbacks(); 1355 } 1356 1357 static void rcu_cpu_kthread_setup(unsigned int cpu) 1358 { 1359 struct sched_param sp; 1360 1361 sp.sched_priority = RCU_KTHREAD_PRIO; 1362 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1363 } 1364 1365 static void rcu_cpu_kthread_park(unsigned int cpu) 1366 { 1367 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1368 } 1369 1370 static int rcu_cpu_kthread_should_run(unsigned int cpu) 1371 { 1372 return __get_cpu_var(rcu_cpu_has_work); 1373 } 1374 1375 /* 1376 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1377 * RCU softirq used in flavors and configurations of RCU that do not 1378 * support RCU priority boosting. 1379 */ 1380 static void rcu_cpu_kthread(unsigned int cpu) 1381 { 1382 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); 1383 char work, *workp = &__get_cpu_var(rcu_cpu_has_work); 1384 int spincnt; 1385 1386 for (spincnt = 0; spincnt < 10; spincnt++) { 1387 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1388 local_bh_disable(); 1389 *statusp = RCU_KTHREAD_RUNNING; 1390 this_cpu_inc(rcu_cpu_kthread_loops); 1391 local_irq_disable(); 1392 work = *workp; 1393 *workp = 0; 1394 local_irq_enable(); 1395 if (work) 1396 rcu_kthread_do_work(); 1397 local_bh_enable(); 1398 if (*workp == 0) { 1399 trace_rcu_utilization("End CPU kthread@rcu_wait"); 1400 *statusp = RCU_KTHREAD_WAITING; 1401 return; 1402 } 1403 } 1404 *statusp = RCU_KTHREAD_YIELDING; 1405 trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1406 schedule_timeout_interruptible(2); 1407 trace_rcu_utilization("End CPU kthread@rcu_yield"); 1408 *statusp = RCU_KTHREAD_WAITING; 1409 } 1410 1411 /* 1412 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1413 * served by the rcu_node in question. The CPU hotplug lock is still 1414 * held, so the value of rnp->qsmaskinit will be stable. 1415 * 1416 * We don't include outgoingcpu in the affinity set, use -1 if there is 1417 * no outgoing CPU. If there are no CPUs left in the affinity set, 1418 * this function allows the kthread to execute on any CPU. 1419 */ 1420 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1421 { 1422 struct task_struct *t = rnp->boost_kthread_task; 1423 unsigned long mask = rnp->qsmaskinit; 1424 cpumask_var_t cm; 1425 int cpu; 1426 1427 if (!t) 1428 return; 1429 if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) 1430 return; 1431 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1432 if ((mask & 0x1) && cpu != outgoingcpu) 1433 cpumask_set_cpu(cpu, cm); 1434 if (cpumask_weight(cm) == 0) { 1435 cpumask_setall(cm); 1436 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) 1437 cpumask_clear_cpu(cpu, cm); 1438 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1439 } 1440 set_cpus_allowed_ptr(t, cm); 1441 free_cpumask_var(cm); 1442 } 1443 1444 static struct smp_hotplug_thread rcu_cpu_thread_spec = { 1445 .store = &rcu_cpu_kthread_task, 1446 .thread_should_run = rcu_cpu_kthread_should_run, 1447 .thread_fn = rcu_cpu_kthread, 1448 .thread_comm = "rcuc/%u", 1449 .setup = rcu_cpu_kthread_setup, 1450 .park = rcu_cpu_kthread_park, 1451 }; 1452 1453 /* 1454 * Spawn all kthreads -- called as soon as the scheduler is running. 1455 */ 1456 static int __init rcu_spawn_kthreads(void) 1457 { 1458 struct rcu_node *rnp; 1459 int cpu; 1460 1461 rcu_scheduler_fully_active = 1; 1462 for_each_possible_cpu(cpu) 1463 per_cpu(rcu_cpu_has_work, cpu) = 0; 1464 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1465 rnp = rcu_get_root(rcu_state); 1466 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1467 if (NUM_RCU_NODES > 1) { 1468 rcu_for_each_leaf_node(rcu_state, rnp) 1469 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1470 } 1471 return 0; 1472 } 1473 early_initcall(rcu_spawn_kthreads); 1474 1475 static void __cpuinit rcu_prepare_kthreads(int cpu) 1476 { 1477 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1478 struct rcu_node *rnp = rdp->mynode; 1479 1480 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1481 if (rcu_scheduler_fully_active) 1482 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1483 } 1484 1485 #else /* #ifdef CONFIG_RCU_BOOST */ 1486 1487 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1488 { 1489 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1490 } 1491 1492 static void invoke_rcu_callbacks_kthread(void) 1493 { 1494 WARN_ON_ONCE(1); 1495 } 1496 1497 static bool rcu_is_callbacks_kthread(void) 1498 { 1499 return false; 1500 } 1501 1502 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1503 { 1504 } 1505 1506 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1507 { 1508 } 1509 1510 static int __init rcu_scheduler_really_started(void) 1511 { 1512 rcu_scheduler_fully_active = 1; 1513 return 0; 1514 } 1515 early_initcall(rcu_scheduler_really_started); 1516 1517 static void __cpuinit rcu_prepare_kthreads(int cpu) 1518 { 1519 } 1520 1521 #endif /* #else #ifdef CONFIG_RCU_BOOST */ 1522 1523 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1524 1525 /* 1526 * Check to see if any future RCU-related work will need to be done 1527 * by the current CPU, even if none need be done immediately, returning 1528 * 1 if so. This function is part of the RCU implementation; it is -not- 1529 * an exported member of the RCU API. 1530 * 1531 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1532 * any flavor of RCU. 1533 */ 1534 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1535 { 1536 *delta_jiffies = ULONG_MAX; 1537 return rcu_cpu_has_callbacks(cpu); 1538 } 1539 1540 /* 1541 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. 1542 */ 1543 static void rcu_prepare_for_idle_init(int cpu) 1544 { 1545 } 1546 1547 /* 1548 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up 1549 * after it. 1550 */ 1551 static void rcu_cleanup_after_idle(int cpu) 1552 { 1553 } 1554 1555 /* 1556 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, 1557 * is nothing. 1558 */ 1559 static void rcu_prepare_for_idle(int cpu) 1560 { 1561 } 1562 1563 /* 1564 * Don't bother keeping a running count of the number of RCU callbacks 1565 * posted because CONFIG_RCU_FAST_NO_HZ=n. 1566 */ 1567 static void rcu_idle_count_callbacks_posted(void) 1568 { 1569 } 1570 1571 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1572 1573 /* 1574 * This code is invoked when a CPU goes idle, at which point we want 1575 * to have the CPU do everything required for RCU so that it can enter 1576 * the energy-efficient dyntick-idle mode. This is handled by a 1577 * state machine implemented by rcu_prepare_for_idle() below. 1578 * 1579 * The following three proprocessor symbols control this state machine: 1580 * 1581 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt 1582 * to satisfy RCU. Beyond this point, it is better to incur a periodic 1583 * scheduling-clock interrupt than to loop through the state machine 1584 * at full power. 1585 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are 1586 * optional if RCU does not need anything immediately from this 1587 * CPU, even if this CPU still has RCU callbacks queued. The first 1588 * times through the state machine are mandatory: we need to give 1589 * the state machine a chance to communicate a quiescent state 1590 * to the RCU core. 1591 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted 1592 * to sleep in dyntick-idle mode with RCU callbacks pending. This 1593 * is sized to be roughly one RCU grace period. Those energy-efficiency 1594 * benchmarkers who might otherwise be tempted to set this to a large 1595 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your 1596 * system. And if you are -that- concerned about energy efficiency, 1597 * just power the system down and be done with it! 1598 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is 1599 * permitted to sleep in dyntick-idle mode with only lazy RCU 1600 * callbacks pending. Setting this too high can OOM your system. 1601 * 1602 * The values below work well in practice. If future workloads require 1603 * adjustment, they can be converted into kernel config parameters, though 1604 * making the state machine smarter might be a better option. 1605 */ 1606 #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1607 #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1608 #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ 1609 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1610 1611 extern int tick_nohz_enabled; 1612 1613 /* 1614 * Does the specified flavor of RCU have non-lazy callbacks pending on 1615 * the specified CPU? Both RCU flavor and CPU are specified by the 1616 * rcu_data structure. 1617 */ 1618 static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) 1619 { 1620 return rdp->qlen != rdp->qlen_lazy; 1621 } 1622 1623 #ifdef CONFIG_TREE_PREEMPT_RCU 1624 1625 /* 1626 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there 1627 * is no RCU-preempt in the kernel.) 1628 */ 1629 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1630 { 1631 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 1632 1633 return __rcu_cpu_has_nonlazy_callbacks(rdp); 1634 } 1635 1636 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1637 1638 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1639 { 1640 return 0; 1641 } 1642 1643 #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1644 1645 /* 1646 * Does any flavor of RCU have non-lazy callbacks on the specified CPU? 1647 */ 1648 static bool rcu_cpu_has_nonlazy_callbacks(int cpu) 1649 { 1650 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || 1651 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || 1652 rcu_preempt_cpu_has_nonlazy_callbacks(cpu); 1653 } 1654 1655 /* 1656 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1657 * callbacks on this CPU, (2) this CPU has not yet attempted to enter 1658 * dyntick-idle mode, or (3) this CPU is in the process of attempting to 1659 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed 1660 * to enter dyntick-idle mode, we refuse to try to enter it. After all, 1661 * it is better to incur scheduling-clock interrupts than to spin 1662 * continuously for the same time duration! 1663 * 1664 * The delta_jiffies argument is used to store the time when RCU is 1665 * going to need the CPU again if it still has callbacks. The reason 1666 * for this is that rcu_prepare_for_idle() might need to post a timer, 1667 * but if so, it will do so after tick_nohz_stop_sched_tick() has set 1668 * the wakeup time for this CPU. This means that RCU's timer can be 1669 * delayed until the wakeup time, which defeats the purpose of posting 1670 * a timer. 1671 */ 1672 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1673 { 1674 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1675 1676 /* Flag a new idle sojourn to the idle-entry state machine. */ 1677 rdtp->idle_first_pass = 1; 1678 /* If no callbacks, RCU doesn't need the CPU. */ 1679 if (!rcu_cpu_has_callbacks(cpu)) { 1680 *delta_jiffies = ULONG_MAX; 1681 return 0; 1682 } 1683 if (rdtp->dyntick_holdoff == jiffies) { 1684 /* RCU recently tried and failed, so don't try again. */ 1685 *delta_jiffies = 1; 1686 return 1; 1687 } 1688 /* Set up for the possibility that RCU will post a timer. */ 1689 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1690 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, 1691 RCU_IDLE_GP_DELAY) - jiffies; 1692 } else { 1693 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; 1694 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; 1695 } 1696 return 0; 1697 } 1698 1699 /* 1700 * Handler for smp_call_function_single(). The only point of this 1701 * handler is to wake the CPU up, so the handler does only tracing. 1702 */ 1703 void rcu_idle_demigrate(void *unused) 1704 { 1705 trace_rcu_prep_idle("Demigrate"); 1706 } 1707 1708 /* 1709 * Timer handler used to force CPU to start pushing its remaining RCU 1710 * callbacks in the case where it entered dyntick-idle mode with callbacks 1711 * pending. The hander doesn't really need to do anything because the 1712 * real work is done upon re-entry to idle, or by the next scheduling-clock 1713 * interrupt should idle not be re-entered. 1714 * 1715 * One special case: the timer gets migrated without awakening the CPU 1716 * on which the timer was scheduled on. In this case, we must wake up 1717 * that CPU. We do so with smp_call_function_single(). 1718 */ 1719 static void rcu_idle_gp_timer_func(unsigned long cpu_in) 1720 { 1721 int cpu = (int)cpu_in; 1722 1723 trace_rcu_prep_idle("Timer"); 1724 if (cpu != smp_processor_id()) 1725 smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); 1726 else 1727 WARN_ON_ONCE(1); /* Getting here can hang the system... */ 1728 } 1729 1730 /* 1731 * Initialize the timer used to pull CPUs out of dyntick-idle mode. 1732 */ 1733 static void rcu_prepare_for_idle_init(int cpu) 1734 { 1735 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1736 1737 rdtp->dyntick_holdoff = jiffies - 1; 1738 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); 1739 rdtp->idle_gp_timer_expires = jiffies - 1; 1740 rdtp->idle_first_pass = 1; 1741 } 1742 1743 /* 1744 * Clean up for exit from idle. Because we are exiting from idle, there 1745 * is no longer any point to ->idle_gp_timer, so cancel it. This will 1746 * do nothing if this timer is not active, so just cancel it unconditionally. 1747 */ 1748 static void rcu_cleanup_after_idle(int cpu) 1749 { 1750 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1751 1752 del_timer(&rdtp->idle_gp_timer); 1753 trace_rcu_prep_idle("Cleanup after idle"); 1754 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); 1755 } 1756 1757 /* 1758 * Check to see if any RCU-related work can be done by the current CPU, 1759 * and if so, schedule a softirq to get it done. This function is part 1760 * of the RCU implementation; it is -not- an exported member of the RCU API. 1761 * 1762 * The idea is for the current CPU to clear out all work required by the 1763 * RCU core for the current grace period, so that this CPU can be permitted 1764 * to enter dyntick-idle mode. In some cases, it will need to be awakened 1765 * at the end of the grace period by whatever CPU ends the grace period. 1766 * This allows CPUs to go dyntick-idle more quickly, and to reduce the 1767 * number of wakeups by a modest integer factor. 1768 * 1769 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1770 * disabled, we do one pass of force_quiescent_state(), then do a 1771 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked 1772 * later. The ->dyntick_drain field controls the sequencing. 1773 * 1774 * The caller must have disabled interrupts. 1775 */ 1776 static void rcu_prepare_for_idle(int cpu) 1777 { 1778 struct timer_list *tp; 1779 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1780 int tne; 1781 1782 /* Handle nohz enablement switches conservatively. */ 1783 tne = ACCESS_ONCE(tick_nohz_enabled); 1784 if (tne != rdtp->tick_nohz_enabled_snap) { 1785 if (rcu_cpu_has_callbacks(cpu)) 1786 invoke_rcu_core(); /* force nohz to see update. */ 1787 rdtp->tick_nohz_enabled_snap = tne; 1788 return; 1789 } 1790 if (!tne) 1791 return; 1792 1793 /* Adaptive-tick mode, where usermode execution is idle to RCU. */ 1794 if (!is_idle_task(current)) { 1795 rdtp->dyntick_holdoff = jiffies - 1; 1796 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1797 trace_rcu_prep_idle("User dyntick with callbacks"); 1798 rdtp->idle_gp_timer_expires = 1799 round_up(jiffies + RCU_IDLE_GP_DELAY, 1800 RCU_IDLE_GP_DELAY); 1801 } else if (rcu_cpu_has_callbacks(cpu)) { 1802 rdtp->idle_gp_timer_expires = 1803 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); 1804 trace_rcu_prep_idle("User dyntick with lazy callbacks"); 1805 } else { 1806 return; 1807 } 1808 tp = &rdtp->idle_gp_timer; 1809 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1810 return; 1811 } 1812 1813 /* 1814 * If this is an idle re-entry, for example, due to use of 1815 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 1816 * loop, then don't take any state-machine actions, unless the 1817 * momentary exit from idle queued additional non-lazy callbacks. 1818 * Instead, repost the ->idle_gp_timer if this CPU has callbacks 1819 * pending. 1820 */ 1821 if (!rdtp->idle_first_pass && 1822 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { 1823 if (rcu_cpu_has_callbacks(cpu)) { 1824 tp = &rdtp->idle_gp_timer; 1825 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1826 } 1827 return; 1828 } 1829 rdtp->idle_first_pass = 0; 1830 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; 1831 1832 /* 1833 * If there are no callbacks on this CPU, enter dyntick-idle mode. 1834 * Also reset state to avoid prejudicing later attempts. 1835 */ 1836 if (!rcu_cpu_has_callbacks(cpu)) { 1837 rdtp->dyntick_holdoff = jiffies - 1; 1838 rdtp->dyntick_drain = 0; 1839 trace_rcu_prep_idle("No callbacks"); 1840 return; 1841 } 1842 1843 /* 1844 * If in holdoff mode, just return. We will presumably have 1845 * refrained from disabling the scheduling-clock tick. 1846 */ 1847 if (rdtp->dyntick_holdoff == jiffies) { 1848 trace_rcu_prep_idle("In holdoff"); 1849 return; 1850 } 1851 1852 /* Check and update the ->dyntick_drain sequencing. */ 1853 if (rdtp->dyntick_drain <= 0) { 1854 /* First time through, initialize the counter. */ 1855 rdtp->dyntick_drain = RCU_IDLE_FLUSHES; 1856 } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && 1857 !rcu_pending(cpu) && 1858 !local_softirq_pending()) { 1859 /* Can we go dyntick-idle despite still having callbacks? */ 1860 rdtp->dyntick_drain = 0; 1861 rdtp->dyntick_holdoff = jiffies; 1862 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1863 trace_rcu_prep_idle("Dyntick with callbacks"); 1864 rdtp->idle_gp_timer_expires = 1865 round_up(jiffies + RCU_IDLE_GP_DELAY, 1866 RCU_IDLE_GP_DELAY); 1867 } else { 1868 rdtp->idle_gp_timer_expires = 1869 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); 1870 trace_rcu_prep_idle("Dyntick with lazy callbacks"); 1871 } 1872 tp = &rdtp->idle_gp_timer; 1873 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1874 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1875 return; /* Nothing more to do immediately. */ 1876 } else if (--(rdtp->dyntick_drain) <= 0) { 1877 /* We have hit the limit, so time to give up. */ 1878 rdtp->dyntick_holdoff = jiffies; 1879 trace_rcu_prep_idle("Begin holdoff"); 1880 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ 1881 return; 1882 } 1883 1884 /* 1885 * Do one step of pushing the remaining RCU callbacks through 1886 * the RCU core state machine. 1887 */ 1888 #ifdef CONFIG_TREE_PREEMPT_RCU 1889 if (per_cpu(rcu_preempt_data, cpu).nxtlist) { 1890 rcu_preempt_qs(cpu); 1891 force_quiescent_state(&rcu_preempt_state); 1892 } 1893 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1894 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 1895 rcu_sched_qs(cpu); 1896 force_quiescent_state(&rcu_sched_state); 1897 } 1898 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 1899 rcu_bh_qs(cpu); 1900 force_quiescent_state(&rcu_bh_state); 1901 } 1902 1903 /* 1904 * If RCU callbacks are still pending, RCU still needs this CPU. 1905 * So try forcing the callbacks through the grace period. 1906 */ 1907 if (rcu_cpu_has_callbacks(cpu)) { 1908 trace_rcu_prep_idle("More callbacks"); 1909 invoke_rcu_core(); 1910 } else { 1911 trace_rcu_prep_idle("Callbacks drained"); 1912 } 1913 } 1914 1915 /* 1916 * Keep a running count of the number of non-lazy callbacks posted 1917 * on this CPU. This running counter (which is never decremented) allows 1918 * rcu_prepare_for_idle() to detect when something out of the idle loop 1919 * posts a callback, even if an equal number of callbacks are invoked. 1920 * Of course, callbacks should only be posted from within a trace event 1921 * designed to be called from idle or from within RCU_NONIDLE(). 1922 */ 1923 static void rcu_idle_count_callbacks_posted(void) 1924 { 1925 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); 1926 } 1927 1928 /* 1929 * Data for flushing lazy RCU callbacks at OOM time. 1930 */ 1931 static atomic_t oom_callback_count; 1932 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); 1933 1934 /* 1935 * RCU OOM callback -- decrement the outstanding count and deliver the 1936 * wake-up if we are the last one. 1937 */ 1938 static void rcu_oom_callback(struct rcu_head *rhp) 1939 { 1940 if (atomic_dec_and_test(&oom_callback_count)) 1941 wake_up(&oom_callback_wq); 1942 } 1943 1944 /* 1945 * Post an rcu_oom_notify callback on the current CPU if it has at 1946 * least one lazy callback. This will unnecessarily post callbacks 1947 * to CPUs that already have a non-lazy callback at the end of their 1948 * callback list, but this is an infrequent operation, so accept some 1949 * extra overhead to keep things simple. 1950 */ 1951 static void rcu_oom_notify_cpu(void *unused) 1952 { 1953 struct rcu_state *rsp; 1954 struct rcu_data *rdp; 1955 1956 for_each_rcu_flavor(rsp) { 1957 rdp = __this_cpu_ptr(rsp->rda); 1958 if (rdp->qlen_lazy != 0) { 1959 atomic_inc(&oom_callback_count); 1960 rsp->call(&rdp->oom_head, rcu_oom_callback); 1961 } 1962 } 1963 } 1964 1965 /* 1966 * If low on memory, ensure that each CPU has a non-lazy callback. 1967 * This will wake up CPUs that have only lazy callbacks, in turn 1968 * ensuring that they free up the corresponding memory in a timely manner. 1969 * Because an uncertain amount of memory will be freed in some uncertain 1970 * timeframe, we do not claim to have freed anything. 1971 */ 1972 static int rcu_oom_notify(struct notifier_block *self, 1973 unsigned long notused, void *nfreed) 1974 { 1975 int cpu; 1976 1977 /* Wait for callbacks from earlier instance to complete. */ 1978 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); 1979 1980 /* 1981 * Prevent premature wakeup: ensure that all increments happen 1982 * before there is a chance of the counter reaching zero. 1983 */ 1984 atomic_set(&oom_callback_count, 1); 1985 1986 get_online_cpus(); 1987 for_each_online_cpu(cpu) { 1988 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1989 cond_resched(); 1990 } 1991 put_online_cpus(); 1992 1993 /* Unconditionally decrement: no need to wake ourselves up. */ 1994 atomic_dec(&oom_callback_count); 1995 1996 return NOTIFY_OK; 1997 } 1998 1999 static struct notifier_block rcu_oom_nb = { 2000 .notifier_call = rcu_oom_notify 2001 }; 2002 2003 static int __init rcu_register_oom_notifier(void) 2004 { 2005 register_oom_notifier(&rcu_oom_nb); 2006 return 0; 2007 } 2008 early_initcall(rcu_register_oom_notifier); 2009 2010 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2011 2012 #ifdef CONFIG_RCU_CPU_STALL_INFO 2013 2014 #ifdef CONFIG_RCU_FAST_NO_HZ 2015 2016 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2017 { 2018 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 2019 struct timer_list *tltp = &rdtp->idle_gp_timer; 2020 char c; 2021 2022 c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; 2023 if (timer_pending(tltp)) 2024 sprintf(cp, "drain=%d %c timer=%lu", 2025 rdtp->dyntick_drain, c, tltp->expires - jiffies); 2026 else 2027 sprintf(cp, "drain=%d %c timer not pending", 2028 rdtp->dyntick_drain, c); 2029 } 2030 2031 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 2032 2033 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2034 { 2035 *cp = '\0'; 2036 } 2037 2038 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 2039 2040 /* Initiate the stall-info list. */ 2041 static void print_cpu_stall_info_begin(void) 2042 { 2043 printk(KERN_CONT "\n"); 2044 } 2045 2046 /* 2047 * Print out diagnostic information for the specified stalled CPU. 2048 * 2049 * If the specified CPU is aware of the current RCU grace period 2050 * (flavor specified by rsp), then print the number of scheduling 2051 * clock interrupts the CPU has taken during the time that it has 2052 * been aware. Otherwise, print the number of RCU grace periods 2053 * that this CPU is ignorant of, for example, "1" if the CPU was 2054 * aware of the previous grace period. 2055 * 2056 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. 2057 */ 2058 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 2059 { 2060 char fast_no_hz[72]; 2061 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2062 struct rcu_dynticks *rdtp = rdp->dynticks; 2063 char *ticks_title; 2064 unsigned long ticks_value; 2065 2066 if (rsp->gpnum == rdp->gpnum) { 2067 ticks_title = "ticks this GP"; 2068 ticks_value = rdp->ticks_this_gp; 2069 } else { 2070 ticks_title = "GPs behind"; 2071 ticks_value = rsp->gpnum - rdp->gpnum; 2072 } 2073 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 2074 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", 2075 cpu, ticks_value, ticks_title, 2076 atomic_read(&rdtp->dynticks) & 0xfff, 2077 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 2078 fast_no_hz); 2079 } 2080 2081 /* Terminate the stall-info list. */ 2082 static void print_cpu_stall_info_end(void) 2083 { 2084 printk(KERN_ERR "\t"); 2085 } 2086 2087 /* Zero ->ticks_this_gp for all flavors of RCU. */ 2088 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 2089 { 2090 rdp->ticks_this_gp = 0; 2091 } 2092 2093 /* Increment ->ticks_this_gp for all flavors of RCU. */ 2094 static void increment_cpu_stall_ticks(void) 2095 { 2096 struct rcu_state *rsp; 2097 2098 for_each_rcu_flavor(rsp) 2099 __this_cpu_ptr(rsp->rda)->ticks_this_gp++; 2100 } 2101 2102 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 2103 2104 static void print_cpu_stall_info_begin(void) 2105 { 2106 printk(KERN_CONT " {"); 2107 } 2108 2109 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 2110 { 2111 printk(KERN_CONT " %d", cpu); 2112 } 2113 2114 static void print_cpu_stall_info_end(void) 2115 { 2116 printk(KERN_CONT "} "); 2117 } 2118 2119 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 2120 { 2121 } 2122 2123 static void increment_cpu_stall_ticks(void) 2124 { 2125 } 2126 2127 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 2128 2129 #ifdef CONFIG_RCU_NOCB_CPU 2130 2131 /* 2132 * Offload callback processing from the boot-time-specified set of CPUs 2133 * specified by rcu_nocb_mask. For each CPU in the set, there is a 2134 * kthread created that pulls the callbacks from the corresponding CPU, 2135 * waits for a grace period to elapse, and invokes the callbacks. 2136 * The no-CBs CPUs do a wake_up() on their kthread when they insert 2137 * a callback into any empty list, unless the rcu_nocb_poll boot parameter 2138 * has been specified, in which case each kthread actively polls its 2139 * CPU. (Which isn't so great for energy efficiency, but which does 2140 * reduce RCU's overhead on that CPU.) 2141 * 2142 * This is intended to be used in conjunction with Frederic Weisbecker's 2143 * adaptive-idle work, which would seriously reduce OS jitter on CPUs 2144 * running CPU-bound user-mode computations. 2145 * 2146 * Offloading of callback processing could also in theory be used as 2147 * an energy-efficiency measure because CPUs with no RCU callbacks 2148 * queued are more aggressive about entering dyntick-idle mode. 2149 */ 2150 2151 2152 /* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */ 2153 static int __init rcu_nocb_setup(char *str) 2154 { 2155 alloc_bootmem_cpumask_var(&rcu_nocb_mask); 2156 have_rcu_nocb_mask = true; 2157 cpulist_parse(str, rcu_nocb_mask); 2158 return 1; 2159 } 2160 __setup("rcu_nocbs=", rcu_nocb_setup); 2161 2162 /* Is the specified CPU a no-CPUs CPU? */ 2163 static bool is_nocb_cpu(int cpu) 2164 { 2165 if (have_rcu_nocb_mask) 2166 return cpumask_test_cpu(cpu, rcu_nocb_mask); 2167 return false; 2168 } 2169 2170 /* 2171 * Enqueue the specified string of rcu_head structures onto the specified 2172 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the 2173 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy 2174 * counts are supplied by rhcount and rhcount_lazy. 2175 * 2176 * If warranted, also wake up the kthread servicing this CPUs queues. 2177 */ 2178 static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, 2179 struct rcu_head *rhp, 2180 struct rcu_head **rhtp, 2181 int rhcount, int rhcount_lazy) 2182 { 2183 int len; 2184 struct rcu_head **old_rhpp; 2185 struct task_struct *t; 2186 2187 /* Enqueue the callback on the nocb list and update counts. */ 2188 old_rhpp = xchg(&rdp->nocb_tail, rhtp); 2189 ACCESS_ONCE(*old_rhpp) = rhp; 2190 atomic_long_add(rhcount, &rdp->nocb_q_count); 2191 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 2192 2193 /* If we are not being polled and there is a kthread, awaken it ... */ 2194 t = ACCESS_ONCE(rdp->nocb_kthread); 2195 if (rcu_nocb_poll | !t) 2196 return; 2197 len = atomic_long_read(&rdp->nocb_q_count); 2198 if (old_rhpp == &rdp->nocb_head) { 2199 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ 2200 rdp->qlen_last_fqs_check = 0; 2201 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2202 wake_up_process(t); /* ... or if many callbacks queued. */ 2203 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2204 } 2205 return; 2206 } 2207 2208 /* 2209 * This is a helper for __call_rcu(), which invokes this when the normal 2210 * callback queue is inoperable. If this is not a no-CBs CPU, this 2211 * function returns failure back to __call_rcu(), which can complain 2212 * appropriately. 2213 * 2214 * Otherwise, this function queues the callback where the corresponding 2215 * "rcuo" kthread can find it. 2216 */ 2217 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2218 bool lazy) 2219 { 2220 2221 if (!is_nocb_cpu(rdp->cpu)) 2222 return 0; 2223 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); 2224 return 1; 2225 } 2226 2227 /* 2228 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is 2229 * not a no-CBs CPU. 2230 */ 2231 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2232 struct rcu_data *rdp) 2233 { 2234 long ql = rsp->qlen; 2235 long qll = rsp->qlen_lazy; 2236 2237 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 2238 if (!is_nocb_cpu(smp_processor_id())) 2239 return 0; 2240 rsp->qlen = 0; 2241 rsp->qlen_lazy = 0; 2242 2243 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 2244 if (rsp->orphan_donelist != NULL) { 2245 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, 2246 rsp->orphan_donetail, ql, qll); 2247 ql = qll = 0; 2248 rsp->orphan_donelist = NULL; 2249 rsp->orphan_donetail = &rsp->orphan_donelist; 2250 } 2251 if (rsp->orphan_nxtlist != NULL) { 2252 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, 2253 rsp->orphan_nxttail, ql, qll); 2254 ql = qll = 0; 2255 rsp->orphan_nxtlist = NULL; 2256 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2257 } 2258 return 1; 2259 } 2260 2261 /* 2262 * There must be at least one non-no-CBs CPU in operation at any given 2263 * time, because no-CBs CPUs are not capable of initiating grace periods 2264 * independently. This function therefore complains if the specified 2265 * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to 2266 * avoid offlining the last such CPU. (Recursion is a wonderful thing, 2267 * but you have to have a base case!) 2268 */ 2269 static bool nocb_cpu_expendable(int cpu) 2270 { 2271 cpumask_var_t non_nocb_cpus; 2272 int ret; 2273 2274 /* 2275 * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, 2276 * then offlining this CPU is harmless. Let it happen. 2277 */ 2278 if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) 2279 return 1; 2280 2281 /* If no memory, play it safe and keep the CPU around. */ 2282 if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) 2283 return 0; 2284 cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); 2285 cpumask_clear_cpu(cpu, non_nocb_cpus); 2286 ret = !cpumask_empty(non_nocb_cpus); 2287 free_cpumask_var(non_nocb_cpus); 2288 return ret; 2289 } 2290 2291 /* 2292 * Helper structure for remote registry of RCU callbacks. 2293 * This is needed for when a no-CBs CPU needs to start a grace period. 2294 * If it just invokes call_rcu(), the resulting callback will be queued, 2295 * which can result in deadlock. 2296 */ 2297 struct rcu_head_remote { 2298 struct rcu_head *rhp; 2299 call_rcu_func_t *crf; 2300 void (*func)(struct rcu_head *rhp); 2301 }; 2302 2303 /* 2304 * Register a callback as specified by the rcu_head_remote struct. 2305 * This function is intended to be invoked via smp_call_function_single(). 2306 */ 2307 static void call_rcu_local(void *arg) 2308 { 2309 struct rcu_head_remote *rhrp = 2310 container_of(arg, struct rcu_head_remote, rhp); 2311 2312 rhrp->crf(rhrp->rhp, rhrp->func); 2313 } 2314 2315 /* 2316 * Set up an rcu_head_remote structure and the invoke call_rcu_local() 2317 * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via 2318 * smp_call_function_single(). 2319 */ 2320 static void invoke_crf_remote(struct rcu_head *rhp, 2321 void (*func)(struct rcu_head *rhp), 2322 call_rcu_func_t crf) 2323 { 2324 struct rcu_head_remote rhr; 2325 2326 rhr.rhp = rhp; 2327 rhr.crf = crf; 2328 rhr.func = func; 2329 smp_call_function_single(0, call_rcu_local, &rhr, 1); 2330 } 2331 2332 /* 2333 * Helper functions to be passed to wait_rcu_gp(), each of which 2334 * invokes invoke_crf_remote() to register a callback appropriately. 2335 */ 2336 static void __maybe_unused 2337 call_rcu_preempt_remote(struct rcu_head *rhp, 2338 void (*func)(struct rcu_head *rhp)) 2339 { 2340 invoke_crf_remote(rhp, func, call_rcu); 2341 } 2342 static void call_rcu_bh_remote(struct rcu_head *rhp, 2343 void (*func)(struct rcu_head *rhp)) 2344 { 2345 invoke_crf_remote(rhp, func, call_rcu_bh); 2346 } 2347 static void call_rcu_sched_remote(struct rcu_head *rhp, 2348 void (*func)(struct rcu_head *rhp)) 2349 { 2350 invoke_crf_remote(rhp, func, call_rcu_sched); 2351 } 2352 2353 /* 2354 * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes 2355 * callbacks queued by the corresponding no-CBs CPU. 2356 */ 2357 static int rcu_nocb_kthread(void *arg) 2358 { 2359 int c, cl; 2360 struct rcu_head *list; 2361 struct rcu_head *next; 2362 struct rcu_head **tail; 2363 struct rcu_data *rdp = arg; 2364 2365 /* Each pass through this loop invokes one batch of callbacks */ 2366 for (;;) { 2367 /* If not polling, wait for next batch of callbacks. */ 2368 if (!rcu_nocb_poll) 2369 wait_event(rdp->nocb_wq, rdp->nocb_head); 2370 list = ACCESS_ONCE(rdp->nocb_head); 2371 if (!list) { 2372 schedule_timeout_interruptible(1); 2373 continue; 2374 } 2375 2376 /* 2377 * Extract queued callbacks, update counts, and wait 2378 * for a grace period to elapse. 2379 */ 2380 ACCESS_ONCE(rdp->nocb_head) = NULL; 2381 tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); 2382 c = atomic_long_xchg(&rdp->nocb_q_count, 0); 2383 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); 2384 ACCESS_ONCE(rdp->nocb_p_count) += c; 2385 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; 2386 wait_rcu_gp(rdp->rsp->call_remote); 2387 2388 /* Each pass through the following loop invokes a callback. */ 2389 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2390 c = cl = 0; 2391 while (list) { 2392 next = list->next; 2393 /* Wait for enqueuing to complete, if needed. */ 2394 while (next == NULL && &list->next != tail) { 2395 schedule_timeout_interruptible(1); 2396 next = list->next; 2397 } 2398 debug_rcu_head_unqueue(list); 2399 local_bh_disable(); 2400 if (__rcu_reclaim(rdp->rsp->name, list)) 2401 cl++; 2402 c++; 2403 local_bh_enable(); 2404 list = next; 2405 } 2406 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2407 ACCESS_ONCE(rdp->nocb_p_count) -= c; 2408 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; 2409 rdp->n_nocbs_invoked += c; 2410 } 2411 return 0; 2412 } 2413 2414 /* Initialize per-rcu_data variables for no-CBs CPUs. */ 2415 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2416 { 2417 rdp->nocb_tail = &rdp->nocb_head; 2418 init_waitqueue_head(&rdp->nocb_wq); 2419 } 2420 2421 /* Create a kthread for each RCU flavor for each no-CBs CPU. */ 2422 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2423 { 2424 int cpu; 2425 struct rcu_data *rdp; 2426 struct task_struct *t; 2427 2428 if (rcu_nocb_mask == NULL) 2429 return; 2430 for_each_cpu(cpu, rcu_nocb_mask) { 2431 rdp = per_cpu_ptr(rsp->rda, cpu); 2432 t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); 2433 BUG_ON(IS_ERR(t)); 2434 ACCESS_ONCE(rdp->nocb_kthread) = t; 2435 } 2436 } 2437 2438 /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2439 static void init_nocb_callback_list(struct rcu_data *rdp) 2440 { 2441 if (rcu_nocb_mask == NULL || 2442 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) 2443 return; 2444 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2445 } 2446 2447 /* Initialize the ->call_remote fields in the rcu_state structures. */ 2448 static void __init rcu_init_nocb(void) 2449 { 2450 #ifdef CONFIG_PREEMPT_RCU 2451 rcu_preempt_state.call_remote = call_rcu_preempt_remote; 2452 #endif /* #ifdef CONFIG_PREEMPT_RCU */ 2453 rcu_bh_state.call_remote = call_rcu_bh_remote; 2454 rcu_sched_state.call_remote = call_rcu_sched_remote; 2455 } 2456 2457 #else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2458 2459 static bool is_nocb_cpu(int cpu) 2460 { 2461 return false; 2462 } 2463 2464 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2465 bool lazy) 2466 { 2467 return 0; 2468 } 2469 2470 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2471 struct rcu_data *rdp) 2472 { 2473 return 0; 2474 } 2475 2476 static bool nocb_cpu_expendable(int cpu) 2477 { 2478 return 1; 2479 } 2480 2481 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2482 { 2483 } 2484 2485 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2486 { 2487 } 2488 2489 static void init_nocb_callback_list(struct rcu_data *rdp) 2490 { 2491 } 2492 2493 static void __init rcu_init_nocb(void) 2494 { 2495 } 2496 2497 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

Cache object: 31021ef5d2cee5d5b52c0fdc9db2dd98

FreeBSD/Linux Kernel Cross Reference sys/kernel/rcutree_plugin.h

FreeBSD/Linux Kernel Cross Reference
sys/kernel/rcutree_plugin.h