The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/osfmk/i386/locks_i386.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /*
   29  * @OSF_COPYRIGHT@
   30  */
   31 /* 
   32  * Mach Operating System
   33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
   34  * All Rights Reserved.
   35  * 
   36  * Permission to use, copy, modify and distribute this software and its
   37  * documentation is hereby granted, provided that both the copyright
   38  * notice and this permission notice appear in all copies of the
   39  * software, derivative works or modified versions, and any portions
   40  * thereof, and that both notices appear in supporting documentation.
   41  * 
   42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   45  * 
   46  * Carnegie Mellon requests users of this software to return to
   47  * 
   48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   49  *  School of Computer Science
   50  *  Carnegie Mellon University
   51  *  Pittsburgh PA 15213-3890
   52  * 
   53  * any improvements or extensions that they make and grant Carnegie Mellon
   54  * the rights to redistribute these changes.
   55  */
   56 /*
   57  *      File:   kern/lock.c
   58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
   59  *      Date:   1985
   60  *
   61  *      Locking primitives implementation
   62  */
   63 
   64 #include <mach_ldebug.h>
   65 
   66 #include <kern/lock.h>
   67 #include <kern/locks.h>
   68 #include <kern/kalloc.h>
   69 #include <kern/misc_protos.h>
   70 #include <kern/thread.h>
   71 #include <kern/processor.h>
   72 #include <kern/cpu_data.h>
   73 #include <kern/cpu_number.h>
   74 #include <kern/sched_prim.h>
   75 #include <kern/xpr.h>
   76 #include <kern/debug.h>
   77 #include <string.h>
   78 
   79 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
   80 #include <machine/machine_cpu.h>
   81 #include <i386/mp.h>
   82 
   83 #include <sys/kdebug.h>
   84 #include <mach/branch_predicates.h>
   85 
   86 /*
   87  * We need only enough declarations from the BSD-side to be able to
   88  * test if our probe is active, and to call __dtrace_probe().  Setting
   89  * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
   90  */
   91 #if     CONFIG_DTRACE
   92 #define NEED_DTRACE_DEFS
   93 #include <../bsd/sys/lockstat.h>
   94 #endif
   95 
   96 #define LCK_RW_LCK_EXCLUSIVE_CODE       0x100
   97 #define LCK_RW_LCK_EXCLUSIVE1_CODE      0x101
   98 #define LCK_RW_LCK_SHARED_CODE          0x102
   99 #define LCK_RW_LCK_SH_TO_EX_CODE        0x103
  100 #define LCK_RW_LCK_SH_TO_EX1_CODE       0x104
  101 #define LCK_RW_LCK_EX_TO_SH_CODE        0x105
  102 
  103 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE  0x106
  104 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE  0x107
  105 #define LCK_RW_LCK_EX_READER_SPIN_CODE  0x108
  106 #define LCK_RW_LCK_EX_READER_WAIT_CODE  0x109
  107 #define LCK_RW_LCK_SHARED_SPIN_CODE     0x110
  108 #define LCK_RW_LCK_SHARED_WAIT_CODE     0x111
  109 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE   0x112
  110 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE   0x113
  111 
  112 
  113 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
  114 
  115 unsigned int LcksOpts=0;
  116 
  117 /* Forwards */
  118 
  119 #if     USLOCK_DEBUG
  120 /*
  121  *      Perform simple lock checks.
  122  */
  123 int     uslock_check = 1;
  124 int     max_lock_loops  = 100000000;
  125 decl_simple_lock_data(extern , printf_lock)
  126 decl_simple_lock_data(extern , panic_lock)
  127 #endif  /* USLOCK_DEBUG */
  128 
  129 
  130 /*
  131  *      We often want to know the addresses of the callers
  132  *      of the various lock routines.  However, this information
  133  *      is only used for debugging and statistics.
  134  */
  135 typedef void    *pc_t;
  136 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
  137 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
  138 #if     ANY_LOCK_DEBUG
  139 #define OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
  140 #define DECL_PC(pc)     pc_t pc;
  141 #else   /* ANY_LOCK_DEBUG */
  142 #define DECL_PC(pc)
  143 #ifdef  lint
  144 /*
  145  *      Eliminate lint complaints about unused local pc variables.
  146  */
  147 #define OBTAIN_PC(pc)   ++pc
  148 #else   /* lint */
  149 #define OBTAIN_PC(pc)
  150 #endif  /* lint */
  151 #endif  /* USLOCK_DEBUG */
  152 
  153 
  154 /*
  155  *      Portable lock package implementation of usimple_locks.
  156  */
  157 
  158 #if     USLOCK_DEBUG
  159 #define USLDBG(stmt)    stmt
  160 void            usld_lock_init(usimple_lock_t, unsigned short);
  161 void            usld_lock_pre(usimple_lock_t, pc_t);
  162 void            usld_lock_post(usimple_lock_t, pc_t);
  163 void            usld_unlock(usimple_lock_t, pc_t);
  164 void            usld_lock_try_pre(usimple_lock_t, pc_t);
  165 void            usld_lock_try_post(usimple_lock_t, pc_t);
  166 int             usld_lock_common_checks(usimple_lock_t, char *);
  167 #else   /* USLOCK_DEBUG */
  168 #define USLDBG(stmt)
  169 #endif  /* USLOCK_DEBUG */
  170 
  171 
  172 extern int lck_rw_grab_want(lck_rw_t *lck);
  173 extern int lck_rw_grab_shared(lck_rw_t *lck);
  174 extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
  175 
  176 
  177 /*
  178  * Forward definitions
  179  */
  180 
  181 void lck_rw_lock_shared_gen(
  182         lck_rw_t        *lck);
  183 
  184 void lck_rw_lock_exclusive_gen(
  185         lck_rw_t        *lck);
  186 
  187 boolean_t lck_rw_lock_shared_to_exclusive_success(
  188         lck_rw_t        *lck);
  189 
  190 boolean_t lck_rw_lock_shared_to_exclusive_failure(
  191         lck_rw_t        *lck,
  192         int             prior_lock_state);
  193 
  194 void lck_rw_lock_exclusive_to_shared_gen(
  195         lck_rw_t        *lck,
  196         int             prior_lock_state);
  197 
  198 lck_rw_type_t lck_rw_done_gen(
  199         lck_rw_t        *lck,
  200         int             prior_lock_state);
  201 
  202 /*
  203  *      Routine:        lck_spin_alloc_init
  204  */
  205 lck_spin_t *
  206 lck_spin_alloc_init(
  207         lck_grp_t       *grp,
  208         lck_attr_t      *attr)
  209 {
  210         lck_spin_t      *lck;
  211 
  212         if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
  213                 lck_spin_init(lck, grp, attr);
  214 
  215         return(lck);
  216 }
  217 
  218 /*
  219  *      Routine:        lck_spin_free
  220  */
  221 void
  222 lck_spin_free(
  223         lck_spin_t      *lck,
  224         lck_grp_t       *grp)
  225 {
  226         lck_spin_destroy(lck, grp);
  227         kfree(lck, sizeof(lck_spin_t));
  228 }
  229 
  230 /*
  231  *      Routine:        lck_spin_init
  232  */
  233 void
  234 lck_spin_init(
  235         lck_spin_t      *lck,
  236         lck_grp_t       *grp,
  237         __unused lck_attr_t     *attr)
  238 {
  239         usimple_lock_init((usimple_lock_t) lck, 0);
  240         lck_grp_reference(grp);
  241         lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
  242 }
  243 
  244 /*
  245  *      Routine:        lck_spin_destroy
  246  */
  247 void
  248 lck_spin_destroy(
  249         lck_spin_t      *lck,
  250         lck_grp_t       *grp)
  251 {
  252         if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
  253                 return;
  254         lck->interlock = LCK_SPIN_TAG_DESTROYED;
  255         lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
  256         lck_grp_deallocate(grp);
  257         return;
  258 }
  259 
  260 /*
  261  *      Routine:        lck_spin_lock
  262  */
  263 void
  264 lck_spin_lock(
  265         lck_spin_t      *lck)
  266 {
  267         usimple_lock((usimple_lock_t) lck);
  268 }
  269 
  270 /*
  271  *      Routine:        lck_spin_unlock
  272  */
  273 void
  274 lck_spin_unlock(
  275         lck_spin_t      *lck)
  276 {
  277         usimple_unlock((usimple_lock_t) lck);
  278 }
  279 
  280 
  281 /*
  282  *      Routine:        lck_spin_try_lock
  283  */
  284 boolean_t
  285 lck_spin_try_lock(
  286         lck_spin_t      *lck)
  287 {
  288         return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
  289 }
  290 
  291 /*
  292  *      Initialize a usimple_lock.
  293  *
  294  *      No change in preemption state.
  295  */
  296 void
  297 usimple_lock_init(
  298         usimple_lock_t  l,
  299         __unused unsigned short tag)
  300 {
  301 #ifndef MACHINE_SIMPLE_LOCK
  302         USLDBG(usld_lock_init(l, tag));
  303         hw_lock_init(&l->interlock);
  304 #else
  305         simple_lock_init((simple_lock_t)l,tag);
  306 #endif
  307 }
  308 
  309 volatile uint32_t spinlock_owner_cpu = ~0;
  310 volatile usimple_lock_t spinlock_timed_out;
  311 
  312 static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
  313         uint64_t deadline;
  314         uint32_t i;
  315 
  316         for (i = 0; i < real_ncpus; i++) {
  317                 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
  318                         spinlock_owner_cpu = i;
  319                         if ((uint32_t) cpu_number() == i)
  320                                 break;
  321                         cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
  322                         cpu_NMI_interrupt(i);
  323                         deadline = mach_absolute_time() + (LockTimeOut * 2);
  324                         while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
  325                                 cpu_pause();
  326                         break;
  327                 }
  328         }
  329 
  330         return spinlock_owner_cpu;
  331 }
  332 
  333 /*
  334  *      Acquire a usimple_lock.
  335  *
  336  *      Returns with preemption disabled.  Note
  337  *      that the hw_lock routines are responsible for
  338  *      maintaining preemption state.
  339  */
  340 void
  341 usimple_lock(
  342         usimple_lock_t  l)
  343 {
  344 #ifndef MACHINE_SIMPLE_LOCK
  345         DECL_PC(pc);
  346 
  347         OBTAIN_PC(pc);
  348         USLDBG(usld_lock_pre(l, pc));
  349 
  350         if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0))        {
  351                 boolean_t uslock_acquired = FALSE;
  352                 while (machine_timeout_suspended()) {
  353                         enable_preemption();
  354                         if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
  355                                 break;
  356                 }
  357 
  358                 if (uslock_acquired == FALSE) {
  359                         uint32_t lock_cpu;
  360                         uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
  361                         spinlock_timed_out = l;
  362                         lock_cpu = spinlock_timeout_NMI(lowner);
  363                         panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner,  current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
  364                 }
  365         }
  366         USLDBG(usld_lock_post(l, pc));
  367 #else
  368         simple_lock((simple_lock_t)l);
  369 #endif
  370 }
  371 
  372 
  373 /*
  374  *      Release a usimple_lock.
  375  *
  376  *      Returns with preemption enabled.  Note
  377  *      that the hw_lock routines are responsible for
  378  *      maintaining preemption state.
  379  */
  380 void
  381 usimple_unlock(
  382         usimple_lock_t  l)
  383 {
  384 #ifndef MACHINE_SIMPLE_LOCK
  385         DECL_PC(pc);
  386 
  387         OBTAIN_PC(pc);
  388         USLDBG(usld_unlock(l, pc));
  389         hw_lock_unlock(&l->interlock);
  390 #else
  391         simple_unlock_rwmb((simple_lock_t)l);
  392 #endif
  393 }
  394 
  395 
  396 /*
  397  *      Conditionally acquire a usimple_lock.
  398  *
  399  *      On success, returns with preemption disabled.
  400  *      On failure, returns with preemption in the same state
  401  *      as when first invoked.  Note that the hw_lock routines
  402  *      are responsible for maintaining preemption state.
  403  *
  404  *      XXX No stats are gathered on a miss; I preserved this
  405  *      behavior from the original assembly-language code, but
  406  *      doesn't it make sense to log misses?  XXX
  407  */
  408 unsigned int
  409 usimple_lock_try(
  410         usimple_lock_t  l)
  411 {
  412 #ifndef MACHINE_SIMPLE_LOCK
  413         unsigned int    success;
  414         DECL_PC(pc);
  415 
  416         OBTAIN_PC(pc);
  417         USLDBG(usld_lock_try_pre(l, pc));
  418         if ((success = hw_lock_try(&l->interlock))) {
  419                 USLDBG(usld_lock_try_post(l, pc));
  420         }
  421         return success;
  422 #else
  423         return(simple_lock_try((simple_lock_t)l));
  424 #endif
  425 }
  426 
  427 #if     USLOCK_DEBUG
  428 /*
  429  *      States of a usimple_lock.  The default when initializing
  430  *      a usimple_lock is setting it up for debug checking.
  431  */
  432 #define USLOCK_CHECKED          0x0001          /* lock is being checked */
  433 #define USLOCK_TAKEN            0x0002          /* lock has been taken */
  434 #define USLOCK_INIT             0xBAA0          /* lock has been initialized */
  435 #define USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
  436 #define USLOCK_CHECKING(l)      (uslock_check &&                        \
  437                                  ((l)->debug.state & USLOCK_CHECKED))
  438 
  439 /*
  440  *      Trace activities of a particularly interesting lock.
  441  */
  442 void    usl_trace(usimple_lock_t, int, pc_t, const char *);
  443 
  444 
  445 /*
  446  *      Initialize the debugging information contained
  447  *      in a usimple_lock.
  448  */
  449 void
  450 usld_lock_init(
  451         usimple_lock_t  l,
  452         __unused unsigned short tag)
  453 {
  454         if (l == USIMPLE_LOCK_NULL)
  455                 panic("lock initialization:  null lock pointer");
  456         l->lock_type = USLOCK_TAG;
  457         l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
  458         l->debug.lock_cpu = l->debug.unlock_cpu = 0;
  459         l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
  460         l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
  461         l->debug.duration[0] = l->debug.duration[1] = 0;
  462         l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
  463         l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
  464         l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
  465 }
  466 
  467 
  468 /*
  469  *      These checks apply to all usimple_locks, not just
  470  *      those with USLOCK_CHECKED turned on.
  471  */
  472 int
  473 usld_lock_common_checks(
  474         usimple_lock_t  l,
  475         char            *caller)
  476 {
  477         if (l == USIMPLE_LOCK_NULL)
  478                 panic("%s:  null lock pointer", caller);
  479         if (l->lock_type != USLOCK_TAG)
  480                 panic("%s:  %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
  481         if (!(l->debug.state & USLOCK_INIT))
  482                 panic("%s:  %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
  483         return USLOCK_CHECKING(l);
  484 }
  485 
  486 
  487 /*
  488  *      Debug checks on a usimple_lock just before attempting
  489  *      to acquire it.
  490  */
  491 /* ARGSUSED */
  492 void
  493 usld_lock_pre(
  494         usimple_lock_t  l,
  495         pc_t            pc)
  496 {
  497         char    caller[] = "usimple_lock";
  498 
  499 
  500         if (!usld_lock_common_checks(l, caller))
  501                 return;
  502 
  503 /*
  504  *      Note that we have a weird case where we are getting a lock when we are]
  505  *      in the process of putting the system to sleep. We are running with no
  506  *      current threads, therefore we can't tell if we are trying to retake a lock
  507  *      we have or someone on the other processor has it.  Therefore we just
  508  *      ignore this test if the locking thread is 0.
  509  */
  510 
  511         if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
  512             l->debug.lock_thread == (void *) current_thread()) {
  513                 printf("%s:  lock %p already locked (at %p) by",
  514                       caller, l, l->debug.lock_pc);
  515                 printf(" current thread %p (new attempt at pc %p)\n",
  516                        l->debug.lock_thread, pc);
  517                 panic("%s", caller);
  518         }
  519         mp_disable_preemption();
  520         usl_trace(l, cpu_number(), pc, caller);
  521         mp_enable_preemption();
  522 }
  523 
  524 
  525 /*
  526  *      Debug checks on a usimple_lock just after acquiring it.
  527  *
  528  *      Pre-emption has been disabled at this point,
  529  *      so we are safe in using cpu_number.
  530  */
  531 void
  532 usld_lock_post(
  533         usimple_lock_t  l,
  534         pc_t            pc)
  535 {
  536         register int    mycpu;
  537         char    caller[] = "successful usimple_lock";
  538 
  539 
  540         if (!usld_lock_common_checks(l, caller))
  541                 return;
  542 
  543         if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
  544                 panic("%s:  lock %p became uninitialized",
  545                       caller, l);
  546         if ((l->debug.state & USLOCK_TAKEN))
  547                 panic("%s:  lock 0x%p became TAKEN by someone else",
  548                       caller, l);
  549 
  550         mycpu = cpu_number();
  551         l->debug.lock_thread = (void *)current_thread();
  552         l->debug.state |= USLOCK_TAKEN;
  553         l->debug.lock_pc = pc;
  554         l->debug.lock_cpu = mycpu;
  555 
  556         usl_trace(l, mycpu, pc, caller);
  557 }
  558 
  559 
  560 /*
  561  *      Debug checks on a usimple_lock just before
  562  *      releasing it.  Note that the caller has not
  563  *      yet released the hardware lock.
  564  *
  565  *      Preemption is still disabled, so there's
  566  *      no problem using cpu_number.
  567  */
  568 void
  569 usld_unlock(
  570         usimple_lock_t  l,
  571         pc_t            pc)
  572 {
  573         register int    mycpu;
  574         char    caller[] = "usimple_unlock";
  575 
  576 
  577         if (!usld_lock_common_checks(l, caller))
  578                 return;
  579 
  580         mycpu = cpu_number();
  581 
  582         if (!(l->debug.state & USLOCK_TAKEN))
  583                 panic("%s:  lock 0x%p hasn't been taken",
  584                       caller, l);
  585         if (l->debug.lock_thread != (void *) current_thread())
  586                 panic("%s:  unlocking lock 0x%p, owned by thread %p",
  587                       caller, l, l->debug.lock_thread);
  588         if (l->debug.lock_cpu != mycpu) {
  589                 printf("%s:  unlocking lock 0x%p on cpu 0x%x",
  590                        caller, l, mycpu);
  591                 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
  592                 panic("%s", caller);
  593         }
  594         usl_trace(l, mycpu, pc, caller);
  595 
  596         l->debug.unlock_thread = l->debug.lock_thread;
  597         l->debug.lock_thread = INVALID_PC;
  598         l->debug.state &= ~USLOCK_TAKEN;
  599         l->debug.unlock_pc = pc;
  600         l->debug.unlock_cpu = mycpu;
  601 }
  602 
  603 
  604 /*
  605  *      Debug checks on a usimple_lock just before
  606  *      attempting to acquire it.
  607  *
  608  *      Preemption isn't guaranteed to be disabled.
  609  */
  610 void
  611 usld_lock_try_pre(
  612         usimple_lock_t  l,
  613         pc_t            pc)
  614 {
  615         char    caller[] = "usimple_lock_try";
  616 
  617         if (!usld_lock_common_checks(l, caller))
  618                 return;
  619         mp_disable_preemption();
  620         usl_trace(l, cpu_number(), pc, caller);
  621         mp_enable_preemption();
  622 }
  623 
  624 
  625 /*
  626  *      Debug checks on a usimple_lock just after
  627  *      successfully attempting to acquire it.
  628  *
  629  *      Preemption has been disabled by the
  630  *      lock acquisition attempt, so it's safe
  631  *      to use cpu_number.
  632  */
  633 void
  634 usld_lock_try_post(
  635         usimple_lock_t  l,
  636         pc_t            pc)
  637 {
  638         register int    mycpu;
  639         char    caller[] = "successful usimple_lock_try";
  640 
  641         if (!usld_lock_common_checks(l, caller))
  642                 return;
  643 
  644         if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
  645                 panic("%s:  lock 0x%p became uninitialized",
  646                       caller, l);
  647         if ((l->debug.state & USLOCK_TAKEN))
  648                 panic("%s:  lock 0x%p became TAKEN by someone else",
  649                       caller, l);
  650 
  651         mycpu = cpu_number();
  652         l->debug.lock_thread = (void *) current_thread();
  653         l->debug.state |= USLOCK_TAKEN;
  654         l->debug.lock_pc = pc;
  655         l->debug.lock_cpu = mycpu;
  656 
  657         usl_trace(l, mycpu, pc, caller);
  658 }
  659 
  660 
  661 /*
  662  *      For very special cases, set traced_lock to point to a
  663  *      specific lock of interest.  The result is a series of
  664  *      XPRs showing lock operations on that lock.  The lock_seq
  665  *      value is used to show the order of those operations.
  666  */
  667 usimple_lock_t          traced_lock;
  668 unsigned int            lock_seq;
  669 
  670 void
  671 usl_trace(
  672         usimple_lock_t  l,
  673         int             mycpu,
  674         pc_t            pc,
  675         const char *    op_name)
  676 {
  677         if (traced_lock == l) {
  678                 XPR(XPR_SLOCK,
  679                     "seq %d, cpu %d, %s @ %x\n",
  680                     (uintptr_t) lock_seq, (uintptr_t) mycpu,
  681                     (uintptr_t) op_name, (uintptr_t) pc, 0);
  682                 lock_seq++;
  683         }
  684 }
  685 
  686 
  687 #endif  /* USLOCK_DEBUG */
  688 
  689 /*
  690  *      Routine:        lock_alloc
  691  *      Function:
  692  *              Allocate a lock for external users who cannot
  693  *              hard-code the structure definition into their
  694  *              objects.
  695  *              For now just use kalloc, but a zone is probably
  696  *              warranted.
  697  */
  698 lock_t *
  699 lock_alloc(
  700         boolean_t       can_sleep,
  701         unsigned short  tag,
  702         unsigned short  tag1)
  703 {
  704         lock_t          *l;
  705 
  706         if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
  707           lock_init(l, can_sleep, tag, tag1);
  708         return(l);
  709 }
  710 
  711 /*
  712  *      Routine:        lock_free
  713  *      Function:
  714  *              Free a lock allocated for external users.
  715  *              For now just use kfree, but a zone is probably
  716  *              warranted.
  717  */
  718 void
  719 lock_free(
  720         lock_t          *l)
  721 {
  722         kfree(l, sizeof(lock_t));
  723 }
  724 
  725           
  726 /*
  727  *      Routine:        lock_init
  728  *      Function:
  729  *              Initialize a lock; required before use.
  730  *              Note that clients declare the "struct lock"
  731  *              variables and then initialize them, rather
  732  *              than getting a new one from this module.
  733  */
  734 void
  735 lock_init(
  736         lock_t          *l,
  737         boolean_t       can_sleep,
  738         __unused unsigned short tag,
  739         __unused unsigned short tag1)
  740 {
  741         hw_lock_byte_init(&l->lck_rw_interlock);
  742         l->lck_rw_want_write = FALSE;
  743         l->lck_rw_want_upgrade = FALSE;
  744         l->lck_rw_shared_count = 0;
  745         l->lck_rw_can_sleep = can_sleep;
  746         l->lck_rw_tag = tag;
  747         l->lck_rw_priv_excl = 1;
  748         l->lck_r_waiting = l->lck_w_waiting = 0;
  749 }
  750 
  751 
  752 /*
  753  *      Sleep locks.  These use the same data structure and algorithm
  754  *      as the spin locks, but the process sleeps while it is waiting
  755  *      for the lock.  These work on uniprocessor systems.
  756  */
  757 
  758 #define DECREMENTER_TIMEOUT 1000000
  759 
  760 void
  761 lock_write(
  762         register lock_t * l)
  763 {
  764         lck_rw_lock_exclusive(l);
  765 }
  766 
  767 void
  768 lock_done(
  769         register lock_t * l)
  770 {
  771         (void) lck_rw_done(l);
  772 }
  773 
  774 void
  775 lock_read(
  776         register lock_t * l)
  777 {
  778         lck_rw_lock_shared(l);
  779 }
  780 
  781 
  782 /*
  783  *      Routine:        lock_read_to_write
  784  *      Function:
  785  *              Improves a read-only lock to one with
  786  *              write permission.  If another reader has
  787  *              already requested an upgrade to a write lock,
  788  *              no lock is held upon return.
  789  *
  790  *              Returns FALSE if the upgrade *failed*.
  791  */
  792 
  793 boolean_t
  794 lock_read_to_write(
  795         register lock_t * l)
  796 {
  797         return lck_rw_lock_shared_to_exclusive(l);
  798 }
  799 
  800 void
  801 lock_write_to_read(
  802         register lock_t * l)
  803 {
  804         lck_rw_lock_exclusive_to_shared(l);
  805 }
  806 
  807 
  808 
  809 /*
  810  *      Routine:        lck_rw_alloc_init
  811  */
  812 lck_rw_t *
  813 lck_rw_alloc_init(
  814         lck_grp_t       *grp,
  815         lck_attr_t      *attr) {
  816         lck_rw_t        *lck;
  817 
  818         if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
  819                 bzero(lck, sizeof(lck_rw_t));
  820                 lck_rw_init(lck, grp, attr);
  821         }
  822 
  823         return(lck);
  824 }
  825 
  826 /*
  827  *      Routine:        lck_rw_free
  828  */
  829 void
  830 lck_rw_free(
  831         lck_rw_t        *lck,
  832         lck_grp_t       *grp) {
  833         lck_rw_destroy(lck, grp);
  834         kfree(lck, sizeof(lck_rw_t));
  835 }
  836 
  837 /*
  838  *      Routine:        lck_rw_init
  839  */
  840 void
  841 lck_rw_init(
  842         lck_rw_t        *lck,
  843         lck_grp_t       *grp,
  844         lck_attr_t      *attr)
  845 {
  846         lck_attr_t      *lck_attr = (attr != LCK_ATTR_NULL) ?
  847                                         attr : &LockDefaultLckAttr;
  848 
  849         hw_lock_byte_init(&lck->lck_rw_interlock);
  850         lck->lck_rw_want_write = FALSE;
  851         lck->lck_rw_want_upgrade = FALSE;
  852         lck->lck_rw_shared_count = 0;
  853         lck->lck_rw_can_sleep = TRUE;
  854         lck->lck_r_waiting = lck->lck_w_waiting = 0;
  855         lck->lck_rw_tag = 0;
  856         lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
  857                                 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
  858 
  859         lck_grp_reference(grp);
  860         lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
  861 }
  862 
  863 /*
  864  *      Routine:        lck_rw_destroy
  865  */
  866 void
  867 lck_rw_destroy(
  868         lck_rw_t        *lck,
  869         lck_grp_t       *grp)
  870 {
  871         if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
  872                 return;
  873         lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
  874         lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
  875         lck_grp_deallocate(grp);
  876         return;
  877 }
  878 
  879 /*
  880  *      Sleep locks.  These use the same data structure and algorithm
  881  *      as the spin locks, but the process sleeps while it is waiting
  882  *      for the lock.  These work on uniprocessor systems.
  883  */
  884 
  885 #define DECREMENTER_TIMEOUT 1000000
  886 
  887 #define RW_LOCK_READER_EVENT(x)         \
  888                 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
  889 
  890 #define RW_LOCK_WRITER_EVENT(x)         \
  891                 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
  892 
  893 /*
  894  * We disable interrupts while holding the RW interlock to prevent an
  895  * interrupt from exacerbating hold time.
  896  * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
  897  */
  898 static boolean_t
  899 lck_interlock_lock(lck_rw_t *lck)
  900 {
  901         boolean_t       istate;
  902 
  903         istate = ml_set_interrupts_enabled(FALSE);      
  904         hw_lock_byte_lock(&lck->lck_rw_interlock);
  905 
  906         return istate;
  907 }
  908 
  909 static void
  910 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
  911 {               
  912         hw_lock_byte_unlock(&lck->lck_rw_interlock);
  913         ml_set_interrupts_enabled(istate);
  914 }
  915 
  916 /*
  917  * This inline is used when busy-waiting for an rw lock.
  918  * If interrupts were disabled when the lock primitive was called,
  919  * we poll the IPI handler for pending tlb flushes.
  920  * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
  921  */
  922 static inline void
  923 lck_rw_lock_pause(boolean_t interrupts_enabled)
  924 {
  925         if (!interrupts_enabled)
  926                 handle_pending_TLB_flushes();
  927         cpu_pause();
  928 }
  929 
  930 
  931 /*
  932  * compute the deadline to spin against when
  933  * waiting for a change of state on a lck_rw_t
  934  */
  935 static inline uint64_t
  936 lck_rw_deadline_for_spin(lck_rw_t *lck)
  937 {
  938         if (lck->lck_rw_can_sleep) {
  939                 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
  940                         /*
  941                          * there are already threads waiting on this lock... this
  942                          * implies that they have spun beyond their deadlines waiting for 
  943                          * the desired state to show up so we will not bother spinning at this time...
  944                          *   or
  945                          * the current number of threads sharing this lock exceeds our capacity to run them
  946                          * concurrently and since all states we're going to spin for require the rw_shared_count
  947                          * to be at 0, we'll not bother spinning since the latency for this to happen is
  948                          * unpredictable...
  949                          */
  950                         return (mach_absolute_time());
  951                 }
  952                 return (mach_absolute_time() + MutexSpin);
  953         } else
  954                 return (mach_absolute_time() + (100000LL * 1000000000LL));
  955 }
  956 
  957 
  958 /*
  959  *      Routine:        lck_rw_lock_exclusive
  960  */
  961 void
  962 lck_rw_lock_exclusive_gen(
  963         lck_rw_t        *lck)
  964 {
  965         uint64_t        deadline = 0;
  966         int             slept = 0;
  967         int             gotlock = 0;
  968         int             lockheld = 0;
  969         wait_result_t   res = 0;
  970         boolean_t       istate = -1;
  971 
  972 #if     CONFIG_DTRACE
  973         boolean_t dtrace_ls_initialized = FALSE;
  974         boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
  975         uint64_t wait_interval = 0;
  976         int readers_at_sleep = 0;
  977 #endif
  978 
  979         /*
  980          *      Try to acquire the lck_rw_want_write bit.
  981          */
  982         while ( !lck_rw_grab_want(lck)) {
  983 
  984 #if     CONFIG_DTRACE
  985                 if (dtrace_ls_initialized == FALSE) {
  986                         dtrace_ls_initialized = TRUE;
  987                         dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
  988                         dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
  989                         dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
  990                         if (dtrace_ls_enabled) {
  991                                 /*
  992                                  * Either sleeping or spinning is happening,
  993                                  *  start a timing of our delay interval now.
  994                                  */
  995                                 readers_at_sleep = lck->lck_rw_shared_count;
  996                                 wait_interval = mach_absolute_time();
  997                         }
  998                 }
  999 #endif
 1000                 if (istate == -1)
 1001                         istate = ml_get_interrupts_enabled();
 1002 
 1003                 deadline = lck_rw_deadline_for_spin(lck);
 1004 
 1005                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
 1006                 
 1007                 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
 1008                         lck_rw_lock_pause(istate);
 1009 
 1010                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
 1011 
 1012                 if (gotlock)
 1013                         break;
 1014                 /*
 1015                  * if we get here, the deadline has expired w/o us
 1016                  * being able to grab the lock exclusively
 1017                  * check to see if we're allowed to do a thread_block
 1018                  */
 1019                 if (lck->lck_rw_can_sleep) {
 1020 
 1021                         istate = lck_interlock_lock(lck);
 1022 
 1023                         if (lck->lck_rw_want_write) {
 1024 
 1025                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
 1026 
 1027                                 lck->lck_w_waiting = TRUE;
 1028 
 1029                                 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
 1030                                 lck_interlock_unlock(lck, istate);
 1031 
 1032                                 if (res == THREAD_WAITING) {
 1033                                         res = thread_block(THREAD_CONTINUE_NULL);
 1034                                         slept++;
 1035                                 }
 1036                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
 1037                         } else {
 1038                                 lck->lck_rw_want_write = TRUE;
 1039                                 lck_interlock_unlock(lck, istate);
 1040                                 break;
 1041                         }
 1042                 }
 1043         }
 1044         /*
 1045          * Wait for readers (and upgrades) to finish...
 1046          * the test for these conditions must be done simultaneously with
 1047          * a check of the interlock not being held since
 1048          * the rw_shared_count will drop to 0 first and then want_upgrade
 1049          * will be set to 1 in the shared_to_exclusive scenario... those
 1050          * adjustments are done behind the interlock and represent an
 1051          * atomic change in state and must be considered as such
 1052          * however, once we see the read count at 0, the want_upgrade not set
 1053          * and the interlock not held, we are safe to proceed
 1054          */
 1055         while (lck_rw_held_read_or_upgrade(lck)) {
 1056 
 1057 #if     CONFIG_DTRACE
 1058                 /*
 1059                  * Either sleeping or spinning is happening, start
 1060                  * a timing of our delay interval now.  If we set it
 1061                  * to -1 we don't have accurate data so we cannot later
 1062                  * decide to record a dtrace spin or sleep event.
 1063                  */
 1064                 if (dtrace_ls_initialized == FALSE) {
 1065                         dtrace_ls_initialized = TRUE;
 1066                         dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
 1067                         dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
 1068                         dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
 1069                         if (dtrace_ls_enabled) {
 1070                                 /*
 1071                                  * Either sleeping or spinning is happening,
 1072                                  *  start a timing of our delay interval now.
 1073                                  */
 1074                                 readers_at_sleep = lck->lck_rw_shared_count;
 1075                                 wait_interval = mach_absolute_time();
 1076                         }
 1077                 }
 1078 #endif
 1079                 if (istate == -1)
 1080                         istate = ml_get_interrupts_enabled();
 1081 
 1082                 deadline = lck_rw_deadline_for_spin(lck);
 1083 
 1084                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
 1085 
 1086                 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
 1087                         lck_rw_lock_pause(istate);
 1088 
 1089                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
 1090 
 1091                 if ( !lockheld)
 1092                         break;
 1093                 /*
 1094                  * if we get here, the deadline has expired w/o us
 1095                  * being able to grab the lock exclusively
 1096                  * check to see if we're allowed to do a thread_block
 1097                  */
 1098                 if (lck->lck_rw_can_sleep) {
 1099 
 1100                         istate = lck_interlock_lock(lck);
 1101 
 1102                         if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
 1103                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
 1104 
 1105                                 lck->lck_w_waiting = TRUE;
 1106 
 1107                                 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
 1108                                 lck_interlock_unlock(lck, istate);
 1109 
 1110                                 if (res == THREAD_WAITING) {
 1111                                         res = thread_block(THREAD_CONTINUE_NULL);
 1112                                         slept++;
 1113                                 }
 1114                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
 1115                         } else {
 1116                                 lck_interlock_unlock(lck, istate);
 1117                                 /*
 1118                                  * must own the lock now, since we checked for
 1119                                  * readers or upgrade owner behind the interlock
 1120                                  * no need for a call to 'lck_rw_held_read_or_upgrade'
 1121                                  */
 1122                                 break;
 1123                         }
 1124                 }
 1125         }
 1126 
 1127 #if     CONFIG_DTRACE
 1128         /*
 1129          * Decide what latencies we suffered that are Dtrace events.
 1130          * If we have set wait_interval, then we either spun or slept.
 1131          * At least we get out from under the interlock before we record
 1132          * which is the best we can do here to minimize the impact
 1133          * of the tracing.
 1134          * If we have set wait_interval to -1, then dtrace was not enabled when we
 1135          * started sleeping/spinning so we don't record this event.
 1136          */
 1137         if (dtrace_ls_enabled == TRUE) {
 1138                 if (slept == 0) {
 1139                         LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
 1140                             mach_absolute_time() - wait_interval, 1);
 1141                 } else {
 1142                         /*
 1143                          * For the blocking case, we also record if when we blocked
 1144                          * it was held for read or write, and how many readers.
 1145                          * Notice that above we recorded this before we dropped
 1146                          * the interlock so the count is accurate.
 1147                          */
 1148                         LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
 1149                             mach_absolute_time() - wait_interval, 1,
 1150                             (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
 1151                 }
 1152         }
 1153         LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
 1154 #endif
 1155 }
 1156 
 1157 
 1158 /*
 1159  *      Routine:        lck_rw_done_gen
 1160  *
 1161  *      called from the assembly language wrapper...
 1162  *      prior_lock_state is the value in the 1st
 1163  *      word of the lock at the time of a successful
 1164  *      atomic compare and exchange with the new value...
 1165  *      it represents the state of the lock before we
 1166  *      decremented the rw_shared_count or cleared either
 1167  *      rw_want_upgrade or rw_want_write and
 1168  *      the lck_x_waiting bits...  since the wrapper
 1169  *      routine has already changed the state atomically, 
 1170  *      we just need to decide if we should
 1171  *      wake up anyone and what value to return... we do
 1172  *      this by examining the state of the lock before
 1173  *      we changed it
 1174  */
 1175 lck_rw_type_t
 1176 lck_rw_done_gen(
 1177         lck_rw_t        *lck,
 1178         int             prior_lock_state)
 1179 {
 1180         lck_rw_t        *fake_lck;
 1181         lck_rw_type_t   lock_type;
 1182 
 1183         /*
 1184          * prior_lock state is a snapshot of the 1st word of the
 1185          * lock in question... we'll fake up a pointer to it
 1186          * and carefully not access anything beyond whats defined
 1187          * in the first word of a lck_rw_t
 1188          */
 1189         fake_lck = (lck_rw_t *)&prior_lock_state;
 1190 
 1191         if (fake_lck->lck_rw_shared_count <= 1) {
 1192                 if (fake_lck->lck_w_waiting)
 1193                         thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
 1194 
 1195                 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
 1196                         thread_wakeup(RW_LOCK_READER_EVENT(lck));
 1197         }
 1198         if (fake_lck->lck_rw_shared_count)
 1199                 lock_type = LCK_RW_TYPE_SHARED;
 1200         else
 1201                 lock_type = LCK_RW_TYPE_EXCLUSIVE;
 1202 
 1203 #if CONFIG_DTRACE
 1204         LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
 1205 #endif
 1206 
 1207         return(lock_type);
 1208 }
 1209 
 1210 
 1211 /*
 1212  *      Routine:        lck_rw_unlock
 1213  */
 1214 void
 1215 lck_rw_unlock(
 1216         lck_rw_t        *lck,
 1217         lck_rw_type_t   lck_rw_type)
 1218 {
 1219         if (lck_rw_type == LCK_RW_TYPE_SHARED)
 1220                 lck_rw_unlock_shared(lck);
 1221         else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
 1222                 lck_rw_unlock_exclusive(lck);
 1223         else
 1224                 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
 1225 }
 1226 
 1227 
 1228 /*
 1229  *      Routine:        lck_rw_unlock_shared
 1230  */
 1231 void
 1232 lck_rw_unlock_shared(
 1233         lck_rw_t        *lck)
 1234 {
 1235         lck_rw_type_t   ret;
 1236 
 1237         ret = lck_rw_done(lck);
 1238 
 1239         if (ret != LCK_RW_TYPE_SHARED)
 1240                 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
 1241 }
 1242 
 1243 
 1244 /*
 1245  *      Routine:        lck_rw_unlock_exclusive
 1246  */
 1247 void
 1248 lck_rw_unlock_exclusive(
 1249         lck_rw_t        *lck)
 1250 {
 1251         lck_rw_type_t   ret;
 1252 
 1253         ret = lck_rw_done(lck);
 1254 
 1255         if (ret != LCK_RW_TYPE_EXCLUSIVE)
 1256                 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
 1257 }
 1258 
 1259 
 1260 /*
 1261  *      Routine:        lck_rw_lock
 1262  */
 1263 void
 1264 lck_rw_lock(
 1265         lck_rw_t        *lck,
 1266         lck_rw_type_t   lck_rw_type)
 1267 {
 1268         if (lck_rw_type == LCK_RW_TYPE_SHARED)
 1269                 lck_rw_lock_shared(lck);
 1270         else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
 1271                 lck_rw_lock_exclusive(lck);
 1272         else
 1273                 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
 1274 }
 1275 
 1276 
 1277 /*
 1278  *      Routine:        lck_rw_lock_shared_gen
 1279  *      Function:
 1280  *              assembly fast path code has determined that this lock
 1281  *              is held exclusively... this is where we spin/block
 1282  *              until we can acquire the lock in the shared mode
 1283  */
 1284 void
 1285 lck_rw_lock_shared_gen(
 1286         lck_rw_t        *lck)
 1287 {
 1288         uint64_t        deadline = 0;
 1289         int             gotlock = 0;
 1290         int             slept = 0;
 1291         wait_result_t   res = 0;
 1292         boolean_t       istate = -1;
 1293         
 1294 #if     CONFIG_DTRACE
 1295         uint64_t wait_interval = 0;
 1296         int readers_at_sleep = 0;
 1297         boolean_t dtrace_ls_initialized = FALSE;
 1298         boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
 1299 #endif
 1300 
 1301         while ( !lck_rw_grab_shared(lck)) {
 1302 
 1303 #if     CONFIG_DTRACE
 1304                 if (dtrace_ls_initialized == FALSE) {
 1305                         dtrace_ls_initialized = TRUE;
 1306                         dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
 1307                         dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
 1308                         dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
 1309                         if (dtrace_ls_enabled) {
 1310                                 /*
 1311                                  * Either sleeping or spinning is happening,
 1312                                  *  start a timing of our delay interval now.
 1313                                  */
 1314                                 readers_at_sleep = lck->lck_rw_shared_count;
 1315                                 wait_interval = mach_absolute_time();
 1316                         }
 1317                 }
 1318 #endif
 1319                 if (istate == -1)
 1320                         istate = ml_get_interrupts_enabled();
 1321 
 1322                 deadline = lck_rw_deadline_for_spin(lck);
 1323 
 1324                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
 1325                              (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 1326 
 1327                 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
 1328                         lck_rw_lock_pause(istate);
 1329 
 1330                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
 1331                              (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
 1332 
 1333                 if (gotlock)
 1334                         break;
 1335                 /*
 1336                  * if we get here, the deadline has expired w/o us
 1337                  * being able to grab the lock for read
 1338                  * check to see if we're allowed to do a thread_block
 1339                  */
 1340                 if (lck->lck_rw_can_sleep) {
 1341 
 1342                         istate = lck_interlock_lock(lck);
 1343 
 1344                         if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
 1345                             ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
 1346 
 1347                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
 1348                                              (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 1349 
 1350                                 lck->lck_r_waiting = TRUE;
 1351 
 1352                                 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
 1353                                 lck_interlock_unlock(lck, istate);
 1354 
 1355                                 if (res == THREAD_WAITING) {
 1356                                         res = thread_block(THREAD_CONTINUE_NULL);
 1357                                         slept++;
 1358                                 }
 1359                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
 1360                                              (int)lck, res, slept, 0, 0);
 1361                         } else {
 1362                                 lck->lck_rw_shared_count++;
 1363                                 lck_interlock_unlock(lck, istate);
 1364                                 break;
 1365                         }
 1366                 }
 1367         }
 1368 
 1369 #if     CONFIG_DTRACE
 1370         if (dtrace_ls_enabled == TRUE) {
 1371                 if (slept == 0) {
 1372                         LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
 1373                 } else {
 1374                         LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
 1375                             mach_absolute_time() - wait_interval, 0,
 1376                             (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
 1377                 }
 1378         }
 1379         LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
 1380 #endif
 1381 }
 1382 
 1383 
 1384 /*
 1385  *      Routine:        lck_rw_lock_shared_to_exclusive_failure
 1386  *      Function:
 1387  *              assembly fast path code has already dropped our read
 1388  *              count and determined that someone else owns 'lck_rw_want_upgrade'
 1389  *              if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
 1390  *              all we need to do here is determine if a wakeup is needed
 1391  */
 1392 boolean_t
 1393 lck_rw_lock_shared_to_exclusive_failure(
 1394         lck_rw_t        *lck,
 1395         int             prior_lock_state)
 1396 {
 1397         lck_rw_t        *fake_lck;
 1398 
 1399         /*
 1400          * prior_lock state is a snapshot of the 1st word of the
 1401          * lock in question... we'll fake up a pointer to it
 1402          * and carefully not access anything beyond whats defined
 1403          * in the first word of a lck_rw_t
 1404          */
 1405         fake_lck = (lck_rw_t *)&prior_lock_state;
 1406 
 1407         if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
 1408                 /*
 1409                  *      Someone else has requested upgrade.
 1410                  *      Since we've released the read lock, wake
 1411                  *      him up if he's blocked waiting
 1412                  */
 1413                 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
 1414         }
 1415         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
 1416                      (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
 1417 
 1418         return (FALSE);
 1419 }
 1420 
 1421 
 1422 /*
 1423  *      Routine:        lck_rw_lock_shared_to_exclusive_failure
 1424  *      Function:
 1425  *              assembly fast path code has already dropped our read
 1426  *              count and successfully acquired 'lck_rw_want_upgrade'
 1427  *              we just need to wait for the rest of the readers to drain
 1428  *              and then we can return as the exclusive holder of this lock
 1429  */
 1430 boolean_t
 1431 lck_rw_lock_shared_to_exclusive_success(
 1432         lck_rw_t        *lck)
 1433 {
 1434         uint64_t        deadline = 0;
 1435         int             slept = 0;
 1436         int             still_shared = 0;
 1437         wait_result_t   res;
 1438         boolean_t       istate = -1;
 1439 
 1440 #if     CONFIG_DTRACE
 1441         uint64_t wait_interval = 0;
 1442         int readers_at_sleep = 0;
 1443         boolean_t dtrace_ls_initialized = FALSE;
 1444         boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
 1445 #endif
 1446 
 1447         while (lck->lck_rw_shared_count != 0) {
 1448 
 1449 #if     CONFIG_DTRACE
 1450                 if (dtrace_ls_initialized == FALSE) {
 1451                         dtrace_ls_initialized = TRUE;
 1452                         dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
 1453                         dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
 1454                         dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
 1455                         if (dtrace_ls_enabled) {
 1456                                 /*
 1457                                  * Either sleeping or spinning is happening,
 1458                                  *  start a timing of our delay interval now.
 1459                                  */
 1460                                 readers_at_sleep = lck->lck_rw_shared_count;
 1461                                 wait_interval = mach_absolute_time();
 1462                         }
 1463                 }
 1464 #endif
 1465                 if (istate == -1)
 1466                         istate = ml_get_interrupts_enabled();
 1467 
 1468                 deadline = lck_rw_deadline_for_spin(lck);
 1469 
 1470                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
 1471                              (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
 1472 
 1473                 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
 1474                         lck_rw_lock_pause(istate);
 1475 
 1476                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
 1477                              (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
 1478 
 1479                 if ( !still_shared)
 1480                         break;
 1481                 /*
 1482                  * if we get here, the deadline has expired w/o
 1483                  * the rw_shared_count having drained to 0
 1484                  * check to see if we're allowed to do a thread_block
 1485                  */
 1486                 if (lck->lck_rw_can_sleep) {
 1487                         
 1488                         istate = lck_interlock_lock(lck);
 1489                         
 1490                         if (lck->lck_rw_shared_count != 0) {
 1491                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
 1492                                              (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
 1493 
 1494                                 lck->lck_w_waiting = TRUE;
 1495 
 1496                                 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
 1497                                 lck_interlock_unlock(lck, istate);
 1498 
 1499                                 if (res == THREAD_WAITING) {
 1500                                         res = thread_block(THREAD_CONTINUE_NULL);
 1501                                         slept++;
 1502                                 }
 1503                                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
 1504                                              (int)lck, res, slept, 0, 0);
 1505                         } else {
 1506                                 lck_interlock_unlock(lck, istate);
 1507                                 break;
 1508                         }
 1509                 }
 1510         }
 1511 #if     CONFIG_DTRACE
 1512         /*
 1513          * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
 1514          */
 1515         if (dtrace_ls_enabled == TRUE) {
 1516                 if (slept == 0) {
 1517                         LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
 1518                 } else {
 1519                         LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
 1520                             mach_absolute_time() - wait_interval, 1,
 1521                             (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
 1522                 }
 1523         }
 1524         LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
 1525 #endif
 1526         return (TRUE);
 1527 }
 1528 
 1529 
 1530 /*
 1531  *      Routine:        lck_rw_lock_exclusive_to_shared
 1532  *      Function:
 1533  *              assembly fast path has already dropped
 1534  *              our exclusive state and bumped lck_rw_shared_count
 1535  *              all we need to do here is determine if anyone
 1536  *              needs to be awakened.
 1537  */
 1538 void
 1539 lck_rw_lock_exclusive_to_shared_gen(
 1540         lck_rw_t        *lck,
 1541         int             prior_lock_state)
 1542 {
 1543         lck_rw_t        *fake_lck;
 1544 
 1545         /*
 1546          * prior_lock state is a snapshot of the 1st word of the
 1547          * lock in question... we'll fake up a pointer to it
 1548          * and carefully not access anything beyond whats defined
 1549          * in the first word of a lck_rw_t
 1550          */
 1551         fake_lck = (lck_rw_t *)&prior_lock_state;
 1552 
 1553         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
 1554                              (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
 1555 
 1556         /*
 1557          * don't wake up anyone waiting to take the lock exclusively
 1558          * since we hold a read count... when the read count drops to 0,
 1559          * the writers will be woken.
 1560          *
 1561          * wake up any waiting readers if we don't have any writers waiting,
 1562          * or the lock is NOT marked as rw_priv_excl (writers have privilege)
 1563          */
 1564         if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
 1565                 thread_wakeup(RW_LOCK_READER_EVENT(lck));
 1566 
 1567         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
 1568                              (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
 1569 
 1570 #if CONFIG_DTRACE
 1571         LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
 1572 #endif
 1573 }
 1574 
 1575 
 1576 /*
 1577  *      Routine:        lck_rw_try_lock
 1578  */
 1579 boolean_t
 1580 lck_rw_try_lock(
 1581         lck_rw_t        *lck,
 1582         lck_rw_type_t   lck_rw_type)
 1583 {
 1584         if (lck_rw_type == LCK_RW_TYPE_SHARED)
 1585                 return(lck_rw_try_lock_shared(lck));
 1586         else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
 1587                 return(lck_rw_try_lock_exclusive(lck));
 1588         else
 1589                 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
 1590         return(FALSE);
 1591 }
 1592 
 1593 
 1594 void
 1595 lck_rw_assert(
 1596         lck_rw_t        *lck,
 1597         unsigned int    type)
 1598 {
 1599         switch (type) {
 1600         case LCK_RW_ASSERT_SHARED:
 1601                 if (lck->lck_rw_shared_count != 0) {
 1602                         return;
 1603                 }
 1604                 break;
 1605         case LCK_RW_ASSERT_EXCLUSIVE:
 1606                 if ((lck->lck_rw_want_write ||
 1607                      lck->lck_rw_want_upgrade) &&
 1608                     lck->lck_rw_shared_count == 0) {
 1609                         return;
 1610                 }
 1611                 break;
 1612         case LCK_RW_ASSERT_HELD:
 1613                 if (lck->lck_rw_want_write ||
 1614                     lck->lck_rw_want_upgrade ||
 1615                     lck->lck_rw_shared_count != 0) {
 1616                         return;
 1617                 }
 1618                 break;
 1619         default:
 1620                 break;
 1621         }
 1622 
 1623         panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
 1624 }
 1625 
 1626 #ifdef  MUTEX_ZONE
 1627 extern zone_t lck_mtx_zone;
 1628 #endif
 1629 /*
 1630  *      Routine:        lck_mtx_alloc_init
 1631  */
 1632 lck_mtx_t *
 1633 lck_mtx_alloc_init(
 1634         lck_grp_t       *grp,
 1635         lck_attr_t      *attr)
 1636 {
 1637         lck_mtx_t       *lck;
 1638 #ifdef  MUTEX_ZONE
 1639         if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
 1640                 lck_mtx_init(lck, grp, attr);
 1641 #else
 1642         if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
 1643                 lck_mtx_init(lck, grp, attr);
 1644 #endif          
 1645         return(lck);
 1646 }
 1647 
 1648 /*
 1649  *      Routine:        lck_mtx_free
 1650  */
 1651 void
 1652 lck_mtx_free(
 1653         lck_mtx_t       *lck,
 1654         lck_grp_t       *grp)
 1655 {
 1656         lck_mtx_destroy(lck, grp);
 1657 #ifdef  MUTEX_ZONE
 1658         zfree(lck_mtx_zone, lck);
 1659 #else
 1660         kfree(lck, sizeof(lck_mtx_t));
 1661 #endif
 1662 }
 1663 
 1664 /*
 1665  *      Routine:        lck_mtx_ext_init
 1666  */
 1667 static void
 1668 lck_mtx_ext_init(
 1669         lck_mtx_ext_t   *lck,
 1670         lck_grp_t       *grp,
 1671         lck_attr_t      *attr)
 1672 {
 1673         bzero((void *)lck, sizeof(lck_mtx_ext_t));
 1674 
 1675         if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
 1676                 lck->lck_mtx_deb.type = MUTEX_TAG;
 1677                 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
 1678         }
 1679 
 1680         lck->lck_mtx_grp = grp;
 1681 
 1682         if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
 1683                 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
 1684 
 1685         lck->lck_mtx.lck_mtx_is_ext = 1;
 1686 #if     defined(__x86_64__)
 1687         lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
 1688 #endif
 1689 }
 1690 
 1691 /*
 1692  *      Routine:        lck_mtx_init
 1693  */
 1694 void
 1695 lck_mtx_init(
 1696         lck_mtx_t       *lck,
 1697         lck_grp_t       *grp,
 1698         lck_attr_t      *attr)
 1699 {
 1700         lck_mtx_ext_t   *lck_ext;
 1701         lck_attr_t      *lck_attr;
 1702 
 1703         if (attr != LCK_ATTR_NULL)
 1704                 lck_attr = attr;
 1705         else
 1706                 lck_attr = &LockDefaultLckAttr;
 1707 
 1708         if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
 1709                 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
 1710                         lck_mtx_ext_init(lck_ext, grp, lck_attr);       
 1711                         lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
 1712                         lck->lck_mtx_ptr = lck_ext;
 1713                 }
 1714         } else {
 1715                 lck->lck_mtx_owner = 0;
 1716                 lck->lck_mtx_state = 0;
 1717         }
 1718 #if     defined(__x86_64__)
 1719         lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
 1720 #endif
 1721         lck_grp_reference(grp);
 1722         lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
 1723 }
 1724 
 1725 /*
 1726  *      Routine:        lck_mtx_init_ext
 1727  */
 1728 void
 1729 lck_mtx_init_ext(
 1730         lck_mtx_t       *lck,
 1731         lck_mtx_ext_t   *lck_ext,
 1732         lck_grp_t       *grp,
 1733         lck_attr_t      *attr)
 1734 {
 1735         lck_attr_t      *lck_attr;
 1736 
 1737         if (attr != LCK_ATTR_NULL)
 1738                 lck_attr = attr;
 1739         else
 1740                 lck_attr = &LockDefaultLckAttr;
 1741 
 1742         if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
 1743                 lck_mtx_ext_init(lck_ext, grp, lck_attr);
 1744                 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
 1745                 lck->lck_mtx_ptr = lck_ext;
 1746         } else {
 1747                 lck->lck_mtx_owner = 0;
 1748                 lck->lck_mtx_state = 0;
 1749         }
 1750 #if     defined(__x86_64__)
 1751         lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
 1752 #endif
 1753 
 1754         lck_grp_reference(grp);
 1755         lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
 1756 }
 1757 
 1758 /*
 1759  *      Routine:        lck_mtx_destroy
 1760  */
 1761 void
 1762 lck_mtx_destroy(
 1763         lck_mtx_t       *lck,
 1764         lck_grp_t       *grp)
 1765 {
 1766         boolean_t lck_is_indirect;
 1767         
 1768         if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
 1769                 return;
 1770         lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
 1771 
 1772         lck_mtx_lock_mark_destroyed(lck);
 1773 
 1774         if (lck_is_indirect)
 1775                 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
 1776         lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
 1777         lck_grp_deallocate(grp);
 1778         return;
 1779 }
 1780 
 1781 
 1782 #define LCK_MTX_LCK_WAIT_CODE           0x20
 1783 #define LCK_MTX_LCK_WAKEUP_CODE         0x21
 1784 #define LCK_MTX_LCK_SPIN_CODE           0x22
 1785 #define LCK_MTX_LCK_ACQUIRE_CODE        0x23
 1786 #define LCK_MTX_LCK_DEMOTE_CODE         0x24
 1787 
 1788 
 1789 /*
 1790  * Routine:     lck_mtx_unlock_wakeup_x86
 1791  *
 1792  * Invoked on unlock when there is 
 1793  * contention (i.e. the assembly routine sees that
 1794  * that mutex->lck_mtx_waiters != 0 or 
 1795  * that mutex->lck_mtx_promoted != 0...
 1796  *
 1797  * neither the mutex or interlock is held
 1798  */
 1799 void
 1800 lck_mtx_unlock_wakeup_x86 (
 1801         lck_mtx_t       *mutex,
 1802         int             prior_lock_state)
 1803 {
 1804         lck_mtx_t       fake_lck;
 1805 
 1806         /*
 1807          * prior_lock state is a snapshot of the 2nd word of the
 1808          * lock in question... we'll fake up a lock with the bits
 1809          * copied into place and carefully not access anything
 1810          * beyond whats defined in the second word of a lck_mtx_t
 1811          */
 1812         fake_lck.lck_mtx_state = prior_lock_state;
 1813 
 1814         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
 1815                      mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
 1816 
 1817         if (__probable(fake_lck.lck_mtx_waiters)) {
 1818 
 1819                 if (fake_lck.lck_mtx_waiters > 1)
 1820                         thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
 1821                 else
 1822                         thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
 1823         }
 1824 
 1825         if (__improbable(fake_lck.lck_mtx_promoted)) {
 1826                 thread_t        thread = current_thread();
 1827 
 1828 
 1829                 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
 1830                              thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
 1831 
 1832                 if (thread->promotions > 0) {
 1833                         spl_t   s = splsched();
 1834 
 1835                         thread_lock(thread);
 1836 
 1837                         if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
 1838 
 1839                                 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
 1840 
 1841                                 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 1842                                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
 1843                                                               thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
 1844 
 1845                                         set_sched_pri(thread, DEPRESSPRI);
 1846                                 }
 1847                                 else {
 1848                                         if (thread->priority < thread->sched_pri) {
 1849                                                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
 1850                                                                       thread->sched_pri, thread->priority, 0, mutex, 0);
 1851 
 1852                                                 SCHED(compute_priority)(thread, FALSE);
 1853                                         }
 1854                                 }
 1855                         }
 1856                         thread_unlock(thread);
 1857                         splx(s);
 1858                 }
 1859         }
 1860         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
 1861                      mutex, 0, mutex->lck_mtx_waiters, 0, 0);
 1862 }
 1863 
 1864 
 1865 /*
 1866  * Routine:     lck_mtx_lock_acquire_x86
 1867  *
 1868  * Invoked on acquiring the mutex when there is
 1869  * contention (i.e. the assembly routine sees that
 1870  * that mutex->lck_mtx_waiters != 0 or 
 1871  * thread->was_promoted_on_wakeup != 0)...
 1872  *
 1873  * mutex is owned...  interlock is held... preemption is disabled
 1874  */
 1875 void
 1876 lck_mtx_lock_acquire_x86(
 1877         lck_mtx_t       *mutex)
 1878 {
 1879         thread_t        thread;
 1880         integer_t       priority;
 1881         spl_t           s;
 1882 
 1883         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
 1884                      mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 1885 
 1886         if (mutex->lck_mtx_waiters)
 1887                 priority = mutex->lck_mtx_pri;
 1888         else
 1889                 priority = 0;
 1890 
 1891         thread = (thread_t)mutex->lck_mtx_owner;        /* faster then current_thread() */
 1892 
 1893         if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
 1894 
 1895                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
 1896                                       thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
 1897 
 1898                 s = splsched();
 1899                 thread_lock(thread);
 1900 
 1901                 if (thread->sched_pri < priority)
 1902                         set_sched_pri(thread, priority);
 1903 
 1904                 if (mutex->lck_mtx_promoted == 0) {
 1905                         mutex->lck_mtx_promoted = 1;
 1906                         
 1907                         thread->promotions++;
 1908                         thread->sched_flags |= TH_SFLAG_PROMOTED;
 1909                 }
 1910                 thread->was_promoted_on_wakeup = 0;
 1911                 
 1912                 thread_unlock(thread);
 1913                 splx(s);
 1914         }
 1915         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
 1916                      mutex, 0, mutex->lck_mtx_waiters, 0, 0);
 1917 }
 1918 
 1919 
 1920 
 1921 /*
 1922  * Routine:     lck_mtx_lock_spinwait_x86
 1923  *
 1924  * Invoked trying to acquire a mutex when there is contention but
 1925  * the holder is running on another processor. We spin for up to a maximum
 1926  * time waiting for the lock to be released.
 1927  *
 1928  * Called with the interlock unlocked.
 1929  * returns 0 if mutex acquired
 1930  * returns 1 if we spun
 1931  * returns 2 if we didn't spin due to the holder not running
 1932  */
 1933 int
 1934 lck_mtx_lock_spinwait_x86(
 1935         lck_mtx_t       *mutex)
 1936 {
 1937         thread_t        holder;
 1938         uint64_t        deadline;
 1939         int             retval = 1;
 1940         int             loopcount = 0;
 1941 
 1942 
 1943         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
 1944                      mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
 1945 
 1946         deadline = mach_absolute_time() + MutexSpin;
 1947 
 1948         /*
 1949          * Spin while:
 1950          *   - mutex is locked, and
 1951          *   - its locked as a spin lock, and
 1952          *   - owner is running on another processor, and
 1953          *   - owner (processor) is not idling, and
 1954          *   - we haven't spun for long enough.
 1955          */
 1956         do {
 1957                 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
 1958                         retval = 0;
 1959                         break;
 1960                 }
 1961                 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
 1962 
 1963                         if ( !(holder->machine.specFlags & OnProc) ||
 1964                              (holder->state & TH_IDLE)) {
 1965                                 if (loopcount == 0)
 1966                                         retval = 2;
 1967                                 break;
 1968                         }
 1969                 }
 1970                 cpu_pause();
 1971 
 1972                 loopcount++;
 1973 
 1974         } while (mach_absolute_time() < deadline);
 1975 
 1976 
 1977 #if     CONFIG_DTRACE
 1978         /*
 1979          * We've already kept a count via deadline of how long we spun.
 1980          * If dtrace is active, then we compute backwards to decide how
 1981          * long we spun.
 1982          *
 1983          * Note that we record a different probe id depending on whether
 1984          * this is a direct or indirect mutex.  This allows us to 
 1985          * penalize only lock groups that have debug/stats enabled
 1986          * with dtrace processing if desired.
 1987          */
 1988         if (__probable(mutex->lck_mtx_is_ext == 0)) {
 1989                 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
 1990                     mach_absolute_time() - (deadline - MutexSpin));
 1991         } else {
 1992                 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
 1993                     mach_absolute_time() - (deadline - MutexSpin));
 1994         }
 1995         /* The lockstat acquire event is recorded by the assembly code beneath us. */
 1996 #endif
 1997 
 1998         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
 1999                      mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
 2000 
 2001         return retval;
 2002 }
 2003 
 2004 
 2005 
 2006 /*
 2007  * Routine:     lck_mtx_lock_wait_x86
 2008  *
 2009  * Invoked in order to wait on contention.
 2010  *
 2011  * Called with the interlock locked and
 2012  * preemption disabled...  
 2013  * returns it unlocked and with preemption enabled
 2014  */
 2015 void
 2016 lck_mtx_lock_wait_x86 (
 2017         lck_mtx_t       *mutex)
 2018 {
 2019         thread_t        self = current_thread();
 2020         thread_t        holder;
 2021         integer_t       priority;
 2022         spl_t           s;
 2023 #if     CONFIG_DTRACE
 2024         uint64_t        sleep_start = 0;
 2025 
 2026         if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
 2027                 sleep_start = mach_absolute_time();
 2028         }
 2029 #endif
 2030         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
 2031                      mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 2032 
 2033         priority = self->sched_pri;
 2034 
 2035         if (priority < self->priority)
 2036                 priority = self->priority;
 2037         if (priority < BASEPRI_DEFAULT)
 2038                 priority = BASEPRI_DEFAULT;
 2039 
 2040         if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
 2041                 mutex->lck_mtx_pri = priority;
 2042         mutex->lck_mtx_waiters++;
 2043 
 2044         if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
 2045              holder->sched_pri < mutex->lck_mtx_pri ) {
 2046 
 2047                 s = splsched();
 2048                 thread_lock(holder);
 2049 
 2050                 if (holder->sched_pri < mutex->lck_mtx_pri) {
 2051                         KERNEL_DEBUG_CONSTANT(
 2052                                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
 2053                                 holder->sched_pri, priority, thread_tid(holder), mutex, 0);
 2054 
 2055                         set_sched_pri(holder, priority);
 2056                         
 2057                         if (mutex->lck_mtx_promoted == 0) {
 2058                                 holder->promotions++;
 2059                                 holder->sched_flags |= TH_SFLAG_PROMOTED;
 2060                                 
 2061                                 mutex->lck_mtx_promoted = 1;
 2062                         }
 2063                 }
 2064                 thread_unlock(holder);
 2065                 splx(s);
 2066         }
 2067         assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
 2068 
 2069         lck_mtx_ilk_unlock(mutex);
 2070 
 2071         thread_block(THREAD_CONTINUE_NULL);
 2072 
 2073         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
 2074                      mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 2075 
 2076 #if     CONFIG_DTRACE
 2077         /*
 2078          * Record the Dtrace lockstat probe for blocking, block time
 2079          * measured from when we were entered.
 2080          */
 2081         if (sleep_start) {
 2082                 if (mutex->lck_mtx_is_ext == 0) {
 2083                         LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
 2084                             mach_absolute_time() - sleep_start);
 2085                 } else {
 2086                         LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
 2087                             mach_absolute_time() - sleep_start);
 2088                 }
 2089         }
 2090 #endif
 2091 }

Cache object: a8b8feeeef061fff2e76bcb114b24537


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.