1 /* $OpenBSD: subr_witness.c,v 1.48 2022/02/21 14:16:49 jsg Exp $ */
2
3 /*-
4 * Copyright (c) 2008 Isilon Systems, Inc.
5 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com>
6 * Copyright (c) 1998 Berkeley Software Design, Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Berkeley Software Design Inc's name may not be used to endorse or
18 * promote products derived from this software without specific prior
19 * written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from BSDI Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp
34 * and BSDI Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp
35 */
36
37 /*
38 * Implementation of the `witness' lock verifier. Originally implemented for
39 * mutexes in BSD/OS. Extended to handle generic lock objects and lock
40 * classes in FreeBSD.
41 */
42
43 /*
44 * Main Entry: witness
45 * Pronunciation: 'wit-n&s
46 * Function: noun
47 * Etymology: Middle English witnesse, from Old English witnes knowledge,
48 * testimony, witness, from 2wit
49 * Date: before 12th century
50 * 1 : attestation of a fact or event : TESTIMONY
51 * 2 : one that gives evidence; specifically : one who testifies in
52 * a cause or before a judicial tribunal
53 * 3 : one asked to be present at a transaction so as to be able to
54 * testify to its having taken place
55 * 4 : one who has personal knowledge of something
56 * 5 a : something serving as evidence or proof : SIGN
57 * b : public affirmation by word or example of usually
58 * religious faith or conviction <the heroic witness to divine
59 * life -- Pilot>
60 * 6 capitalized : a member of the Jehovah's Witnesses
61 */
62
63 /*
64 * Special rules concerning Giant and lock orders:
65 *
66 * 1) Giant must be acquired before any other mutexes. Stated another way,
67 * no other mutex may be held when Giant is acquired.
68 *
69 * 2) Giant must be released when blocking on a sleepable lock.
70 *
71 * This rule is less obvious, but is a result of Giant providing the same
72 * semantics as spl(). Basically, when a thread sleeps, it must release
73 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule
74 * 2).
75 *
76 * 3) Giant may be acquired before or after sleepable locks.
77 *
78 * This rule is also not quite as obvious. Giant may be acquired after
79 * a sleepable lock because it is a non-sleepable lock and non-sleepable
80 * locks may always be acquired while holding a sleepable lock. The second
81 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose
82 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1
83 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and
84 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to
85 * execute. Thus, acquiring Giant both before and after a sleepable lock
86 * will not result in a lock order reversal.
87 */
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/kernel.h>
92 #include <sys/malloc.h>
93 #ifdef MULTIPROCESSOR
94 #include <sys/mplock.h>
95 #endif
96 #include <sys/mutex.h>
97 #include <sys/percpu.h>
98 #include <sys/proc.h>
99 #include <sys/sched.h>
100 #include <sys/stacktrace.h>
101 #include <sys/stdint.h>
102 #include <sys/sysctl.h>
103 #include <sys/syslog.h>
104 #include <sys/witness.h>
105
106 #include <machine/cpu.h>
107
108 #include <uvm/uvm_extern.h> /* uvm_pageboot_alloc */
109
110 #ifndef DDB
111 #error "DDB is required for WITNESS"
112 #endif
113
114 #include <machine/db_machdep.h>
115
116 #include <ddb/db_access.h>
117 #include <ddb/db_var.h>
118 #include <ddb/db_output.h>
119
120 #define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */
121 #define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */
122 #define LI_NORELEASE 0x00020000 /* Lock not allowed to be released. */
123
124 #ifndef WITNESS_COUNT
125 #define WITNESS_COUNT 1536
126 #endif
127 #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
128 #define WITNESS_PENDLIST (1024 + MAXCPUS)
129
130 /* Allocate 256 KB of stack data space */
131 #define WITNESS_LO_DATA_COUNT 2048
132
133 /* Prime, gives load factor of ~2 at full load */
134 #define WITNESS_LO_HASH_SIZE 1021
135
136 /*
137 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads
138 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should
139 * probably be safe for the most part, but it's still a SWAG.
140 */
141 #define LOCK_NCHILDREN 5
142 #define LOCK_CHILDCOUNT 2048
143
144 #define FULLGRAPH_SBUF_SIZE 512
145
146 /*
147 * These flags go in the witness relationship matrix and describe the
148 * relationship between any two struct witness objects.
149 */
150 #define WITNESS_UNRELATED 0x00 /* No lock order relation. */
151 #define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
152 #define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
153 #define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
154 #define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
155 #define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
156 #define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
157 #define WITNESS_RELATED_MASK \
158 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
159 #define WITNESS_REVERSAL 0x10 /* A lock order reversal has been
160 * observed. */
161 #define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
162 #define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
163 #define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
164
165 /* Descendant to ancestor flags */
166 #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2)
167
168 /* Ancestor to descendant flags */
169 #define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2)
170
171 #define WITNESS_INDEX_ASSERT(i) \
172 KASSERT((i) > 0 && (i) <= w_max_used_index && (i) < witness_count)
173
174 /*
175 * Lock classes. Each lock has a class which describes characteristics
176 * common to all types of locks of a given class.
177 *
178 * Spin locks in general must always protect against preemption, as it is
179 * an error to perform any type of context switch while holding a spin lock.
180 * Also, for an individual lock to be recursable, its class must allow
181 * recursion and the lock itself must explicitly allow recursion.
182 */
183
184 struct lock_class {
185 const char *lc_name;
186 u_int lc_flags;
187 };
188
189 union lock_stack {
190 union lock_stack *ls_next;
191 struct stacktrace ls_stack;
192 };
193
194 #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */
195 #define LC_SPINLOCK 0x00000002 /* Spin lock. */
196 #define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */
197 #define LC_RECURSABLE 0x00000008 /* Locks of this type may recurse. */
198 #define LC_UPGRADABLE 0x00000010 /* Upgrades and downgrades permitted. */
199
200 /*
201 * Lock instances. A lock instance is the data associated with a lock while
202 * it is held by witness. For example, a lock instance will hold the
203 * recursion count of a lock. Lock instances are held in lists. Spin locks
204 * are held in a per-cpu list while sleep locks are held in per-thread list.
205 */
206 struct lock_instance {
207 struct lock_object *li_lock;
208 union lock_stack *li_stack;
209 u_int li_flags;
210 };
211
212 /*
213 * A simple list type used to build the list of locks held by a thread
214 * or CPU. We can't simply embed the list in struct lock_object since a
215 * lock may be held by more than one thread if it is a shared lock. Locks
216 * are added to the head of the list, so we fill up each list entry from
217 * "the back" logically. To ease some of the arithmetic, we actually fill
218 * in each list entry the normal way (children[0] then children[1], etc.) but
219 * when we traverse the list we read children[count-1] as the first entry
220 * down to children[0] as the final entry.
221 */
222 struct lock_list_entry {
223 struct lock_list_entry *ll_next;
224 struct lock_instance ll_children[LOCK_NCHILDREN];
225 int ll_count;
226 };
227
228 /*
229 * The main witness structure. One of these per named lock type in the system
230 * (for example, "vnode interlock").
231 */
232 struct witness {
233 const struct lock_type *w_type;
234 const char *w_subtype;
235 uint32_t w_index; /* Index in the relationship matrix */
236 struct lock_class *w_class;
237 SLIST_ENTRY(witness) w_list; /* List of all witnesses. */
238 SLIST_ENTRY(witness) w_typelist; /* Witnesses of a type. */
239 SLIST_ENTRY(witness) w_hash_next; /* Linked list in
240 * hash buckets. */
241 uint16_t w_num_ancestors; /* direct/indirect
242 * ancestor count */
243 uint16_t w_num_descendants; /* direct/indirect
244 * descendant count */
245 int16_t w_ddb_level;
246 unsigned w_acquired:1;
247 unsigned w_displayed:1;
248 unsigned w_reversed:1;
249 };
250
251 SLIST_HEAD(witness_list, witness);
252
253 /*
254 * The witness hash table. Keys are witness names (const char *), elements are
255 * witness objects (struct witness *).
256 */
257 struct witness_hash {
258 struct witness_list wh_array[WITNESS_HASH_SIZE];
259 uint32_t wh_size;
260 uint32_t wh_count;
261 };
262
263 /*
264 * Key type for the lock order data hash table.
265 */
266 struct witness_lock_order_key {
267 uint16_t from;
268 uint16_t to;
269 };
270
271 struct witness_lock_order_data {
272 struct stacktrace wlod_stack;
273 struct witness_lock_order_key wlod_key;
274 struct witness_lock_order_data *wlod_next;
275 };
276
277 /*
278 * The witness lock order data hash table. Keys are witness index tuples
279 * (struct witness_lock_order_key), elements are lock order data objects
280 * (struct witness_lock_order_data).
281 */
282 struct witness_lock_order_hash {
283 struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE];
284 u_int wloh_size;
285 u_int wloh_count;
286 };
287
288 struct witness_pendhelp {
289 const struct lock_type *wh_type;
290 struct lock_object *wh_lock;
291 };
292
293 struct witness_cpu {
294 struct lock_list_entry *wc_spinlocks;
295 struct lock_list_entry *wc_lle_cache;
296 union lock_stack *wc_stk_cache;
297 unsigned int wc_lle_count;
298 unsigned int wc_stk_count;
299 } __aligned(CACHELINESIZE);
300
301 #define WITNESS_LLE_CACHE_MAX 8
302 #define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN)
303
304 struct witness_cpu witness_cpu[MAXCPUS];
305
306 /*
307 * Returns 0 if one of the locks is a spin lock and the other is not.
308 * Returns 1 otherwise.
309 */
310 static __inline int
311 witness_lock_type_equal(struct witness *w1, struct witness *w2)
312 {
313
314 return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
315 (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
316 }
317
318 static __inline int
319 witness_lock_order_key_equal(const struct witness_lock_order_key *a,
320 const struct witness_lock_order_key *b)
321 {
322
323 return (a->from == b->from && a->to == b->to);
324 }
325
326 static int _isitmyx(struct witness *w1, struct witness *w2, int rmask,
327 const char *fname);
328 static void adopt(struct witness *parent, struct witness *child);
329 static struct witness *enroll(const struct lock_type *, const char *,
330 struct lock_class *);
331 static struct lock_instance *find_instance(struct lock_list_entry *list,
332 const struct lock_object *lock);
333 static int isitmychild(struct witness *parent, struct witness *child);
334 static int isitmydescendant(struct witness *parent, struct witness *child);
335 static void itismychild(struct witness *parent, struct witness *child);
336 #ifdef DDB
337 static void db_witness_add_fullgraph(struct witness *parent);
338 static void witness_ddb_compute_levels(void);
339 static void witness_ddb_display(int(*)(const char *fmt, ...));
340 static void witness_ddb_display_descendants(int(*)(const char *fmt, ...),
341 struct witness *, int indent);
342 static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
343 struct witness_list *list);
344 static void witness_ddb_level_descendants(struct witness *parent, int l);
345 static void witness_ddb_list(struct proc *td);
346 #endif
347 static int witness_alloc_stacks(void);
348 static void witness_debugger(int dump);
349 static void witness_free(struct witness *m);
350 static struct witness *witness_get(void);
351 static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size);
352 static struct witness *witness_hash_get(const struct lock_type *,
353 const char *);
354 static void witness_hash_put(struct witness *w);
355 static void witness_init_hash_tables(void);
356 static void witness_increment_graph_generation(void);
357 static int witness_list_locks(struct lock_list_entry **,
358 int (*)(const char *, ...));
359 static void witness_lock_list_free(struct lock_list_entry *lle);
360 static struct lock_list_entry *witness_lock_list_get(void);
361 static void witness_lock_stack_free(union lock_stack *stack);
362 static union lock_stack *witness_lock_stack_get(void);
363 static int witness_lock_order_add(struct witness *parent,
364 struct witness *child);
365 static int witness_lock_order_check(struct witness *parent,
366 struct witness *child);
367 static struct witness_lock_order_data *witness_lock_order_get(
368 struct witness *parent,
369 struct witness *child);
370 static void witness_list_lock(struct lock_instance *instance,
371 int (*prnt)(const char *fmt, ...));
372 static void witness_setflag(struct lock_object *lock, int flag, int set);
373
374 /*
375 * If set to 0, lock order checking is disabled. If set to -1,
376 * witness is completely disabled. Otherwise witness performs full
377 * lock order checking for all locks. At runtime, lock order checking
378 * may be toggled. However, witness cannot be reenabled once it is
379 * completely disabled.
380 */
381 #ifdef WITNESS_WATCH
382 static int witness_watch = 3;
383 #else
384 static int witness_watch = 2;
385 #endif
386
387 #ifdef WITNESS_LOCKTRACE
388 static int witness_locktrace = 1;
389 #else
390 static int witness_locktrace = 0;
391 #endif
392
393 int witness_count = WITNESS_COUNT;
394 int witness_uninitialized_report = 5;
395
396 static struct mutex w_mtx;
397 static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock");
398
399 /* w_list */
400 static struct witness_list w_free = SLIST_HEAD_INITIALIZER(w_free);
401 static struct witness_list w_all = SLIST_HEAD_INITIALIZER(w_all);
402
403 /* w_typelist */
404 static struct witness_list w_spin = SLIST_HEAD_INITIALIZER(w_spin);
405 static struct witness_list w_sleep = SLIST_HEAD_INITIALIZER(w_sleep);
406
407 /* lock list */
408 static struct lock_list_entry *w_lock_list_free = NULL;
409 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST];
410 static u_int pending_cnt;
411
412 static int w_free_cnt, w_spin_cnt, w_sleep_cnt;
413
414 static struct witness *w_data;
415 static uint8_t **w_rmatrix;
416 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
417 static struct witness_hash w_hash; /* The witness hash table. */
418
419 /* The lock order data hash */
420 static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT];
421 static struct witness_lock_order_data *w_lofree = NULL;
422 static struct witness_lock_order_hash w_lohash;
423 static int w_max_used_index = 0;
424 static unsigned int w_generation = 0;
425
426 static union lock_stack *w_lock_stack_free;
427 static unsigned int w_lock_stack_num;
428
429 static struct lock_class lock_class_kernel_lock = {
430 .lc_name = "kernel_lock",
431 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE
432 };
433
434 static struct lock_class lock_class_sched_lock = {
435 .lc_name = "sched_lock",
436 .lc_flags = LC_SPINLOCK | LC_RECURSABLE
437 };
438
439 static struct lock_class lock_class_mutex = {
440 .lc_name = "mutex",
441 .lc_flags = LC_SPINLOCK
442 };
443
444 static struct lock_class lock_class_rwlock = {
445 .lc_name = "rwlock",
446 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_UPGRADABLE
447 };
448
449 static struct lock_class lock_class_rrwlock = {
450 .lc_name = "rrwlock",
451 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE |
452 LC_UPGRADABLE
453 };
454
455 static struct lock_class *lock_classes[] = {
456 &lock_class_kernel_lock,
457 &lock_class_sched_lock,
458 &lock_class_mutex,
459 &lock_class_rwlock,
460 &lock_class_rrwlock,
461 };
462
463 /*
464 * This global is set to 0 once it becomes safe to use the witness code.
465 */
466 static int witness_cold = 1;
467
468 /*
469 * This global is set to 1 once the static lock orders have been enrolled
470 * so that a warning can be issued for any spin locks enrolled later.
471 */
472 static int witness_spin_warn = 0;
473
474 /*
475 * The WITNESS-enabled diagnostic code. Note that the witness code does
476 * assume that the early boot is single-threaded at least until after this
477 * routine is completed.
478 */
479 void
480 witness_initialize(void)
481 {
482 struct lock_object *lock;
483 union lock_stack *stacks;
484 struct witness *w;
485 int i, s;
486
487 w_data = (void *)uvm_pageboot_alloc(sizeof(struct witness) *
488 witness_count);
489 memset(w_data, 0, sizeof(struct witness) * witness_count);
490
491 w_rmatrix = (void *)uvm_pageboot_alloc(sizeof(*w_rmatrix) *
492 (witness_count + 1));
493
494 for (i = 0; i < witness_count + 1; i++) {
495 w_rmatrix[i] = (void *)uvm_pageboot_alloc(
496 sizeof(*w_rmatrix[i]) * (witness_count + 1));
497 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
498 (witness_count + 1));
499 }
500
501 mtx_init_flags(&w_mtx, IPL_HIGH, "witness lock", MTX_NOWITNESS);
502 for (i = witness_count - 1; i >= 0; i--) {
503 w = &w_data[i];
504 memset(w, 0, sizeof(*w));
505 w_data[i].w_index = i; /* Witness index never changes. */
506 witness_free(w);
507 }
508 KASSERTMSG(SLIST_FIRST(&w_free)->w_index == 0,
509 "%s: Invalid list of free witness objects", __func__);
510
511 /* Witness with index 0 is not used to aid in debugging. */
512 SLIST_REMOVE_HEAD(&w_free, w_list);
513 w_free_cnt--;
514
515 for (i = 0; i < witness_count; i++) {
516 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
517 (witness_count + 1));
518 }
519
520 if (witness_locktrace) {
521 w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN;
522 stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) *
523 w_lock_stack_num);
524 }
525
526 s = splhigh();
527 for (i = 0; i < w_lock_stack_num; i++)
528 witness_lock_stack_free(&stacks[i]);
529 for (i = 0; i < LOCK_CHILDCOUNT; i++)
530 witness_lock_list_free(&w_locklistdata[i]);
531 splx(s);
532 witness_init_hash_tables();
533 witness_spin_warn = 1;
534
535 /* Iterate through all locks and add them to witness. */
536 for (i = 0; pending_locks[i].wh_lock != NULL; i++) {
537 lock = pending_locks[i].wh_lock;
538 KASSERTMSG(lock->lo_flags & LO_WITNESS,
539 "%s: lock %s is on pending list but not LO_WITNESS",
540 __func__, lock->lo_name);
541 lock->lo_witness = enroll(pending_locks[i].wh_type,
542 lock->lo_name, LOCK_CLASS(lock));
543 }
544
545 /* Mark the witness code as being ready for use. */
546 witness_cold = 0;
547 }
548
549 void
550 witness_init(struct lock_object *lock, const struct lock_type *type)
551 {
552 struct lock_class *class;
553
554 /* Various sanity checks. */
555 class = LOCK_CLASS(lock);
556 if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
557 (class->lc_flags & LC_RECURSABLE) == 0)
558 panic("%s: lock (%s) %s can not be recursable",
559 __func__, class->lc_name, lock->lo_name);
560 if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
561 (class->lc_flags & LC_SLEEPABLE) == 0)
562 panic("%s: lock (%s) %s can not be sleepable",
563 __func__, class->lc_name, lock->lo_name);
564 if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
565 (class->lc_flags & LC_UPGRADABLE) == 0)
566 panic("%s: lock (%s) %s can not be upgradable",
567 __func__, class->lc_name, lock->lo_name);
568
569 /*
570 * If we shouldn't watch this lock, then just clear lo_witness.
571 * Record the type in case the lock becomes watched later.
572 * Otherwise, if witness_cold is set, then it is too early to
573 * enroll this lock, so defer it to witness_initialize() by adding
574 * it to the pending_locks list. If it is not too early, then enroll
575 * the lock now.
576 */
577 if (witness_watch < 1 || panicstr != NULL || db_active ||
578 (lock->lo_flags & LO_WITNESS) == 0) {
579 lock->lo_witness = NULL;
580 lock->lo_type = type;
581 } else if (witness_cold) {
582 pending_locks[pending_cnt].wh_lock = lock;
583 pending_locks[pending_cnt++].wh_type = type;
584 if (pending_cnt > WITNESS_PENDLIST)
585 panic("%s: pending locks list is too small, "
586 "increase WITNESS_PENDLIST",
587 __func__);
588 } else
589 lock->lo_witness = enroll(type, lock->lo_name, class);
590 }
591
592 static inline int
593 is_kernel_lock(const struct lock_object *lock)
594 {
595 #ifdef MULTIPROCESSOR
596 return (lock == &kernel_lock.mpl_lock_obj);
597 #else
598 return (0);
599 #endif
600 }
601
602 #ifdef DDB
603 static void
604 witness_ddb_compute_levels(void)
605 {
606 struct witness *w;
607
608 /*
609 * First clear all levels.
610 */
611 SLIST_FOREACH(w, &w_all, w_list)
612 w->w_ddb_level = -1;
613
614 /*
615 * Look for locks with no parents and level all their descendants.
616 */
617 SLIST_FOREACH(w, &w_all, w_list) {
618 /* If the witness has ancestors (is not a root), skip it. */
619 if (w->w_num_ancestors > 0)
620 continue;
621 witness_ddb_level_descendants(w, 0);
622 }
623 }
624
625 static void
626 witness_ddb_level_descendants(struct witness *w, int l)
627 {
628 int i;
629
630 if (w->w_ddb_level >= l)
631 return;
632
633 w->w_ddb_level = l;
634 l++;
635
636 for (i = 1; i <= w_max_used_index; i++) {
637 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
638 witness_ddb_level_descendants(&w_data[i], l);
639 }
640 }
641
642 static void
643 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
644 struct witness *w, int indent)
645 {
646 int i;
647
648 for (i = 0; i < indent; i++)
649 prnt(" ");
650 prnt("%s (type: %s, depth: %d)",
651 w->w_type->lt_name, w->w_class->lc_name, w->w_ddb_level);
652 if (w->w_displayed) {
653 prnt(" -- (already displayed)\n");
654 return;
655 }
656 w->w_displayed = 1;
657 if (!w->w_acquired)
658 prnt(" -- never acquired\n");
659 else
660 prnt("\n");
661 indent++;
662 WITNESS_INDEX_ASSERT(w->w_index);
663 for (i = 1; i <= w_max_used_index; i++) {
664 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
665 witness_ddb_display_descendants(prnt, &w_data[i],
666 indent);
667 }
668 }
669
670 static void
671 witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
672 struct witness_list *list)
673 {
674 struct witness *w;
675
676 SLIST_FOREACH(w, list, w_typelist) {
677 if (!w->w_acquired || w->w_ddb_level > 0)
678 continue;
679
680 /* This lock has no ancestors - display its descendants. */
681 witness_ddb_display_descendants(prnt, w, 0);
682 }
683 }
684
685 static void
686 witness_ddb_display(int(*prnt)(const char *fmt, ...))
687 {
688 struct witness *w;
689
690 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__);
691 witness_ddb_compute_levels();
692
693 /* Clear all the displayed flags. */
694 SLIST_FOREACH(w, &w_all, w_list)
695 w->w_displayed = 0;
696
697 /*
698 * First, handle sleep locks which have been acquired at least
699 * once.
700 */
701 prnt("Sleep locks:\n");
702 witness_ddb_display_list(prnt, &w_sleep);
703
704 /*
705 * Now do spin locks which have been acquired at least once.
706 */
707 prnt("\nSpin locks:\n");
708 witness_ddb_display_list(prnt, &w_spin);
709
710 /*
711 * Finally, any locks which have not been acquired yet.
712 */
713 prnt("\nLocks which were never acquired:\n");
714 SLIST_FOREACH(w, &w_all, w_list) {
715 if (w->w_acquired)
716 continue;
717 prnt("%s (type: %s, depth: %d)\n", w->w_type->lt_name,
718 w->w_class->lc_name, w->w_ddb_level);
719 }
720 }
721 #endif /* DDB */
722
723 int
724 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
725 {
726
727 if (witness_watch < 0 || panicstr != NULL || db_active)
728 return (0);
729
730 /* Require locks that witness knows about. */
731 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL ||
732 lock2->lo_witness == NULL)
733 return (EINVAL);
734
735 MUTEX_ASSERT_UNLOCKED(&w_mtx);
736 mtx_enter(&w_mtx);
737
738 /*
739 * If we already have either an explicit or implied lock order that
740 * is the other way around, then return an error.
741 */
742 if (witness_watch &&
743 isitmydescendant(lock2->lo_witness, lock1->lo_witness)) {
744 mtx_leave(&w_mtx);
745 return (EINVAL);
746 }
747
748 /* Try to add the new order. */
749 itismychild(lock1->lo_witness, lock2->lo_witness);
750 mtx_leave(&w_mtx);
751 return (0);
752 }
753
754 void
755 witness_checkorder(struct lock_object *lock, int flags,
756 struct lock_object *interlock)
757 {
758 struct lock_list_entry *lock_list, *lle;
759 struct lock_instance *lock1, *lock2, *plock;
760 struct lock_class *class, *iclass;
761 struct proc *p;
762 struct witness *w, *w1;
763 int i, j, s;
764
765 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active)
766 return;
767
768 if ((lock->lo_flags & LO_INITIALIZED) == 0) {
769 if (witness_uninitialized_report > 0) {
770 witness_uninitialized_report--;
771 printf("witness: lock_object uninitialized: %p\n", lock);
772 witness_debugger(1);
773 }
774 lock->lo_flags |= LO_INITIALIZED;
775 }
776
777 if ((lock->lo_flags & LO_WITNESS) == 0)
778 return;
779
780 w = lock->lo_witness;
781 class = LOCK_CLASS(lock);
782
783 if (w == NULL)
784 w = lock->lo_witness =
785 enroll(lock->lo_type, lock->lo_name, class);
786
787 p = curproc;
788
789 if (class->lc_flags & LC_SLEEPLOCK) {
790 /*
791 * Since spin locks include a critical section, this check
792 * implicitly enforces a lock order of all sleep locks before
793 * all spin locks.
794 */
795 lock_list = witness_cpu[cpu_number()].wc_spinlocks;
796 if (lock_list != NULL && lock_list->ll_count > 0) {
797 panic("acquiring blockable sleep lock with "
798 "spinlock or critical section held (%s) %s",
799 class->lc_name, lock->lo_name);
800 }
801
802 /*
803 * If this is the first lock acquired then just return as
804 * no order checking is needed.
805 */
806 lock_list = p->p_sleeplocks;
807 if (lock_list == NULL || lock_list->ll_count == 0)
808 return;
809 } else {
810
811 /*
812 * If this is the first lock, just return as no order
813 * checking is needed.
814 */
815 lock_list = witness_cpu[cpu_number()].wc_spinlocks;
816 if (lock_list == NULL || lock_list->ll_count == 0)
817 return;
818 }
819
820 s = splhigh();
821
822 /*
823 * Check to see if we are recursing on a lock we already own. If
824 * so, make sure that we don't mismatch exclusive and shared lock
825 * acquires.
826 */
827 lock1 = find_instance(lock_list, lock);
828 if (lock1 != NULL) {
829 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 &&
830 (flags & LOP_EXCLUSIVE) == 0) {
831 printf("witness: shared lock of (%s) %s "
832 "while exclusively locked\n",
833 class->lc_name, lock->lo_name);
834 panic("excl->share");
835 }
836 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
837 (flags & LOP_EXCLUSIVE) != 0) {
838 printf("witness: exclusive lock of (%s) %s "
839 "while share locked\n",
840 class->lc_name, lock->lo_name);
841 panic("share->excl");
842 }
843 goto out_splx;
844 }
845
846 /* Warn if the interlock is not locked exactly once. */
847 if (interlock != NULL) {
848 iclass = LOCK_CLASS(interlock);
849 lock1 = find_instance(lock_list, interlock);
850 if (lock1 == NULL)
851 panic("interlock (%s) %s not locked",
852 iclass->lc_name, interlock->lo_name);
853 else if ((lock1->li_flags & LI_RECURSEMASK) != 0)
854 panic("interlock (%s) %s recursed",
855 iclass->lc_name, interlock->lo_name);
856 }
857
858 /*
859 * Find the previously acquired lock, but ignore interlocks.
860 */
861 plock = &lock_list->ll_children[lock_list->ll_count - 1];
862 if (interlock != NULL && plock->li_lock == interlock) {
863 if (lock_list->ll_count > 1)
864 plock =
865 &lock_list->ll_children[lock_list->ll_count - 2];
866 else {
867 lle = lock_list->ll_next;
868
869 /*
870 * The interlock is the only lock we hold, so
871 * simply return.
872 */
873 if (lle == NULL)
874 goto out_splx;
875 plock = &lle->ll_children[lle->ll_count - 1];
876 }
877 }
878
879 /*
880 * Try to perform most checks without a lock. If this succeeds we
881 * can skip acquiring the lock and return success. Otherwise we redo
882 * the check with the lock held to handle races with concurrent updates.
883 */
884 w1 = plock->li_lock->lo_witness;
885 if (witness_lock_order_check(w1, w))
886 goto out_splx;
887
888 mtx_enter(&w_mtx);
889 if (witness_lock_order_check(w1, w))
890 goto out;
891
892 witness_lock_order_add(w1, w);
893
894 /*
895 * Check for duplicate locks of the same type. Note that we only
896 * have to check for this on the last lock we just acquired. Any
897 * other cases will be caught as lock order violations.
898 */
899 if (w1 == w) {
900 i = w->w_index;
901 if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) &&
902 !(w_rmatrix[i][i] & WITNESS_REVERSAL)) {
903 w_rmatrix[i][i] |= WITNESS_REVERSAL;
904 w->w_reversed = 1;
905 mtx_leave(&w_mtx);
906 printf("witness: acquiring duplicate lock of "
907 "same type: \"%s\"\n", w->w_type->lt_name);
908 printf(" 1st %s\n", plock->li_lock->lo_name);
909 printf(" 2nd %s\n", lock->lo_name);
910 witness_debugger(1);
911 } else
912 mtx_leave(&w_mtx);
913 goto out_splx;
914 }
915 MUTEX_ASSERT_LOCKED(&w_mtx);
916
917 /*
918 * If we know that the lock we are acquiring comes after
919 * the lock we most recently acquired in the lock order tree,
920 * then there is no need for any further checks.
921 */
922 if (isitmychild(w1, w))
923 goto out;
924
925 for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) {
926 for (i = lle->ll_count - 1; i >= 0; i--, j++) {
927
928 KASSERT(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN);
929 lock1 = &lle->ll_children[i];
930
931 /*
932 * Ignore the interlock.
933 */
934 if (interlock == lock1->li_lock)
935 continue;
936
937 /*
938 * If this lock doesn't undergo witness checking,
939 * then skip it.
940 */
941 w1 = lock1->li_lock->lo_witness;
942 if (w1 == NULL) {
943 KASSERTMSG((lock1->li_lock->lo_flags &
944 LO_WITNESS) == 0,
945 "lock missing witness structure");
946 continue;
947 }
948
949 /*
950 * If we are locking Giant and this is a sleepable
951 * lock, then skip it.
952 */
953 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 &&
954 is_kernel_lock(lock))
955 continue;
956
957 /*
958 * If we are locking a sleepable lock and this lock
959 * is Giant, then skip it.
960 */
961 if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
962 is_kernel_lock(lock1->li_lock))
963 continue;
964
965 /*
966 * If we are locking a sleepable lock and this lock
967 * isn't sleepable, we want to treat it as a lock
968 * order violation to enfore a general lock order of
969 * sleepable locks before non-sleepable locks.
970 */
971 if (((lock->lo_flags & LO_SLEEPABLE) != 0 &&
972 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
973 goto reversal;
974
975 /*
976 * If we are locking Giant and this is a non-sleepable
977 * lock, then treat it as a reversal.
978 */
979 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 &&
980 is_kernel_lock(lock))
981 goto reversal;
982
983 /*
984 * Check the lock order hierarchy for a reveresal.
985 */
986 if (!isitmydescendant(w, w1))
987 continue;
988 reversal:
989
990 /*
991 * We have a lock order violation, check to see if it
992 * is allowed or has already been yelled about.
993 */
994
995 /* Bail if this violation is known */
996 if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL)
997 goto out;
998
999 /* Record this as a violation */
1000 w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL;
1001 w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL;
1002 w->w_reversed = w1->w_reversed = 1;
1003 witness_increment_graph_generation();
1004 mtx_leave(&w_mtx);
1005
1006 /*
1007 * There are known LORs between VNODE locks. They are
1008 * not an indication of a bug. VNODE locks are flagged
1009 * as such (LO_IS_VNODE) and we don't yell if the LOR
1010 * is between 2 VNODE locks.
1011 */
1012 if ((lock->lo_flags & LO_IS_VNODE) != 0 &&
1013 (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0)
1014 goto out_splx;
1015
1016 /*
1017 * Ok, yell about it.
1018 */
1019 printf("witness: ");
1020 if (((lock->lo_flags & LO_SLEEPABLE) != 0 &&
1021 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
1022 printf("lock order reversal: "
1023 "(sleepable after non-sleepable)\n");
1024 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0
1025 && is_kernel_lock(lock))
1026 printf("lock order reversal: "
1027 "(Giant after non-sleepable)\n");
1028 else
1029 printf("lock order reversal:\n");
1030
1031 /*
1032 * Try to locate an earlier lock with
1033 * witness w in our list.
1034 */
1035 do {
1036 lock2 = &lle->ll_children[i];
1037 KASSERT(lock2->li_lock != NULL);
1038 if (lock2->li_lock->lo_witness == w)
1039 break;
1040 if (i == 0 && lle->ll_next != NULL) {
1041 lle = lle->ll_next;
1042 i = lle->ll_count - 1;
1043 KASSERT(i >= 0 && i < LOCK_NCHILDREN);
1044 } else
1045 i--;
1046 } while (i >= 0);
1047 if (i < 0) {
1048 printf(" 1st %p %s (%s)\n",
1049 lock1->li_lock, lock1->li_lock->lo_name,
1050 w1->w_type->lt_name);
1051 printf(" 2nd %p %s (%s)\n",
1052 lock, lock->lo_name, w->w_type->lt_name);
1053 } else {
1054 printf(" 1st %p %s (%s)\n",
1055 lock2->li_lock, lock2->li_lock->lo_name,
1056 lock2->li_lock->lo_witness->w_type->
1057 lt_name);
1058 printf(" 2nd %p %s (%s)\n",
1059 lock1->li_lock, lock1->li_lock->lo_name,
1060 w1->w_type->lt_name);
1061 printf(" 3rd %p %s (%s)\n", lock,
1062 lock->lo_name, w->w_type->lt_name);
1063 }
1064 if (witness_watch > 1) {
1065 struct witness_lock_order_data *wlod1, *wlod2;
1066
1067 mtx_enter(&w_mtx);
1068 wlod1 = witness_lock_order_get(w, w1);
1069 wlod2 = witness_lock_order_get(w1, w);
1070 mtx_leave(&w_mtx);
1071
1072 /*
1073 * It is safe to access saved stack traces,
1074 * w_type, and w_class without the lock.
1075 * Once written, they never change.
1076 */
1077
1078 if (wlod1 != NULL) {
1079 printf("lock order \"%s\"(%s) -> "
1080 "\"%s\"(%s) first seen at:\n",
1081 w->w_type->lt_name,
1082 w->w_class->lc_name,
1083 w1->w_type->lt_name,
1084 w1->w_class->lc_name);
1085 stacktrace_print(
1086 &wlod1->wlod_stack, printf);
1087 } else {
1088 printf("lock order data "
1089 "w2 -> w1 missing\n");
1090 }
1091 if (wlod2 != NULL) {
1092 printf("lock order \"%s\"(%s) -> "
1093 "\"%s\"(%s) first seen at:\n",
1094 w1->w_type->lt_name,
1095 w1->w_class->lc_name,
1096 w->w_type->lt_name,
1097 w->w_class->lc_name);
1098 stacktrace_print(
1099 &wlod2->wlod_stack, printf);
1100 } else {
1101 printf("lock order data "
1102 "w1 -> w2 missing\n");
1103 }
1104 }
1105 witness_debugger(0);
1106 goto out_splx;
1107 }
1108 }
1109
1110 /*
1111 * If requested, build a new lock order. However, don't build a new
1112 * relationship between a sleepable lock and Giant if it is in the
1113 * wrong direction. The correct lock order is that sleepable locks
1114 * always come before Giant.
1115 */
1116 if (flags & LOP_NEWORDER &&
1117 !(is_kernel_lock(plock->li_lock) &&
1118 (lock->lo_flags & LO_SLEEPABLE) != 0))
1119 itismychild(plock->li_lock->lo_witness, w);
1120 out:
1121 mtx_leave(&w_mtx);
1122 out_splx:
1123 splx(s);
1124 }
1125
1126 void
1127 witness_lock(struct lock_object *lock, int flags)
1128 {
1129 struct lock_list_entry **lock_list, *lle;
1130 struct lock_instance *instance;
1131 struct proc *p;
1132 struct witness *w;
1133 int s;
1134
1135 if (witness_cold || witness_watch < 0 || panicstr != NULL ||
1136 db_active || (lock->lo_flags & LO_WITNESS) == 0)
1137 return;
1138
1139 w = lock->lo_witness;
1140 if (w == NULL)
1141 w = lock->lo_witness =
1142 enroll(lock->lo_type, lock->lo_name, LOCK_CLASS(lock));
1143
1144 p = curproc;
1145
1146 /* Determine lock list for this lock. */
1147 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK)
1148 lock_list = &p->p_sleeplocks;
1149 else
1150 lock_list = &witness_cpu[cpu_number()].wc_spinlocks;
1151
1152 s = splhigh();
1153
1154 /* Check to see if we are recursing on a lock we already own. */
1155 instance = find_instance(*lock_list, lock);
1156 if (instance != NULL) {
1157 instance->li_flags++;
1158 goto out;
1159 }
1160
1161 w->w_acquired = 1;
1162
1163 /* Find the next open lock instance in the list and fill it. */
1164 lle = *lock_list;
1165 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) {
1166 lle = witness_lock_list_get();
1167 if (lle == NULL)
1168 goto out;
1169 lle->ll_next = *lock_list;
1170 *lock_list = lle;
1171 }
1172 instance = &lle->ll_children[lle->ll_count++];
1173 instance->li_lock = lock;
1174 if ((flags & LOP_EXCLUSIVE) != 0)
1175 instance->li_flags = LI_EXCLUSIVE;
1176 else
1177 instance->li_flags = 0;
1178 instance->li_stack = NULL;
1179 if (witness_locktrace) {
1180 instance->li_stack = witness_lock_stack_get();
1181 if (instance->li_stack != NULL)
1182 stacktrace_save(&instance->li_stack->ls_stack);
1183 }
1184 out:
1185 splx(s);
1186 }
1187
1188 void
1189 witness_upgrade(struct lock_object *lock, int flags)
1190 {
1191 struct lock_instance *instance;
1192 struct lock_class *class;
1193 int s;
1194
1195 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__);
1196 if (lock->lo_witness == NULL || witness_watch < 0 ||
1197 panicstr != NULL || db_active)
1198 return;
1199 class = LOCK_CLASS(lock);
1200 if (witness_watch) {
1201 if ((lock->lo_flags & LO_UPGRADABLE) == 0)
1202 panic("upgrade of non-upgradable lock (%s) %s",
1203 class->lc_name, lock->lo_name);
1204 if ((class->lc_flags & LC_SLEEPLOCK) == 0)
1205 panic("upgrade of non-sleep lock (%s) %s",
1206 class->lc_name, lock->lo_name);
1207 }
1208 s = splhigh();
1209 instance = find_instance(curproc->p_sleeplocks, lock);
1210 if (instance == NULL) {
1211 panic("upgrade of unlocked lock (%s) %s",
1212 class->lc_name, lock->lo_name);
1213 goto out;
1214 }
1215 if (witness_watch) {
1216 if ((instance->li_flags & LI_EXCLUSIVE) != 0)
1217 panic("upgrade of exclusive lock (%s) %s",
1218 class->lc_name, lock->lo_name);
1219 if ((instance->li_flags & LI_RECURSEMASK) != 0)
1220 panic("upgrade of recursed lock (%s) %s r=%d",
1221 class->lc_name, lock->lo_name,
1222 instance->li_flags & LI_RECURSEMASK);
1223 }
1224 instance->li_flags |= LI_EXCLUSIVE;
1225 out:
1226 splx(s);
1227 }
1228
1229 void
1230 witness_downgrade(struct lock_object *lock, int flags)
1231 {
1232 struct lock_instance *instance;
1233 struct lock_class *class;
1234 int s;
1235
1236 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__);
1237 if (lock->lo_witness == NULL || witness_watch < 0 ||
1238 panicstr != NULL || db_active)
1239 return;
1240 class = LOCK_CLASS(lock);
1241 if (witness_watch) {
1242 if ((lock->lo_flags & LO_UPGRADABLE) == 0)
1243 panic(
1244 "downgrade of non-upgradable lock (%s) %s",
1245 class->lc_name, lock->lo_name);
1246 if ((class->lc_flags & LC_SLEEPLOCK) == 0)
1247 panic("downgrade of non-sleep lock (%s) %s",
1248 class->lc_name, lock->lo_name);
1249 }
1250 s = splhigh();
1251 instance = find_instance(curproc->p_sleeplocks, lock);
1252 if (instance == NULL) {
1253 panic("downgrade of unlocked lock (%s) %s",
1254 class->lc_name, lock->lo_name);
1255 goto out;
1256 }
1257 if (witness_watch) {
1258 if ((instance->li_flags & LI_EXCLUSIVE) == 0)
1259 panic("downgrade of shared lock (%s) %s",
1260 class->lc_name, lock->lo_name);
1261 if ((instance->li_flags & LI_RECURSEMASK) != 0)
1262 panic("downgrade of recursed lock (%s) %s r=%d",
1263 class->lc_name, lock->lo_name,
1264 instance->li_flags & LI_RECURSEMASK);
1265 }
1266 instance->li_flags &= ~LI_EXCLUSIVE;
1267 out:
1268 splx(s);
1269 }
1270
1271 void
1272 witness_unlock(struct lock_object *lock, int flags)
1273 {
1274 struct lock_list_entry **lock_list, *lle;
1275 struct lock_instance *instance;
1276 struct lock_class *class;
1277 struct proc *p;
1278 int i, j;
1279 int s;
1280
1281 if (witness_cold || lock->lo_witness == NULL ||
1282 panicstr != NULL || db_active)
1283 return;
1284 p = curproc;
1285 class = LOCK_CLASS(lock);
1286
1287 /* Find lock instance associated with this lock. */
1288 if (class->lc_flags & LC_SLEEPLOCK)
1289 lock_list = &p->p_sleeplocks;
1290 else
1291 lock_list = &witness_cpu[cpu_number()].wc_spinlocks;
1292
1293 s = splhigh();
1294
1295 lle = *lock_list;
1296 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next)
1297 for (i = 0; i < (*lock_list)->ll_count; i++) {
1298 instance = &(*lock_list)->ll_children[i];
1299 if (instance->li_lock == lock)
1300 goto found;
1301 }
1302
1303 /*
1304 * When disabling WITNESS through witness_watch we could end up in
1305 * having registered locks in the p_sleeplocks queue.
1306 * We have to make sure we flush these queues, so just search for
1307 * eventual register locks and remove them.
1308 */
1309 if (witness_watch > 0) {
1310 panic("lock (%s) %s not locked", class->lc_name, lock->lo_name);
1311 }
1312 goto out;
1313
1314 found:
1315
1316 /* First, check for shared/exclusive mismatches. */
1317 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 &&
1318 (flags & LOP_EXCLUSIVE) == 0) {
1319 printf("witness: shared unlock of (%s) %s "
1320 "while exclusively locked\n",
1321 class->lc_name, lock->lo_name);
1322 panic("excl->ushare");
1323 }
1324 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 &&
1325 (flags & LOP_EXCLUSIVE) != 0) {
1326 printf("witness: exclusive unlock of (%s) %s "
1327 "while share locked\n", class->lc_name, lock->lo_name);
1328 panic("share->uexcl");
1329 }
1330 /* If we are recursed, unrecurse. */
1331 if ((instance->li_flags & LI_RECURSEMASK) > 0) {
1332 instance->li_flags--;
1333 goto out;
1334 }
1335 /* The lock is now being dropped, check for NORELEASE flag */
1336 if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) {
1337 printf("witness: forbidden unlock of (%s) %s\n",
1338 class->lc_name, lock->lo_name);
1339 panic("lock marked norelease");
1340 }
1341
1342 /* Release the stack buffer, if any. */
1343 if (instance->li_stack != NULL) {
1344 witness_lock_stack_free(instance->li_stack);
1345 instance->li_stack = NULL;
1346 }
1347
1348 /* Remove this item from the list. */
1349 for (j = i; j < (*lock_list)->ll_count - 1; j++)
1350 (*lock_list)->ll_children[j] =
1351 (*lock_list)->ll_children[j + 1];
1352 (*lock_list)->ll_count--;
1353
1354 /*
1355 * In order to reduce contention on w_mtx, we want to keep always an
1356 * head object into lists so that frequent allocation from the
1357 * free witness pool (and subsequent locking) is avoided.
1358 * In order to maintain the current code simple, when the head
1359 * object is totally unloaded it means also that we do not have
1360 * further objects in the list, so the list ownership needs to be
1361 * hand over to another object if the current head needs to be freed.
1362 */
1363 if ((*lock_list)->ll_count == 0) {
1364 if (*lock_list == lle) {
1365 if (lle->ll_next == NULL)
1366 goto out;
1367 } else
1368 lle = *lock_list;
1369 *lock_list = lle->ll_next;
1370 witness_lock_list_free(lle);
1371 }
1372 out:
1373 splx(s);
1374 }
1375
1376 void
1377 witness_thread_exit(struct proc *p)
1378 {
1379 struct lock_list_entry *lle;
1380 int i, n, s;
1381
1382 lle = p->p_sleeplocks;
1383 if (lle == NULL || panicstr != NULL || db_active)
1384 return;
1385 if (lle->ll_count != 0) {
1386 for (n = 0; lle != NULL; lle = lle->ll_next)
1387 for (i = lle->ll_count - 1; i >= 0; i--) {
1388 if (n == 0)
1389 printf("witness: thread %p exiting "
1390 "with the following locks held:\n",
1391 p);
1392 n++;
1393 witness_list_lock(&lle->ll_children[i],
1394 printf);
1395 }
1396 panic("thread %p cannot exit while holding sleeplocks", p);
1397 }
1398 KASSERT(lle->ll_next == NULL);
1399 s = splhigh();
1400 witness_lock_list_free(lle);
1401 splx(s);
1402 }
1403
1404 /*
1405 * Warn if any locks other than 'lock' are held. Flags can be passed in to
1406 * exempt Giant and sleepable locks from the checks as well. If any
1407 * non-exempt locks are held, then a supplied message is printed to the
1408 * output channel along with a list of the offending locks. If indicated in the
1409 * flags then a failure results in a panic as well.
1410 */
1411 int
1412 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...)
1413 {
1414 struct lock_list_entry *lock_list, *lle;
1415 struct lock_instance *lock1;
1416 struct proc *p;
1417 va_list ap;
1418 int i, n;
1419
1420 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active)
1421 return (0);
1422 n = 0;
1423 p = curproc;
1424 for (lle = p->p_sleeplocks; lle != NULL; lle = lle->ll_next)
1425 for (i = lle->ll_count - 1; i >= 0; i--) {
1426 lock1 = &lle->ll_children[i];
1427 if (lock1->li_lock == lock)
1428 continue;
1429 if (flags & WARN_KERNELOK &&
1430 is_kernel_lock(lock1->li_lock))
1431 continue;
1432 if (flags & WARN_SLEEPOK &&
1433 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0)
1434 continue;
1435 if (n == 0) {
1436 printf("witness: ");
1437 va_start(ap, fmt);
1438 vprintf(fmt, ap);
1439 va_end(ap);
1440 printf(" with the following %slocks held:\n",
1441 (flags & WARN_SLEEPOK) != 0 ?
1442 "non-sleepable " : "");
1443 }
1444 n++;
1445 witness_list_lock(lock1, printf);
1446 }
1447
1448 lock_list = witness_cpu[cpu_number()].wc_spinlocks;
1449 if (lock_list != NULL && lock_list->ll_count != 0) {
1450 /*
1451 * We should only have one spinlock and as long as
1452 * the flags cannot match for this locks class,
1453 * check if the first spinlock is the one curproc
1454 * should hold.
1455 */
1456 lock1 = &lock_list->ll_children[lock_list->ll_count - 1];
1457 if (lock_list->ll_count == 1 && lock_list->ll_next == NULL &&
1458 lock1->li_lock == lock && n == 0)
1459 return (0);
1460
1461 printf("witness: ");
1462 va_start(ap, fmt);
1463 vprintf(fmt, ap);
1464 va_end(ap);
1465 printf(" with the following %slocks held:\n",
1466 (flags & WARN_SLEEPOK) != 0 ? "non-sleepable " : "");
1467 n += witness_list_locks(&lock_list, printf);
1468 }
1469 if (n > 0) {
1470 if (flags & WARN_PANIC)
1471 panic("%s", __func__);
1472 else
1473 witness_debugger(1);
1474 }
1475 return (n);
1476 }
1477
1478 static struct witness *
1479 enroll(const struct lock_type *type, const char *subtype,
1480 struct lock_class *lock_class)
1481 {
1482 struct witness *w;
1483 struct witness_list *typelist;
1484
1485 KASSERT(type != NULL);
1486
1487 if (witness_watch < 0 || panicstr != NULL || db_active)
1488 return (NULL);
1489 if ((lock_class->lc_flags & LC_SPINLOCK)) {
1490 typelist = &w_spin;
1491 } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) {
1492 typelist = &w_sleep;
1493 } else {
1494 panic("lock class %s is not sleep or spin",
1495 lock_class->lc_name);
1496 return (NULL);
1497 }
1498
1499 mtx_enter(&w_mtx);
1500 w = witness_hash_get(type, subtype);
1501 if (w)
1502 goto found;
1503 if ((w = witness_get()) == NULL)
1504 return (NULL);
1505 w->w_type = type;
1506 w->w_subtype = subtype;
1507 w->w_class = lock_class;
1508 SLIST_INSERT_HEAD(&w_all, w, w_list);
1509 if (lock_class->lc_flags & LC_SPINLOCK) {
1510 SLIST_INSERT_HEAD(&w_spin, w, w_typelist);
1511 w_spin_cnt++;
1512 } else if (lock_class->lc_flags & LC_SLEEPLOCK) {
1513 SLIST_INSERT_HEAD(&w_sleep, w, w_typelist);
1514 w_sleep_cnt++;
1515 }
1516
1517 /* Insert new witness into the hash */
1518 witness_hash_put(w);
1519 witness_increment_graph_generation();
1520 mtx_leave(&w_mtx);
1521 return (w);
1522 found:
1523 mtx_leave(&w_mtx);
1524 if (lock_class != w->w_class)
1525 panic("lock (%s) %s does not match earlier (%s) lock",
1526 type->lt_name, lock_class->lc_name, w->w_class->lc_name);
1527 return (w);
1528 }
1529
1530 static void
1531 adopt(struct witness *parent, struct witness *child)
1532 {
1533 int pi, ci, i, j;
1534
1535 if (witness_cold == 0)
1536 MUTEX_ASSERT_LOCKED(&w_mtx);
1537
1538 /* If the relationship is already known, there's no work to be done. */
1539 if (isitmychild(parent, child))
1540 return;
1541
1542 /* When the structure of the graph changes, bump up the generation. */
1543 witness_increment_graph_generation();
1544
1545 /*
1546 * The hard part ... create the direct relationship, then propagate all
1547 * indirect relationships.
1548 */
1549 pi = parent->w_index;
1550 ci = child->w_index;
1551 WITNESS_INDEX_ASSERT(pi);
1552 WITNESS_INDEX_ASSERT(ci);
1553 KASSERT(pi != ci);
1554 w_rmatrix[pi][ci] |= WITNESS_PARENT;
1555 w_rmatrix[ci][pi] |= WITNESS_CHILD;
1556
1557 /*
1558 * If parent was not already an ancestor of child,
1559 * then we increment the descendant and ancestor counters.
1560 */
1561 if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) {
1562 parent->w_num_descendants++;
1563 child->w_num_ancestors++;
1564 }
1565
1566 /*
1567 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
1568 * an ancestor of 'pi' during this loop.
1569 */
1570 for (i = 1; i <= w_max_used_index; i++) {
1571 if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
1572 (i != pi))
1573 continue;
1574
1575 /* Find each descendant of 'i' and mark it as a descendant. */
1576 for (j = 1; j <= w_max_used_index; j++) {
1577
1578 /*
1579 * Skip children that are already marked as
1580 * descendants of 'i'.
1581 */
1582 if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK)
1583 continue;
1584
1585 /*
1586 * We are only interested in descendants of 'ci'. Note
1587 * that 'ci' itself is counted as a descendant of 'ci'.
1588 */
1589 if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
1590 (j != ci))
1591 continue;
1592 w_rmatrix[i][j] |= WITNESS_ANCESTOR;
1593 w_rmatrix[j][i] |= WITNESS_DESCENDANT;
1594 w_data[i].w_num_descendants++;
1595 w_data[j].w_num_ancestors++;
1596
1597 /*
1598 * Make sure we aren't marking a node as both an
1599 * ancestor and descendant. We should have caught
1600 * this as a lock order reversal earlier.
1601 */
1602 if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
1603 (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
1604 printf("witness: rmatrix paradox! [%d][%d]=%d "
1605 "both ancestor and descendant\n",
1606 i, j, w_rmatrix[i][j]);
1607 #ifdef DDB
1608 db_stack_dump();
1609 #endif
1610 printf("witness disabled\n");
1611 witness_watch = -1;
1612 }
1613 if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) &&
1614 (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
1615 printf("witness: rmatrix paradox! [%d][%d]=%d "
1616 "both ancestor and descendant\n",
1617 j, i, w_rmatrix[j][i]);
1618 #ifdef DDB
1619 db_stack_dump();
1620 #endif
1621 printf("witness disabled\n");
1622 witness_watch = -1;
1623 }
1624 }
1625 }
1626 }
1627
1628 static void
1629 itismychild(struct witness *parent, struct witness *child)
1630 {
1631 KASSERT(child != NULL && parent != NULL);
1632 if (witness_cold == 0)
1633 MUTEX_ASSERT_LOCKED(&w_mtx);
1634
1635 if (!witness_lock_type_equal(parent, child)) {
1636 if (witness_cold == 0)
1637 mtx_leave(&w_mtx);
1638 panic(
1639 "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
1640 "the same lock type", __func__, parent->w_type->lt_name,
1641 parent->w_class->lc_name, child->w_type->lt_name,
1642 child->w_class->lc_name);
1643 }
1644 adopt(parent, child);
1645 }
1646
1647 /*
1648 * Generic code for the isitmy*() functions. The rmask parameter is the
1649 * expected relationship of w1 to w2.
1650 */
1651 static int
1652 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
1653 {
1654 unsigned char r1, r2;
1655 int i1, i2;
1656
1657 i1 = w1->w_index;
1658 i2 = w2->w_index;
1659 WITNESS_INDEX_ASSERT(i1);
1660 WITNESS_INDEX_ASSERT(i2);
1661 r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK;
1662 r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK;
1663
1664 /* The flags on one better be the inverse of the flags on the other */
1665 if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) ||
1666 (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) {
1667 /* Don't squawk if we're potentially racing with an update. */
1668 if (w_mtx.mtx_owner != curcpu())
1669 return (0);
1670 printf("witness: %s: rmatrix mismatch between %s (index %d) "
1671 "and %s (index %d): w_rmatrix[%d][%d] == %x but "
1672 "w_rmatrix[%d][%d] == %x\n",
1673 fname, w1->w_type->lt_name, i1, w2->w_type->lt_name,
1674 i2, i1, i2, r1,
1675 i2, i1, r2);
1676 #ifdef DDB
1677 db_stack_dump();
1678 #endif
1679 printf("witness disabled\n");
1680 witness_watch = -1;
1681 }
1682 return (r1 & rmask);
1683 }
1684
1685 /*
1686 * Checks if @child is a direct child of @parent.
1687 */
1688 static int
1689 isitmychild(struct witness *parent, struct witness *child)
1690 {
1691
1692 return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
1693 }
1694
1695 /*
1696 * Checks if @descendant is a direct or indirect descendant of @ancestor.
1697 */
1698 static int
1699 isitmydescendant(struct witness *ancestor, struct witness *descendant)
1700 {
1701
1702 return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
1703 __func__));
1704 }
1705
1706 static struct witness *
1707 witness_get(void)
1708 {
1709 struct witness *w;
1710 int index;
1711
1712 if (witness_cold == 0)
1713 MUTEX_ASSERT_LOCKED(&w_mtx);
1714
1715 if (witness_watch < 0) {
1716 mtx_leave(&w_mtx);
1717 return (NULL);
1718 }
1719 if (SLIST_EMPTY(&w_free)) {
1720 witness_watch = -1;
1721 mtx_leave(&w_mtx);
1722 printf("WITNESS: unable to allocate a new witness object\n");
1723 return (NULL);
1724 }
1725 w = SLIST_FIRST(&w_free);
1726 SLIST_REMOVE_HEAD(&w_free, w_list);
1727 w_free_cnt--;
1728 index = w->w_index;
1729 KASSERT(index > 0 && index == w_max_used_index + 1 &&
1730 index < witness_count);
1731 memset(w, 0, sizeof(*w));
1732 w->w_index = index;
1733 if (index > w_max_used_index)
1734 w_max_used_index = index;
1735 return (w);
1736 }
1737
1738 static void
1739 witness_free(struct witness *w)
1740 {
1741 SLIST_INSERT_HEAD(&w_free, w, w_list);
1742 w_free_cnt++;
1743 }
1744
1745 static struct lock_list_entry *
1746 witness_lock_list_get(void)
1747 {
1748 struct lock_list_entry *lle;
1749 struct witness_cpu *wcpu = &witness_cpu[cpu_number()];
1750
1751 if (witness_watch < 0)
1752 return (NULL);
1753
1754 splassert(IPL_HIGH);
1755
1756 if (wcpu->wc_lle_count > 0) {
1757 lle = wcpu->wc_lle_cache;
1758 wcpu->wc_lle_cache = lle->ll_next;
1759 wcpu->wc_lle_count--;
1760 memset(lle, 0, sizeof(*lle));
1761 return (lle);
1762 }
1763
1764 mtx_enter(&w_mtx);
1765 lle = w_lock_list_free;
1766 if (lle == NULL) {
1767 witness_watch = -1;
1768 mtx_leave(&w_mtx);
1769 printf("%s: witness exhausted\n", __func__);
1770 return (NULL);
1771 }
1772 w_lock_list_free = lle->ll_next;
1773 mtx_leave(&w_mtx);
1774 memset(lle, 0, sizeof(*lle));
1775 return (lle);
1776 }
1777
1778 static void
1779 witness_lock_list_free(struct lock_list_entry *lle)
1780 {
1781 struct witness_cpu *wcpu = &witness_cpu[cpu_number()];
1782
1783 splassert(IPL_HIGH);
1784
1785 if (wcpu->wc_lle_count < WITNESS_LLE_CACHE_MAX) {
1786 lle->ll_next = wcpu->wc_lle_cache;
1787 wcpu->wc_lle_cache = lle;
1788 wcpu->wc_lle_count++;
1789 return;
1790 }
1791
1792 mtx_enter(&w_mtx);
1793 lle->ll_next = w_lock_list_free;
1794 w_lock_list_free = lle;
1795 mtx_leave(&w_mtx);
1796 }
1797
1798 static union lock_stack *
1799 witness_lock_stack_get(void)
1800 {
1801 union lock_stack *stack = NULL;
1802 struct witness_cpu *wcpu = &witness_cpu[cpu_number()];
1803
1804 splassert(IPL_HIGH);
1805
1806 if (wcpu->wc_stk_count > 0) {
1807 stack = wcpu->wc_stk_cache;
1808 wcpu->wc_stk_cache = stack->ls_next;
1809 wcpu->wc_stk_count--;
1810 return (stack);
1811 }
1812
1813 mtx_enter(&w_mtx);
1814 if (w_lock_stack_free != NULL) {
1815 stack = w_lock_stack_free;
1816 w_lock_stack_free = stack->ls_next;
1817 }
1818 mtx_leave(&w_mtx);
1819 return (stack);
1820 }
1821
1822 static void
1823 witness_lock_stack_free(union lock_stack *stack)
1824 {
1825 struct witness_cpu *wcpu = &witness_cpu[cpu_number()];
1826
1827 splassert(IPL_HIGH);
1828
1829 if (wcpu->wc_stk_count < WITNESS_STK_CACHE_MAX) {
1830 stack->ls_next = wcpu->wc_stk_cache;
1831 wcpu->wc_stk_cache = stack;
1832 wcpu->wc_stk_count++;
1833 return;
1834 }
1835
1836 mtx_enter(&w_mtx);
1837 stack->ls_next = w_lock_stack_free;
1838 w_lock_stack_free = stack;
1839 mtx_leave(&w_mtx);
1840 }
1841
1842 static struct lock_instance *
1843 find_instance(struct lock_list_entry *list, const struct lock_object *lock)
1844 {
1845 struct lock_list_entry *lle;
1846 struct lock_instance *instance;
1847 int i;
1848
1849 for (lle = list; lle != NULL; lle = lle->ll_next) {
1850 for (i = lle->ll_count - 1; i >= 0; i--) {
1851 instance = &lle->ll_children[i];
1852 if (instance->li_lock == lock)
1853 return (instance);
1854 }
1855 }
1856 return (NULL);
1857 }
1858
1859 static void
1860 witness_list_lock(struct lock_instance *instance,
1861 int (*prnt)(const char *fmt, ...))
1862 {
1863 struct lock_object *lock;
1864
1865 lock = instance->li_lock;
1866 prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ?
1867 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name);
1868 prnt(" r = %d (%p)\n", instance->li_flags & LI_RECURSEMASK, lock);
1869 if (instance->li_stack != NULL)
1870 stacktrace_print(&instance->li_stack->ls_stack, prnt);
1871 }
1872
1873 #ifdef DDB
1874 static int
1875 witness_thread_has_locks(struct proc *p)
1876 {
1877
1878 if (p->p_sleeplocks == NULL)
1879 return (0);
1880 return (p->p_sleeplocks->ll_count != 0);
1881 }
1882
1883 static int
1884 witness_process_has_locks(struct process *pr)
1885 {
1886 struct proc *p;
1887
1888 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1889 if (witness_thread_has_locks(p))
1890 return (1);
1891 }
1892 return (0);
1893 }
1894 #endif
1895
1896 int
1897 witness_list_locks(struct lock_list_entry **lock_list,
1898 int (*prnt)(const char *fmt, ...))
1899 {
1900 struct lock_list_entry *lle;
1901 int i, nheld;
1902
1903 nheld = 0;
1904 for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
1905 for (i = lle->ll_count - 1; i >= 0; i--) {
1906 witness_list_lock(&lle->ll_children[i], prnt);
1907 nheld++;
1908 }
1909 return (nheld);
1910 }
1911
1912 /*
1913 * This is a bit risky at best. We call this function when we have timed
1914 * out acquiring a spin lock, and we assume that the other CPU is stuck
1915 * with this lock held. So, we go groveling around in the other CPU's
1916 * per-cpu data to try to find the lock instance for this spin lock to
1917 * see when it was last acquired.
1918 */
1919 void
1920 witness_display_spinlock(struct lock_object *lock, struct proc *owner,
1921 int (*prnt)(const char *fmt, ...))
1922 {
1923 struct lock_instance *instance;
1924
1925 if (owner->p_stat != SONPROC)
1926 return;
1927 instance = find_instance(
1928 witness_cpu[owner->p_cpu->ci_cpuid].wc_spinlocks, lock);
1929 if (instance != NULL)
1930 witness_list_lock(instance, prnt);
1931 }
1932
1933 void
1934 witness_assert(const struct lock_object *lock, int flags)
1935 {
1936 struct lock_instance *instance;
1937 struct lock_class *class;
1938
1939 if (lock->lo_witness == NULL || witness_watch < 1 ||
1940 panicstr != NULL || db_active)
1941 return;
1942 class = LOCK_CLASS(lock);
1943 if ((class->lc_flags & LC_SLEEPLOCK) != 0)
1944 instance = find_instance(curproc->p_sleeplocks, lock);
1945 else if ((class->lc_flags & LC_SPINLOCK) != 0)
1946 instance = find_instance(
1947 witness_cpu[cpu_number()].wc_spinlocks, lock);
1948 else {
1949 panic("lock (%s) %s is not sleep or spin!",
1950 class->lc_name, lock->lo_name);
1951 return;
1952 }
1953 switch (flags) {
1954 case LA_UNLOCKED:
1955 if (instance != NULL)
1956 panic("lock (%s) %s locked",
1957 class->lc_name, lock->lo_name);
1958 break;
1959 case LA_LOCKED:
1960 case LA_LOCKED | LA_RECURSED:
1961 case LA_LOCKED | LA_NOTRECURSED:
1962 case LA_SLOCKED:
1963 case LA_SLOCKED | LA_RECURSED:
1964 case LA_SLOCKED | LA_NOTRECURSED:
1965 case LA_XLOCKED:
1966 case LA_XLOCKED | LA_RECURSED:
1967 case LA_XLOCKED | LA_NOTRECURSED:
1968 if (instance == NULL) {
1969 panic("lock (%s) %s not locked",
1970 class->lc_name, lock->lo_name);
1971 break;
1972 }
1973 if ((flags & LA_XLOCKED) != 0 &&
1974 (instance->li_flags & LI_EXCLUSIVE) == 0)
1975 panic(
1976 "lock (%s) %s not exclusively locked",
1977 class->lc_name, lock->lo_name);
1978 if ((flags & LA_SLOCKED) != 0 &&
1979 (instance->li_flags & LI_EXCLUSIVE) != 0)
1980 panic(
1981 "lock (%s) %s exclusively locked",
1982 class->lc_name, lock->lo_name);
1983 if ((flags & LA_RECURSED) != 0 &&
1984 (instance->li_flags & LI_RECURSEMASK) == 0)
1985 panic("lock (%s) %s not recursed",
1986 class->lc_name, lock->lo_name);
1987 if ((flags & LA_NOTRECURSED) != 0 &&
1988 (instance->li_flags & LI_RECURSEMASK) != 0)
1989 panic("lock (%s) %s recursed",
1990 class->lc_name, lock->lo_name);
1991 break;
1992 default:
1993 panic("invalid lock assertion");
1994
1995 }
1996 }
1997
1998 static void
1999 witness_setflag(struct lock_object *lock, int flag, int set)
2000 {
2001 struct lock_list_entry *lock_list;
2002 struct lock_instance *instance;
2003 struct lock_class *class;
2004
2005 if (lock->lo_witness == NULL || witness_watch < 0 ||
2006 panicstr != NULL || db_active)
2007 return;
2008 class = LOCK_CLASS(lock);
2009 if (class->lc_flags & LC_SLEEPLOCK)
2010 lock_list = curproc->p_sleeplocks;
2011 else
2012 lock_list = witness_cpu[cpu_number()].wc_spinlocks;
2013 instance = find_instance(lock_list, lock);
2014 if (instance == NULL) {
2015 panic("%s: lock (%s) %s not locked", __func__,
2016 class->lc_name, lock->lo_name);
2017 return;
2018 }
2019
2020 if (set)
2021 instance->li_flags |= flag;
2022 else
2023 instance->li_flags &= ~flag;
2024 }
2025
2026 void
2027 witness_norelease(struct lock_object *lock)
2028 {
2029
2030 witness_setflag(lock, LI_NORELEASE, 1);
2031 }
2032
2033 void
2034 witness_releaseok(struct lock_object *lock)
2035 {
2036
2037 witness_setflag(lock, LI_NORELEASE, 0);
2038 }
2039
2040 #ifdef DDB
2041 static void
2042 witness_ddb_list(struct proc *p)
2043 {
2044 struct witness_cpu *wc = &witness_cpu[cpu_number()];
2045
2046 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__);
2047 KASSERTMSG(db_active, "%s: not in the debugger", __func__);
2048
2049 if (witness_watch < 1)
2050 return;
2051
2052 witness_list_locks(&p->p_sleeplocks, db_printf);
2053
2054 /*
2055 * We only handle spinlocks if td == curproc. This is somewhat broken
2056 * if td is currently executing on some other CPU and holds spin locks
2057 * as we won't display those locks. If we had a MI way of getting
2058 * the per-cpu data for a given cpu then we could use
2059 * td->td_oncpu to get the list of spinlocks for this thread
2060 * and "fix" this.
2061 *
2062 * That still wouldn't really fix this unless we locked the scheduler
2063 * lock or stopped the other CPU to make sure it wasn't changing the
2064 * list out from under us. It is probably best to just not try to
2065 * handle threads on other CPU's for now.
2066 */
2067 if (p == curproc && wc->wc_spinlocks != NULL)
2068 witness_list_locks(&wc->wc_spinlocks, db_printf);
2069 }
2070
2071 void
2072 db_witness_list(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2073 {
2074 struct proc *p;
2075
2076 if (have_addr)
2077 p = (struct proc *)addr;
2078 else
2079 p = curproc;
2080 witness_ddb_list(p);
2081 }
2082
2083 void
2084 db_witness_list_all(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2085 {
2086 CPU_INFO_ITERATOR cii;
2087 struct cpu_info *ci;
2088 struct lock_list_entry *lock_list;
2089 struct process *pr;
2090 struct proc *p;
2091
2092 CPU_INFO_FOREACH(cii, ci) {
2093 lock_list = witness_cpu[CPU_INFO_UNIT(ci)].wc_spinlocks;
2094 if (lock_list == NULL || lock_list->ll_count == 0)
2095 continue;
2096 db_printf("CPU %d:\n", CPU_INFO_UNIT(ci));
2097 witness_list_locks(&lock_list, db_printf);
2098 }
2099
2100 /*
2101 * It would be nice to list only threads and processes that actually
2102 * held sleep locks, but that information is currently not exported
2103 * by WITNESS.
2104 */
2105 LIST_FOREACH(pr, &allprocess, ps_list) {
2106 if (!witness_process_has_locks(pr))
2107 continue;
2108 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
2109 if (!witness_thread_has_locks(p))
2110 continue;
2111 db_printf("Process %d (%s) thread %p (%d)\n",
2112 pr->ps_pid, pr->ps_comm, p, p->p_tid);
2113 witness_ddb_list(p);
2114 }
2115 }
2116 }
2117
2118 void
2119 witness_print_badstacks(void)
2120 {
2121 static struct witness tmp_w1, tmp_w2;
2122 static struct witness_lock_order_data tmp_data1, tmp_data2;
2123 struct witness_lock_order_data *data1, *data2;
2124 struct witness *w1, *w2;
2125 int error, generation, i, j;
2126
2127 if (witness_watch < 1) {
2128 db_printf("witness watch is disabled\n");
2129 return;
2130 }
2131 if (witness_cold) {
2132 db_printf("witness is cold\n");
2133 return;
2134 }
2135 error = 0;
2136
2137 memset(&tmp_w1, 0, sizeof(tmp_w1));
2138 memset(&tmp_w2, 0, sizeof(tmp_w2));
2139 memset(&tmp_data1, 0, sizeof(tmp_data1));
2140 memset(&tmp_data2, 0, sizeof(tmp_data2));
2141
2142 restart:
2143 mtx_enter(&w_mtx);
2144 generation = w_generation;
2145 mtx_leave(&w_mtx);
2146 db_printf("Number of known direct relationships is %d\n",
2147 w_lohash.wloh_count);
2148 for (i = 1; i < w_max_used_index; i++) {
2149 mtx_enter(&w_mtx);
2150 if (generation != w_generation) {
2151 mtx_leave(&w_mtx);
2152
2153 /* The graph has changed, try again. */
2154 db_printf("Lock graph changed, restarting trace.\n");
2155 goto restart;
2156 }
2157
2158 w1 = &w_data[i];
2159 if (w1->w_reversed == 0) {
2160 mtx_leave(&w_mtx);
2161 continue;
2162 }
2163
2164 /* Copy w1 locally so we can release the spin lock. */
2165 tmp_w1 = *w1;
2166 mtx_leave(&w_mtx);
2167
2168 if (tmp_w1.w_reversed == 0)
2169 continue;
2170 for (j = 1; j < w_max_used_index; j++) {
2171 if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j)
2172 continue;
2173
2174 mtx_enter(&w_mtx);
2175 if (generation != w_generation) {
2176 mtx_leave(&w_mtx);
2177
2178 /* The graph has changed, try again. */
2179 db_printf("Lock graph changed, "
2180 "restarting trace.\n");
2181 goto restart;
2182 }
2183
2184 w2 = &w_data[j];
2185 data1 = witness_lock_order_get(w1, w2);
2186 data2 = witness_lock_order_get(w2, w1);
2187
2188 /*
2189 * Copy information locally so we can release the
2190 * spin lock.
2191 */
2192 tmp_w2 = *w2;
2193
2194 if (data1)
2195 tmp_data1.wlod_stack = data1->wlod_stack;
2196 if (data2 && data2 != data1)
2197 tmp_data2.wlod_stack = data2->wlod_stack;
2198 mtx_leave(&w_mtx);
2199
2200 db_printf("\nLock order reversal between \"%s\"(%s) "
2201 "and \"%s\"(%s)!\n",
2202 tmp_w1.w_type->lt_name, tmp_w1.w_class->lc_name,
2203 tmp_w2.w_type->lt_name, tmp_w2.w_class->lc_name);
2204 if (data1) {
2205 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) "
2206 "first seen at:\n",
2207 tmp_w1.w_type->lt_name,
2208 tmp_w1.w_class->lc_name,
2209 tmp_w2.w_type->lt_name,
2210 tmp_w2.w_class->lc_name);
2211 stacktrace_print(&tmp_data1.wlod_stack,
2212 db_printf);
2213 db_printf("\n");
2214 }
2215 if (data2 && data2 != data1) {
2216 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) "
2217 "first seen at:\n",
2218 tmp_w2.w_type->lt_name,
2219 tmp_w2.w_class->lc_name,
2220 tmp_w1.w_type->lt_name,
2221 tmp_w1.w_class->lc_name);
2222 stacktrace_print(&tmp_data2.wlod_stack,
2223 db_printf);
2224 db_printf("\n");
2225 }
2226 }
2227 }
2228 mtx_enter(&w_mtx);
2229 if (generation != w_generation) {
2230 mtx_leave(&w_mtx);
2231
2232 /*
2233 * The graph changed while we were printing stack data,
2234 * try again.
2235 */
2236 db_printf("Lock graph changed, restarting trace.\n");
2237 goto restart;
2238 }
2239 mtx_leave(&w_mtx);
2240 }
2241
2242 void
2243 db_witness_display(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2244 {
2245 switch (modif[0]) {
2246 case 'b':
2247 witness_print_badstacks();
2248 break;
2249 default:
2250 witness_ddb_display(db_printf);
2251 break;
2252 }
2253 }
2254 #endif
2255
2256 void
2257 db_witness_print_fullgraph(void)
2258 {
2259 struct witness *w;
2260 int error;
2261
2262 if (witness_watch < 1) {
2263 db_printf("witness watch is disabled\n");
2264 return;
2265 }
2266 if (witness_cold) {
2267 db_printf("witness is cold\n");
2268 return;
2269 }
2270 error = 0;
2271
2272 mtx_enter(&w_mtx);
2273 SLIST_FOREACH(w, &w_all, w_list)
2274 w->w_displayed = 0;
2275 SLIST_FOREACH(w, &w_all, w_list)
2276 db_witness_add_fullgraph(w);
2277 mtx_leave(&w_mtx);
2278 }
2279
2280 static void
2281 db_witness_add_fullgraph(struct witness *w)
2282 {
2283 int i;
2284
2285 if (w->w_displayed != 0 || w->w_acquired == 0)
2286 return;
2287 w->w_displayed = 1;
2288
2289 WITNESS_INDEX_ASSERT(w->w_index);
2290 for (i = 1; i <= w_max_used_index; i++) {
2291 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) {
2292 db_printf("\"%s\",\"%s\"\n", w->w_type->lt_name,
2293 w_data[i].w_type->lt_name);
2294 db_witness_add_fullgraph(&w_data[i]);
2295 }
2296 }
2297 }
2298
2299 /*
2300 * A simple hash function. Takes a key pointer and a key size. If size == 0,
2301 * interprets the key as a string and reads until the null
2302 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit
2303 * hash value computed from the key.
2304 */
2305 static uint32_t
2306 witness_hash_djb2(const uint8_t *key, uint32_t size)
2307 {
2308 unsigned int hash = 5381;
2309 int i;
2310
2311 /* hash = hash * 33 + key[i] */
2312 if (size)
2313 for (i = 0; i < size; i++)
2314 hash = ((hash << 5) + hash) + (unsigned int)key[i];
2315 else
2316 for (i = 0; key[i] != 0; i++)
2317 hash = ((hash << 5) + hash) + (unsigned int)key[i];
2318
2319 return (hash);
2320 }
2321
2322
2323 /*
2324 * Initializes the two witness hash tables. Called exactly once from
2325 * witness_initialize().
2326 */
2327 static void
2328 witness_init_hash_tables(void)
2329 {
2330 int i;
2331
2332 KASSERT(witness_cold);
2333
2334 /* Initialize the hash tables. */
2335 for (i = 0; i < WITNESS_HASH_SIZE; i++)
2336 SLIST_INIT(&w_hash.wh_array[i]);
2337
2338 w_hash.wh_size = WITNESS_HASH_SIZE;
2339 w_hash.wh_count = 0;
2340
2341 /* Initialize the lock order data hash. */
2342 w_lofree = NULL;
2343 for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) {
2344 memset(&w_lodata[i], 0, sizeof(w_lodata[i]));
2345 w_lodata[i].wlod_next = w_lofree;
2346 w_lofree = &w_lodata[i];
2347 }
2348 w_lohash.wloh_size = WITNESS_LO_HASH_SIZE;
2349 w_lohash.wloh_count = 0;
2350 for (i = 0; i < WITNESS_LO_HASH_SIZE; i++)
2351 w_lohash.wloh_array[i] = NULL;
2352 }
2353
2354 static struct witness *
2355 witness_hash_get(const struct lock_type *type, const char *subtype)
2356 {
2357 struct witness *w;
2358 uint32_t hash;
2359
2360 KASSERT(type != NULL);
2361 if (witness_cold == 0)
2362 MUTEX_ASSERT_LOCKED(&w_mtx);
2363 hash = (uint32_t)((uintptr_t)type ^ (uintptr_t)subtype) %
2364 w_hash.wh_size;
2365 SLIST_FOREACH(w, &w_hash.wh_array[hash], w_hash_next) {
2366 if (w->w_type == type && w->w_subtype == subtype)
2367 goto out;
2368 }
2369
2370 out:
2371 return (w);
2372 }
2373
2374 static void
2375 witness_hash_put(struct witness *w)
2376 {
2377 uint32_t hash;
2378
2379 KASSERT(w != NULL);
2380 KASSERT(w->w_type != NULL);
2381 if (witness_cold == 0)
2382 MUTEX_ASSERT_LOCKED(&w_mtx);
2383 KASSERTMSG(witness_hash_get(w->w_type, w->w_subtype) == NULL,
2384 "%s: trying to add a hash entry that already exists!", __func__);
2385 KASSERTMSG(SLIST_NEXT(w, w_hash_next) == NULL,
2386 "%s: w->w_hash_next != NULL", __func__);
2387
2388 hash = (uint32_t)((uintptr_t)w->w_type ^ (uintptr_t)w->w_subtype) %
2389 w_hash.wh_size;
2390 SLIST_INSERT_HEAD(&w_hash.wh_array[hash], w, w_hash_next);
2391 w_hash.wh_count++;
2392 }
2393
2394
2395 static struct witness_lock_order_data *
2396 witness_lock_order_get(struct witness *parent, struct witness *child)
2397 {
2398 struct witness_lock_order_data *data = NULL;
2399 struct witness_lock_order_key key;
2400 unsigned int hash;
2401
2402 KASSERT(parent != NULL && child != NULL);
2403 key.from = parent->w_index;
2404 key.to = child->w_index;
2405 WITNESS_INDEX_ASSERT(key.from);
2406 WITNESS_INDEX_ASSERT(key.to);
2407 if ((w_rmatrix[parent->w_index][child->w_index]
2408 & WITNESS_LOCK_ORDER_KNOWN) == 0)
2409 goto out;
2410
2411 hash = witness_hash_djb2((const char*)&key,
2412 sizeof(key)) % w_lohash.wloh_size;
2413 data = w_lohash.wloh_array[hash];
2414 while (data != NULL) {
2415 if (witness_lock_order_key_equal(&data->wlod_key, &key))
2416 break;
2417 data = data->wlod_next;
2418 }
2419
2420 out:
2421 return (data);
2422 }
2423
2424 /*
2425 * Verify that parent and child have a known relationship, are not the same,
2426 * and child is actually a child of parent. This is done without w_mtx
2427 * to avoid contention in the common case.
2428 */
2429 static int
2430 witness_lock_order_check(struct witness *parent, struct witness *child)
2431 {
2432
2433 if (parent != child &&
2434 w_rmatrix[parent->w_index][child->w_index]
2435 & WITNESS_LOCK_ORDER_KNOWN &&
2436 isitmychild(parent, child))
2437 return (1);
2438
2439 return (0);
2440 }
2441
2442 static int
2443 witness_lock_order_add(struct witness *parent, struct witness *child)
2444 {
2445 static int lofree_empty_reported = 0;
2446 struct witness_lock_order_data *data = NULL;
2447 struct witness_lock_order_key key;
2448 unsigned int hash;
2449
2450 KASSERT(parent != NULL && child != NULL);
2451 key.from = parent->w_index;
2452 key.to = child->w_index;
2453 WITNESS_INDEX_ASSERT(key.from);
2454 WITNESS_INDEX_ASSERT(key.to);
2455 if (w_rmatrix[parent->w_index][child->w_index]
2456 & WITNESS_LOCK_ORDER_KNOWN)
2457 return (1);
2458
2459 hash = witness_hash_djb2((const char*)&key,
2460 sizeof(key)) % w_lohash.wloh_size;
2461 w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
2462 data = w_lofree;
2463 if (data == NULL) {
2464 if (!lofree_empty_reported) {
2465 lofree_empty_reported = 1;
2466 printf("witness: out of free lock order entries\n");
2467 }
2468 return (0);
2469 }
2470 w_lofree = data->wlod_next;
2471 data->wlod_next = w_lohash.wloh_array[hash];
2472 data->wlod_key = key;
2473 w_lohash.wloh_array[hash] = data;
2474 w_lohash.wloh_count++;
2475 stacktrace_save_at(&data->wlod_stack, 1);
2476 return (1);
2477 }
2478
2479 /* Call this whenever the structure of the witness graph changes. */
2480 static void
2481 witness_increment_graph_generation(void)
2482 {
2483
2484 if (witness_cold == 0)
2485 MUTEX_ASSERT_LOCKED(&w_mtx);
2486 w_generation++;
2487 }
2488
2489 static void
2490 witness_debugger(int dump)
2491 {
2492 switch (witness_watch) {
2493 case 1:
2494 break;
2495 case 2:
2496 if (dump)
2497 db_stack_dump();
2498 break;
2499 case 3:
2500 if (dump)
2501 db_stack_dump();
2502 db_enter();
2503 break;
2504 default:
2505 panic("witness: locking error");
2506 }
2507 }
2508
2509 static int
2510 witness_alloc_stacks(void)
2511 {
2512 union lock_stack *stacks;
2513 unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN;
2514
2515 rw_assert_wrlock(&w_ctlock);
2516
2517 if (w_lock_stack_num >= nstacks)
2518 return (0);
2519
2520 nstacks -= w_lock_stack_num;
2521 stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS,
2522 M_WAITOK | M_CANFAIL | M_ZERO);
2523 if (stacks == NULL)
2524 return (ENOMEM);
2525
2526 mtx_enter(&w_mtx);
2527 for (i = 0; i < nstacks; i++) {
2528 stacks[i].ls_next = w_lock_stack_free;
2529 w_lock_stack_free = &stacks[i];
2530 }
2531 mtx_leave(&w_mtx);
2532 w_lock_stack_num += nstacks;
2533
2534 return (0);
2535 }
2536
2537 int
2538 witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2539 void *newp, size_t newlen)
2540 {
2541 int error, value;
2542
2543 if (namelen != 1)
2544 return (ENOTDIR);
2545
2546 rw_enter_write(&w_ctlock);
2547
2548 switch (name[0]) {
2549 case KERN_WITNESS_WATCH:
2550 error = witness_sysctl_watch(oldp, oldlenp, newp, newlen);
2551 break;
2552 case KERN_WITNESS_LOCKTRACE:
2553 value = witness_locktrace;
2554 error = sysctl_int(oldp, oldlenp, newp, newlen, &value);
2555 if (error == 0 && newp != NULL) {
2556 switch (value) {
2557 case 1:
2558 error = witness_alloc_stacks();
2559 /* FALLTHROUGH */
2560 case 0:
2561 if (error == 0)
2562 witness_locktrace = value;
2563 break;
2564 default:
2565 error = EINVAL;
2566 break;
2567 }
2568 }
2569 break;
2570 default:
2571 error = EOPNOTSUPP;
2572 break;
2573 }
2574
2575 rw_exit_write(&w_ctlock);
2576
2577 return (error);
2578 }
2579
2580 int
2581 witness_sysctl_watch(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2582 {
2583 int error;
2584 int value;
2585
2586 value = witness_watch;
2587 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
2588 &value, -1, 3);
2589 if (error == 0 && newp != NULL) {
2590 mtx_enter(&w_mtx);
2591 if (value < 0 || witness_watch >= 0)
2592 witness_watch = value;
2593 else
2594 error = EINVAL;
2595 mtx_leave(&w_mtx);
2596 }
2597 return (error);
2598 }
Cache object: 8c2a1886f84ef7804ae8ba714fbfb1c6
|