FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/9.1/sys/kern/kern_umtx.c 234505 2012-04-20 21:40:31Z davide $");
30
31 #include "opt_compat.h"
32 #include "opt_umtx_profiling.h"
33
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 #include <sys/sysent.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/eventhandler.h>
50 #include <sys/umtx.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57
58 #include <machine/cpu.h>
59
60 #ifdef COMPAT_FREEBSD32
61 #include <compat/freebsd32/freebsd32_proto.h>
62 #endif
63
64 #define _UMUTEX_TRY 1
65 #define _UMUTEX_WAIT 2
66
67 /* Priority inheritance mutex info. */
68 struct umtx_pi {
69 /* Owner thread */
70 struct thread *pi_owner;
71
72 /* Reference count */
73 int pi_refcount;
74
75 /* List entry to link umtx holding by thread */
76 TAILQ_ENTRY(umtx_pi) pi_link;
77
78 /* List entry in hash */
79 TAILQ_ENTRY(umtx_pi) pi_hashlink;
80
81 /* List for waiters */
82 TAILQ_HEAD(,umtx_q) pi_blocked;
83
84 /* Identify a userland lock object */
85 struct umtx_key pi_key;
86 };
87
88 /* A userland synchronous object user. */
89 struct umtx_q {
90 /* Linked list for the hash. */
91 TAILQ_ENTRY(umtx_q) uq_link;
92
93 /* Umtx key. */
94 struct umtx_key uq_key;
95
96 /* Umtx flags. */
97 int uq_flags;
98 #define UQF_UMTXQ 0x0001
99
100 /* The thread waits on. */
101 struct thread *uq_thread;
102
103 /*
104 * Blocked on PI mutex. read can use chain lock
105 * or umtx_lock, write must have both chain lock and
106 * umtx_lock being hold.
107 */
108 struct umtx_pi *uq_pi_blocked;
109
110 /* On blocked list */
111 TAILQ_ENTRY(umtx_q) uq_lockq;
112
113 /* Thread contending with us */
114 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
115
116 /* Inherited priority from PP mutex */
117 u_char uq_inherited_pri;
118
119 /* Spare queue ready to be reused */
120 struct umtxq_queue *uq_spare_queue;
121
122 /* The queue we on */
123 struct umtxq_queue *uq_cur_queue;
124 };
125
126 TAILQ_HEAD(umtxq_head, umtx_q);
127
128 /* Per-key wait-queue */
129 struct umtxq_queue {
130 struct umtxq_head head;
131 struct umtx_key key;
132 LIST_ENTRY(umtxq_queue) link;
133 int length;
134 };
135
136 LIST_HEAD(umtxq_list, umtxq_queue);
137
138 /* Userland lock object's wait-queue chain */
139 struct umtxq_chain {
140 /* Lock for this chain. */
141 struct mtx uc_lock;
142
143 /* List of sleep queues. */
144 struct umtxq_list uc_queue[2];
145 #define UMTX_SHARED_QUEUE 0
146 #define UMTX_EXCLUSIVE_QUEUE 1
147
148 LIST_HEAD(, umtxq_queue) uc_spare_queue;
149
150 /* Busy flag */
151 char uc_busy;
152
153 /* Chain lock waiters */
154 int uc_waiters;
155
156 /* All PI in the list */
157 TAILQ_HEAD(,umtx_pi) uc_pi_list;
158
159 #ifdef UMTX_PROFILING
160 int length;
161 int max_length;
162 #endif
163 };
164
165 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
166 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
167
168 /*
169 * Don't propagate time-sharing priority, there is a security reason,
170 * a user can simply introduce PI-mutex, let thread A lock the mutex,
171 * and let another thread B block on the mutex, because B is
172 * sleeping, its priority will be boosted, this causes A's priority to
173 * be boosted via priority propagating too and will never be lowered even
174 * if it is using 100%CPU, this is unfair to other processes.
175 */
176
177 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
178 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
179 PRI_MAX_TIMESHARE : (td)->td_user_pri)
180
181 #define GOLDEN_RATIO_PRIME 2654404609U
182 #define UMTX_CHAINS 512
183 #define UMTX_SHIFTS (__WORD_BIT - 9)
184
185 #define GET_SHARE(flags) \
186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187
188 #define BUSY_SPINS 200
189
190 static uma_zone_t umtx_pi_zone;
191 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
192 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
193 static int umtx_pi_allocated;
194
195 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
196 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
197 &umtx_pi_allocated, 0, "Allocated umtx_pi");
198
199 #ifdef UMTX_PROFILING
200 static long max_length;
201 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
202 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
203 #endif
204
205 static void umtxq_sysinit(void *);
206 static void umtxq_hash(struct umtx_key *key);
207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
208 static void umtxq_lock(struct umtx_key *key);
209 static void umtxq_unlock(struct umtx_key *key);
210 static void umtxq_busy(struct umtx_key *key);
211 static void umtxq_unbusy(struct umtx_key *key);
212 static void umtxq_insert_queue(struct umtx_q *uq, int q);
213 static void umtxq_remove_queue(struct umtx_q *uq, int q);
214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
215 static int umtxq_count(struct umtx_key *key);
216 static struct umtx_pi *umtx_pi_alloc(int);
217 static void umtx_pi_free(struct umtx_pi *pi);
218 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
219 static void umtx_thread_cleanup(struct thread *td);
220 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
221 struct image_params *imgp __unused);
222 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
223
224 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
225 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
226 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
227
228 static struct mtx umtx_lock;
229
230 #ifdef UMTX_PROFILING
231 static void
232 umtx_init_profiling(void)
233 {
234 struct sysctl_oid *chain_oid;
235 char chain_name[10];
236 int i;
237
238 for (i = 0; i < UMTX_CHAINS; ++i) {
239 snprintf(chain_name, sizeof(chain_name), "%d", i);
240 chain_oid = SYSCTL_ADD_NODE(NULL,
241 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
242 chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
243 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
244 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
245 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
246 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
247 }
248 }
249 #endif
250
251 static void
252 umtxq_sysinit(void *arg __unused)
253 {
254 int i, j;
255
256 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
257 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
258 for (i = 0; i < 2; ++i) {
259 for (j = 0; j < UMTX_CHAINS; ++j) {
260 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
261 MTX_DEF | MTX_DUPOK);
262 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
263 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
264 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
265 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
266 umtxq_chains[i][j].uc_busy = 0;
267 umtxq_chains[i][j].uc_waiters = 0;
268 #ifdef UMTX_PROFILING
269 umtxq_chains[i][j].length = 0;
270 umtxq_chains[i][j].max_length = 0;
271 #endif
272 }
273 }
274 #ifdef UMTX_PROFILING
275 umtx_init_profiling();
276 #endif
277 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279 EVENTHANDLER_PRI_ANY);
280 }
281
282 struct umtx_q *
283 umtxq_alloc(void)
284 {
285 struct umtx_q *uq;
286
287 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289 TAILQ_INIT(&uq->uq_spare_queue->head);
290 TAILQ_INIT(&uq->uq_pi_contested);
291 uq->uq_inherited_pri = PRI_MAX;
292 return (uq);
293 }
294
295 void
296 umtxq_free(struct umtx_q *uq)
297 {
298 MPASS(uq->uq_spare_queue != NULL);
299 free(uq->uq_spare_queue, M_UMTX);
300 free(uq, M_UMTX);
301 }
302
303 static inline void
304 umtxq_hash(struct umtx_key *key)
305 {
306 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308 }
309
310 static inline struct umtxq_chain *
311 umtxq_getchain(struct umtx_key *key)
312 {
313 if (key->type <= TYPE_SEM)
314 return (&umtxq_chains[1][key->hash]);
315 return (&umtxq_chains[0][key->hash]);
316 }
317
318 /*
319 * Lock a chain.
320 */
321 static inline void
322 umtxq_lock(struct umtx_key *key)
323 {
324 struct umtxq_chain *uc;
325
326 uc = umtxq_getchain(key);
327 mtx_lock(&uc->uc_lock);
328 }
329
330 /*
331 * Unlock a chain.
332 */
333 static inline void
334 umtxq_unlock(struct umtx_key *key)
335 {
336 struct umtxq_chain *uc;
337
338 uc = umtxq_getchain(key);
339 mtx_unlock(&uc->uc_lock);
340 }
341
342 /*
343 * Set chain to busy state when following operation
344 * may be blocked (kernel mutex can not be used).
345 */
346 static inline void
347 umtxq_busy(struct umtx_key *key)
348 {
349 struct umtxq_chain *uc;
350
351 uc = umtxq_getchain(key);
352 mtx_assert(&uc->uc_lock, MA_OWNED);
353 if (uc->uc_busy) {
354 #ifdef SMP
355 if (smp_cpus > 1) {
356 int count = BUSY_SPINS;
357 if (count > 0) {
358 umtxq_unlock(key);
359 while (uc->uc_busy && --count > 0)
360 cpu_spinwait();
361 umtxq_lock(key);
362 }
363 }
364 #endif
365 while (uc->uc_busy) {
366 uc->uc_waiters++;
367 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
368 uc->uc_waiters--;
369 }
370 }
371 uc->uc_busy = 1;
372 }
373
374 /*
375 * Unbusy a chain.
376 */
377 static inline void
378 umtxq_unbusy(struct umtx_key *key)
379 {
380 struct umtxq_chain *uc;
381
382 uc = umtxq_getchain(key);
383 mtx_assert(&uc->uc_lock, MA_OWNED);
384 KASSERT(uc->uc_busy != 0, ("not busy"));
385 uc->uc_busy = 0;
386 if (uc->uc_waiters)
387 wakeup_one(uc);
388 }
389
390 static struct umtxq_queue *
391 umtxq_queue_lookup(struct umtx_key *key, int q)
392 {
393 struct umtxq_queue *uh;
394 struct umtxq_chain *uc;
395
396 uc = umtxq_getchain(key);
397 UMTXQ_LOCKED_ASSERT(uc);
398 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
399 if (umtx_key_match(&uh->key, key))
400 return (uh);
401 }
402
403 return (NULL);
404 }
405
406 static inline void
407 umtxq_insert_queue(struct umtx_q *uq, int q)
408 {
409 struct umtxq_queue *uh;
410 struct umtxq_chain *uc;
411
412 uc = umtxq_getchain(&uq->uq_key);
413 UMTXQ_LOCKED_ASSERT(uc);
414 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
415 uh = umtxq_queue_lookup(&uq->uq_key, q);
416 if (uh != NULL) {
417 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
418 } else {
419 uh = uq->uq_spare_queue;
420 uh->key = uq->uq_key;
421 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
422 }
423 uq->uq_spare_queue = NULL;
424
425 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
426 uh->length++;
427 #ifdef UMTX_PROFILING
428 uc->length++;
429 if (uc->length > uc->max_length) {
430 uc->max_length = uc->length;
431 if (uc->max_length > max_length)
432 max_length = uc->max_length;
433 }
434 #endif
435 uq->uq_flags |= UQF_UMTXQ;
436 uq->uq_cur_queue = uh;
437 return;
438 }
439
440 static inline void
441 umtxq_remove_queue(struct umtx_q *uq, int q)
442 {
443 struct umtxq_chain *uc;
444 struct umtxq_queue *uh;
445
446 uc = umtxq_getchain(&uq->uq_key);
447 UMTXQ_LOCKED_ASSERT(uc);
448 if (uq->uq_flags & UQF_UMTXQ) {
449 uh = uq->uq_cur_queue;
450 TAILQ_REMOVE(&uh->head, uq, uq_link);
451 uh->length--;
452 #ifdef UMTX_PROFILING
453 uc->length--;
454 #endif
455 uq->uq_flags &= ~UQF_UMTXQ;
456 if (TAILQ_EMPTY(&uh->head)) {
457 KASSERT(uh->length == 0,
458 ("inconsistent umtxq_queue length"));
459 LIST_REMOVE(uh, link);
460 } else {
461 uh = LIST_FIRST(&uc->uc_spare_queue);
462 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
463 LIST_REMOVE(uh, link);
464 }
465 uq->uq_spare_queue = uh;
466 uq->uq_cur_queue = NULL;
467 }
468 }
469
470 /*
471 * Check if there are multiple waiters
472 */
473 static int
474 umtxq_count(struct umtx_key *key)
475 {
476 struct umtxq_chain *uc;
477 struct umtxq_queue *uh;
478
479 uc = umtxq_getchain(key);
480 UMTXQ_LOCKED_ASSERT(uc);
481 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
482 if (uh != NULL)
483 return (uh->length);
484 return (0);
485 }
486
487 /*
488 * Check if there are multiple PI waiters and returns first
489 * waiter.
490 */
491 static int
492 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
493 {
494 struct umtxq_chain *uc;
495 struct umtxq_queue *uh;
496
497 *first = NULL;
498 uc = umtxq_getchain(key);
499 UMTXQ_LOCKED_ASSERT(uc);
500 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
501 if (uh != NULL) {
502 *first = TAILQ_FIRST(&uh->head);
503 return (uh->length);
504 }
505 return (0);
506 }
507
508 /*
509 * Wake up threads waiting on an userland object.
510 */
511
512 static int
513 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
514 {
515 struct umtxq_chain *uc;
516 struct umtxq_queue *uh;
517 struct umtx_q *uq;
518 int ret;
519
520 ret = 0;
521 uc = umtxq_getchain(key);
522 UMTXQ_LOCKED_ASSERT(uc);
523 uh = umtxq_queue_lookup(key, q);
524 if (uh != NULL) {
525 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
526 umtxq_remove_queue(uq, q);
527 wakeup(uq);
528 if (++ret >= n_wake)
529 return (ret);
530 }
531 }
532 return (ret);
533 }
534
535
536 /*
537 * Wake up specified thread.
538 */
539 static inline void
540 umtxq_signal_thread(struct umtx_q *uq)
541 {
542 struct umtxq_chain *uc;
543
544 uc = umtxq_getchain(&uq->uq_key);
545 UMTXQ_LOCKED_ASSERT(uc);
546 umtxq_remove(uq);
547 wakeup(uq);
548 }
549
550 /*
551 * Put thread into sleep state, before sleeping, check if
552 * thread was removed from umtx queue.
553 */
554 static inline int
555 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
556 {
557 struct umtxq_chain *uc;
558 int error;
559
560 uc = umtxq_getchain(&uq->uq_key);
561 UMTXQ_LOCKED_ASSERT(uc);
562 if (!(uq->uq_flags & UQF_UMTXQ))
563 return (0);
564 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
565 if (error == EWOULDBLOCK)
566 error = ETIMEDOUT;
567 return (error);
568 }
569
570 /*
571 * Convert userspace address into unique logical address.
572 */
573 int
574 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
575 {
576 struct thread *td = curthread;
577 vm_map_t map;
578 vm_map_entry_t entry;
579 vm_pindex_t pindex;
580 vm_prot_t prot;
581 boolean_t wired;
582
583 key->type = type;
584 if (share == THREAD_SHARE) {
585 key->shared = 0;
586 key->info.private.vs = td->td_proc->p_vmspace;
587 key->info.private.addr = (uintptr_t)addr;
588 } else {
589 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
590 map = &td->td_proc->p_vmspace->vm_map;
591 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
592 &entry, &key->info.shared.object, &pindex, &prot,
593 &wired) != KERN_SUCCESS) {
594 return EFAULT;
595 }
596
597 if ((share == PROCESS_SHARE) ||
598 (share == AUTO_SHARE &&
599 VM_INHERIT_SHARE == entry->inheritance)) {
600 key->shared = 1;
601 key->info.shared.offset = entry->offset + entry->start -
602 (vm_offset_t)addr;
603 vm_object_reference(key->info.shared.object);
604 } else {
605 key->shared = 0;
606 key->info.private.vs = td->td_proc->p_vmspace;
607 key->info.private.addr = (uintptr_t)addr;
608 }
609 vm_map_lookup_done(map, entry);
610 }
611
612 umtxq_hash(key);
613 return (0);
614 }
615
616 /*
617 * Release key.
618 */
619 void
620 umtx_key_release(struct umtx_key *key)
621 {
622 if (key->shared)
623 vm_object_deallocate(key->info.shared.object);
624 }
625
626 /*
627 * Lock a umtx object.
628 */
629 static int
630 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
631 {
632 struct umtx_q *uq;
633 u_long owner;
634 u_long old;
635 int error = 0;
636
637 uq = td->td_umtxq;
638
639 /*
640 * Care must be exercised when dealing with umtx structure. It
641 * can fault on any access.
642 */
643 for (;;) {
644 /*
645 * Try the uncontested case. This should be done in userland.
646 */
647 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
648
649 /* The acquire succeeded. */
650 if (owner == UMTX_UNOWNED)
651 return (0);
652
653 /* The address was invalid. */
654 if (owner == -1)
655 return (EFAULT);
656
657 /* If no one owns it but it is contested try to acquire it. */
658 if (owner == UMTX_CONTESTED) {
659 owner = casuword(&umtx->u_owner,
660 UMTX_CONTESTED, id | UMTX_CONTESTED);
661
662 if (owner == UMTX_CONTESTED)
663 return (0);
664
665 /* The address was invalid. */
666 if (owner == -1)
667 return (EFAULT);
668
669 /* If this failed the lock has changed, restart. */
670 continue;
671 }
672
673 /*
674 * If we caught a signal, we have retried and now
675 * exit immediately.
676 */
677 if (error != 0)
678 return (error);
679
680 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
681 AUTO_SHARE, &uq->uq_key)) != 0)
682 return (error);
683
684 umtxq_lock(&uq->uq_key);
685 umtxq_busy(&uq->uq_key);
686 umtxq_insert(uq);
687 umtxq_unbusy(&uq->uq_key);
688 umtxq_unlock(&uq->uq_key);
689
690 /*
691 * Set the contested bit so that a release in user space
692 * knows to use the system call for unlock. If this fails
693 * either some one else has acquired the lock or it has been
694 * released.
695 */
696 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
697
698 /* The address was invalid. */
699 if (old == -1) {
700 umtxq_lock(&uq->uq_key);
701 umtxq_remove(uq);
702 umtxq_unlock(&uq->uq_key);
703 umtx_key_release(&uq->uq_key);
704 return (EFAULT);
705 }
706
707 /*
708 * We set the contested bit, sleep. Otherwise the lock changed
709 * and we need to retry or we lost a race to the thread
710 * unlocking the umtx.
711 */
712 umtxq_lock(&uq->uq_key);
713 if (old == owner)
714 error = umtxq_sleep(uq, "umtx", timo);
715 umtxq_remove(uq);
716 umtxq_unlock(&uq->uq_key);
717 umtx_key_release(&uq->uq_key);
718 }
719
720 return (0);
721 }
722
723 /*
724 * Lock a umtx object.
725 */
726 static int
727 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
728 struct timespec *timeout)
729 {
730 struct timespec ts, ts2, ts3;
731 struct timeval tv;
732 int error;
733
734 if (timeout == NULL) {
735 error = _do_lock_umtx(td, umtx, id, 0);
736 /* Mutex locking is restarted if it is interrupted. */
737 if (error == EINTR)
738 error = ERESTART;
739 } else {
740 getnanouptime(&ts);
741 timespecadd(&ts, timeout);
742 TIMESPEC_TO_TIMEVAL(&tv, timeout);
743 for (;;) {
744 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
745 if (error != ETIMEDOUT)
746 break;
747 getnanouptime(&ts2);
748 if (timespeccmp(&ts2, &ts, >=)) {
749 error = ETIMEDOUT;
750 break;
751 }
752 ts3 = ts;
753 timespecsub(&ts3, &ts2);
754 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
755 }
756 /* Timed-locking is not restarted. */
757 if (error == ERESTART)
758 error = EINTR;
759 }
760 return (error);
761 }
762
763 /*
764 * Unlock a umtx object.
765 */
766 static int
767 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
768 {
769 struct umtx_key key;
770 u_long owner;
771 u_long old;
772 int error;
773 int count;
774
775 /*
776 * Make sure we own this mtx.
777 */
778 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
779 if (owner == -1)
780 return (EFAULT);
781
782 if ((owner & ~UMTX_CONTESTED) != id)
783 return (EPERM);
784
785 /* This should be done in userland */
786 if ((owner & UMTX_CONTESTED) == 0) {
787 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
788 if (old == -1)
789 return (EFAULT);
790 if (old == owner)
791 return (0);
792 owner = old;
793 }
794
795 /* We should only ever be in here for contested locks */
796 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
797 &key)) != 0)
798 return (error);
799
800 umtxq_lock(&key);
801 umtxq_busy(&key);
802 count = umtxq_count(&key);
803 umtxq_unlock(&key);
804
805 /*
806 * When unlocking the umtx, it must be marked as unowned if
807 * there is zero or one thread only waiting for it.
808 * Otherwise, it must be marked as contested.
809 */
810 old = casuword(&umtx->u_owner, owner,
811 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
812 umtxq_lock(&key);
813 umtxq_signal(&key,1);
814 umtxq_unbusy(&key);
815 umtxq_unlock(&key);
816 umtx_key_release(&key);
817 if (old == -1)
818 return (EFAULT);
819 if (old != owner)
820 return (EINVAL);
821 return (0);
822 }
823
824 #ifdef COMPAT_FREEBSD32
825
826 /*
827 * Lock a umtx object.
828 */
829 static int
830 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
831 {
832 struct umtx_q *uq;
833 uint32_t owner;
834 uint32_t old;
835 int error = 0;
836
837 uq = td->td_umtxq;
838
839 /*
840 * Care must be exercised when dealing with umtx structure. It
841 * can fault on any access.
842 */
843 for (;;) {
844 /*
845 * Try the uncontested case. This should be done in userland.
846 */
847 owner = casuword32(m, UMUTEX_UNOWNED, id);
848
849 /* The acquire succeeded. */
850 if (owner == UMUTEX_UNOWNED)
851 return (0);
852
853 /* The address was invalid. */
854 if (owner == -1)
855 return (EFAULT);
856
857 /* If no one owns it but it is contested try to acquire it. */
858 if (owner == UMUTEX_CONTESTED) {
859 owner = casuword32(m,
860 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
861 if (owner == UMUTEX_CONTESTED)
862 return (0);
863
864 /* The address was invalid. */
865 if (owner == -1)
866 return (EFAULT);
867
868 /* If this failed the lock has changed, restart. */
869 continue;
870 }
871
872 /*
873 * If we caught a signal, we have retried and now
874 * exit immediately.
875 */
876 if (error != 0)
877 return (error);
878
879 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
880 AUTO_SHARE, &uq->uq_key)) != 0)
881 return (error);
882
883 umtxq_lock(&uq->uq_key);
884 umtxq_busy(&uq->uq_key);
885 umtxq_insert(uq);
886 umtxq_unbusy(&uq->uq_key);
887 umtxq_unlock(&uq->uq_key);
888
889 /*
890 * Set the contested bit so that a release in user space
891 * knows to use the system call for unlock. If this fails
892 * either some one else has acquired the lock or it has been
893 * released.
894 */
895 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
896
897 /* The address was invalid. */
898 if (old == -1) {
899 umtxq_lock(&uq->uq_key);
900 umtxq_remove(uq);
901 umtxq_unlock(&uq->uq_key);
902 umtx_key_release(&uq->uq_key);
903 return (EFAULT);
904 }
905
906 /*
907 * We set the contested bit, sleep. Otherwise the lock changed
908 * and we need to retry or we lost a race to the thread
909 * unlocking the umtx.
910 */
911 umtxq_lock(&uq->uq_key);
912 if (old == owner)
913 error = umtxq_sleep(uq, "umtx", timo);
914 umtxq_remove(uq);
915 umtxq_unlock(&uq->uq_key);
916 umtx_key_release(&uq->uq_key);
917 }
918
919 return (0);
920 }
921
922 /*
923 * Lock a umtx object.
924 */
925 static int
926 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
927 struct timespec *timeout)
928 {
929 struct timespec ts, ts2, ts3;
930 struct timeval tv;
931 int error;
932
933 if (timeout == NULL) {
934 error = _do_lock_umtx32(td, m, id, 0);
935 /* Mutex locking is restarted if it is interrupted. */
936 if (error == EINTR)
937 error = ERESTART;
938 } else {
939 getnanouptime(&ts);
940 timespecadd(&ts, timeout);
941 TIMESPEC_TO_TIMEVAL(&tv, timeout);
942 for (;;) {
943 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
944 if (error != ETIMEDOUT)
945 break;
946 getnanouptime(&ts2);
947 if (timespeccmp(&ts2, &ts, >=)) {
948 error = ETIMEDOUT;
949 break;
950 }
951 ts3 = ts;
952 timespecsub(&ts3, &ts2);
953 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
954 }
955 /* Timed-locking is not restarted. */
956 if (error == ERESTART)
957 error = EINTR;
958 }
959 return (error);
960 }
961
962 /*
963 * Unlock a umtx object.
964 */
965 static int
966 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
967 {
968 struct umtx_key key;
969 uint32_t owner;
970 uint32_t old;
971 int error;
972 int count;
973
974 /*
975 * Make sure we own this mtx.
976 */
977 owner = fuword32(m);
978 if (owner == -1)
979 return (EFAULT);
980
981 if ((owner & ~UMUTEX_CONTESTED) != id)
982 return (EPERM);
983
984 /* This should be done in userland */
985 if ((owner & UMUTEX_CONTESTED) == 0) {
986 old = casuword32(m, owner, UMUTEX_UNOWNED);
987 if (old == -1)
988 return (EFAULT);
989 if (old == owner)
990 return (0);
991 owner = old;
992 }
993
994 /* We should only ever be in here for contested locks */
995 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
996 &key)) != 0)
997 return (error);
998
999 umtxq_lock(&key);
1000 umtxq_busy(&key);
1001 count = umtxq_count(&key);
1002 umtxq_unlock(&key);
1003
1004 /*
1005 * When unlocking the umtx, it must be marked as unowned if
1006 * there is zero or one thread only waiting for it.
1007 * Otherwise, it must be marked as contested.
1008 */
1009 old = casuword32(m, owner,
1010 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1011 umtxq_lock(&key);
1012 umtxq_signal(&key,1);
1013 umtxq_unbusy(&key);
1014 umtxq_unlock(&key);
1015 umtx_key_release(&key);
1016 if (old == -1)
1017 return (EFAULT);
1018 if (old != owner)
1019 return (EINVAL);
1020 return (0);
1021 }
1022 #endif
1023
1024 /*
1025 * Fetch and compare value, sleep on the address if value is not changed.
1026 */
1027 static int
1028 do_wait(struct thread *td, void *addr, u_long id,
1029 struct timespec *timeout, int compat32, int is_private)
1030 {
1031 struct umtx_q *uq;
1032 struct timespec ts, ts2, ts3;
1033 struct timeval tv;
1034 u_long tmp;
1035 int error = 0;
1036
1037 uq = td->td_umtxq;
1038 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1039 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1040 return (error);
1041
1042 umtxq_lock(&uq->uq_key);
1043 umtxq_insert(uq);
1044 umtxq_unlock(&uq->uq_key);
1045 if (compat32 == 0)
1046 tmp = fuword(addr);
1047 else
1048 tmp = (unsigned int)fuword32(addr);
1049 if (tmp != id) {
1050 umtxq_lock(&uq->uq_key);
1051 umtxq_remove(uq);
1052 umtxq_unlock(&uq->uq_key);
1053 } else if (timeout == NULL) {
1054 umtxq_lock(&uq->uq_key);
1055 error = umtxq_sleep(uq, "uwait", 0);
1056 umtxq_remove(uq);
1057 umtxq_unlock(&uq->uq_key);
1058 } else {
1059 getnanouptime(&ts);
1060 timespecadd(&ts, timeout);
1061 TIMESPEC_TO_TIMEVAL(&tv, timeout);
1062 umtxq_lock(&uq->uq_key);
1063 for (;;) {
1064 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1065 if (!(uq->uq_flags & UQF_UMTXQ)) {
1066 error = 0;
1067 break;
1068 }
1069 if (error != ETIMEDOUT)
1070 break;
1071 umtxq_unlock(&uq->uq_key);
1072 getnanouptime(&ts2);
1073 if (timespeccmp(&ts2, &ts, >=)) {
1074 error = ETIMEDOUT;
1075 umtxq_lock(&uq->uq_key);
1076 break;
1077 }
1078 ts3 = ts;
1079 timespecsub(&ts3, &ts2);
1080 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1081 umtxq_lock(&uq->uq_key);
1082 }
1083 umtxq_remove(uq);
1084 umtxq_unlock(&uq->uq_key);
1085 }
1086 umtx_key_release(&uq->uq_key);
1087 if (error == ERESTART)
1088 error = EINTR;
1089 return (error);
1090 }
1091
1092 /*
1093 * Wake up threads sleeping on the specified address.
1094 */
1095 int
1096 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1097 {
1098 struct umtx_key key;
1099 int ret;
1100
1101 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1102 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1103 return (ret);
1104 umtxq_lock(&key);
1105 ret = umtxq_signal(&key, n_wake);
1106 umtxq_unlock(&key);
1107 umtx_key_release(&key);
1108 return (0);
1109 }
1110
1111 /*
1112 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1113 */
1114 static int
1115 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1116 int mode)
1117 {
1118 struct umtx_q *uq;
1119 uint32_t owner, old, id;
1120 int error = 0;
1121
1122 id = td->td_tid;
1123 uq = td->td_umtxq;
1124
1125 /*
1126 * Care must be exercised when dealing with umtx structure. It
1127 * can fault on any access.
1128 */
1129 for (;;) {
1130 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1131 if (mode == _UMUTEX_WAIT) {
1132 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1133 return (0);
1134 } else {
1135 /*
1136 * Try the uncontested case. This should be done in userland.
1137 */
1138 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1139
1140 /* The acquire succeeded. */
1141 if (owner == UMUTEX_UNOWNED)
1142 return (0);
1143
1144 /* The address was invalid. */
1145 if (owner == -1)
1146 return (EFAULT);
1147
1148 /* If no one owns it but it is contested try to acquire it. */
1149 if (owner == UMUTEX_CONTESTED) {
1150 owner = casuword32(&m->m_owner,
1151 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1152
1153 if (owner == UMUTEX_CONTESTED)
1154 return (0);
1155
1156 /* The address was invalid. */
1157 if (owner == -1)
1158 return (EFAULT);
1159
1160 /* If this failed the lock has changed, restart. */
1161 continue;
1162 }
1163 }
1164
1165 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1166 (owner & ~UMUTEX_CONTESTED) == id)
1167 return (EDEADLK);
1168
1169 if (mode == _UMUTEX_TRY)
1170 return (EBUSY);
1171
1172 /*
1173 * If we caught a signal, we have retried and now
1174 * exit immediately.
1175 */
1176 if (error != 0)
1177 return (error);
1178
1179 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1180 GET_SHARE(flags), &uq->uq_key)) != 0)
1181 return (error);
1182
1183 umtxq_lock(&uq->uq_key);
1184 umtxq_busy(&uq->uq_key);
1185 umtxq_insert(uq);
1186 umtxq_unlock(&uq->uq_key);
1187
1188 /*
1189 * Set the contested bit so that a release in user space
1190 * knows to use the system call for unlock. If this fails
1191 * either some one else has acquired the lock or it has been
1192 * released.
1193 */
1194 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1195
1196 /* The address was invalid. */
1197 if (old == -1) {
1198 umtxq_lock(&uq->uq_key);
1199 umtxq_remove(uq);
1200 umtxq_unbusy(&uq->uq_key);
1201 umtxq_unlock(&uq->uq_key);
1202 umtx_key_release(&uq->uq_key);
1203 return (EFAULT);
1204 }
1205
1206 /*
1207 * We set the contested bit, sleep. Otherwise the lock changed
1208 * and we need to retry or we lost a race to the thread
1209 * unlocking the umtx.
1210 */
1211 umtxq_lock(&uq->uq_key);
1212 umtxq_unbusy(&uq->uq_key);
1213 if (old == owner)
1214 error = umtxq_sleep(uq, "umtxn", timo);
1215 umtxq_remove(uq);
1216 umtxq_unlock(&uq->uq_key);
1217 umtx_key_release(&uq->uq_key);
1218 }
1219
1220 return (0);
1221 }
1222
1223 /*
1224 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1225 */
1226 /*
1227 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1228 */
1229 static int
1230 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1231 {
1232 struct umtx_key key;
1233 uint32_t owner, old, id;
1234 int error;
1235 int count;
1236
1237 id = td->td_tid;
1238 /*
1239 * Make sure we own this mtx.
1240 */
1241 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1242 if (owner == -1)
1243 return (EFAULT);
1244
1245 if ((owner & ~UMUTEX_CONTESTED) != id)
1246 return (EPERM);
1247
1248 if ((owner & UMUTEX_CONTESTED) == 0) {
1249 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1250 if (old == -1)
1251 return (EFAULT);
1252 if (old == owner)
1253 return (0);
1254 owner = old;
1255 }
1256
1257 /* We should only ever be in here for contested locks */
1258 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1259 &key)) != 0)
1260 return (error);
1261
1262 umtxq_lock(&key);
1263 umtxq_busy(&key);
1264 count = umtxq_count(&key);
1265 umtxq_unlock(&key);
1266
1267 /*
1268 * When unlocking the umtx, it must be marked as unowned if
1269 * there is zero or one thread only waiting for it.
1270 * Otherwise, it must be marked as contested.
1271 */
1272 old = casuword32(&m->m_owner, owner,
1273 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1274 umtxq_lock(&key);
1275 umtxq_signal(&key,1);
1276 umtxq_unbusy(&key);
1277 umtxq_unlock(&key);
1278 umtx_key_release(&key);
1279 if (old == -1)
1280 return (EFAULT);
1281 if (old != owner)
1282 return (EINVAL);
1283 return (0);
1284 }
1285
1286 /*
1287 * Check if the mutex is available and wake up a waiter,
1288 * only for simple mutex.
1289 */
1290 static int
1291 do_wake_umutex(struct thread *td, struct umutex *m)
1292 {
1293 struct umtx_key key;
1294 uint32_t owner;
1295 uint32_t flags;
1296 int error;
1297 int count;
1298
1299 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1300 if (owner == -1)
1301 return (EFAULT);
1302
1303 if ((owner & ~UMUTEX_CONTESTED) != 0)
1304 return (0);
1305
1306 flags = fuword32(&m->m_flags);
1307
1308 /* We should only ever be in here for contested locks */
1309 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1310 &key)) != 0)
1311 return (error);
1312
1313 umtxq_lock(&key);
1314 umtxq_busy(&key);
1315 count = umtxq_count(&key);
1316 umtxq_unlock(&key);
1317
1318 if (count <= 1)
1319 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1320
1321 umtxq_lock(&key);
1322 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1323 umtxq_signal(&key, 1);
1324 umtxq_unbusy(&key);
1325 umtxq_unlock(&key);
1326 umtx_key_release(&key);
1327 return (0);
1328 }
1329
1330 /*
1331 * Check if the mutex has waiters and tries to fix contention bit.
1332 */
1333 static int
1334 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1335 {
1336 struct umtx_key key;
1337 uint32_t owner, old;
1338 int type;
1339 int error;
1340 int count;
1341
1342 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1343 case 0:
1344 type = TYPE_NORMAL_UMUTEX;
1345 break;
1346 case UMUTEX_PRIO_INHERIT:
1347 type = TYPE_PI_UMUTEX;
1348 break;
1349 case UMUTEX_PRIO_PROTECT:
1350 type = TYPE_PP_UMUTEX;
1351 break;
1352 default:
1353 return (EINVAL);
1354 }
1355 if ((error = umtx_key_get(m, type, GET_SHARE(flags),
1356 &key)) != 0)
1357 return (error);
1358
1359 owner = 0;
1360 umtxq_lock(&key);
1361 umtxq_busy(&key);
1362 count = umtxq_count(&key);
1363 umtxq_unlock(&key);
1364 /*
1365 * Only repair contention bit if there is a waiter, this means the mutex
1366 * is still being referenced by userland code, otherwise don't update
1367 * any memory.
1368 */
1369 if (count > 1) {
1370 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1371 while ((owner & UMUTEX_CONTESTED) ==0) {
1372 old = casuword32(&m->m_owner, owner,
1373 owner|UMUTEX_CONTESTED);
1374 if (old == owner)
1375 break;
1376 owner = old;
1377 }
1378 } else if (count == 1) {
1379 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1380 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
1381 (owner & UMUTEX_CONTESTED) == 0) {
1382 old = casuword32(&m->m_owner, owner,
1383 owner|UMUTEX_CONTESTED);
1384 if (old == owner)
1385 break;
1386 owner = old;
1387 }
1388 }
1389 umtxq_lock(&key);
1390 if (owner == -1) {
1391 error = EFAULT;
1392 umtxq_signal(&key, INT_MAX);
1393 }
1394 else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1395 umtxq_signal(&key, 1);
1396 umtxq_unbusy(&key);
1397 umtxq_unlock(&key);
1398 umtx_key_release(&key);
1399 return (error);
1400 }
1401
1402 static inline struct umtx_pi *
1403 umtx_pi_alloc(int flags)
1404 {
1405 struct umtx_pi *pi;
1406
1407 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1408 TAILQ_INIT(&pi->pi_blocked);
1409 atomic_add_int(&umtx_pi_allocated, 1);
1410 return (pi);
1411 }
1412
1413 static inline void
1414 umtx_pi_free(struct umtx_pi *pi)
1415 {
1416 uma_zfree(umtx_pi_zone, pi);
1417 atomic_add_int(&umtx_pi_allocated, -1);
1418 }
1419
1420 /*
1421 * Adjust the thread's position on a pi_state after its priority has been
1422 * changed.
1423 */
1424 static int
1425 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1426 {
1427 struct umtx_q *uq, *uq1, *uq2;
1428 struct thread *td1;
1429
1430 mtx_assert(&umtx_lock, MA_OWNED);
1431 if (pi == NULL)
1432 return (0);
1433
1434 uq = td->td_umtxq;
1435
1436 /*
1437 * Check if the thread needs to be moved on the blocked chain.
1438 * It needs to be moved if either its priority is lower than
1439 * the previous thread or higher than the next thread.
1440 */
1441 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1442 uq2 = TAILQ_NEXT(uq, uq_lockq);
1443 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1444 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1445 /*
1446 * Remove thread from blocked chain and determine where
1447 * it should be moved to.
1448 */
1449 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1450 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1451 td1 = uq1->uq_thread;
1452 MPASS(td1->td_proc->p_magic == P_MAGIC);
1453 if (UPRI(td1) > UPRI(td))
1454 break;
1455 }
1456
1457 if (uq1 == NULL)
1458 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1459 else
1460 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1461 }
1462 return (1);
1463 }
1464
1465 /*
1466 * Propagate priority when a thread is blocked on POSIX
1467 * PI mutex.
1468 */
1469 static void
1470 umtx_propagate_priority(struct thread *td)
1471 {
1472 struct umtx_q *uq;
1473 struct umtx_pi *pi;
1474 int pri;
1475
1476 mtx_assert(&umtx_lock, MA_OWNED);
1477 pri = UPRI(td);
1478 uq = td->td_umtxq;
1479 pi = uq->uq_pi_blocked;
1480 if (pi == NULL)
1481 return;
1482
1483 for (;;) {
1484 td = pi->pi_owner;
1485 if (td == NULL || td == curthread)
1486 return;
1487
1488 MPASS(td->td_proc != NULL);
1489 MPASS(td->td_proc->p_magic == P_MAGIC);
1490
1491 thread_lock(td);
1492 if (td->td_lend_user_pri > pri)
1493 sched_lend_user_prio(td, pri);
1494 else {
1495 thread_unlock(td);
1496 break;
1497 }
1498 thread_unlock(td);
1499
1500 /*
1501 * Pick up the lock that td is blocked on.
1502 */
1503 uq = td->td_umtxq;
1504 pi = uq->uq_pi_blocked;
1505 if (pi == NULL)
1506 break;
1507 /* Resort td on the list if needed. */
1508 umtx_pi_adjust_thread(pi, td);
1509 }
1510 }
1511
1512 /*
1513 * Unpropagate priority for a PI mutex when a thread blocked on
1514 * it is interrupted by signal or resumed by others.
1515 */
1516 static void
1517 umtx_repropagate_priority(struct umtx_pi *pi)
1518 {
1519 struct umtx_q *uq, *uq_owner;
1520 struct umtx_pi *pi2;
1521 int pri;
1522
1523 mtx_assert(&umtx_lock, MA_OWNED);
1524
1525 while (pi != NULL && pi->pi_owner != NULL) {
1526 pri = PRI_MAX;
1527 uq_owner = pi->pi_owner->td_umtxq;
1528
1529 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1530 uq = TAILQ_FIRST(&pi2->pi_blocked);
1531 if (uq != NULL) {
1532 if (pri > UPRI(uq->uq_thread))
1533 pri = UPRI(uq->uq_thread);
1534 }
1535 }
1536
1537 if (pri > uq_owner->uq_inherited_pri)
1538 pri = uq_owner->uq_inherited_pri;
1539 thread_lock(pi->pi_owner);
1540 sched_lend_user_prio(pi->pi_owner, pri);
1541 thread_unlock(pi->pi_owner);
1542 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1543 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1544 }
1545 }
1546
1547 /*
1548 * Insert a PI mutex into owned list.
1549 */
1550 static void
1551 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1552 {
1553 struct umtx_q *uq_owner;
1554
1555 uq_owner = owner->td_umtxq;
1556 mtx_assert(&umtx_lock, MA_OWNED);
1557 if (pi->pi_owner != NULL)
1558 panic("pi_ower != NULL");
1559 pi->pi_owner = owner;
1560 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1561 }
1562
1563 /*
1564 * Claim ownership of a PI mutex.
1565 */
1566 static int
1567 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1568 {
1569 struct umtx_q *uq, *uq_owner;
1570
1571 uq_owner = owner->td_umtxq;
1572 mtx_lock_spin(&umtx_lock);
1573 if (pi->pi_owner == owner) {
1574 mtx_unlock_spin(&umtx_lock);
1575 return (0);
1576 }
1577
1578 if (pi->pi_owner != NULL) {
1579 /*
1580 * userland may have already messed the mutex, sigh.
1581 */
1582 mtx_unlock_spin(&umtx_lock);
1583 return (EPERM);
1584 }
1585 umtx_pi_setowner(pi, owner);
1586 uq = TAILQ_FIRST(&pi->pi_blocked);
1587 if (uq != NULL) {
1588 int pri;
1589
1590 pri = UPRI(uq->uq_thread);
1591 thread_lock(owner);
1592 if (pri < UPRI(owner))
1593 sched_lend_user_prio(owner, pri);
1594 thread_unlock(owner);
1595 }
1596 mtx_unlock_spin(&umtx_lock);
1597 return (0);
1598 }
1599
1600 /*
1601 * Adjust a thread's order position in its blocked PI mutex,
1602 * this may result new priority propagating process.
1603 */
1604 void
1605 umtx_pi_adjust(struct thread *td, u_char oldpri)
1606 {
1607 struct umtx_q *uq;
1608 struct umtx_pi *pi;
1609
1610 uq = td->td_umtxq;
1611 mtx_lock_spin(&umtx_lock);
1612 /*
1613 * Pick up the lock that td is blocked on.
1614 */
1615 pi = uq->uq_pi_blocked;
1616 if (pi != NULL) {
1617 umtx_pi_adjust_thread(pi, td);
1618 umtx_repropagate_priority(pi);
1619 }
1620 mtx_unlock_spin(&umtx_lock);
1621 }
1622
1623 /*
1624 * Sleep on a PI mutex.
1625 */
1626 static int
1627 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1628 uint32_t owner, const char *wmesg, int timo)
1629 {
1630 struct umtxq_chain *uc;
1631 struct thread *td, *td1;
1632 struct umtx_q *uq1;
1633 int pri;
1634 int error = 0;
1635
1636 td = uq->uq_thread;
1637 KASSERT(td == curthread, ("inconsistent uq_thread"));
1638 uc = umtxq_getchain(&uq->uq_key);
1639 UMTXQ_LOCKED_ASSERT(uc);
1640 UMTXQ_BUSY_ASSERT(uc);
1641 umtxq_insert(uq);
1642 mtx_lock_spin(&umtx_lock);
1643 if (pi->pi_owner == NULL) {
1644 mtx_unlock_spin(&umtx_lock);
1645 /* XXX Only look up thread in current process. */
1646 td1 = tdfind(owner, curproc->p_pid);
1647 mtx_lock_spin(&umtx_lock);
1648 if (td1 != NULL) {
1649 if (pi->pi_owner == NULL)
1650 umtx_pi_setowner(pi, td1);
1651 PROC_UNLOCK(td1->td_proc);
1652 }
1653 }
1654
1655 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1656 pri = UPRI(uq1->uq_thread);
1657 if (pri > UPRI(td))
1658 break;
1659 }
1660
1661 if (uq1 != NULL)
1662 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1663 else
1664 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1665
1666 uq->uq_pi_blocked = pi;
1667 thread_lock(td);
1668 td->td_flags |= TDF_UPIBLOCKED;
1669 thread_unlock(td);
1670 umtx_propagate_priority(td);
1671 mtx_unlock_spin(&umtx_lock);
1672 umtxq_unbusy(&uq->uq_key);
1673
1674 if (uq->uq_flags & UQF_UMTXQ) {
1675 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1676 if (error == EWOULDBLOCK)
1677 error = ETIMEDOUT;
1678 if (uq->uq_flags & UQF_UMTXQ) {
1679 umtxq_remove(uq);
1680 }
1681 }
1682 mtx_lock_spin(&umtx_lock);
1683 uq->uq_pi_blocked = NULL;
1684 thread_lock(td);
1685 td->td_flags &= ~TDF_UPIBLOCKED;
1686 thread_unlock(td);
1687 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1688 umtx_repropagate_priority(pi);
1689 mtx_unlock_spin(&umtx_lock);
1690 umtxq_unlock(&uq->uq_key);
1691
1692 return (error);
1693 }
1694
1695 /*
1696 * Add reference count for a PI mutex.
1697 */
1698 static void
1699 umtx_pi_ref(struct umtx_pi *pi)
1700 {
1701 struct umtxq_chain *uc;
1702
1703 uc = umtxq_getchain(&pi->pi_key);
1704 UMTXQ_LOCKED_ASSERT(uc);
1705 pi->pi_refcount++;
1706 }
1707
1708 /*
1709 * Decrease reference count for a PI mutex, if the counter
1710 * is decreased to zero, its memory space is freed.
1711 */
1712 static void
1713 umtx_pi_unref(struct umtx_pi *pi)
1714 {
1715 struct umtxq_chain *uc;
1716
1717 uc = umtxq_getchain(&pi->pi_key);
1718 UMTXQ_LOCKED_ASSERT(uc);
1719 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1720 if (--pi->pi_refcount == 0) {
1721 mtx_lock_spin(&umtx_lock);
1722 if (pi->pi_owner != NULL) {
1723 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1724 pi, pi_link);
1725 pi->pi_owner = NULL;
1726 }
1727 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1728 ("blocked queue not empty"));
1729 mtx_unlock_spin(&umtx_lock);
1730 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1731 umtx_pi_free(pi);
1732 }
1733 }
1734
1735 /*
1736 * Find a PI mutex in hash table.
1737 */
1738 static struct umtx_pi *
1739 umtx_pi_lookup(struct umtx_key *key)
1740 {
1741 struct umtxq_chain *uc;
1742 struct umtx_pi *pi;
1743
1744 uc = umtxq_getchain(key);
1745 UMTXQ_LOCKED_ASSERT(uc);
1746
1747 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1748 if (umtx_key_match(&pi->pi_key, key)) {
1749 return (pi);
1750 }
1751 }
1752 return (NULL);
1753 }
1754
1755 /*
1756 * Insert a PI mutex into hash table.
1757 */
1758 static inline void
1759 umtx_pi_insert(struct umtx_pi *pi)
1760 {
1761 struct umtxq_chain *uc;
1762
1763 uc = umtxq_getchain(&pi->pi_key);
1764 UMTXQ_LOCKED_ASSERT(uc);
1765 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1766 }
1767
1768 /*
1769 * Lock a PI mutex.
1770 */
1771 static int
1772 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1773 int try)
1774 {
1775 struct umtx_q *uq;
1776 struct umtx_pi *pi, *new_pi;
1777 uint32_t id, owner, old;
1778 int error;
1779
1780 id = td->td_tid;
1781 uq = td->td_umtxq;
1782
1783 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1784 &uq->uq_key)) != 0)
1785 return (error);
1786 umtxq_lock(&uq->uq_key);
1787 pi = umtx_pi_lookup(&uq->uq_key);
1788 if (pi == NULL) {
1789 new_pi = umtx_pi_alloc(M_NOWAIT);
1790 if (new_pi == NULL) {
1791 umtxq_unlock(&uq->uq_key);
1792 new_pi = umtx_pi_alloc(M_WAITOK);
1793 umtxq_lock(&uq->uq_key);
1794 pi = umtx_pi_lookup(&uq->uq_key);
1795 if (pi != NULL) {
1796 umtx_pi_free(new_pi);
1797 new_pi = NULL;
1798 }
1799 }
1800 if (new_pi != NULL) {
1801 new_pi->pi_key = uq->uq_key;
1802 umtx_pi_insert(new_pi);
1803 pi = new_pi;
1804 }
1805 }
1806 umtx_pi_ref(pi);
1807 umtxq_unlock(&uq->uq_key);
1808
1809 /*
1810 * Care must be exercised when dealing with umtx structure. It
1811 * can fault on any access.
1812 */
1813 for (;;) {
1814 /*
1815 * Try the uncontested case. This should be done in userland.
1816 */
1817 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1818
1819 /* The acquire succeeded. */
1820 if (owner == UMUTEX_UNOWNED) {
1821 error = 0;
1822 break;
1823 }
1824
1825 /* The address was invalid. */
1826 if (owner == -1) {
1827 error = EFAULT;
1828 break;
1829 }
1830
1831 /* If no one owns it but it is contested try to acquire it. */
1832 if (owner == UMUTEX_CONTESTED) {
1833 owner = casuword32(&m->m_owner,
1834 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1835
1836 if (owner == UMUTEX_CONTESTED) {
1837 umtxq_lock(&uq->uq_key);
1838 umtxq_busy(&uq->uq_key);
1839 error = umtx_pi_claim(pi, td);
1840 umtxq_unbusy(&uq->uq_key);
1841 umtxq_unlock(&uq->uq_key);
1842 break;
1843 }
1844
1845 /* The address was invalid. */
1846 if (owner == -1) {
1847 error = EFAULT;
1848 break;
1849 }
1850
1851 /* If this failed the lock has changed, restart. */
1852 continue;
1853 }
1854
1855 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1856 (owner & ~UMUTEX_CONTESTED) == id) {
1857 error = EDEADLK;
1858 break;
1859 }
1860
1861 if (try != 0) {
1862 error = EBUSY;
1863 break;
1864 }
1865
1866 /*
1867 * If we caught a signal, we have retried and now
1868 * exit immediately.
1869 */
1870 if (error != 0)
1871 break;
1872
1873 umtxq_lock(&uq->uq_key);
1874 umtxq_busy(&uq->uq_key);
1875 umtxq_unlock(&uq->uq_key);
1876
1877 /*
1878 * Set the contested bit so that a release in user space
1879 * knows to use the system call for unlock. If this fails
1880 * either some one else has acquired the lock or it has been
1881 * released.
1882 */
1883 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1884
1885 /* The address was invalid. */
1886 if (old == -1) {
1887 umtxq_lock(&uq->uq_key);
1888 umtxq_unbusy(&uq->uq_key);
1889 umtxq_unlock(&uq->uq_key);
1890 error = EFAULT;
1891 break;
1892 }
1893
1894 umtxq_lock(&uq->uq_key);
1895 /*
1896 * We set the contested bit, sleep. Otherwise the lock changed
1897 * and we need to retry or we lost a race to the thread
1898 * unlocking the umtx.
1899 */
1900 if (old == owner)
1901 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1902 "umtxpi", timo);
1903 else {
1904 umtxq_unbusy(&uq->uq_key);
1905 umtxq_unlock(&uq->uq_key);
1906 }
1907 }
1908
1909 umtxq_lock(&uq->uq_key);
1910 umtx_pi_unref(pi);
1911 umtxq_unlock(&uq->uq_key);
1912
1913 umtx_key_release(&uq->uq_key);
1914 return (error);
1915 }
1916
1917 /*
1918 * Unlock a PI mutex.
1919 */
1920 static int
1921 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1922 {
1923 struct umtx_key key;
1924 struct umtx_q *uq_first, *uq_first2, *uq_me;
1925 struct umtx_pi *pi, *pi2;
1926 uint32_t owner, old, id;
1927 int error;
1928 int count;
1929 int pri;
1930
1931 id = td->td_tid;
1932 /*
1933 * Make sure we own this mtx.
1934 */
1935 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1936 if (owner == -1)
1937 return (EFAULT);
1938
1939 if ((owner & ~UMUTEX_CONTESTED) != id)
1940 return (EPERM);
1941
1942 /* This should be done in userland */
1943 if ((owner & UMUTEX_CONTESTED) == 0) {
1944 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1945 if (old == -1)
1946 return (EFAULT);
1947 if (old == owner)
1948 return (0);
1949 owner = old;
1950 }
1951
1952 /* We should only ever be in here for contested locks */
1953 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1954 &key)) != 0)
1955 return (error);
1956
1957 umtxq_lock(&key);
1958 umtxq_busy(&key);
1959 count = umtxq_count_pi(&key, &uq_first);
1960 if (uq_first != NULL) {
1961 mtx_lock_spin(&umtx_lock);
1962 pi = uq_first->uq_pi_blocked;
1963 KASSERT(pi != NULL, ("pi == NULL?"));
1964 if (pi->pi_owner != curthread) {
1965 mtx_unlock_spin(&umtx_lock);
1966 umtxq_unbusy(&key);
1967 umtxq_unlock(&key);
1968 umtx_key_release(&key);
1969 /* userland messed the mutex */
1970 return (EPERM);
1971 }
1972 uq_me = curthread->td_umtxq;
1973 pi->pi_owner = NULL;
1974 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1975 /* get highest priority thread which is still sleeping. */
1976 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1977 while (uq_first != NULL &&
1978 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1979 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1980 }
1981 pri = PRI_MAX;
1982 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1983 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1984 if (uq_first2 != NULL) {
1985 if (pri > UPRI(uq_first2->uq_thread))
1986 pri = UPRI(uq_first2->uq_thread);
1987 }
1988 }
1989 thread_lock(curthread);
1990 sched_lend_user_prio(curthread, pri);
1991 thread_unlock(curthread);
1992 mtx_unlock_spin(&umtx_lock);
1993 if (uq_first)
1994 umtxq_signal_thread(uq_first);
1995 }
1996 umtxq_unlock(&key);
1997
1998 /*
1999 * When unlocking the umtx, it must be marked as unowned if
2000 * there is zero or one thread only waiting for it.
2001 * Otherwise, it must be marked as contested.
2002 */
2003 old = casuword32(&m->m_owner, owner,
2004 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
2005
2006 umtxq_lock(&key);
2007 umtxq_unbusy(&key);
2008 umtxq_unlock(&key);
2009 umtx_key_release(&key);
2010 if (old == -1)
2011 return (EFAULT);
2012 if (old != owner)
2013 return (EINVAL);
2014 return (0);
2015 }
2016
2017 /*
2018 * Lock a PP mutex.
2019 */
2020 static int
2021 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
2022 int try)
2023 {
2024 struct umtx_q *uq, *uq2;
2025 struct umtx_pi *pi;
2026 uint32_t ceiling;
2027 uint32_t owner, id;
2028 int error, pri, old_inherited_pri, su;
2029
2030 id = td->td_tid;
2031 uq = td->td_umtxq;
2032 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2033 &uq->uq_key)) != 0)
2034 return (error);
2035 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2036 for (;;) {
2037 old_inherited_pri = uq->uq_inherited_pri;
2038 umtxq_lock(&uq->uq_key);
2039 umtxq_busy(&uq->uq_key);
2040 umtxq_unlock(&uq->uq_key);
2041
2042 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
2043 if (ceiling > RTP_PRIO_MAX) {
2044 error = EINVAL;
2045 goto out;
2046 }
2047
2048 mtx_lock_spin(&umtx_lock);
2049 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2050 mtx_unlock_spin(&umtx_lock);
2051 error = EINVAL;
2052 goto out;
2053 }
2054 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2055 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2056 thread_lock(td);
2057 if (uq->uq_inherited_pri < UPRI(td))
2058 sched_lend_user_prio(td, uq->uq_inherited_pri);
2059 thread_unlock(td);
2060 }
2061 mtx_unlock_spin(&umtx_lock);
2062
2063 owner = casuword32(&m->m_owner,
2064 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2065
2066 if (owner == UMUTEX_CONTESTED) {
2067 error = 0;
2068 break;
2069 }
2070
2071 /* The address was invalid. */
2072 if (owner == -1) {
2073 error = EFAULT;
2074 break;
2075 }
2076
2077 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2078 (owner & ~UMUTEX_CONTESTED) == id) {
2079 error = EDEADLK;
2080 break;
2081 }
2082
2083 if (try != 0) {
2084 error = EBUSY;
2085 break;
2086 }
2087
2088 /*
2089 * If we caught a signal, we have retried and now
2090 * exit immediately.
2091 */
2092 if (error != 0)
2093 break;
2094
2095 umtxq_lock(&uq->uq_key);
2096 umtxq_insert(uq);
2097 umtxq_unbusy(&uq->uq_key);
2098 error = umtxq_sleep(uq, "umtxpp", timo);
2099 umtxq_remove(uq);
2100 umtxq_unlock(&uq->uq_key);
2101
2102 mtx_lock_spin(&umtx_lock);
2103 uq->uq_inherited_pri = old_inherited_pri;
2104 pri = PRI_MAX;
2105 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2106 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2107 if (uq2 != NULL) {
2108 if (pri > UPRI(uq2->uq_thread))
2109 pri = UPRI(uq2->uq_thread);
2110 }
2111 }
2112 if (pri > uq->uq_inherited_pri)
2113 pri = uq->uq_inherited_pri;
2114 thread_lock(td);
2115 sched_lend_user_prio(td, pri);
2116 thread_unlock(td);
2117 mtx_unlock_spin(&umtx_lock);
2118 }
2119
2120 if (error != 0) {
2121 mtx_lock_spin(&umtx_lock);
2122 uq->uq_inherited_pri = old_inherited_pri;
2123 pri = PRI_MAX;
2124 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2125 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2126 if (uq2 != NULL) {
2127 if (pri > UPRI(uq2->uq_thread))
2128 pri = UPRI(uq2->uq_thread);
2129 }
2130 }
2131 if (pri > uq->uq_inherited_pri)
2132 pri = uq->uq_inherited_pri;
2133 thread_lock(td);
2134 sched_lend_user_prio(td, pri);
2135 thread_unlock(td);
2136 mtx_unlock_spin(&umtx_lock);
2137 }
2138
2139 out:
2140 umtxq_lock(&uq->uq_key);
2141 umtxq_unbusy(&uq->uq_key);
2142 umtxq_unlock(&uq->uq_key);
2143 umtx_key_release(&uq->uq_key);
2144 return (error);
2145 }
2146
2147 /*
2148 * Unlock a PP mutex.
2149 */
2150 static int
2151 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2152 {
2153 struct umtx_key key;
2154 struct umtx_q *uq, *uq2;
2155 struct umtx_pi *pi;
2156 uint32_t owner, id;
2157 uint32_t rceiling;
2158 int error, pri, new_inherited_pri, su;
2159
2160 id = td->td_tid;
2161 uq = td->td_umtxq;
2162 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2163
2164 /*
2165 * Make sure we own this mtx.
2166 */
2167 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2168 if (owner == -1)
2169 return (EFAULT);
2170
2171 if ((owner & ~UMUTEX_CONTESTED) != id)
2172 return (EPERM);
2173
2174 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2175 if (error != 0)
2176 return (error);
2177
2178 if (rceiling == -1)
2179 new_inherited_pri = PRI_MAX;
2180 else {
2181 rceiling = RTP_PRIO_MAX - rceiling;
2182 if (rceiling > RTP_PRIO_MAX)
2183 return (EINVAL);
2184 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2185 }
2186
2187 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2188 &key)) != 0)
2189 return (error);
2190 umtxq_lock(&key);
2191 umtxq_busy(&key);
2192 umtxq_unlock(&key);
2193 /*
2194 * For priority protected mutex, always set unlocked state
2195 * to UMUTEX_CONTESTED, so that userland always enters kernel
2196 * to lock the mutex, it is necessary because thread priority
2197 * has to be adjusted for such mutex.
2198 */
2199 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2200 UMUTEX_CONTESTED);
2201
2202 umtxq_lock(&key);
2203 if (error == 0)
2204 umtxq_signal(&key, 1);
2205 umtxq_unbusy(&key);
2206 umtxq_unlock(&key);
2207
2208 if (error == -1)
2209 error = EFAULT;
2210 else {
2211 mtx_lock_spin(&umtx_lock);
2212 if (su != 0)
2213 uq->uq_inherited_pri = new_inherited_pri;
2214 pri = PRI_MAX;
2215 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2216 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2217 if (uq2 != NULL) {
2218 if (pri > UPRI(uq2->uq_thread))
2219 pri = UPRI(uq2->uq_thread);
2220 }
2221 }
2222 if (pri > uq->uq_inherited_pri)
2223 pri = uq->uq_inherited_pri;
2224 thread_lock(td);
2225 sched_lend_user_prio(td, pri);
2226 thread_unlock(td);
2227 mtx_unlock_spin(&umtx_lock);
2228 }
2229 umtx_key_release(&key);
2230 return (error);
2231 }
2232
2233 static int
2234 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2235 uint32_t *old_ceiling)
2236 {
2237 struct umtx_q *uq;
2238 uint32_t save_ceiling;
2239 uint32_t owner, id;
2240 uint32_t flags;
2241 int error;
2242
2243 flags = fuword32(&m->m_flags);
2244 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2245 return (EINVAL);
2246 if (ceiling > RTP_PRIO_MAX)
2247 return (EINVAL);
2248 id = td->td_tid;
2249 uq = td->td_umtxq;
2250 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2251 &uq->uq_key)) != 0)
2252 return (error);
2253 for (;;) {
2254 umtxq_lock(&uq->uq_key);
2255 umtxq_busy(&uq->uq_key);
2256 umtxq_unlock(&uq->uq_key);
2257
2258 save_ceiling = fuword32(&m->m_ceilings[0]);
2259
2260 owner = casuword32(&m->m_owner,
2261 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2262
2263 if (owner == UMUTEX_CONTESTED) {
2264 suword32(&m->m_ceilings[0], ceiling);
2265 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2266 UMUTEX_CONTESTED);
2267 error = 0;
2268 break;
2269 }
2270
2271 /* The address was invalid. */
2272 if (owner == -1) {
2273 error = EFAULT;
2274 break;
2275 }
2276
2277 if ((owner & ~UMUTEX_CONTESTED) == id) {
2278 suword32(&m->m_ceilings[0], ceiling);
2279 error = 0;
2280 break;
2281 }
2282
2283 /*
2284 * If we caught a signal, we have retried and now
2285 * exit immediately.
2286 */
2287 if (error != 0)
2288 break;
2289
2290 /*
2291 * We set the contested bit, sleep. Otherwise the lock changed
2292 * and we need to retry or we lost a race to the thread
2293 * unlocking the umtx.
2294 */
2295 umtxq_lock(&uq->uq_key);
2296 umtxq_insert(uq);
2297 umtxq_unbusy(&uq->uq_key);
2298 error = umtxq_sleep(uq, "umtxpp", 0);
2299 umtxq_remove(uq);
2300 umtxq_unlock(&uq->uq_key);
2301 }
2302 umtxq_lock(&uq->uq_key);
2303 if (error == 0)
2304 umtxq_signal(&uq->uq_key, INT_MAX);
2305 umtxq_unbusy(&uq->uq_key);
2306 umtxq_unlock(&uq->uq_key);
2307 umtx_key_release(&uq->uq_key);
2308 if (error == 0 && old_ceiling != NULL)
2309 suword32(old_ceiling, save_ceiling);
2310 return (error);
2311 }
2312
2313 static int
2314 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2315 int mode)
2316 {
2317 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2318 case 0:
2319 return (_do_lock_normal(td, m, flags, timo, mode));
2320 case UMUTEX_PRIO_INHERIT:
2321 return (_do_lock_pi(td, m, flags, timo, mode));
2322 case UMUTEX_PRIO_PROTECT:
2323 return (_do_lock_pp(td, m, flags, timo, mode));
2324 }
2325 return (EINVAL);
2326 }
2327
2328 /*
2329 * Lock a userland POSIX mutex.
2330 */
2331 static int
2332 do_lock_umutex(struct thread *td, struct umutex *m,
2333 struct timespec *timeout, int mode)
2334 {
2335 struct timespec ts, ts2, ts3;
2336 struct timeval tv;
2337 uint32_t flags;
2338 int error;
2339
2340 flags = fuword32(&m->m_flags);
2341 if (flags == -1)
2342 return (EFAULT);
2343
2344 if (timeout == NULL) {
2345 error = _do_lock_umutex(td, m, flags, 0, mode);
2346 /* Mutex locking is restarted if it is interrupted. */
2347 if (error == EINTR && mode != _UMUTEX_WAIT)
2348 error = ERESTART;
2349 } else {
2350 getnanouptime(&ts);
2351 timespecadd(&ts, timeout);
2352 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2353 for (;;) {
2354 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2355 if (error != ETIMEDOUT)
2356 break;
2357 getnanouptime(&ts2);
2358 if (timespeccmp(&ts2, &ts, >=)) {
2359 error = ETIMEDOUT;
2360 break;
2361 }
2362 ts3 = ts;
2363 timespecsub(&ts3, &ts2);
2364 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2365 }
2366 /* Timed-locking is not restarted. */
2367 if (error == ERESTART)
2368 error = EINTR;
2369 }
2370 return (error);
2371 }
2372
2373 /*
2374 * Unlock a userland POSIX mutex.
2375 */
2376 static int
2377 do_unlock_umutex(struct thread *td, struct umutex *m)
2378 {
2379 uint32_t flags;
2380
2381 flags = fuword32(&m->m_flags);
2382 if (flags == -1)
2383 return (EFAULT);
2384
2385 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2386 case 0:
2387 return (do_unlock_normal(td, m, flags));
2388 case UMUTEX_PRIO_INHERIT:
2389 return (do_unlock_pi(td, m, flags));
2390 case UMUTEX_PRIO_PROTECT:
2391 return (do_unlock_pp(td, m, flags));
2392 }
2393
2394 return (EINVAL);
2395 }
2396
2397 static int
2398 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2399 struct timespec *timeout, u_long wflags)
2400 {
2401 struct umtx_q *uq;
2402 struct timeval tv;
2403 struct timespec cts, ets, tts;
2404 uint32_t flags;
2405 uint32_t clockid;
2406 int error;
2407
2408 uq = td->td_umtxq;
2409 flags = fuword32(&cv->c_flags);
2410 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2411 if (error != 0)
2412 return (error);
2413
2414 if ((wflags & CVWAIT_CLOCKID) != 0) {
2415 clockid = fuword32(&cv->c_clockid);
2416 if (clockid < CLOCK_REALTIME ||
2417 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2418 /* hmm, only HW clock id will work. */
2419 return (EINVAL);
2420 }
2421 } else {
2422 clockid = CLOCK_REALTIME;
2423 }
2424
2425 umtxq_lock(&uq->uq_key);
2426 umtxq_busy(&uq->uq_key);
2427 umtxq_insert(uq);
2428 umtxq_unlock(&uq->uq_key);
2429
2430 /*
2431 * Set c_has_waiters to 1 before releasing user mutex, also
2432 * don't modify cache line when unnecessary.
2433 */
2434 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2435 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2436
2437 umtxq_lock(&uq->uq_key);
2438 umtxq_unbusy(&uq->uq_key);
2439 umtxq_unlock(&uq->uq_key);
2440
2441 error = do_unlock_umutex(td, m);
2442
2443 umtxq_lock(&uq->uq_key);
2444 if (error == 0) {
2445 if (timeout == NULL) {
2446 error = umtxq_sleep(uq, "ucond", 0);
2447 } else {
2448 if ((wflags & CVWAIT_ABSTIME) == 0) {
2449 kern_clock_gettime(td, clockid, &ets);
2450 timespecadd(&ets, timeout);
2451 tts = *timeout;
2452 } else { /* absolute time */
2453 ets = *timeout;
2454 tts = *timeout;
2455 kern_clock_gettime(td, clockid, &cts);
2456 timespecsub(&tts, &cts);
2457 }
2458 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2459 for (;;) {
2460 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2461 if (error != ETIMEDOUT)
2462 break;
2463 kern_clock_gettime(td, clockid, &cts);
2464 if (timespeccmp(&cts, &ets, >=)) {
2465 error = ETIMEDOUT;
2466 break;
2467 }
2468 tts = ets;
2469 timespecsub(&tts, &cts);
2470 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2471 }
2472 }
2473 }
2474
2475 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2476 error = 0;
2477 else {
2478 /*
2479 * This must be timeout,interrupted by signal or
2480 * surprious wakeup, clear c_has_waiter flag when
2481 * necessary.
2482 */
2483 umtxq_busy(&uq->uq_key);
2484 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2485 int oldlen = uq->uq_cur_queue->length;
2486 umtxq_remove(uq);
2487 if (oldlen == 1) {
2488 umtxq_unlock(&uq->uq_key);
2489 suword32(
2490 __DEVOLATILE(uint32_t *,
2491 &cv->c_has_waiters), 0);
2492 umtxq_lock(&uq->uq_key);
2493 }
2494 }
2495 umtxq_unbusy(&uq->uq_key);
2496 if (error == ERESTART)
2497 error = EINTR;
2498 }
2499
2500 umtxq_unlock(&uq->uq_key);
2501 umtx_key_release(&uq->uq_key);
2502 return (error);
2503 }
2504
2505 /*
2506 * Signal a userland condition variable.
2507 */
2508 static int
2509 do_cv_signal(struct thread *td, struct ucond *cv)
2510 {
2511 struct umtx_key key;
2512 int error, cnt, nwake;
2513 uint32_t flags;
2514
2515 flags = fuword32(&cv->c_flags);
2516 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2517 return (error);
2518 umtxq_lock(&key);
2519 umtxq_busy(&key);
2520 cnt = umtxq_count(&key);
2521 nwake = umtxq_signal(&key, 1);
2522 if (cnt <= nwake) {
2523 umtxq_unlock(&key);
2524 error = suword32(
2525 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2526 umtxq_lock(&key);
2527 }
2528 umtxq_unbusy(&key);
2529 umtxq_unlock(&key);
2530 umtx_key_release(&key);
2531 return (error);
2532 }
2533
2534 static int
2535 do_cv_broadcast(struct thread *td, struct ucond *cv)
2536 {
2537 struct umtx_key key;
2538 int error;
2539 uint32_t flags;
2540
2541 flags = fuword32(&cv->c_flags);
2542 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2543 return (error);
2544
2545 umtxq_lock(&key);
2546 umtxq_busy(&key);
2547 umtxq_signal(&key, INT_MAX);
2548 umtxq_unlock(&key);
2549
2550 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2551
2552 umtxq_lock(&key);
2553 umtxq_unbusy(&key);
2554 umtxq_unlock(&key);
2555
2556 umtx_key_release(&key);
2557 return (error);
2558 }
2559
2560 static int
2561 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2562 {
2563 struct umtx_q *uq;
2564 uint32_t flags, wrflags;
2565 int32_t state, oldstate;
2566 int32_t blocked_readers;
2567 int error;
2568
2569 uq = td->td_umtxq;
2570 flags = fuword32(&rwlock->rw_flags);
2571 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2572 if (error != 0)
2573 return (error);
2574
2575 wrflags = URWLOCK_WRITE_OWNER;
2576 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2577 wrflags |= URWLOCK_WRITE_WAITERS;
2578
2579 for (;;) {
2580 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2581 /* try to lock it */
2582 while (!(state & wrflags)) {
2583 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2584 umtx_key_release(&uq->uq_key);
2585 return (EAGAIN);
2586 }
2587 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2588 if (oldstate == state) {
2589 umtx_key_release(&uq->uq_key);
2590 return (0);
2591 }
2592 state = oldstate;
2593 }
2594
2595 if (error)
2596 break;
2597
2598 /* grab monitor lock */
2599 umtxq_lock(&uq->uq_key);
2600 umtxq_busy(&uq->uq_key);
2601 umtxq_unlock(&uq->uq_key);
2602
2603 /*
2604 * re-read the state, in case it changed between the try-lock above
2605 * and the check below
2606 */
2607 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2608
2609 /* set read contention bit */
2610 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2611 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2612 if (oldstate == state)
2613 goto sleep;
2614 state = oldstate;
2615 }
2616
2617 /* state is changed while setting flags, restart */
2618 if (!(state & wrflags)) {
2619 umtxq_lock(&uq->uq_key);
2620 umtxq_unbusy(&uq->uq_key);
2621 umtxq_unlock(&uq->uq_key);
2622 continue;
2623 }
2624
2625 sleep:
2626 /* contention bit is set, before sleeping, increase read waiter count */
2627 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2628 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2629
2630 while (state & wrflags) {
2631 umtxq_lock(&uq->uq_key);
2632 umtxq_insert(uq);
2633 umtxq_unbusy(&uq->uq_key);
2634
2635 error = umtxq_sleep(uq, "urdlck", timo);
2636
2637 umtxq_busy(&uq->uq_key);
2638 umtxq_remove(uq);
2639 umtxq_unlock(&uq->uq_key);
2640 if (error)
2641 break;
2642 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2643 }
2644
2645 /* decrease read waiter count, and may clear read contention bit */
2646 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2647 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2648 if (blocked_readers == 1) {
2649 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2650 for (;;) {
2651 oldstate = casuword32(&rwlock->rw_state, state,
2652 state & ~URWLOCK_READ_WAITERS);
2653 if (oldstate == state)
2654 break;
2655 state = oldstate;
2656 }
2657 }
2658
2659 umtxq_lock(&uq->uq_key);
2660 umtxq_unbusy(&uq->uq_key);
2661 umtxq_unlock(&uq->uq_key);
2662 }
2663 umtx_key_release(&uq->uq_key);
2664 return (error);
2665 }
2666
2667 static int
2668 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2669 {
2670 struct timespec ts, ts2, ts3;
2671 struct timeval tv;
2672 int error;
2673
2674 getnanouptime(&ts);
2675 timespecadd(&ts, timeout);
2676 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2677 for (;;) {
2678 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2679 if (error != ETIMEDOUT)
2680 break;
2681 getnanouptime(&ts2);
2682 if (timespeccmp(&ts2, &ts, >=)) {
2683 error = ETIMEDOUT;
2684 break;
2685 }
2686 ts3 = ts;
2687 timespecsub(&ts3, &ts2);
2688 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2689 }
2690 if (error == ERESTART)
2691 error = EINTR;
2692 return (error);
2693 }
2694
2695 static int
2696 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2697 {
2698 struct umtx_q *uq;
2699 uint32_t flags;
2700 int32_t state, oldstate;
2701 int32_t blocked_writers;
2702 int32_t blocked_readers;
2703 int error;
2704
2705 uq = td->td_umtxq;
2706 flags = fuword32(&rwlock->rw_flags);
2707 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2708 if (error != 0)
2709 return (error);
2710
2711 blocked_readers = 0;
2712 for (;;) {
2713 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2714 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2715 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2716 if (oldstate == state) {
2717 umtx_key_release(&uq->uq_key);
2718 return (0);
2719 }
2720 state = oldstate;
2721 }
2722
2723 if (error) {
2724 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2725 blocked_readers != 0) {
2726 umtxq_lock(&uq->uq_key);
2727 umtxq_busy(&uq->uq_key);
2728 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2729 umtxq_unbusy(&uq->uq_key);
2730 umtxq_unlock(&uq->uq_key);
2731 }
2732
2733 break;
2734 }
2735
2736 /* grab monitor lock */
2737 umtxq_lock(&uq->uq_key);
2738 umtxq_busy(&uq->uq_key);
2739 umtxq_unlock(&uq->uq_key);
2740
2741 /*
2742 * re-read the state, in case it changed between the try-lock above
2743 * and the check below
2744 */
2745 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2746
2747 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2748 (state & URWLOCK_WRITE_WAITERS) == 0) {
2749 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2750 if (oldstate == state)
2751 goto sleep;
2752 state = oldstate;
2753 }
2754
2755 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2756 umtxq_lock(&uq->uq_key);
2757 umtxq_unbusy(&uq->uq_key);
2758 umtxq_unlock(&uq->uq_key);
2759 continue;
2760 }
2761 sleep:
2762 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2763 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2764
2765 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2766 umtxq_lock(&uq->uq_key);
2767 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2768 umtxq_unbusy(&uq->uq_key);
2769
2770 error = umtxq_sleep(uq, "uwrlck", timo);
2771
2772 umtxq_busy(&uq->uq_key);
2773 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2774 umtxq_unlock(&uq->uq_key);
2775 if (error)
2776 break;
2777 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2778 }
2779
2780 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2781 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2782 if (blocked_writers == 1) {
2783 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2784 for (;;) {
2785 oldstate = casuword32(&rwlock->rw_state, state,
2786 state & ~URWLOCK_WRITE_WAITERS);
2787 if (oldstate == state)
2788 break;
2789 state = oldstate;
2790 }
2791 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2792 } else
2793 blocked_readers = 0;
2794
2795 umtxq_lock(&uq->uq_key);
2796 umtxq_unbusy(&uq->uq_key);
2797 umtxq_unlock(&uq->uq_key);
2798 }
2799
2800 umtx_key_release(&uq->uq_key);
2801 return (error);
2802 }
2803
2804 static int
2805 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2806 {
2807 struct timespec ts, ts2, ts3;
2808 struct timeval tv;
2809 int error;
2810
2811 getnanouptime(&ts);
2812 timespecadd(&ts, timeout);
2813 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2814 for (;;) {
2815 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2816 if (error != ETIMEDOUT)
2817 break;
2818 getnanouptime(&ts2);
2819 if (timespeccmp(&ts2, &ts, >=)) {
2820 error = ETIMEDOUT;
2821 break;
2822 }
2823 ts3 = ts;
2824 timespecsub(&ts3, &ts2);
2825 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2826 }
2827 if (error == ERESTART)
2828 error = EINTR;
2829 return (error);
2830 }
2831
2832 static int
2833 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2834 {
2835 struct umtx_q *uq;
2836 uint32_t flags;
2837 int32_t state, oldstate;
2838 int error, q, count;
2839
2840 uq = td->td_umtxq;
2841 flags = fuword32(&rwlock->rw_flags);
2842 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2843 if (error != 0)
2844 return (error);
2845
2846 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2847 if (state & URWLOCK_WRITE_OWNER) {
2848 for (;;) {
2849 oldstate = casuword32(&rwlock->rw_state, state,
2850 state & ~URWLOCK_WRITE_OWNER);
2851 if (oldstate != state) {
2852 state = oldstate;
2853 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2854 error = EPERM;
2855 goto out;
2856 }
2857 } else
2858 break;
2859 }
2860 } else if (URWLOCK_READER_COUNT(state) != 0) {
2861 for (;;) {
2862 oldstate = casuword32(&rwlock->rw_state, state,
2863 state - 1);
2864 if (oldstate != state) {
2865 state = oldstate;
2866 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2867 error = EPERM;
2868 goto out;
2869 }
2870 }
2871 else
2872 break;
2873 }
2874 } else {
2875 error = EPERM;
2876 goto out;
2877 }
2878
2879 count = 0;
2880
2881 if (!(flags & URWLOCK_PREFER_READER)) {
2882 if (state & URWLOCK_WRITE_WAITERS) {
2883 count = 1;
2884 q = UMTX_EXCLUSIVE_QUEUE;
2885 } else if (state & URWLOCK_READ_WAITERS) {
2886 count = INT_MAX;
2887 q = UMTX_SHARED_QUEUE;
2888 }
2889 } else {
2890 if (state & URWLOCK_READ_WAITERS) {
2891 count = INT_MAX;
2892 q = UMTX_SHARED_QUEUE;
2893 } else if (state & URWLOCK_WRITE_WAITERS) {
2894 count = 1;
2895 q = UMTX_EXCLUSIVE_QUEUE;
2896 }
2897 }
2898
2899 if (count) {
2900 umtxq_lock(&uq->uq_key);
2901 umtxq_busy(&uq->uq_key);
2902 umtxq_signal_queue(&uq->uq_key, count, q);
2903 umtxq_unbusy(&uq->uq_key);
2904 umtxq_unlock(&uq->uq_key);
2905 }
2906 out:
2907 umtx_key_release(&uq->uq_key);
2908 return (error);
2909 }
2910
2911 static int
2912 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2913 {
2914 struct umtx_q *uq;
2915 struct timeval tv;
2916 struct timespec cts, ets, tts;
2917 uint32_t flags, count;
2918 int error;
2919
2920 uq = td->td_umtxq;
2921 flags = fuword32(&sem->_flags);
2922 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2923 if (error != 0)
2924 return (error);
2925 umtxq_lock(&uq->uq_key);
2926 umtxq_busy(&uq->uq_key);
2927 umtxq_insert(uq);
2928 umtxq_unlock(&uq->uq_key);
2929
2930 if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2931 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2932
2933 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2934 if (count != 0) {
2935 umtxq_lock(&uq->uq_key);
2936 umtxq_unbusy(&uq->uq_key);
2937 umtxq_remove(uq);
2938 umtxq_unlock(&uq->uq_key);
2939 umtx_key_release(&uq->uq_key);
2940 return (0);
2941 }
2942
2943 umtxq_lock(&uq->uq_key);
2944 umtxq_unbusy(&uq->uq_key);
2945 umtxq_unlock(&uq->uq_key);
2946
2947 umtxq_lock(&uq->uq_key);
2948 if (timeout == NULL) {
2949 error = umtxq_sleep(uq, "usem", 0);
2950 } else {
2951 getnanouptime(&ets);
2952 timespecadd(&ets, timeout);
2953 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2954 for (;;) {
2955 error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2956 if (error != ETIMEDOUT)
2957 break;
2958 getnanouptime(&cts);
2959 if (timespeccmp(&cts, &ets, >=)) {
2960 error = ETIMEDOUT;
2961 break;
2962 }
2963 tts = ets;
2964 timespecsub(&tts, &cts);
2965 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2966 }
2967 }
2968
2969 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2970 error = 0;
2971 else {
2972 umtxq_remove(uq);
2973 if (error == ERESTART)
2974 error = EINTR;
2975 }
2976 umtxq_unlock(&uq->uq_key);
2977 umtx_key_release(&uq->uq_key);
2978 return (error);
2979 }
2980
2981 /*
2982 * Signal a userland condition variable.
2983 */
2984 static int
2985 do_sem_wake(struct thread *td, struct _usem *sem)
2986 {
2987 struct umtx_key key;
2988 int error, cnt, nwake;
2989 uint32_t flags;
2990
2991 flags = fuword32(&sem->_flags);
2992 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2993 return (error);
2994 umtxq_lock(&key);
2995 umtxq_busy(&key);
2996 cnt = umtxq_count(&key);
2997 nwake = umtxq_signal(&key, 1);
2998 if (cnt <= nwake) {
2999 umtxq_unlock(&key);
3000 error = suword32(
3001 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
3002 umtxq_lock(&key);
3003 }
3004 umtxq_unbusy(&key);
3005 umtxq_unlock(&key);
3006 umtx_key_release(&key);
3007 return (error);
3008 }
3009
3010 int
3011 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
3012 /* struct umtx *umtx */
3013 {
3014 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
3015 }
3016
3017 int
3018 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
3019 /* struct umtx *umtx */
3020 {
3021 return do_unlock_umtx(td, uap->umtx, td->td_tid);
3022 }
3023
3024 inline int
3025 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3026 {
3027 int error;
3028
3029 error = copyin(addr, tsp, sizeof(struct timespec));
3030 if (error == 0) {
3031 if (tsp->tv_sec < 0 ||
3032 tsp->tv_nsec >= 1000000000 ||
3033 tsp->tv_nsec < 0)
3034 error = EINVAL;
3035 }
3036 return (error);
3037 }
3038
3039 static int
3040 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
3041 {
3042 struct timespec *ts, timeout;
3043 int error;
3044
3045 /* Allow a null timespec (wait forever). */
3046 if (uap->uaddr2 == NULL)
3047 ts = NULL;
3048 else {
3049 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3050 if (error != 0)
3051 return (error);
3052 ts = &timeout;
3053 }
3054 return (do_lock_umtx(td, uap->obj, uap->val, ts));
3055 }
3056
3057 static int
3058 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
3059 {
3060 return (do_unlock_umtx(td, uap->obj, uap->val));
3061 }
3062
3063 static int
3064 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3065 {
3066 struct timespec *ts, timeout;
3067 int error;
3068
3069 if (uap->uaddr2 == NULL)
3070 ts = NULL;
3071 else {
3072 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3073 if (error != 0)
3074 return (error);
3075 ts = &timeout;
3076 }
3077 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
3078 }
3079
3080 static int
3081 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3082 {
3083 struct timespec *ts, timeout;
3084 int error;
3085
3086 if (uap->uaddr2 == NULL)
3087 ts = NULL;
3088 else {
3089 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3090 if (error != 0)
3091 return (error);
3092 ts = &timeout;
3093 }
3094 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3095 }
3096
3097 static int
3098 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3099 {
3100 struct timespec *ts, timeout;
3101 int error;
3102
3103 if (uap->uaddr2 == NULL)
3104 ts = NULL;
3105 else {
3106 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3107 if (error != 0)
3108 return (error);
3109 ts = &timeout;
3110 }
3111 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3112 }
3113
3114 static int
3115 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3116 {
3117 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3118 }
3119
3120 #define BATCH_SIZE 128
3121 static int
3122 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3123 {
3124 int count = uap->val;
3125 void *uaddrs[BATCH_SIZE];
3126 char **upp = (char **)uap->obj;
3127 int tocopy;
3128 int error = 0;
3129 int i, pos = 0;
3130
3131 while (count > 0) {
3132 tocopy = count;
3133 if (tocopy > BATCH_SIZE)
3134 tocopy = BATCH_SIZE;
3135 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3136 if (error != 0)
3137 break;
3138 for (i = 0; i < tocopy; ++i)
3139 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3140 count -= tocopy;
3141 pos += tocopy;
3142 }
3143 return (error);
3144 }
3145
3146 static int
3147 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3148 {
3149 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3150 }
3151
3152 static int
3153 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3154 {
3155 struct timespec *ts, timeout;
3156 int error;
3157
3158 /* Allow a null timespec (wait forever). */
3159 if (uap->uaddr2 == NULL)
3160 ts = NULL;
3161 else {
3162 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3163 if (error != 0)
3164 return (error);
3165 ts = &timeout;
3166 }
3167 return do_lock_umutex(td, uap->obj, ts, 0);
3168 }
3169
3170 static int
3171 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3172 {
3173 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3174 }
3175
3176 static int
3177 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3178 {
3179 struct timespec *ts, timeout;
3180 int error;
3181
3182 /* Allow a null timespec (wait forever). */
3183 if (uap->uaddr2 == NULL)
3184 ts = NULL;
3185 else {
3186 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3187 if (error != 0)
3188 return (error);
3189 ts = &timeout;
3190 }
3191 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3192 }
3193
3194 static int
3195 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3196 {
3197 return do_wake_umutex(td, uap->obj);
3198 }
3199
3200 static int
3201 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3202 {
3203 return do_unlock_umutex(td, uap->obj);
3204 }
3205
3206 static int
3207 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3208 {
3209 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3210 }
3211
3212 static int
3213 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3214 {
3215 struct timespec *ts, timeout;
3216 int error;
3217
3218 /* Allow a null timespec (wait forever). */
3219 if (uap->uaddr2 == NULL)
3220 ts = NULL;
3221 else {
3222 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3223 if (error != 0)
3224 return (error);
3225 ts = &timeout;
3226 }
3227 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3228 }
3229
3230 static int
3231 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3232 {
3233 return do_cv_signal(td, uap->obj);
3234 }
3235
3236 static int
3237 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3238 {
3239 return do_cv_broadcast(td, uap->obj);
3240 }
3241
3242 static int
3243 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3244 {
3245 struct timespec timeout;
3246 int error;
3247
3248 /* Allow a null timespec (wait forever). */
3249 if (uap->uaddr2 == NULL) {
3250 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3251 } else {
3252 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3253 if (error != 0)
3254 return (error);
3255 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3256 }
3257 return (error);
3258 }
3259
3260 static int
3261 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3262 {
3263 struct timespec timeout;
3264 int error;
3265
3266 /* Allow a null timespec (wait forever). */
3267 if (uap->uaddr2 == NULL) {
3268 error = do_rw_wrlock(td, uap->obj, 0);
3269 } else {
3270 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3271 if (error != 0)
3272 return (error);
3273
3274 error = do_rw_wrlock2(td, uap->obj, &timeout);
3275 }
3276 return (error);
3277 }
3278
3279 static int
3280 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3281 {
3282 return do_rw_unlock(td, uap->obj);
3283 }
3284
3285 static int
3286 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3287 {
3288 struct timespec *ts, timeout;
3289 int error;
3290
3291 /* Allow a null timespec (wait forever). */
3292 if (uap->uaddr2 == NULL)
3293 ts = NULL;
3294 else {
3295 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3296 if (error != 0)
3297 return (error);
3298 ts = &timeout;
3299 }
3300 return (do_sem_wait(td, uap->obj, ts));
3301 }
3302
3303 static int
3304 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3305 {
3306 return do_sem_wake(td, uap->obj);
3307 }
3308
3309 static int
3310 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3311 {
3312 return do_wake2_umutex(td, uap->obj, uap->val);
3313 }
3314
3315 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3316
3317 static _umtx_op_func op_table[] = {
3318 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3319 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3320 __umtx_op_wait, /* UMTX_OP_WAIT */
3321 __umtx_op_wake, /* UMTX_OP_WAKE */
3322 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3323 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3324 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3325 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3326 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3327 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3328 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3329 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3330 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3331 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3332 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3333 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3334 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3335 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3336 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3337 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
3338 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3339 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */
3340 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */
3341 };
3342
3343 int
3344 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3345 {
3346 if ((unsigned)uap->op < UMTX_OP_MAX)
3347 return (*op_table[uap->op])(td, uap);
3348 return (EINVAL);
3349 }
3350
3351 #ifdef COMPAT_FREEBSD32
3352 int
3353 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3354 /* struct umtx *umtx */
3355 {
3356 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3357 }
3358
3359 int
3360 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3361 /* struct umtx *umtx */
3362 {
3363 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3364 }
3365
3366 struct timespec32 {
3367 uint32_t tv_sec;
3368 uint32_t tv_nsec;
3369 };
3370
3371 static inline int
3372 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
3373 {
3374 struct timespec32 ts32;
3375 int error;
3376
3377 error = copyin(addr, &ts32, sizeof(struct timespec32));
3378 if (error == 0) {
3379 if (ts32.tv_sec < 0 ||
3380 ts32.tv_nsec >= 1000000000 ||
3381 ts32.tv_nsec < 0)
3382 error = EINVAL;
3383 else {
3384 tsp->tv_sec = ts32.tv_sec;
3385 tsp->tv_nsec = ts32.tv_nsec;
3386 }
3387 }
3388 return (error);
3389 }
3390
3391 static int
3392 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3393 {
3394 struct timespec *ts, timeout;
3395 int error;
3396
3397 /* Allow a null timespec (wait forever). */
3398 if (uap->uaddr2 == NULL)
3399 ts = NULL;
3400 else {
3401 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3402 if (error != 0)
3403 return (error);
3404 ts = &timeout;
3405 }
3406 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3407 }
3408
3409 static int
3410 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3411 {
3412 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3413 }
3414
3415 static int
3416 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3417 {
3418 struct timespec *ts, timeout;
3419 int error;
3420
3421 if (uap->uaddr2 == NULL)
3422 ts = NULL;
3423 else {
3424 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3425 if (error != 0)
3426 return (error);
3427 ts = &timeout;
3428 }
3429 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3430 }
3431
3432 static int
3433 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3434 {
3435 struct timespec *ts, timeout;
3436 int error;
3437
3438 /* Allow a null timespec (wait forever). */
3439 if (uap->uaddr2 == NULL)
3440 ts = NULL;
3441 else {
3442 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3443 if (error != 0)
3444 return (error);
3445 ts = &timeout;
3446 }
3447 return do_lock_umutex(td, uap->obj, ts, 0);
3448 }
3449
3450 static int
3451 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3452 {
3453 struct timespec *ts, timeout;
3454 int error;
3455
3456 /* Allow a null timespec (wait forever). */
3457 if (uap->uaddr2 == NULL)
3458 ts = NULL;
3459 else {
3460 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3461 if (error != 0)
3462 return (error);
3463 ts = &timeout;
3464 }
3465 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3466 }
3467
3468 static int
3469 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3470 {
3471 struct timespec *ts, timeout;
3472 int error;
3473
3474 /* Allow a null timespec (wait forever). */
3475 if (uap->uaddr2 == NULL)
3476 ts = NULL;
3477 else {
3478 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3479 if (error != 0)
3480 return (error);
3481 ts = &timeout;
3482 }
3483 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3484 }
3485
3486 static int
3487 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3488 {
3489 struct timespec timeout;
3490 int error;
3491
3492 /* Allow a null timespec (wait forever). */
3493 if (uap->uaddr2 == NULL) {
3494 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3495 } else {
3496 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3497 if (error != 0)
3498 return (error);
3499 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3500 }
3501 return (error);
3502 }
3503
3504 static int
3505 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3506 {
3507 struct timespec timeout;
3508 int error;
3509
3510 /* Allow a null timespec (wait forever). */
3511 if (uap->uaddr2 == NULL) {
3512 error = do_rw_wrlock(td, uap->obj, 0);
3513 } else {
3514 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3515 if (error != 0)
3516 return (error);
3517
3518 error = do_rw_wrlock2(td, uap->obj, &timeout);
3519 }
3520 return (error);
3521 }
3522
3523 static int
3524 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3525 {
3526 struct timespec *ts, timeout;
3527 int error;
3528
3529 if (uap->uaddr2 == NULL)
3530 ts = NULL;
3531 else {
3532 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3533 if (error != 0)
3534 return (error);
3535 ts = &timeout;
3536 }
3537 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3538 }
3539
3540 static int
3541 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3542 {
3543 struct timespec *ts, timeout;
3544 int error;
3545
3546 /* Allow a null timespec (wait forever). */
3547 if (uap->uaddr2 == NULL)
3548 ts = NULL;
3549 else {
3550 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3551 if (error != 0)
3552 return (error);
3553 ts = &timeout;
3554 }
3555 return (do_sem_wait(td, uap->obj, ts));
3556 }
3557
3558 static int
3559 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3560 {
3561 int count = uap->val;
3562 uint32_t uaddrs[BATCH_SIZE];
3563 uint32_t **upp = (uint32_t **)uap->obj;
3564 int tocopy;
3565 int error = 0;
3566 int i, pos = 0;
3567
3568 while (count > 0) {
3569 tocopy = count;
3570 if (tocopy > BATCH_SIZE)
3571 tocopy = BATCH_SIZE;
3572 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3573 if (error != 0)
3574 break;
3575 for (i = 0; i < tocopy; ++i)
3576 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3577 INT_MAX, 1);
3578 count -= tocopy;
3579 pos += tocopy;
3580 }
3581 return (error);
3582 }
3583
3584 static _umtx_op_func op_table_compat32[] = {
3585 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3586 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3587 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3588 __umtx_op_wake, /* UMTX_OP_WAKE */
3589 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3590 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3591 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3592 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3593 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3594 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3595 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3596 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3597 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3598 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3599 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3600 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3601 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3602 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3603 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3604 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
3605 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3606 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */
3607 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */
3608 };
3609
3610 int
3611 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3612 {
3613 if ((unsigned)uap->op < UMTX_OP_MAX)
3614 return (*op_table_compat32[uap->op])(td,
3615 (struct _umtx_op_args *)uap);
3616 return (EINVAL);
3617 }
3618 #endif
3619
3620 void
3621 umtx_thread_init(struct thread *td)
3622 {
3623 td->td_umtxq = umtxq_alloc();
3624 td->td_umtxq->uq_thread = td;
3625 }
3626
3627 void
3628 umtx_thread_fini(struct thread *td)
3629 {
3630 umtxq_free(td->td_umtxq);
3631 }
3632
3633 /*
3634 * It will be called when new thread is created, e.g fork().
3635 */
3636 void
3637 umtx_thread_alloc(struct thread *td)
3638 {
3639 struct umtx_q *uq;
3640
3641 uq = td->td_umtxq;
3642 uq->uq_inherited_pri = PRI_MAX;
3643
3644 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3645 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3646 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3647 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3648 }
3649
3650 /*
3651 * exec() hook.
3652 */
3653 static void
3654 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3655 struct image_params *imgp __unused)
3656 {
3657 umtx_thread_cleanup(curthread);
3658 }
3659
3660 /*
3661 * thread_exit() hook.
3662 */
3663 void
3664 umtx_thread_exit(struct thread *td)
3665 {
3666 umtx_thread_cleanup(td);
3667 }
3668
3669 /*
3670 * clean up umtx data.
3671 */
3672 static void
3673 umtx_thread_cleanup(struct thread *td)
3674 {
3675 struct umtx_q *uq;
3676 struct umtx_pi *pi;
3677
3678 if ((uq = td->td_umtxq) == NULL)
3679 return;
3680
3681 mtx_lock_spin(&umtx_lock);
3682 uq->uq_inherited_pri = PRI_MAX;
3683 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3684 pi->pi_owner = NULL;
3685 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3686 }
3687 mtx_unlock_spin(&umtx_lock);
3688 thread_lock(td);
3689 sched_lend_user_prio(td, PRI_MAX);
3690 thread_unlock(td);
3691 }
Cache object: 2ecc3ea95138b2f410fd38debc1318aa
|