FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/9.0/sys/kern/kern_umtx.c 225617 2011-09-16 13:58:51Z kmacy $");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/eventhandler.h>
48 #include <sys/umtx.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55
56 #include <machine/cpu.h>
57
58 #ifdef COMPAT_FREEBSD32
59 #include <compat/freebsd32/freebsd32_proto.h>
60 #endif
61
62 #define _UMUTEX_TRY 1
63 #define _UMUTEX_WAIT 2
64
65 /* Priority inheritance mutex info. */
66 struct umtx_pi {
67 /* Owner thread */
68 struct thread *pi_owner;
69
70 /* Reference count */
71 int pi_refcount;
72
73 /* List entry to link umtx holding by thread */
74 TAILQ_ENTRY(umtx_pi) pi_link;
75
76 /* List entry in hash */
77 TAILQ_ENTRY(umtx_pi) pi_hashlink;
78
79 /* List for waiters */
80 TAILQ_HEAD(,umtx_q) pi_blocked;
81
82 /* Identify a userland lock object */
83 struct umtx_key pi_key;
84 };
85
86 /* A userland synchronous object user. */
87 struct umtx_q {
88 /* Linked list for the hash. */
89 TAILQ_ENTRY(umtx_q) uq_link;
90
91 /* Umtx key. */
92 struct umtx_key uq_key;
93
94 /* Umtx flags. */
95 int uq_flags;
96 #define UQF_UMTXQ 0x0001
97
98 /* The thread waits on. */
99 struct thread *uq_thread;
100
101 /*
102 * Blocked on PI mutex. read can use chain lock
103 * or umtx_lock, write must have both chain lock and
104 * umtx_lock being hold.
105 */
106 struct umtx_pi *uq_pi_blocked;
107
108 /* On blocked list */
109 TAILQ_ENTRY(umtx_q) uq_lockq;
110
111 /* Thread contending with us */
112 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
113
114 /* Inherited priority from PP mutex */
115 u_char uq_inherited_pri;
116
117 /* Spare queue ready to be reused */
118 struct umtxq_queue *uq_spare_queue;
119
120 /* The queue we on */
121 struct umtxq_queue *uq_cur_queue;
122 };
123
124 TAILQ_HEAD(umtxq_head, umtx_q);
125
126 /* Per-key wait-queue */
127 struct umtxq_queue {
128 struct umtxq_head head;
129 struct umtx_key key;
130 LIST_ENTRY(umtxq_queue) link;
131 int length;
132 };
133
134 LIST_HEAD(umtxq_list, umtxq_queue);
135
136 /* Userland lock object's wait-queue chain */
137 struct umtxq_chain {
138 /* Lock for this chain. */
139 struct mtx uc_lock;
140
141 /* List of sleep queues. */
142 struct umtxq_list uc_queue[2];
143 #define UMTX_SHARED_QUEUE 0
144 #define UMTX_EXCLUSIVE_QUEUE 1
145
146 LIST_HEAD(, umtxq_queue) uc_spare_queue;
147
148 /* Busy flag */
149 char uc_busy;
150
151 /* Chain lock waiters */
152 int uc_waiters;
153
154 /* All PI in the list */
155 TAILQ_HEAD(,umtx_pi) uc_pi_list;
156
157 };
158
159 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
160 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
161
162 /*
163 * Don't propagate time-sharing priority, there is a security reason,
164 * a user can simply introduce PI-mutex, let thread A lock the mutex,
165 * and let another thread B block on the mutex, because B is
166 * sleeping, its priority will be boosted, this causes A's priority to
167 * be boosted via priority propagating too and will never be lowered even
168 * if it is using 100%CPU, this is unfair to other processes.
169 */
170
171 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
172 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
173 PRI_MAX_TIMESHARE : (td)->td_user_pri)
174
175 #define GOLDEN_RATIO_PRIME 2654404609U
176 #define UMTX_CHAINS 512
177 #define UMTX_SHIFTS (__WORD_BIT - 9)
178
179 #define GET_SHARE(flags) \
180 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
181
182 #define BUSY_SPINS 200
183
184 static uma_zone_t umtx_pi_zone;
185 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
186 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187 static int umtx_pi_allocated;
188
189 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191 &umtx_pi_allocated, 0, "Allocated umtx_pi");
192
193 static void umtxq_sysinit(void *);
194 static void umtxq_hash(struct umtx_key *key);
195 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196 static void umtxq_lock(struct umtx_key *key);
197 static void umtxq_unlock(struct umtx_key *key);
198 static void umtxq_busy(struct umtx_key *key);
199 static void umtxq_unbusy(struct umtx_key *key);
200 static void umtxq_insert_queue(struct umtx_q *uq, int q);
201 static void umtxq_remove_queue(struct umtx_q *uq, int q);
202 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203 static int umtxq_count(struct umtx_key *key);
204 static struct umtx_pi *umtx_pi_alloc(int);
205 static void umtx_pi_free(struct umtx_pi *pi);
206 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207 static void umtx_thread_cleanup(struct thread *td);
208 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209 struct image_params *imgp __unused);
210 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211
212 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
213 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
214 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
215
216 static struct mtx umtx_lock;
217
218 static void
219 umtxq_sysinit(void *arg __unused)
220 {
221 int i, j;
222
223 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
224 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
225 for (i = 0; i < 2; ++i) {
226 for (j = 0; j < UMTX_CHAINS; ++j) {
227 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
228 MTX_DEF | MTX_DUPOK);
229 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
230 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
231 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
232 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
233 umtxq_chains[i][j].uc_busy = 0;
234 umtxq_chains[i][j].uc_waiters = 0;
235 }
236 }
237 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
238 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
239 EVENTHANDLER_PRI_ANY);
240 }
241
242 struct umtx_q *
243 umtxq_alloc(void)
244 {
245 struct umtx_q *uq;
246
247 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
248 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
249 TAILQ_INIT(&uq->uq_spare_queue->head);
250 TAILQ_INIT(&uq->uq_pi_contested);
251 uq->uq_inherited_pri = PRI_MAX;
252 return (uq);
253 }
254
255 void
256 umtxq_free(struct umtx_q *uq)
257 {
258 MPASS(uq->uq_spare_queue != NULL);
259 free(uq->uq_spare_queue, M_UMTX);
260 free(uq, M_UMTX);
261 }
262
263 static inline void
264 umtxq_hash(struct umtx_key *key)
265 {
266 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
267 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
268 }
269
270 static inline struct umtxq_chain *
271 umtxq_getchain(struct umtx_key *key)
272 {
273 if (key->type <= TYPE_SEM)
274 return (&umtxq_chains[1][key->hash]);
275 return (&umtxq_chains[0][key->hash]);
276 }
277
278 /*
279 * Lock a chain.
280 */
281 static inline void
282 umtxq_lock(struct umtx_key *key)
283 {
284 struct umtxq_chain *uc;
285
286 uc = umtxq_getchain(key);
287 mtx_lock(&uc->uc_lock);
288 }
289
290 /*
291 * Unlock a chain.
292 */
293 static inline void
294 umtxq_unlock(struct umtx_key *key)
295 {
296 struct umtxq_chain *uc;
297
298 uc = umtxq_getchain(key);
299 mtx_unlock(&uc->uc_lock);
300 }
301
302 /*
303 * Set chain to busy state when following operation
304 * may be blocked (kernel mutex can not be used).
305 */
306 static inline void
307 umtxq_busy(struct umtx_key *key)
308 {
309 struct umtxq_chain *uc;
310
311 uc = umtxq_getchain(key);
312 mtx_assert(&uc->uc_lock, MA_OWNED);
313 if (uc->uc_busy) {
314 #ifdef SMP
315 if (smp_cpus > 1) {
316 int count = BUSY_SPINS;
317 if (count > 0) {
318 umtxq_unlock(key);
319 while (uc->uc_busy && --count > 0)
320 cpu_spinwait();
321 umtxq_lock(key);
322 }
323 }
324 #endif
325 while (uc->uc_busy) {
326 uc->uc_waiters++;
327 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
328 uc->uc_waiters--;
329 }
330 }
331 uc->uc_busy = 1;
332 }
333
334 /*
335 * Unbusy a chain.
336 */
337 static inline void
338 umtxq_unbusy(struct umtx_key *key)
339 {
340 struct umtxq_chain *uc;
341
342 uc = umtxq_getchain(key);
343 mtx_assert(&uc->uc_lock, MA_OWNED);
344 KASSERT(uc->uc_busy != 0, ("not busy"));
345 uc->uc_busy = 0;
346 if (uc->uc_waiters)
347 wakeup_one(uc);
348 }
349
350 static struct umtxq_queue *
351 umtxq_queue_lookup(struct umtx_key *key, int q)
352 {
353 struct umtxq_queue *uh;
354 struct umtxq_chain *uc;
355
356 uc = umtxq_getchain(key);
357 UMTXQ_LOCKED_ASSERT(uc);
358 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
359 if (umtx_key_match(&uh->key, key))
360 return (uh);
361 }
362
363 return (NULL);
364 }
365
366 static inline void
367 umtxq_insert_queue(struct umtx_q *uq, int q)
368 {
369 struct umtxq_queue *uh;
370 struct umtxq_chain *uc;
371
372 uc = umtxq_getchain(&uq->uq_key);
373 UMTXQ_LOCKED_ASSERT(uc);
374 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
375 uh = umtxq_queue_lookup(&uq->uq_key, q);
376 if (uh != NULL) {
377 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
378 } else {
379 uh = uq->uq_spare_queue;
380 uh->key = uq->uq_key;
381 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
382 }
383 uq->uq_spare_queue = NULL;
384
385 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
386 uh->length++;
387 uq->uq_flags |= UQF_UMTXQ;
388 uq->uq_cur_queue = uh;
389 return;
390 }
391
392 static inline void
393 umtxq_remove_queue(struct umtx_q *uq, int q)
394 {
395 struct umtxq_chain *uc;
396 struct umtxq_queue *uh;
397
398 uc = umtxq_getchain(&uq->uq_key);
399 UMTXQ_LOCKED_ASSERT(uc);
400 if (uq->uq_flags & UQF_UMTXQ) {
401 uh = uq->uq_cur_queue;
402 TAILQ_REMOVE(&uh->head, uq, uq_link);
403 uh->length--;
404 uq->uq_flags &= ~UQF_UMTXQ;
405 if (TAILQ_EMPTY(&uh->head)) {
406 KASSERT(uh->length == 0,
407 ("inconsistent umtxq_queue length"));
408 LIST_REMOVE(uh, link);
409 } else {
410 uh = LIST_FIRST(&uc->uc_spare_queue);
411 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
412 LIST_REMOVE(uh, link);
413 }
414 uq->uq_spare_queue = uh;
415 uq->uq_cur_queue = NULL;
416 }
417 }
418
419 /*
420 * Check if there are multiple waiters
421 */
422 static int
423 umtxq_count(struct umtx_key *key)
424 {
425 struct umtxq_chain *uc;
426 struct umtxq_queue *uh;
427
428 uc = umtxq_getchain(key);
429 UMTXQ_LOCKED_ASSERT(uc);
430 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
431 if (uh != NULL)
432 return (uh->length);
433 return (0);
434 }
435
436 /*
437 * Check if there are multiple PI waiters and returns first
438 * waiter.
439 */
440 static int
441 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
442 {
443 struct umtxq_chain *uc;
444 struct umtxq_queue *uh;
445
446 *first = NULL;
447 uc = umtxq_getchain(key);
448 UMTXQ_LOCKED_ASSERT(uc);
449 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
450 if (uh != NULL) {
451 *first = TAILQ_FIRST(&uh->head);
452 return (uh->length);
453 }
454 return (0);
455 }
456
457 /*
458 * Wake up threads waiting on an userland object.
459 */
460
461 static int
462 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
463 {
464 struct umtxq_chain *uc;
465 struct umtxq_queue *uh;
466 struct umtx_q *uq;
467 int ret;
468
469 ret = 0;
470 uc = umtxq_getchain(key);
471 UMTXQ_LOCKED_ASSERT(uc);
472 uh = umtxq_queue_lookup(key, q);
473 if (uh != NULL) {
474 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
475 umtxq_remove_queue(uq, q);
476 wakeup(uq);
477 if (++ret >= n_wake)
478 return (ret);
479 }
480 }
481 return (ret);
482 }
483
484
485 /*
486 * Wake up specified thread.
487 */
488 static inline void
489 umtxq_signal_thread(struct umtx_q *uq)
490 {
491 struct umtxq_chain *uc;
492
493 uc = umtxq_getchain(&uq->uq_key);
494 UMTXQ_LOCKED_ASSERT(uc);
495 umtxq_remove(uq);
496 wakeup(uq);
497 }
498
499 /*
500 * Put thread into sleep state, before sleeping, check if
501 * thread was removed from umtx queue.
502 */
503 static inline int
504 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
505 {
506 struct umtxq_chain *uc;
507 int error;
508
509 uc = umtxq_getchain(&uq->uq_key);
510 UMTXQ_LOCKED_ASSERT(uc);
511 if (!(uq->uq_flags & UQF_UMTXQ))
512 return (0);
513 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
514 if (error == EWOULDBLOCK)
515 error = ETIMEDOUT;
516 return (error);
517 }
518
519 /*
520 * Convert userspace address into unique logical address.
521 */
522 int
523 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
524 {
525 struct thread *td = curthread;
526 vm_map_t map;
527 vm_map_entry_t entry;
528 vm_pindex_t pindex;
529 vm_prot_t prot;
530 boolean_t wired;
531
532 key->type = type;
533 if (share == THREAD_SHARE) {
534 key->shared = 0;
535 key->info.private.vs = td->td_proc->p_vmspace;
536 key->info.private.addr = (uintptr_t)addr;
537 } else {
538 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
539 map = &td->td_proc->p_vmspace->vm_map;
540 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
541 &entry, &key->info.shared.object, &pindex, &prot,
542 &wired) != KERN_SUCCESS) {
543 return EFAULT;
544 }
545
546 if ((share == PROCESS_SHARE) ||
547 (share == AUTO_SHARE &&
548 VM_INHERIT_SHARE == entry->inheritance)) {
549 key->shared = 1;
550 key->info.shared.offset = entry->offset + entry->start -
551 (vm_offset_t)addr;
552 vm_object_reference(key->info.shared.object);
553 } else {
554 key->shared = 0;
555 key->info.private.vs = td->td_proc->p_vmspace;
556 key->info.private.addr = (uintptr_t)addr;
557 }
558 vm_map_lookup_done(map, entry);
559 }
560
561 umtxq_hash(key);
562 return (0);
563 }
564
565 /*
566 * Release key.
567 */
568 void
569 umtx_key_release(struct umtx_key *key)
570 {
571 if (key->shared)
572 vm_object_deallocate(key->info.shared.object);
573 }
574
575 /*
576 * Lock a umtx object.
577 */
578 static int
579 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
580 {
581 struct umtx_q *uq;
582 u_long owner;
583 u_long old;
584 int error = 0;
585
586 uq = td->td_umtxq;
587
588 /*
589 * Care must be exercised when dealing with umtx structure. It
590 * can fault on any access.
591 */
592 for (;;) {
593 /*
594 * Try the uncontested case. This should be done in userland.
595 */
596 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
597
598 /* The acquire succeeded. */
599 if (owner == UMTX_UNOWNED)
600 return (0);
601
602 /* The address was invalid. */
603 if (owner == -1)
604 return (EFAULT);
605
606 /* If no one owns it but it is contested try to acquire it. */
607 if (owner == UMTX_CONTESTED) {
608 owner = casuword(&umtx->u_owner,
609 UMTX_CONTESTED, id | UMTX_CONTESTED);
610
611 if (owner == UMTX_CONTESTED)
612 return (0);
613
614 /* The address was invalid. */
615 if (owner == -1)
616 return (EFAULT);
617
618 /* If this failed the lock has changed, restart. */
619 continue;
620 }
621
622 /*
623 * If we caught a signal, we have retried and now
624 * exit immediately.
625 */
626 if (error != 0)
627 return (error);
628
629 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
630 AUTO_SHARE, &uq->uq_key)) != 0)
631 return (error);
632
633 umtxq_lock(&uq->uq_key);
634 umtxq_busy(&uq->uq_key);
635 umtxq_insert(uq);
636 umtxq_unbusy(&uq->uq_key);
637 umtxq_unlock(&uq->uq_key);
638
639 /*
640 * Set the contested bit so that a release in user space
641 * knows to use the system call for unlock. If this fails
642 * either some one else has acquired the lock or it has been
643 * released.
644 */
645 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
646
647 /* The address was invalid. */
648 if (old == -1) {
649 umtxq_lock(&uq->uq_key);
650 umtxq_remove(uq);
651 umtxq_unlock(&uq->uq_key);
652 umtx_key_release(&uq->uq_key);
653 return (EFAULT);
654 }
655
656 /*
657 * We set the contested bit, sleep. Otherwise the lock changed
658 * and we need to retry or we lost a race to the thread
659 * unlocking the umtx.
660 */
661 umtxq_lock(&uq->uq_key);
662 if (old == owner)
663 error = umtxq_sleep(uq, "umtx", timo);
664 umtxq_remove(uq);
665 umtxq_unlock(&uq->uq_key);
666 umtx_key_release(&uq->uq_key);
667 }
668
669 return (0);
670 }
671
672 /*
673 * Lock a umtx object.
674 */
675 static int
676 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
677 struct timespec *timeout)
678 {
679 struct timespec ts, ts2, ts3;
680 struct timeval tv;
681 int error;
682
683 if (timeout == NULL) {
684 error = _do_lock_umtx(td, umtx, id, 0);
685 /* Mutex locking is restarted if it is interrupted. */
686 if (error == EINTR)
687 error = ERESTART;
688 } else {
689 getnanouptime(&ts);
690 timespecadd(&ts, timeout);
691 TIMESPEC_TO_TIMEVAL(&tv, timeout);
692 for (;;) {
693 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
694 if (error != ETIMEDOUT)
695 break;
696 getnanouptime(&ts2);
697 if (timespeccmp(&ts2, &ts, >=)) {
698 error = ETIMEDOUT;
699 break;
700 }
701 ts3 = ts;
702 timespecsub(&ts3, &ts2);
703 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
704 }
705 /* Timed-locking is not restarted. */
706 if (error == ERESTART)
707 error = EINTR;
708 }
709 return (error);
710 }
711
712 /*
713 * Unlock a umtx object.
714 */
715 static int
716 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
717 {
718 struct umtx_key key;
719 u_long owner;
720 u_long old;
721 int error;
722 int count;
723
724 /*
725 * Make sure we own this mtx.
726 */
727 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
728 if (owner == -1)
729 return (EFAULT);
730
731 if ((owner & ~UMTX_CONTESTED) != id)
732 return (EPERM);
733
734 /* This should be done in userland */
735 if ((owner & UMTX_CONTESTED) == 0) {
736 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
737 if (old == -1)
738 return (EFAULT);
739 if (old == owner)
740 return (0);
741 owner = old;
742 }
743
744 /* We should only ever be in here for contested locks */
745 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
746 &key)) != 0)
747 return (error);
748
749 umtxq_lock(&key);
750 umtxq_busy(&key);
751 count = umtxq_count(&key);
752 umtxq_unlock(&key);
753
754 /*
755 * When unlocking the umtx, it must be marked as unowned if
756 * there is zero or one thread only waiting for it.
757 * Otherwise, it must be marked as contested.
758 */
759 old = casuword(&umtx->u_owner, owner,
760 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
761 umtxq_lock(&key);
762 umtxq_signal(&key,1);
763 umtxq_unbusy(&key);
764 umtxq_unlock(&key);
765 umtx_key_release(&key);
766 if (old == -1)
767 return (EFAULT);
768 if (old != owner)
769 return (EINVAL);
770 return (0);
771 }
772
773 #ifdef COMPAT_FREEBSD32
774
775 /*
776 * Lock a umtx object.
777 */
778 static int
779 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
780 {
781 struct umtx_q *uq;
782 uint32_t owner;
783 uint32_t old;
784 int error = 0;
785
786 uq = td->td_umtxq;
787
788 /*
789 * Care must be exercised when dealing with umtx structure. It
790 * can fault on any access.
791 */
792 for (;;) {
793 /*
794 * Try the uncontested case. This should be done in userland.
795 */
796 owner = casuword32(m, UMUTEX_UNOWNED, id);
797
798 /* The acquire succeeded. */
799 if (owner == UMUTEX_UNOWNED)
800 return (0);
801
802 /* The address was invalid. */
803 if (owner == -1)
804 return (EFAULT);
805
806 /* If no one owns it but it is contested try to acquire it. */
807 if (owner == UMUTEX_CONTESTED) {
808 owner = casuword32(m,
809 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
810 if (owner == UMUTEX_CONTESTED)
811 return (0);
812
813 /* The address was invalid. */
814 if (owner == -1)
815 return (EFAULT);
816
817 /* If this failed the lock has changed, restart. */
818 continue;
819 }
820
821 /*
822 * If we caught a signal, we have retried and now
823 * exit immediately.
824 */
825 if (error != 0)
826 return (error);
827
828 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
829 AUTO_SHARE, &uq->uq_key)) != 0)
830 return (error);
831
832 umtxq_lock(&uq->uq_key);
833 umtxq_busy(&uq->uq_key);
834 umtxq_insert(uq);
835 umtxq_unbusy(&uq->uq_key);
836 umtxq_unlock(&uq->uq_key);
837
838 /*
839 * Set the contested bit so that a release in user space
840 * knows to use the system call for unlock. If this fails
841 * either some one else has acquired the lock or it has been
842 * released.
843 */
844 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
845
846 /* The address was invalid. */
847 if (old == -1) {
848 umtxq_lock(&uq->uq_key);
849 umtxq_remove(uq);
850 umtxq_unlock(&uq->uq_key);
851 umtx_key_release(&uq->uq_key);
852 return (EFAULT);
853 }
854
855 /*
856 * We set the contested bit, sleep. Otherwise the lock changed
857 * and we need to retry or we lost a race to the thread
858 * unlocking the umtx.
859 */
860 umtxq_lock(&uq->uq_key);
861 if (old == owner)
862 error = umtxq_sleep(uq, "umtx", timo);
863 umtxq_remove(uq);
864 umtxq_unlock(&uq->uq_key);
865 umtx_key_release(&uq->uq_key);
866 }
867
868 return (0);
869 }
870
871 /*
872 * Lock a umtx object.
873 */
874 static int
875 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
876 struct timespec *timeout)
877 {
878 struct timespec ts, ts2, ts3;
879 struct timeval tv;
880 int error;
881
882 if (timeout == NULL) {
883 error = _do_lock_umtx32(td, m, id, 0);
884 /* Mutex locking is restarted if it is interrupted. */
885 if (error == EINTR)
886 error = ERESTART;
887 } else {
888 getnanouptime(&ts);
889 timespecadd(&ts, timeout);
890 TIMESPEC_TO_TIMEVAL(&tv, timeout);
891 for (;;) {
892 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
893 if (error != ETIMEDOUT)
894 break;
895 getnanouptime(&ts2);
896 if (timespeccmp(&ts2, &ts, >=)) {
897 error = ETIMEDOUT;
898 break;
899 }
900 ts3 = ts;
901 timespecsub(&ts3, &ts2);
902 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
903 }
904 /* Timed-locking is not restarted. */
905 if (error == ERESTART)
906 error = EINTR;
907 }
908 return (error);
909 }
910
911 /*
912 * Unlock a umtx object.
913 */
914 static int
915 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
916 {
917 struct umtx_key key;
918 uint32_t owner;
919 uint32_t old;
920 int error;
921 int count;
922
923 /*
924 * Make sure we own this mtx.
925 */
926 owner = fuword32(m);
927 if (owner == -1)
928 return (EFAULT);
929
930 if ((owner & ~UMUTEX_CONTESTED) != id)
931 return (EPERM);
932
933 /* This should be done in userland */
934 if ((owner & UMUTEX_CONTESTED) == 0) {
935 old = casuword32(m, owner, UMUTEX_UNOWNED);
936 if (old == -1)
937 return (EFAULT);
938 if (old == owner)
939 return (0);
940 owner = old;
941 }
942
943 /* We should only ever be in here for contested locks */
944 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
945 &key)) != 0)
946 return (error);
947
948 umtxq_lock(&key);
949 umtxq_busy(&key);
950 count = umtxq_count(&key);
951 umtxq_unlock(&key);
952
953 /*
954 * When unlocking the umtx, it must be marked as unowned if
955 * there is zero or one thread only waiting for it.
956 * Otherwise, it must be marked as contested.
957 */
958 old = casuword32(m, owner,
959 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
960 umtxq_lock(&key);
961 umtxq_signal(&key,1);
962 umtxq_unbusy(&key);
963 umtxq_unlock(&key);
964 umtx_key_release(&key);
965 if (old == -1)
966 return (EFAULT);
967 if (old != owner)
968 return (EINVAL);
969 return (0);
970 }
971 #endif
972
973 /*
974 * Fetch and compare value, sleep on the address if value is not changed.
975 */
976 static int
977 do_wait(struct thread *td, void *addr, u_long id,
978 struct timespec *timeout, int compat32, int is_private)
979 {
980 struct umtx_q *uq;
981 struct timespec ts, ts2, ts3;
982 struct timeval tv;
983 u_long tmp;
984 int error = 0;
985
986 uq = td->td_umtxq;
987 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
988 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
989 return (error);
990
991 umtxq_lock(&uq->uq_key);
992 umtxq_insert(uq);
993 umtxq_unlock(&uq->uq_key);
994 if (compat32 == 0)
995 tmp = fuword(addr);
996 else
997 tmp = (unsigned int)fuword32(addr);
998 if (tmp != id) {
999 umtxq_lock(&uq->uq_key);
1000 umtxq_remove(uq);
1001 umtxq_unlock(&uq->uq_key);
1002 } else if (timeout == NULL) {
1003 umtxq_lock(&uq->uq_key);
1004 error = umtxq_sleep(uq, "uwait", 0);
1005 umtxq_remove(uq);
1006 umtxq_unlock(&uq->uq_key);
1007 } else {
1008 getnanouptime(&ts);
1009 timespecadd(&ts, timeout);
1010 TIMESPEC_TO_TIMEVAL(&tv, timeout);
1011 umtxq_lock(&uq->uq_key);
1012 for (;;) {
1013 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1014 if (!(uq->uq_flags & UQF_UMTXQ)) {
1015 error = 0;
1016 break;
1017 }
1018 if (error != ETIMEDOUT)
1019 break;
1020 umtxq_unlock(&uq->uq_key);
1021 getnanouptime(&ts2);
1022 if (timespeccmp(&ts2, &ts, >=)) {
1023 error = ETIMEDOUT;
1024 umtxq_lock(&uq->uq_key);
1025 break;
1026 }
1027 ts3 = ts;
1028 timespecsub(&ts3, &ts2);
1029 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1030 umtxq_lock(&uq->uq_key);
1031 }
1032 umtxq_remove(uq);
1033 umtxq_unlock(&uq->uq_key);
1034 }
1035 umtx_key_release(&uq->uq_key);
1036 if (error == ERESTART)
1037 error = EINTR;
1038 return (error);
1039 }
1040
1041 /*
1042 * Wake up threads sleeping on the specified address.
1043 */
1044 int
1045 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1046 {
1047 struct umtx_key key;
1048 int ret;
1049
1050 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1051 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1052 return (ret);
1053 umtxq_lock(&key);
1054 ret = umtxq_signal(&key, n_wake);
1055 umtxq_unlock(&key);
1056 umtx_key_release(&key);
1057 return (0);
1058 }
1059
1060 /*
1061 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1062 */
1063 static int
1064 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1065 int mode)
1066 {
1067 struct umtx_q *uq;
1068 uint32_t owner, old, id;
1069 int error = 0;
1070
1071 id = td->td_tid;
1072 uq = td->td_umtxq;
1073
1074 /*
1075 * Care must be exercised when dealing with umtx structure. It
1076 * can fault on any access.
1077 */
1078 for (;;) {
1079 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1080 if (mode == _UMUTEX_WAIT) {
1081 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1082 return (0);
1083 } else {
1084 /*
1085 * Try the uncontested case. This should be done in userland.
1086 */
1087 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1088
1089 /* The acquire succeeded. */
1090 if (owner == UMUTEX_UNOWNED)
1091 return (0);
1092
1093 /* The address was invalid. */
1094 if (owner == -1)
1095 return (EFAULT);
1096
1097 /* If no one owns it but it is contested try to acquire it. */
1098 if (owner == UMUTEX_CONTESTED) {
1099 owner = casuword32(&m->m_owner,
1100 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1101
1102 if (owner == UMUTEX_CONTESTED)
1103 return (0);
1104
1105 /* The address was invalid. */
1106 if (owner == -1)
1107 return (EFAULT);
1108
1109 /* If this failed the lock has changed, restart. */
1110 continue;
1111 }
1112 }
1113
1114 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1115 (owner & ~UMUTEX_CONTESTED) == id)
1116 return (EDEADLK);
1117
1118 if (mode == _UMUTEX_TRY)
1119 return (EBUSY);
1120
1121 /*
1122 * If we caught a signal, we have retried and now
1123 * exit immediately.
1124 */
1125 if (error != 0)
1126 return (error);
1127
1128 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1129 GET_SHARE(flags), &uq->uq_key)) != 0)
1130 return (error);
1131
1132 umtxq_lock(&uq->uq_key);
1133 umtxq_busy(&uq->uq_key);
1134 umtxq_insert(uq);
1135 umtxq_unlock(&uq->uq_key);
1136
1137 /*
1138 * Set the contested bit so that a release in user space
1139 * knows to use the system call for unlock. If this fails
1140 * either some one else has acquired the lock or it has been
1141 * released.
1142 */
1143 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1144
1145 /* The address was invalid. */
1146 if (old == -1) {
1147 umtxq_lock(&uq->uq_key);
1148 umtxq_remove(uq);
1149 umtxq_unbusy(&uq->uq_key);
1150 umtxq_unlock(&uq->uq_key);
1151 umtx_key_release(&uq->uq_key);
1152 return (EFAULT);
1153 }
1154
1155 /*
1156 * We set the contested bit, sleep. Otherwise the lock changed
1157 * and we need to retry or we lost a race to the thread
1158 * unlocking the umtx.
1159 */
1160 umtxq_lock(&uq->uq_key);
1161 umtxq_unbusy(&uq->uq_key);
1162 if (old == owner)
1163 error = umtxq_sleep(uq, "umtxn", timo);
1164 umtxq_remove(uq);
1165 umtxq_unlock(&uq->uq_key);
1166 umtx_key_release(&uq->uq_key);
1167 }
1168
1169 return (0);
1170 }
1171
1172 /*
1173 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1174 */
1175 /*
1176 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1177 */
1178 static int
1179 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1180 {
1181 struct umtx_key key;
1182 uint32_t owner, old, id;
1183 int error;
1184 int count;
1185
1186 id = td->td_tid;
1187 /*
1188 * Make sure we own this mtx.
1189 */
1190 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1191 if (owner == -1)
1192 return (EFAULT);
1193
1194 if ((owner & ~UMUTEX_CONTESTED) != id)
1195 return (EPERM);
1196
1197 if ((owner & UMUTEX_CONTESTED) == 0) {
1198 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1199 if (old == -1)
1200 return (EFAULT);
1201 if (old == owner)
1202 return (0);
1203 owner = old;
1204 }
1205
1206 /* We should only ever be in here for contested locks */
1207 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1208 &key)) != 0)
1209 return (error);
1210
1211 umtxq_lock(&key);
1212 umtxq_busy(&key);
1213 count = umtxq_count(&key);
1214 umtxq_unlock(&key);
1215
1216 /*
1217 * When unlocking the umtx, it must be marked as unowned if
1218 * there is zero or one thread only waiting for it.
1219 * Otherwise, it must be marked as contested.
1220 */
1221 old = casuword32(&m->m_owner, owner,
1222 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1223 umtxq_lock(&key);
1224 umtxq_signal(&key,1);
1225 umtxq_unbusy(&key);
1226 umtxq_unlock(&key);
1227 umtx_key_release(&key);
1228 if (old == -1)
1229 return (EFAULT);
1230 if (old != owner)
1231 return (EINVAL);
1232 return (0);
1233 }
1234
1235 /*
1236 * Check if the mutex is available and wake up a waiter,
1237 * only for simple mutex.
1238 */
1239 static int
1240 do_wake_umutex(struct thread *td, struct umutex *m)
1241 {
1242 struct umtx_key key;
1243 uint32_t owner;
1244 uint32_t flags;
1245 int error;
1246 int count;
1247
1248 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1249 if (owner == -1)
1250 return (EFAULT);
1251
1252 if ((owner & ~UMUTEX_CONTESTED) != 0)
1253 return (0);
1254
1255 flags = fuword32(&m->m_flags);
1256
1257 /* We should only ever be in here for contested locks */
1258 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1259 &key)) != 0)
1260 return (error);
1261
1262 umtxq_lock(&key);
1263 umtxq_busy(&key);
1264 count = umtxq_count(&key);
1265 umtxq_unlock(&key);
1266
1267 if (count <= 1)
1268 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1269
1270 umtxq_lock(&key);
1271 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1272 umtxq_signal(&key, 1);
1273 umtxq_unbusy(&key);
1274 umtxq_unlock(&key);
1275 umtx_key_release(&key);
1276 return (0);
1277 }
1278
1279 static inline struct umtx_pi *
1280 umtx_pi_alloc(int flags)
1281 {
1282 struct umtx_pi *pi;
1283
1284 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1285 TAILQ_INIT(&pi->pi_blocked);
1286 atomic_add_int(&umtx_pi_allocated, 1);
1287 return (pi);
1288 }
1289
1290 static inline void
1291 umtx_pi_free(struct umtx_pi *pi)
1292 {
1293 uma_zfree(umtx_pi_zone, pi);
1294 atomic_add_int(&umtx_pi_allocated, -1);
1295 }
1296
1297 /*
1298 * Adjust the thread's position on a pi_state after its priority has been
1299 * changed.
1300 */
1301 static int
1302 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1303 {
1304 struct umtx_q *uq, *uq1, *uq2;
1305 struct thread *td1;
1306
1307 mtx_assert(&umtx_lock, MA_OWNED);
1308 if (pi == NULL)
1309 return (0);
1310
1311 uq = td->td_umtxq;
1312
1313 /*
1314 * Check if the thread needs to be moved on the blocked chain.
1315 * It needs to be moved if either its priority is lower than
1316 * the previous thread or higher than the next thread.
1317 */
1318 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1319 uq2 = TAILQ_NEXT(uq, uq_lockq);
1320 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1321 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1322 /*
1323 * Remove thread from blocked chain and determine where
1324 * it should be moved to.
1325 */
1326 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1327 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1328 td1 = uq1->uq_thread;
1329 MPASS(td1->td_proc->p_magic == P_MAGIC);
1330 if (UPRI(td1) > UPRI(td))
1331 break;
1332 }
1333
1334 if (uq1 == NULL)
1335 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1336 else
1337 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1338 }
1339 return (1);
1340 }
1341
1342 /*
1343 * Propagate priority when a thread is blocked on POSIX
1344 * PI mutex.
1345 */
1346 static void
1347 umtx_propagate_priority(struct thread *td)
1348 {
1349 struct umtx_q *uq;
1350 struct umtx_pi *pi;
1351 int pri;
1352
1353 mtx_assert(&umtx_lock, MA_OWNED);
1354 pri = UPRI(td);
1355 uq = td->td_umtxq;
1356 pi = uq->uq_pi_blocked;
1357 if (pi == NULL)
1358 return;
1359
1360 for (;;) {
1361 td = pi->pi_owner;
1362 if (td == NULL || td == curthread)
1363 return;
1364
1365 MPASS(td->td_proc != NULL);
1366 MPASS(td->td_proc->p_magic == P_MAGIC);
1367
1368 thread_lock(td);
1369 if (td->td_lend_user_pri > pri)
1370 sched_lend_user_prio(td, pri);
1371 else {
1372 thread_unlock(td);
1373 break;
1374 }
1375 thread_unlock(td);
1376
1377 /*
1378 * Pick up the lock that td is blocked on.
1379 */
1380 uq = td->td_umtxq;
1381 pi = uq->uq_pi_blocked;
1382 if (pi == NULL)
1383 break;
1384 /* Resort td on the list if needed. */
1385 umtx_pi_adjust_thread(pi, td);
1386 }
1387 }
1388
1389 /*
1390 * Unpropagate priority for a PI mutex when a thread blocked on
1391 * it is interrupted by signal or resumed by others.
1392 */
1393 static void
1394 umtx_repropagate_priority(struct umtx_pi *pi)
1395 {
1396 struct umtx_q *uq, *uq_owner;
1397 struct umtx_pi *pi2;
1398 int pri;
1399
1400 mtx_assert(&umtx_lock, MA_OWNED);
1401
1402 while (pi != NULL && pi->pi_owner != NULL) {
1403 pri = PRI_MAX;
1404 uq_owner = pi->pi_owner->td_umtxq;
1405
1406 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1407 uq = TAILQ_FIRST(&pi2->pi_blocked);
1408 if (uq != NULL) {
1409 if (pri > UPRI(uq->uq_thread))
1410 pri = UPRI(uq->uq_thread);
1411 }
1412 }
1413
1414 if (pri > uq_owner->uq_inherited_pri)
1415 pri = uq_owner->uq_inherited_pri;
1416 thread_lock(pi->pi_owner);
1417 sched_lend_user_prio(pi->pi_owner, pri);
1418 thread_unlock(pi->pi_owner);
1419 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1420 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1421 }
1422 }
1423
1424 /*
1425 * Insert a PI mutex into owned list.
1426 */
1427 static void
1428 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1429 {
1430 struct umtx_q *uq_owner;
1431
1432 uq_owner = owner->td_umtxq;
1433 mtx_assert(&umtx_lock, MA_OWNED);
1434 if (pi->pi_owner != NULL)
1435 panic("pi_ower != NULL");
1436 pi->pi_owner = owner;
1437 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1438 }
1439
1440 /*
1441 * Claim ownership of a PI mutex.
1442 */
1443 static int
1444 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1445 {
1446 struct umtx_q *uq, *uq_owner;
1447
1448 uq_owner = owner->td_umtxq;
1449 mtx_lock_spin(&umtx_lock);
1450 if (pi->pi_owner == owner) {
1451 mtx_unlock_spin(&umtx_lock);
1452 return (0);
1453 }
1454
1455 if (pi->pi_owner != NULL) {
1456 /*
1457 * userland may have already messed the mutex, sigh.
1458 */
1459 mtx_unlock_spin(&umtx_lock);
1460 return (EPERM);
1461 }
1462 umtx_pi_setowner(pi, owner);
1463 uq = TAILQ_FIRST(&pi->pi_blocked);
1464 if (uq != NULL) {
1465 int pri;
1466
1467 pri = UPRI(uq->uq_thread);
1468 thread_lock(owner);
1469 if (pri < UPRI(owner))
1470 sched_lend_user_prio(owner, pri);
1471 thread_unlock(owner);
1472 }
1473 mtx_unlock_spin(&umtx_lock);
1474 return (0);
1475 }
1476
1477 /*
1478 * Adjust a thread's order position in its blocked PI mutex,
1479 * this may result new priority propagating process.
1480 */
1481 void
1482 umtx_pi_adjust(struct thread *td, u_char oldpri)
1483 {
1484 struct umtx_q *uq;
1485 struct umtx_pi *pi;
1486
1487 uq = td->td_umtxq;
1488 mtx_lock_spin(&umtx_lock);
1489 /*
1490 * Pick up the lock that td is blocked on.
1491 */
1492 pi = uq->uq_pi_blocked;
1493 if (pi != NULL) {
1494 umtx_pi_adjust_thread(pi, td);
1495 umtx_repropagate_priority(pi);
1496 }
1497 mtx_unlock_spin(&umtx_lock);
1498 }
1499
1500 /*
1501 * Sleep on a PI mutex.
1502 */
1503 static int
1504 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1505 uint32_t owner, const char *wmesg, int timo)
1506 {
1507 struct umtxq_chain *uc;
1508 struct thread *td, *td1;
1509 struct umtx_q *uq1;
1510 int pri;
1511 int error = 0;
1512
1513 td = uq->uq_thread;
1514 KASSERT(td == curthread, ("inconsistent uq_thread"));
1515 uc = umtxq_getchain(&uq->uq_key);
1516 UMTXQ_LOCKED_ASSERT(uc);
1517 UMTXQ_BUSY_ASSERT(uc);
1518 umtxq_insert(uq);
1519 mtx_lock_spin(&umtx_lock);
1520 if (pi->pi_owner == NULL) {
1521 mtx_unlock_spin(&umtx_lock);
1522 /* XXX Only look up thread in current process. */
1523 td1 = tdfind(owner, curproc->p_pid);
1524 mtx_lock_spin(&umtx_lock);
1525 if (td1 != NULL) {
1526 if (pi->pi_owner == NULL)
1527 umtx_pi_setowner(pi, td1);
1528 PROC_UNLOCK(td1->td_proc);
1529 }
1530 }
1531
1532 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1533 pri = UPRI(uq1->uq_thread);
1534 if (pri > UPRI(td))
1535 break;
1536 }
1537
1538 if (uq1 != NULL)
1539 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1540 else
1541 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1542
1543 uq->uq_pi_blocked = pi;
1544 thread_lock(td);
1545 td->td_flags |= TDF_UPIBLOCKED;
1546 thread_unlock(td);
1547 umtx_propagate_priority(td);
1548 mtx_unlock_spin(&umtx_lock);
1549 umtxq_unbusy(&uq->uq_key);
1550
1551 if (uq->uq_flags & UQF_UMTXQ) {
1552 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1553 if (error == EWOULDBLOCK)
1554 error = ETIMEDOUT;
1555 if (uq->uq_flags & UQF_UMTXQ) {
1556 umtxq_remove(uq);
1557 }
1558 }
1559 mtx_lock_spin(&umtx_lock);
1560 uq->uq_pi_blocked = NULL;
1561 thread_lock(td);
1562 td->td_flags &= ~TDF_UPIBLOCKED;
1563 thread_unlock(td);
1564 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1565 umtx_repropagate_priority(pi);
1566 mtx_unlock_spin(&umtx_lock);
1567 umtxq_unlock(&uq->uq_key);
1568
1569 return (error);
1570 }
1571
1572 /*
1573 * Add reference count for a PI mutex.
1574 */
1575 static void
1576 umtx_pi_ref(struct umtx_pi *pi)
1577 {
1578 struct umtxq_chain *uc;
1579
1580 uc = umtxq_getchain(&pi->pi_key);
1581 UMTXQ_LOCKED_ASSERT(uc);
1582 pi->pi_refcount++;
1583 }
1584
1585 /*
1586 * Decrease reference count for a PI mutex, if the counter
1587 * is decreased to zero, its memory space is freed.
1588 */
1589 static void
1590 umtx_pi_unref(struct umtx_pi *pi)
1591 {
1592 struct umtxq_chain *uc;
1593
1594 uc = umtxq_getchain(&pi->pi_key);
1595 UMTXQ_LOCKED_ASSERT(uc);
1596 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1597 if (--pi->pi_refcount == 0) {
1598 mtx_lock_spin(&umtx_lock);
1599 if (pi->pi_owner != NULL) {
1600 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1601 pi, pi_link);
1602 pi->pi_owner = NULL;
1603 }
1604 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1605 ("blocked queue not empty"));
1606 mtx_unlock_spin(&umtx_lock);
1607 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1608 umtx_pi_free(pi);
1609 }
1610 }
1611
1612 /*
1613 * Find a PI mutex in hash table.
1614 */
1615 static struct umtx_pi *
1616 umtx_pi_lookup(struct umtx_key *key)
1617 {
1618 struct umtxq_chain *uc;
1619 struct umtx_pi *pi;
1620
1621 uc = umtxq_getchain(key);
1622 UMTXQ_LOCKED_ASSERT(uc);
1623
1624 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1625 if (umtx_key_match(&pi->pi_key, key)) {
1626 return (pi);
1627 }
1628 }
1629 return (NULL);
1630 }
1631
1632 /*
1633 * Insert a PI mutex into hash table.
1634 */
1635 static inline void
1636 umtx_pi_insert(struct umtx_pi *pi)
1637 {
1638 struct umtxq_chain *uc;
1639
1640 uc = umtxq_getchain(&pi->pi_key);
1641 UMTXQ_LOCKED_ASSERT(uc);
1642 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1643 }
1644
1645 /*
1646 * Lock a PI mutex.
1647 */
1648 static int
1649 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1650 int try)
1651 {
1652 struct umtx_q *uq;
1653 struct umtx_pi *pi, *new_pi;
1654 uint32_t id, owner, old;
1655 int error;
1656
1657 id = td->td_tid;
1658 uq = td->td_umtxq;
1659
1660 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1661 &uq->uq_key)) != 0)
1662 return (error);
1663 umtxq_lock(&uq->uq_key);
1664 pi = umtx_pi_lookup(&uq->uq_key);
1665 if (pi == NULL) {
1666 new_pi = umtx_pi_alloc(M_NOWAIT);
1667 if (new_pi == NULL) {
1668 umtxq_unlock(&uq->uq_key);
1669 new_pi = umtx_pi_alloc(M_WAITOK);
1670 umtxq_lock(&uq->uq_key);
1671 pi = umtx_pi_lookup(&uq->uq_key);
1672 if (pi != NULL) {
1673 umtx_pi_free(new_pi);
1674 new_pi = NULL;
1675 }
1676 }
1677 if (new_pi != NULL) {
1678 new_pi->pi_key = uq->uq_key;
1679 umtx_pi_insert(new_pi);
1680 pi = new_pi;
1681 }
1682 }
1683 umtx_pi_ref(pi);
1684 umtxq_unlock(&uq->uq_key);
1685
1686 /*
1687 * Care must be exercised when dealing with umtx structure. It
1688 * can fault on any access.
1689 */
1690 for (;;) {
1691 /*
1692 * Try the uncontested case. This should be done in userland.
1693 */
1694 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1695
1696 /* The acquire succeeded. */
1697 if (owner == UMUTEX_UNOWNED) {
1698 error = 0;
1699 break;
1700 }
1701
1702 /* The address was invalid. */
1703 if (owner == -1) {
1704 error = EFAULT;
1705 break;
1706 }
1707
1708 /* If no one owns it but it is contested try to acquire it. */
1709 if (owner == UMUTEX_CONTESTED) {
1710 owner = casuword32(&m->m_owner,
1711 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1712
1713 if (owner == UMUTEX_CONTESTED) {
1714 umtxq_lock(&uq->uq_key);
1715 umtxq_busy(&uq->uq_key);
1716 error = umtx_pi_claim(pi, td);
1717 umtxq_unbusy(&uq->uq_key);
1718 umtxq_unlock(&uq->uq_key);
1719 break;
1720 }
1721
1722 /* The address was invalid. */
1723 if (owner == -1) {
1724 error = EFAULT;
1725 break;
1726 }
1727
1728 /* If this failed the lock has changed, restart. */
1729 continue;
1730 }
1731
1732 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1733 (owner & ~UMUTEX_CONTESTED) == id) {
1734 error = EDEADLK;
1735 break;
1736 }
1737
1738 if (try != 0) {
1739 error = EBUSY;
1740 break;
1741 }
1742
1743 /*
1744 * If we caught a signal, we have retried and now
1745 * exit immediately.
1746 */
1747 if (error != 0)
1748 break;
1749
1750 umtxq_lock(&uq->uq_key);
1751 umtxq_busy(&uq->uq_key);
1752 umtxq_unlock(&uq->uq_key);
1753
1754 /*
1755 * Set the contested bit so that a release in user space
1756 * knows to use the system call for unlock. If this fails
1757 * either some one else has acquired the lock or it has been
1758 * released.
1759 */
1760 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1761
1762 /* The address was invalid. */
1763 if (old == -1) {
1764 umtxq_lock(&uq->uq_key);
1765 umtxq_unbusy(&uq->uq_key);
1766 umtxq_unlock(&uq->uq_key);
1767 error = EFAULT;
1768 break;
1769 }
1770
1771 umtxq_lock(&uq->uq_key);
1772 /*
1773 * We set the contested bit, sleep. Otherwise the lock changed
1774 * and we need to retry or we lost a race to the thread
1775 * unlocking the umtx.
1776 */
1777 if (old == owner)
1778 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1779 "umtxpi", timo);
1780 else {
1781 umtxq_unbusy(&uq->uq_key);
1782 umtxq_unlock(&uq->uq_key);
1783 }
1784 }
1785
1786 umtxq_lock(&uq->uq_key);
1787 umtx_pi_unref(pi);
1788 umtxq_unlock(&uq->uq_key);
1789
1790 umtx_key_release(&uq->uq_key);
1791 return (error);
1792 }
1793
1794 /*
1795 * Unlock a PI mutex.
1796 */
1797 static int
1798 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1799 {
1800 struct umtx_key key;
1801 struct umtx_q *uq_first, *uq_first2, *uq_me;
1802 struct umtx_pi *pi, *pi2;
1803 uint32_t owner, old, id;
1804 int error;
1805 int count;
1806 int pri;
1807
1808 id = td->td_tid;
1809 /*
1810 * Make sure we own this mtx.
1811 */
1812 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1813 if (owner == -1)
1814 return (EFAULT);
1815
1816 if ((owner & ~UMUTEX_CONTESTED) != id)
1817 return (EPERM);
1818
1819 /* This should be done in userland */
1820 if ((owner & UMUTEX_CONTESTED) == 0) {
1821 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1822 if (old == -1)
1823 return (EFAULT);
1824 if (old == owner)
1825 return (0);
1826 owner = old;
1827 }
1828
1829 /* We should only ever be in here for contested locks */
1830 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1831 &key)) != 0)
1832 return (error);
1833
1834 umtxq_lock(&key);
1835 umtxq_busy(&key);
1836 count = umtxq_count_pi(&key, &uq_first);
1837 if (uq_first != NULL) {
1838 mtx_lock_spin(&umtx_lock);
1839 pi = uq_first->uq_pi_blocked;
1840 KASSERT(pi != NULL, ("pi == NULL?"));
1841 if (pi->pi_owner != curthread) {
1842 mtx_unlock_spin(&umtx_lock);
1843 umtxq_unbusy(&key);
1844 umtxq_unlock(&key);
1845 umtx_key_release(&key);
1846 /* userland messed the mutex */
1847 return (EPERM);
1848 }
1849 uq_me = curthread->td_umtxq;
1850 pi->pi_owner = NULL;
1851 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1852 /* get highest priority thread which is still sleeping. */
1853 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1854 while (uq_first != NULL &&
1855 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1856 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1857 }
1858 pri = PRI_MAX;
1859 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1860 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1861 if (uq_first2 != NULL) {
1862 if (pri > UPRI(uq_first2->uq_thread))
1863 pri = UPRI(uq_first2->uq_thread);
1864 }
1865 }
1866 thread_lock(curthread);
1867 sched_lend_user_prio(curthread, pri);
1868 thread_unlock(curthread);
1869 mtx_unlock_spin(&umtx_lock);
1870 if (uq_first)
1871 umtxq_signal_thread(uq_first);
1872 }
1873 umtxq_unlock(&key);
1874
1875 /*
1876 * When unlocking the umtx, it must be marked as unowned if
1877 * there is zero or one thread only waiting for it.
1878 * Otherwise, it must be marked as contested.
1879 */
1880 old = casuword32(&m->m_owner, owner,
1881 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1882
1883 umtxq_lock(&key);
1884 umtxq_unbusy(&key);
1885 umtxq_unlock(&key);
1886 umtx_key_release(&key);
1887 if (old == -1)
1888 return (EFAULT);
1889 if (old != owner)
1890 return (EINVAL);
1891 return (0);
1892 }
1893
1894 /*
1895 * Lock a PP mutex.
1896 */
1897 static int
1898 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1899 int try)
1900 {
1901 struct umtx_q *uq, *uq2;
1902 struct umtx_pi *pi;
1903 uint32_t ceiling;
1904 uint32_t owner, id;
1905 int error, pri, old_inherited_pri, su;
1906
1907 id = td->td_tid;
1908 uq = td->td_umtxq;
1909 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1910 &uq->uq_key)) != 0)
1911 return (error);
1912 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1913 for (;;) {
1914 old_inherited_pri = uq->uq_inherited_pri;
1915 umtxq_lock(&uq->uq_key);
1916 umtxq_busy(&uq->uq_key);
1917 umtxq_unlock(&uq->uq_key);
1918
1919 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1920 if (ceiling > RTP_PRIO_MAX) {
1921 error = EINVAL;
1922 goto out;
1923 }
1924
1925 mtx_lock_spin(&umtx_lock);
1926 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1927 mtx_unlock_spin(&umtx_lock);
1928 error = EINVAL;
1929 goto out;
1930 }
1931 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1932 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1933 thread_lock(td);
1934 if (uq->uq_inherited_pri < UPRI(td))
1935 sched_lend_user_prio(td, uq->uq_inherited_pri);
1936 thread_unlock(td);
1937 }
1938 mtx_unlock_spin(&umtx_lock);
1939
1940 owner = casuword32(&m->m_owner,
1941 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1942
1943 if (owner == UMUTEX_CONTESTED) {
1944 error = 0;
1945 break;
1946 }
1947
1948 /* The address was invalid. */
1949 if (owner == -1) {
1950 error = EFAULT;
1951 break;
1952 }
1953
1954 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1955 (owner & ~UMUTEX_CONTESTED) == id) {
1956 error = EDEADLK;
1957 break;
1958 }
1959
1960 if (try != 0) {
1961 error = EBUSY;
1962 break;
1963 }
1964
1965 /*
1966 * If we caught a signal, we have retried and now
1967 * exit immediately.
1968 */
1969 if (error != 0)
1970 break;
1971
1972 umtxq_lock(&uq->uq_key);
1973 umtxq_insert(uq);
1974 umtxq_unbusy(&uq->uq_key);
1975 error = umtxq_sleep(uq, "umtxpp", timo);
1976 umtxq_remove(uq);
1977 umtxq_unlock(&uq->uq_key);
1978
1979 mtx_lock_spin(&umtx_lock);
1980 uq->uq_inherited_pri = old_inherited_pri;
1981 pri = PRI_MAX;
1982 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1983 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1984 if (uq2 != NULL) {
1985 if (pri > UPRI(uq2->uq_thread))
1986 pri = UPRI(uq2->uq_thread);
1987 }
1988 }
1989 if (pri > uq->uq_inherited_pri)
1990 pri = uq->uq_inherited_pri;
1991 thread_lock(td);
1992 sched_lend_user_prio(td, pri);
1993 thread_unlock(td);
1994 mtx_unlock_spin(&umtx_lock);
1995 }
1996
1997 if (error != 0) {
1998 mtx_lock_spin(&umtx_lock);
1999 uq->uq_inherited_pri = old_inherited_pri;
2000 pri = PRI_MAX;
2001 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2002 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2003 if (uq2 != NULL) {
2004 if (pri > UPRI(uq2->uq_thread))
2005 pri = UPRI(uq2->uq_thread);
2006 }
2007 }
2008 if (pri > uq->uq_inherited_pri)
2009 pri = uq->uq_inherited_pri;
2010 thread_lock(td);
2011 sched_lend_user_prio(td, pri);
2012 thread_unlock(td);
2013 mtx_unlock_spin(&umtx_lock);
2014 }
2015
2016 out:
2017 umtxq_lock(&uq->uq_key);
2018 umtxq_unbusy(&uq->uq_key);
2019 umtxq_unlock(&uq->uq_key);
2020 umtx_key_release(&uq->uq_key);
2021 return (error);
2022 }
2023
2024 /*
2025 * Unlock a PP mutex.
2026 */
2027 static int
2028 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2029 {
2030 struct umtx_key key;
2031 struct umtx_q *uq, *uq2;
2032 struct umtx_pi *pi;
2033 uint32_t owner, id;
2034 uint32_t rceiling;
2035 int error, pri, new_inherited_pri, su;
2036
2037 id = td->td_tid;
2038 uq = td->td_umtxq;
2039 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2040
2041 /*
2042 * Make sure we own this mtx.
2043 */
2044 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2045 if (owner == -1)
2046 return (EFAULT);
2047
2048 if ((owner & ~UMUTEX_CONTESTED) != id)
2049 return (EPERM);
2050
2051 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2052 if (error != 0)
2053 return (error);
2054
2055 if (rceiling == -1)
2056 new_inherited_pri = PRI_MAX;
2057 else {
2058 rceiling = RTP_PRIO_MAX - rceiling;
2059 if (rceiling > RTP_PRIO_MAX)
2060 return (EINVAL);
2061 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2062 }
2063
2064 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2065 &key)) != 0)
2066 return (error);
2067 umtxq_lock(&key);
2068 umtxq_busy(&key);
2069 umtxq_unlock(&key);
2070 /*
2071 * For priority protected mutex, always set unlocked state
2072 * to UMUTEX_CONTESTED, so that userland always enters kernel
2073 * to lock the mutex, it is necessary because thread priority
2074 * has to be adjusted for such mutex.
2075 */
2076 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2077 UMUTEX_CONTESTED);
2078
2079 umtxq_lock(&key);
2080 if (error == 0)
2081 umtxq_signal(&key, 1);
2082 umtxq_unbusy(&key);
2083 umtxq_unlock(&key);
2084
2085 if (error == -1)
2086 error = EFAULT;
2087 else {
2088 mtx_lock_spin(&umtx_lock);
2089 if (su != 0)
2090 uq->uq_inherited_pri = new_inherited_pri;
2091 pri = PRI_MAX;
2092 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2093 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2094 if (uq2 != NULL) {
2095 if (pri > UPRI(uq2->uq_thread))
2096 pri = UPRI(uq2->uq_thread);
2097 }
2098 }
2099 if (pri > uq->uq_inherited_pri)
2100 pri = uq->uq_inherited_pri;
2101 thread_lock(td);
2102 sched_lend_user_prio(td, pri);
2103 thread_unlock(td);
2104 mtx_unlock_spin(&umtx_lock);
2105 }
2106 umtx_key_release(&key);
2107 return (error);
2108 }
2109
2110 static int
2111 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2112 uint32_t *old_ceiling)
2113 {
2114 struct umtx_q *uq;
2115 uint32_t save_ceiling;
2116 uint32_t owner, id;
2117 uint32_t flags;
2118 int error;
2119
2120 flags = fuword32(&m->m_flags);
2121 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2122 return (EINVAL);
2123 if (ceiling > RTP_PRIO_MAX)
2124 return (EINVAL);
2125 id = td->td_tid;
2126 uq = td->td_umtxq;
2127 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2128 &uq->uq_key)) != 0)
2129 return (error);
2130 for (;;) {
2131 umtxq_lock(&uq->uq_key);
2132 umtxq_busy(&uq->uq_key);
2133 umtxq_unlock(&uq->uq_key);
2134
2135 save_ceiling = fuword32(&m->m_ceilings[0]);
2136
2137 owner = casuword32(&m->m_owner,
2138 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2139
2140 if (owner == UMUTEX_CONTESTED) {
2141 suword32(&m->m_ceilings[0], ceiling);
2142 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2143 UMUTEX_CONTESTED);
2144 error = 0;
2145 break;
2146 }
2147
2148 /* The address was invalid. */
2149 if (owner == -1) {
2150 error = EFAULT;
2151 break;
2152 }
2153
2154 if ((owner & ~UMUTEX_CONTESTED) == id) {
2155 suword32(&m->m_ceilings[0], ceiling);
2156 error = 0;
2157 break;
2158 }
2159
2160 /*
2161 * If we caught a signal, we have retried and now
2162 * exit immediately.
2163 */
2164 if (error != 0)
2165 break;
2166
2167 /*
2168 * We set the contested bit, sleep. Otherwise the lock changed
2169 * and we need to retry or we lost a race to the thread
2170 * unlocking the umtx.
2171 */
2172 umtxq_lock(&uq->uq_key);
2173 umtxq_insert(uq);
2174 umtxq_unbusy(&uq->uq_key);
2175 error = umtxq_sleep(uq, "umtxpp", 0);
2176 umtxq_remove(uq);
2177 umtxq_unlock(&uq->uq_key);
2178 }
2179 umtxq_lock(&uq->uq_key);
2180 if (error == 0)
2181 umtxq_signal(&uq->uq_key, INT_MAX);
2182 umtxq_unbusy(&uq->uq_key);
2183 umtxq_unlock(&uq->uq_key);
2184 umtx_key_release(&uq->uq_key);
2185 if (error == 0 && old_ceiling != NULL)
2186 suword32(old_ceiling, save_ceiling);
2187 return (error);
2188 }
2189
2190 static int
2191 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2192 int mode)
2193 {
2194 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2195 case 0:
2196 return (_do_lock_normal(td, m, flags, timo, mode));
2197 case UMUTEX_PRIO_INHERIT:
2198 return (_do_lock_pi(td, m, flags, timo, mode));
2199 case UMUTEX_PRIO_PROTECT:
2200 return (_do_lock_pp(td, m, flags, timo, mode));
2201 }
2202 return (EINVAL);
2203 }
2204
2205 /*
2206 * Lock a userland POSIX mutex.
2207 */
2208 static int
2209 do_lock_umutex(struct thread *td, struct umutex *m,
2210 struct timespec *timeout, int mode)
2211 {
2212 struct timespec ts, ts2, ts3;
2213 struct timeval tv;
2214 uint32_t flags;
2215 int error;
2216
2217 flags = fuword32(&m->m_flags);
2218 if (flags == -1)
2219 return (EFAULT);
2220
2221 if (timeout == NULL) {
2222 error = _do_lock_umutex(td, m, flags, 0, mode);
2223 /* Mutex locking is restarted if it is interrupted. */
2224 if (error == EINTR && mode != _UMUTEX_WAIT)
2225 error = ERESTART;
2226 } else {
2227 getnanouptime(&ts);
2228 timespecadd(&ts, timeout);
2229 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2230 for (;;) {
2231 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2232 if (error != ETIMEDOUT)
2233 break;
2234 getnanouptime(&ts2);
2235 if (timespeccmp(&ts2, &ts, >=)) {
2236 error = ETIMEDOUT;
2237 break;
2238 }
2239 ts3 = ts;
2240 timespecsub(&ts3, &ts2);
2241 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2242 }
2243 /* Timed-locking is not restarted. */
2244 if (error == ERESTART)
2245 error = EINTR;
2246 }
2247 return (error);
2248 }
2249
2250 /*
2251 * Unlock a userland POSIX mutex.
2252 */
2253 static int
2254 do_unlock_umutex(struct thread *td, struct umutex *m)
2255 {
2256 uint32_t flags;
2257
2258 flags = fuword32(&m->m_flags);
2259 if (flags == -1)
2260 return (EFAULT);
2261
2262 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2263 case 0:
2264 return (do_unlock_normal(td, m, flags));
2265 case UMUTEX_PRIO_INHERIT:
2266 return (do_unlock_pi(td, m, flags));
2267 case UMUTEX_PRIO_PROTECT:
2268 return (do_unlock_pp(td, m, flags));
2269 }
2270
2271 return (EINVAL);
2272 }
2273
2274 static int
2275 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2276 struct timespec *timeout, u_long wflags)
2277 {
2278 struct umtx_q *uq;
2279 struct timeval tv;
2280 struct timespec cts, ets, tts;
2281 uint32_t flags;
2282 uint32_t clockid;
2283 int error;
2284
2285 uq = td->td_umtxq;
2286 flags = fuword32(&cv->c_flags);
2287 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2288 if (error != 0)
2289 return (error);
2290
2291 if ((wflags & CVWAIT_CLOCKID) != 0) {
2292 clockid = fuword32(&cv->c_clockid);
2293 if (clockid < CLOCK_REALTIME ||
2294 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2295 /* hmm, only HW clock id will work. */
2296 return (EINVAL);
2297 }
2298 } else {
2299 clockid = CLOCK_REALTIME;
2300 }
2301
2302 umtxq_lock(&uq->uq_key);
2303 umtxq_busy(&uq->uq_key);
2304 umtxq_insert(uq);
2305 umtxq_unlock(&uq->uq_key);
2306
2307 /*
2308 * Set c_has_waiters to 1 before releasing user mutex, also
2309 * don't modify cache line when unnecessary.
2310 */
2311 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2312 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2313
2314 umtxq_lock(&uq->uq_key);
2315 umtxq_unbusy(&uq->uq_key);
2316 umtxq_unlock(&uq->uq_key);
2317
2318 error = do_unlock_umutex(td, m);
2319
2320 umtxq_lock(&uq->uq_key);
2321 if (error == 0) {
2322 if (timeout == NULL) {
2323 error = umtxq_sleep(uq, "ucond", 0);
2324 } else {
2325 if ((wflags & CVWAIT_ABSTIME) == 0) {
2326 kern_clock_gettime(td, clockid, &ets);
2327 timespecadd(&ets, timeout);
2328 tts = *timeout;
2329 } else { /* absolute time */
2330 ets = *timeout;
2331 tts = *timeout;
2332 kern_clock_gettime(td, clockid, &cts);
2333 timespecsub(&tts, &cts);
2334 }
2335 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2336 for (;;) {
2337 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2338 if (error != ETIMEDOUT)
2339 break;
2340 kern_clock_gettime(td, clockid, &cts);
2341 if (timespeccmp(&cts, &ets, >=)) {
2342 error = ETIMEDOUT;
2343 break;
2344 }
2345 tts = ets;
2346 timespecsub(&tts, &cts);
2347 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2348 }
2349 }
2350 }
2351
2352 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2353 error = 0;
2354 else {
2355 /*
2356 * This must be timeout,interrupted by signal or
2357 * surprious wakeup, clear c_has_waiter flag when
2358 * necessary.
2359 */
2360 umtxq_busy(&uq->uq_key);
2361 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2362 int oldlen = uq->uq_cur_queue->length;
2363 umtxq_remove(uq);
2364 if (oldlen == 1) {
2365 umtxq_unlock(&uq->uq_key);
2366 suword32(
2367 __DEVOLATILE(uint32_t *,
2368 &cv->c_has_waiters), 0);
2369 umtxq_lock(&uq->uq_key);
2370 }
2371 }
2372 umtxq_unbusy(&uq->uq_key);
2373 if (error == ERESTART)
2374 error = EINTR;
2375 }
2376
2377 umtxq_unlock(&uq->uq_key);
2378 umtx_key_release(&uq->uq_key);
2379 return (error);
2380 }
2381
2382 /*
2383 * Signal a userland condition variable.
2384 */
2385 static int
2386 do_cv_signal(struct thread *td, struct ucond *cv)
2387 {
2388 struct umtx_key key;
2389 int error, cnt, nwake;
2390 uint32_t flags;
2391
2392 flags = fuword32(&cv->c_flags);
2393 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2394 return (error);
2395 umtxq_lock(&key);
2396 umtxq_busy(&key);
2397 cnt = umtxq_count(&key);
2398 nwake = umtxq_signal(&key, 1);
2399 if (cnt <= nwake) {
2400 umtxq_unlock(&key);
2401 error = suword32(
2402 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2403 umtxq_lock(&key);
2404 }
2405 umtxq_unbusy(&key);
2406 umtxq_unlock(&key);
2407 umtx_key_release(&key);
2408 return (error);
2409 }
2410
2411 static int
2412 do_cv_broadcast(struct thread *td, struct ucond *cv)
2413 {
2414 struct umtx_key key;
2415 int error;
2416 uint32_t flags;
2417
2418 flags = fuword32(&cv->c_flags);
2419 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2420 return (error);
2421
2422 umtxq_lock(&key);
2423 umtxq_busy(&key);
2424 umtxq_signal(&key, INT_MAX);
2425 umtxq_unlock(&key);
2426
2427 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2428
2429 umtxq_lock(&key);
2430 umtxq_unbusy(&key);
2431 umtxq_unlock(&key);
2432
2433 umtx_key_release(&key);
2434 return (error);
2435 }
2436
2437 static int
2438 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2439 {
2440 struct umtx_q *uq;
2441 uint32_t flags, wrflags;
2442 int32_t state, oldstate;
2443 int32_t blocked_readers;
2444 int error;
2445
2446 uq = td->td_umtxq;
2447 flags = fuword32(&rwlock->rw_flags);
2448 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2449 if (error != 0)
2450 return (error);
2451
2452 wrflags = URWLOCK_WRITE_OWNER;
2453 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2454 wrflags |= URWLOCK_WRITE_WAITERS;
2455
2456 for (;;) {
2457 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2458 /* try to lock it */
2459 while (!(state & wrflags)) {
2460 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2461 umtx_key_release(&uq->uq_key);
2462 return (EAGAIN);
2463 }
2464 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2465 if (oldstate == state) {
2466 umtx_key_release(&uq->uq_key);
2467 return (0);
2468 }
2469 state = oldstate;
2470 }
2471
2472 if (error)
2473 break;
2474
2475 /* grab monitor lock */
2476 umtxq_lock(&uq->uq_key);
2477 umtxq_busy(&uq->uq_key);
2478 umtxq_unlock(&uq->uq_key);
2479
2480 /*
2481 * re-read the state, in case it changed between the try-lock above
2482 * and the check below
2483 */
2484 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2485
2486 /* set read contention bit */
2487 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2488 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2489 if (oldstate == state)
2490 goto sleep;
2491 state = oldstate;
2492 }
2493
2494 /* state is changed while setting flags, restart */
2495 if (!(state & wrflags)) {
2496 umtxq_lock(&uq->uq_key);
2497 umtxq_unbusy(&uq->uq_key);
2498 umtxq_unlock(&uq->uq_key);
2499 continue;
2500 }
2501
2502 sleep:
2503 /* contention bit is set, before sleeping, increase read waiter count */
2504 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2505 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2506
2507 while (state & wrflags) {
2508 umtxq_lock(&uq->uq_key);
2509 umtxq_insert(uq);
2510 umtxq_unbusy(&uq->uq_key);
2511
2512 error = umtxq_sleep(uq, "urdlck", timo);
2513
2514 umtxq_busy(&uq->uq_key);
2515 umtxq_remove(uq);
2516 umtxq_unlock(&uq->uq_key);
2517 if (error)
2518 break;
2519 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2520 }
2521
2522 /* decrease read waiter count, and may clear read contention bit */
2523 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2524 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2525 if (blocked_readers == 1) {
2526 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2527 for (;;) {
2528 oldstate = casuword32(&rwlock->rw_state, state,
2529 state & ~URWLOCK_READ_WAITERS);
2530 if (oldstate == state)
2531 break;
2532 state = oldstate;
2533 }
2534 }
2535
2536 umtxq_lock(&uq->uq_key);
2537 umtxq_unbusy(&uq->uq_key);
2538 umtxq_unlock(&uq->uq_key);
2539 }
2540 umtx_key_release(&uq->uq_key);
2541 return (error);
2542 }
2543
2544 static int
2545 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2546 {
2547 struct timespec ts, ts2, ts3;
2548 struct timeval tv;
2549 int error;
2550
2551 getnanouptime(&ts);
2552 timespecadd(&ts, timeout);
2553 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2554 for (;;) {
2555 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2556 if (error != ETIMEDOUT)
2557 break;
2558 getnanouptime(&ts2);
2559 if (timespeccmp(&ts2, &ts, >=)) {
2560 error = ETIMEDOUT;
2561 break;
2562 }
2563 ts3 = ts;
2564 timespecsub(&ts3, &ts2);
2565 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2566 }
2567 if (error == ERESTART)
2568 error = EINTR;
2569 return (error);
2570 }
2571
2572 static int
2573 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2574 {
2575 struct umtx_q *uq;
2576 uint32_t flags;
2577 int32_t state, oldstate;
2578 int32_t blocked_writers;
2579 int32_t blocked_readers;
2580 int error;
2581
2582 uq = td->td_umtxq;
2583 flags = fuword32(&rwlock->rw_flags);
2584 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2585 if (error != 0)
2586 return (error);
2587
2588 blocked_readers = 0;
2589 for (;;) {
2590 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2591 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2592 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2593 if (oldstate == state) {
2594 umtx_key_release(&uq->uq_key);
2595 return (0);
2596 }
2597 state = oldstate;
2598 }
2599
2600 if (error) {
2601 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2602 blocked_readers != 0) {
2603 umtxq_lock(&uq->uq_key);
2604 umtxq_busy(&uq->uq_key);
2605 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2606 umtxq_unbusy(&uq->uq_key);
2607 umtxq_unlock(&uq->uq_key);
2608 }
2609
2610 break;
2611 }
2612
2613 /* grab monitor lock */
2614 umtxq_lock(&uq->uq_key);
2615 umtxq_busy(&uq->uq_key);
2616 umtxq_unlock(&uq->uq_key);
2617
2618 /*
2619 * re-read the state, in case it changed between the try-lock above
2620 * and the check below
2621 */
2622 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2623
2624 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2625 (state & URWLOCK_WRITE_WAITERS) == 0) {
2626 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2627 if (oldstate == state)
2628 goto sleep;
2629 state = oldstate;
2630 }
2631
2632 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2633 umtxq_lock(&uq->uq_key);
2634 umtxq_unbusy(&uq->uq_key);
2635 umtxq_unlock(&uq->uq_key);
2636 continue;
2637 }
2638 sleep:
2639 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2640 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2641
2642 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2643 umtxq_lock(&uq->uq_key);
2644 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2645 umtxq_unbusy(&uq->uq_key);
2646
2647 error = umtxq_sleep(uq, "uwrlck", timo);
2648
2649 umtxq_busy(&uq->uq_key);
2650 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2651 umtxq_unlock(&uq->uq_key);
2652 if (error)
2653 break;
2654 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2655 }
2656
2657 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2658 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2659 if (blocked_writers == 1) {
2660 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2661 for (;;) {
2662 oldstate = casuword32(&rwlock->rw_state, state,
2663 state & ~URWLOCK_WRITE_WAITERS);
2664 if (oldstate == state)
2665 break;
2666 state = oldstate;
2667 }
2668 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2669 } else
2670 blocked_readers = 0;
2671
2672 umtxq_lock(&uq->uq_key);
2673 umtxq_unbusy(&uq->uq_key);
2674 umtxq_unlock(&uq->uq_key);
2675 }
2676
2677 umtx_key_release(&uq->uq_key);
2678 return (error);
2679 }
2680
2681 static int
2682 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2683 {
2684 struct timespec ts, ts2, ts3;
2685 struct timeval tv;
2686 int error;
2687
2688 getnanouptime(&ts);
2689 timespecadd(&ts, timeout);
2690 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2691 for (;;) {
2692 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2693 if (error != ETIMEDOUT)
2694 break;
2695 getnanouptime(&ts2);
2696 if (timespeccmp(&ts2, &ts, >=)) {
2697 error = ETIMEDOUT;
2698 break;
2699 }
2700 ts3 = ts;
2701 timespecsub(&ts3, &ts2);
2702 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2703 }
2704 if (error == ERESTART)
2705 error = EINTR;
2706 return (error);
2707 }
2708
2709 static int
2710 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2711 {
2712 struct umtx_q *uq;
2713 uint32_t flags;
2714 int32_t state, oldstate;
2715 int error, q, count;
2716
2717 uq = td->td_umtxq;
2718 flags = fuword32(&rwlock->rw_flags);
2719 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2720 if (error != 0)
2721 return (error);
2722
2723 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2724 if (state & URWLOCK_WRITE_OWNER) {
2725 for (;;) {
2726 oldstate = casuword32(&rwlock->rw_state, state,
2727 state & ~URWLOCK_WRITE_OWNER);
2728 if (oldstate != state) {
2729 state = oldstate;
2730 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2731 error = EPERM;
2732 goto out;
2733 }
2734 } else
2735 break;
2736 }
2737 } else if (URWLOCK_READER_COUNT(state) != 0) {
2738 for (;;) {
2739 oldstate = casuword32(&rwlock->rw_state, state,
2740 state - 1);
2741 if (oldstate != state) {
2742 state = oldstate;
2743 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2744 error = EPERM;
2745 goto out;
2746 }
2747 }
2748 else
2749 break;
2750 }
2751 } else {
2752 error = EPERM;
2753 goto out;
2754 }
2755
2756 count = 0;
2757
2758 if (!(flags & URWLOCK_PREFER_READER)) {
2759 if (state & URWLOCK_WRITE_WAITERS) {
2760 count = 1;
2761 q = UMTX_EXCLUSIVE_QUEUE;
2762 } else if (state & URWLOCK_READ_WAITERS) {
2763 count = INT_MAX;
2764 q = UMTX_SHARED_QUEUE;
2765 }
2766 } else {
2767 if (state & URWLOCK_READ_WAITERS) {
2768 count = INT_MAX;
2769 q = UMTX_SHARED_QUEUE;
2770 } else if (state & URWLOCK_WRITE_WAITERS) {
2771 count = 1;
2772 q = UMTX_EXCLUSIVE_QUEUE;
2773 }
2774 }
2775
2776 if (count) {
2777 umtxq_lock(&uq->uq_key);
2778 umtxq_busy(&uq->uq_key);
2779 umtxq_signal_queue(&uq->uq_key, count, q);
2780 umtxq_unbusy(&uq->uq_key);
2781 umtxq_unlock(&uq->uq_key);
2782 }
2783 out:
2784 umtx_key_release(&uq->uq_key);
2785 return (error);
2786 }
2787
2788 static int
2789 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2790 {
2791 struct umtx_q *uq;
2792 struct timeval tv;
2793 struct timespec cts, ets, tts;
2794 uint32_t flags, count;
2795 int error;
2796
2797 uq = td->td_umtxq;
2798 flags = fuword32(&sem->_flags);
2799 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2800 if (error != 0)
2801 return (error);
2802 umtxq_lock(&uq->uq_key);
2803 umtxq_busy(&uq->uq_key);
2804 umtxq_insert(uq);
2805 umtxq_unlock(&uq->uq_key);
2806
2807 if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2808 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2809
2810 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2811 if (count != 0) {
2812 umtxq_lock(&uq->uq_key);
2813 umtxq_unbusy(&uq->uq_key);
2814 umtxq_remove(uq);
2815 umtxq_unlock(&uq->uq_key);
2816 umtx_key_release(&uq->uq_key);
2817 return (0);
2818 }
2819
2820 umtxq_lock(&uq->uq_key);
2821 umtxq_unbusy(&uq->uq_key);
2822 umtxq_unlock(&uq->uq_key);
2823
2824 umtxq_lock(&uq->uq_key);
2825 if (timeout == NULL) {
2826 error = umtxq_sleep(uq, "usem", 0);
2827 } else {
2828 getnanouptime(&ets);
2829 timespecadd(&ets, timeout);
2830 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2831 for (;;) {
2832 error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2833 if (error != ETIMEDOUT)
2834 break;
2835 getnanouptime(&cts);
2836 if (timespeccmp(&cts, &ets, >=)) {
2837 error = ETIMEDOUT;
2838 break;
2839 }
2840 tts = ets;
2841 timespecsub(&tts, &cts);
2842 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2843 }
2844 }
2845
2846 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2847 error = 0;
2848 else {
2849 umtxq_remove(uq);
2850 if (error == ERESTART)
2851 error = EINTR;
2852 }
2853 umtxq_unlock(&uq->uq_key);
2854 umtx_key_release(&uq->uq_key);
2855 return (error);
2856 }
2857
2858 /*
2859 * Signal a userland condition variable.
2860 */
2861 static int
2862 do_sem_wake(struct thread *td, struct _usem *sem)
2863 {
2864 struct umtx_key key;
2865 int error, cnt, nwake;
2866 uint32_t flags;
2867
2868 flags = fuword32(&sem->_flags);
2869 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2870 return (error);
2871 umtxq_lock(&key);
2872 umtxq_busy(&key);
2873 cnt = umtxq_count(&key);
2874 nwake = umtxq_signal(&key, 1);
2875 if (cnt <= nwake) {
2876 umtxq_unlock(&key);
2877 error = suword32(
2878 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2879 umtxq_lock(&key);
2880 }
2881 umtxq_unbusy(&key);
2882 umtxq_unlock(&key);
2883 umtx_key_release(&key);
2884 return (error);
2885 }
2886
2887 int
2888 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2889 /* struct umtx *umtx */
2890 {
2891 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2892 }
2893
2894 int
2895 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2896 /* struct umtx *umtx */
2897 {
2898 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2899 }
2900
2901 static int
2902 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2903 {
2904 struct timespec *ts, timeout;
2905 int error;
2906
2907 /* Allow a null timespec (wait forever). */
2908 if (uap->uaddr2 == NULL)
2909 ts = NULL;
2910 else {
2911 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2912 if (error != 0)
2913 return (error);
2914 if (timeout.tv_nsec >= 1000000000 ||
2915 timeout.tv_nsec < 0) {
2916 return (EINVAL);
2917 }
2918 ts = &timeout;
2919 }
2920 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2921 }
2922
2923 static int
2924 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2925 {
2926 return (do_unlock_umtx(td, uap->obj, uap->val));
2927 }
2928
2929 static int
2930 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2931 {
2932 struct timespec *ts, timeout;
2933 int error;
2934
2935 if (uap->uaddr2 == NULL)
2936 ts = NULL;
2937 else {
2938 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2939 if (error != 0)
2940 return (error);
2941 if (timeout.tv_nsec >= 1000000000 ||
2942 timeout.tv_nsec < 0)
2943 return (EINVAL);
2944 ts = &timeout;
2945 }
2946 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2947 }
2948
2949 static int
2950 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2951 {
2952 struct timespec *ts, timeout;
2953 int error;
2954
2955 if (uap->uaddr2 == NULL)
2956 ts = NULL;
2957 else {
2958 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2959 if (error != 0)
2960 return (error);
2961 if (timeout.tv_nsec >= 1000000000 ||
2962 timeout.tv_nsec < 0)
2963 return (EINVAL);
2964 ts = &timeout;
2965 }
2966 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2967 }
2968
2969 static int
2970 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2971 {
2972 struct timespec *ts, timeout;
2973 int error;
2974
2975 if (uap->uaddr2 == NULL)
2976 ts = NULL;
2977 else {
2978 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2979 if (error != 0)
2980 return (error);
2981 if (timeout.tv_nsec >= 1000000000 ||
2982 timeout.tv_nsec < 0)
2983 return (EINVAL);
2984 ts = &timeout;
2985 }
2986 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2987 }
2988
2989 static int
2990 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2991 {
2992 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2993 }
2994
2995 #define BATCH_SIZE 128
2996 static int
2997 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
2998 {
2999 int count = uap->val;
3000 void *uaddrs[BATCH_SIZE];
3001 char **upp = (char **)uap->obj;
3002 int tocopy;
3003 int error = 0;
3004 int i, pos = 0;
3005
3006 while (count > 0) {
3007 tocopy = count;
3008 if (tocopy > BATCH_SIZE)
3009 tocopy = BATCH_SIZE;
3010 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3011 if (error != 0)
3012 break;
3013 for (i = 0; i < tocopy; ++i)
3014 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3015 count -= tocopy;
3016 pos += tocopy;
3017 }
3018 return (error);
3019 }
3020
3021 static int
3022 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3023 {
3024 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3025 }
3026
3027 static int
3028 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3029 {
3030 struct timespec *ts, timeout;
3031 int error;
3032
3033 /* Allow a null timespec (wait forever). */
3034 if (uap->uaddr2 == NULL)
3035 ts = NULL;
3036 else {
3037 error = copyin(uap->uaddr2, &timeout,
3038 sizeof(timeout));
3039 if (error != 0)
3040 return (error);
3041 if (timeout.tv_nsec >= 1000000000 ||
3042 timeout.tv_nsec < 0) {
3043 return (EINVAL);
3044 }
3045 ts = &timeout;
3046 }
3047 return do_lock_umutex(td, uap->obj, ts, 0);
3048 }
3049
3050 static int
3051 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3052 {
3053 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3054 }
3055
3056 static int
3057 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3058 {
3059 struct timespec *ts, timeout;
3060 int error;
3061
3062 /* Allow a null timespec (wait forever). */
3063 if (uap->uaddr2 == NULL)
3064 ts = NULL;
3065 else {
3066 error = copyin(uap->uaddr2, &timeout,
3067 sizeof(timeout));
3068 if (error != 0)
3069 return (error);
3070 if (timeout.tv_nsec >= 1000000000 ||
3071 timeout.tv_nsec < 0) {
3072 return (EINVAL);
3073 }
3074 ts = &timeout;
3075 }
3076 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3077 }
3078
3079 static int
3080 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3081 {
3082 return do_wake_umutex(td, uap->obj);
3083 }
3084
3085 static int
3086 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3087 {
3088 return do_unlock_umutex(td, uap->obj);
3089 }
3090
3091 static int
3092 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3093 {
3094 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3095 }
3096
3097 static int
3098 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3099 {
3100 struct timespec *ts, timeout;
3101 int error;
3102
3103 /* Allow a null timespec (wait forever). */
3104 if (uap->uaddr2 == NULL)
3105 ts = NULL;
3106 else {
3107 error = copyin(uap->uaddr2, &timeout,
3108 sizeof(timeout));
3109 if (error != 0)
3110 return (error);
3111 if (timeout.tv_nsec >= 1000000000 ||
3112 timeout.tv_nsec < 0) {
3113 return (EINVAL);
3114 }
3115 ts = &timeout;
3116 }
3117 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3118 }
3119
3120 static int
3121 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3122 {
3123 return do_cv_signal(td, uap->obj);
3124 }
3125
3126 static int
3127 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3128 {
3129 return do_cv_broadcast(td, uap->obj);
3130 }
3131
3132 static int
3133 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3134 {
3135 struct timespec timeout;
3136 int error;
3137
3138 /* Allow a null timespec (wait forever). */
3139 if (uap->uaddr2 == NULL) {
3140 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3141 } else {
3142 error = copyin(uap->uaddr2, &timeout,
3143 sizeof(timeout));
3144 if (error != 0)
3145 return (error);
3146 if (timeout.tv_nsec >= 1000000000 ||
3147 timeout.tv_nsec < 0) {
3148 return (EINVAL);
3149 }
3150 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3151 }
3152 return (error);
3153 }
3154
3155 static int
3156 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3157 {
3158 struct timespec timeout;
3159 int error;
3160
3161 /* Allow a null timespec (wait forever). */
3162 if (uap->uaddr2 == NULL) {
3163 error = do_rw_wrlock(td, uap->obj, 0);
3164 } else {
3165 error = copyin(uap->uaddr2, &timeout,
3166 sizeof(timeout));
3167 if (error != 0)
3168 return (error);
3169 if (timeout.tv_nsec >= 1000000000 ||
3170 timeout.tv_nsec < 0) {
3171 return (EINVAL);
3172 }
3173
3174 error = do_rw_wrlock2(td, uap->obj, &timeout);
3175 }
3176 return (error);
3177 }
3178
3179 static int
3180 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3181 {
3182 return do_rw_unlock(td, uap->obj);
3183 }
3184
3185 static int
3186 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3187 {
3188 struct timespec *ts, timeout;
3189 int error;
3190
3191 /* Allow a null timespec (wait forever). */
3192 if (uap->uaddr2 == NULL)
3193 ts = NULL;
3194 else {
3195 error = copyin(uap->uaddr2, &timeout,
3196 sizeof(timeout));
3197 if (error != 0)
3198 return (error);
3199 if (timeout.tv_nsec >= 1000000000 ||
3200 timeout.tv_nsec < 0) {
3201 return (EINVAL);
3202 }
3203 ts = &timeout;
3204 }
3205 return (do_sem_wait(td, uap->obj, ts));
3206 }
3207
3208 static int
3209 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3210 {
3211 return do_sem_wake(td, uap->obj);
3212 }
3213
3214 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3215
3216 static _umtx_op_func op_table[] = {
3217 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3218 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3219 __umtx_op_wait, /* UMTX_OP_WAIT */
3220 __umtx_op_wake, /* UMTX_OP_WAKE */
3221 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3222 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3223 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3224 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3225 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3226 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3227 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3228 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3229 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3230 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3231 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3232 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3233 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3234 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3235 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3236 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
3237 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3238 __umtx_op_nwake_private /* UMTX_OP_NWAKE_PRIVATE */
3239 };
3240
3241 int
3242 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3243 {
3244 if ((unsigned)uap->op < UMTX_OP_MAX)
3245 return (*op_table[uap->op])(td, uap);
3246 return (EINVAL);
3247 }
3248
3249 #ifdef COMPAT_FREEBSD32
3250 int
3251 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3252 /* struct umtx *umtx */
3253 {
3254 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3255 }
3256
3257 int
3258 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3259 /* struct umtx *umtx */
3260 {
3261 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3262 }
3263
3264 struct timespec32 {
3265 uint32_t tv_sec;
3266 uint32_t tv_nsec;
3267 };
3268
3269 static inline int
3270 copyin_timeout32(void *addr, struct timespec *tsp)
3271 {
3272 struct timespec32 ts32;
3273 int error;
3274
3275 error = copyin(addr, &ts32, sizeof(struct timespec32));
3276 if (error == 0) {
3277 tsp->tv_sec = ts32.tv_sec;
3278 tsp->tv_nsec = ts32.tv_nsec;
3279 }
3280 return (error);
3281 }
3282
3283 static int
3284 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3285 {
3286 struct timespec *ts, timeout;
3287 int error;
3288
3289 /* Allow a null timespec (wait forever). */
3290 if (uap->uaddr2 == NULL)
3291 ts = NULL;
3292 else {
3293 error = copyin_timeout32(uap->uaddr2, &timeout);
3294 if (error != 0)
3295 return (error);
3296 if (timeout.tv_nsec >= 1000000000 ||
3297 timeout.tv_nsec < 0) {
3298 return (EINVAL);
3299 }
3300 ts = &timeout;
3301 }
3302 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3303 }
3304
3305 static int
3306 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3307 {
3308 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3309 }
3310
3311 static int
3312 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3313 {
3314 struct timespec *ts, timeout;
3315 int error;
3316
3317 if (uap->uaddr2 == NULL)
3318 ts = NULL;
3319 else {
3320 error = copyin_timeout32(uap->uaddr2, &timeout);
3321 if (error != 0)
3322 return (error);
3323 if (timeout.tv_nsec >= 1000000000 ||
3324 timeout.tv_nsec < 0)
3325 return (EINVAL);
3326 ts = &timeout;
3327 }
3328 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3329 }
3330
3331 static int
3332 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3333 {
3334 struct timespec *ts, timeout;
3335 int error;
3336
3337 /* Allow a null timespec (wait forever). */
3338 if (uap->uaddr2 == NULL)
3339 ts = NULL;
3340 else {
3341 error = copyin_timeout32(uap->uaddr2, &timeout);
3342 if (error != 0)
3343 return (error);
3344 if (timeout.tv_nsec >= 1000000000 ||
3345 timeout.tv_nsec < 0)
3346 return (EINVAL);
3347 ts = &timeout;
3348 }
3349 return do_lock_umutex(td, uap->obj, ts, 0);
3350 }
3351
3352 static int
3353 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3354 {
3355 struct timespec *ts, timeout;
3356 int error;
3357
3358 /* Allow a null timespec (wait forever). */
3359 if (uap->uaddr2 == NULL)
3360 ts = NULL;
3361 else {
3362 error = copyin_timeout32(uap->uaddr2, &timeout);
3363 if (error != 0)
3364 return (error);
3365 if (timeout.tv_nsec >= 1000000000 ||
3366 timeout.tv_nsec < 0)
3367 return (EINVAL);
3368 ts = &timeout;
3369 }
3370 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3371 }
3372
3373 static int
3374 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3375 {
3376 struct timespec *ts, timeout;
3377 int error;
3378
3379 /* Allow a null timespec (wait forever). */
3380 if (uap->uaddr2 == NULL)
3381 ts = NULL;
3382 else {
3383 error = copyin_timeout32(uap->uaddr2, &timeout);
3384 if (error != 0)
3385 return (error);
3386 if (timeout.tv_nsec >= 1000000000 ||
3387 timeout.tv_nsec < 0)
3388 return (EINVAL);
3389 ts = &timeout;
3390 }
3391 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3392 }
3393
3394 static int
3395 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3396 {
3397 struct timespec timeout;
3398 int error;
3399
3400 /* Allow a null timespec (wait forever). */
3401 if (uap->uaddr2 == NULL) {
3402 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3403 } else {
3404 error = copyin_timeout32(uap->uaddr2, &timeout);
3405 if (error != 0)
3406 return (error);
3407 if (timeout.tv_nsec >= 1000000000 ||
3408 timeout.tv_nsec < 0) {
3409 return (EINVAL);
3410 }
3411 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3412 }
3413 return (error);
3414 }
3415
3416 static int
3417 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3418 {
3419 struct timespec timeout;
3420 int error;
3421
3422 /* Allow a null timespec (wait forever). */
3423 if (uap->uaddr2 == NULL) {
3424 error = do_rw_wrlock(td, uap->obj, 0);
3425 } else {
3426 error = copyin_timeout32(uap->uaddr2, &timeout);
3427 if (error != 0)
3428 return (error);
3429 if (timeout.tv_nsec >= 1000000000 ||
3430 timeout.tv_nsec < 0) {
3431 return (EINVAL);
3432 }
3433
3434 error = do_rw_wrlock2(td, uap->obj, &timeout);
3435 }
3436 return (error);
3437 }
3438
3439 static int
3440 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3441 {
3442 struct timespec *ts, timeout;
3443 int error;
3444
3445 if (uap->uaddr2 == NULL)
3446 ts = NULL;
3447 else {
3448 error = copyin_timeout32(uap->uaddr2, &timeout);
3449 if (error != 0)
3450 return (error);
3451 if (timeout.tv_nsec >= 1000000000 ||
3452 timeout.tv_nsec < 0)
3453 return (EINVAL);
3454 ts = &timeout;
3455 }
3456 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3457 }
3458
3459 static int
3460 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3461 {
3462 struct timespec *ts, timeout;
3463 int error;
3464
3465 /* Allow a null timespec (wait forever). */
3466 if (uap->uaddr2 == NULL)
3467 ts = NULL;
3468 else {
3469 error = copyin_timeout32(uap->uaddr2, &timeout);
3470 if (error != 0)
3471 return (error);
3472 if (timeout.tv_nsec >= 1000000000 ||
3473 timeout.tv_nsec < 0)
3474 return (EINVAL);
3475 ts = &timeout;
3476 }
3477 return (do_sem_wait(td, uap->obj, ts));
3478 }
3479
3480 static int
3481 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3482 {
3483 int count = uap->val;
3484 uint32_t uaddrs[BATCH_SIZE];
3485 uint32_t **upp = (uint32_t **)uap->obj;
3486 int tocopy;
3487 int error = 0;
3488 int i, pos = 0;
3489
3490 while (count > 0) {
3491 tocopy = count;
3492 if (tocopy > BATCH_SIZE)
3493 tocopy = BATCH_SIZE;
3494 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3495 if (error != 0)
3496 break;
3497 for (i = 0; i < tocopy; ++i)
3498 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3499 INT_MAX, 1);
3500 count -= tocopy;
3501 pos += tocopy;
3502 }
3503 return (error);
3504 }
3505
3506 static _umtx_op_func op_table_compat32[] = {
3507 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3508 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3509 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3510 __umtx_op_wake, /* UMTX_OP_WAKE */
3511 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3512 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3513 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3514 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3515 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3516 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3517 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3518 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3519 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3520 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3521 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3522 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3523 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3524 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3525 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3526 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
3527 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3528 __umtx_op_nwake_private32 /* UMTX_OP_NWAKE_PRIVATE */
3529 };
3530
3531 int
3532 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3533 {
3534 if ((unsigned)uap->op < UMTX_OP_MAX)
3535 return (*op_table_compat32[uap->op])(td,
3536 (struct _umtx_op_args *)uap);
3537 return (EINVAL);
3538 }
3539 #endif
3540
3541 void
3542 umtx_thread_init(struct thread *td)
3543 {
3544 td->td_umtxq = umtxq_alloc();
3545 td->td_umtxq->uq_thread = td;
3546 }
3547
3548 void
3549 umtx_thread_fini(struct thread *td)
3550 {
3551 umtxq_free(td->td_umtxq);
3552 }
3553
3554 /*
3555 * It will be called when new thread is created, e.g fork().
3556 */
3557 void
3558 umtx_thread_alloc(struct thread *td)
3559 {
3560 struct umtx_q *uq;
3561
3562 uq = td->td_umtxq;
3563 uq->uq_inherited_pri = PRI_MAX;
3564
3565 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3566 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3567 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3568 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3569 }
3570
3571 /*
3572 * exec() hook.
3573 */
3574 static void
3575 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3576 struct image_params *imgp __unused)
3577 {
3578 umtx_thread_cleanup(curthread);
3579 }
3580
3581 /*
3582 * thread_exit() hook.
3583 */
3584 void
3585 umtx_thread_exit(struct thread *td)
3586 {
3587 umtx_thread_cleanup(td);
3588 }
3589
3590 /*
3591 * clean up umtx data.
3592 */
3593 static void
3594 umtx_thread_cleanup(struct thread *td)
3595 {
3596 struct umtx_q *uq;
3597 struct umtx_pi *pi;
3598
3599 if ((uq = td->td_umtxq) == NULL)
3600 return;
3601
3602 mtx_lock_spin(&umtx_lock);
3603 uq->uq_inherited_pri = PRI_MAX;
3604 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3605 pi->pi_owner = NULL;
3606 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3607 }
3608 mtx_unlock_spin(&umtx_lock);
3609 thread_lock(td);
3610 sched_lend_user_prio(td, PRI_MAX);
3611 thread_unlock(td);
3612 }
Cache object: 4a13796edcad946a40653da375788c30
|