FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_compat.h"
32 #include "opt_umtx_profiling.h"
33
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 #include <sys/sysent.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/eventhandler.h>
50 #include <sys/umtx.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57
58 #include <machine/cpu.h>
59
60 #ifdef COMPAT_FREEBSD32
61 #include <compat/freebsd32/freebsd32_proto.h>
62 #endif
63
64 #define _UMUTEX_TRY 1
65 #define _UMUTEX_WAIT 2
66
67 /* Priority inheritance mutex info. */
68 struct umtx_pi {
69 /* Owner thread */
70 struct thread *pi_owner;
71
72 /* Reference count */
73 int pi_refcount;
74
75 /* List entry to link umtx holding by thread */
76 TAILQ_ENTRY(umtx_pi) pi_link;
77
78 /* List entry in hash */
79 TAILQ_ENTRY(umtx_pi) pi_hashlink;
80
81 /* List for waiters */
82 TAILQ_HEAD(,umtx_q) pi_blocked;
83
84 /* Identify a userland lock object */
85 struct umtx_key pi_key;
86 };
87
88 /* A userland synchronous object user. */
89 struct umtx_q {
90 /* Linked list for the hash. */
91 TAILQ_ENTRY(umtx_q) uq_link;
92
93 /* Umtx key. */
94 struct umtx_key uq_key;
95
96 /* Umtx flags. */
97 int uq_flags;
98 #define UQF_UMTXQ 0x0001
99
100 /* The thread waits on. */
101 struct thread *uq_thread;
102
103 /*
104 * Blocked on PI mutex. read can use chain lock
105 * or umtx_lock, write must have both chain lock and
106 * umtx_lock being hold.
107 */
108 struct umtx_pi *uq_pi_blocked;
109
110 /* On blocked list */
111 TAILQ_ENTRY(umtx_q) uq_lockq;
112
113 /* Thread contending with us */
114 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
115
116 /* Inherited priority from PP mutex */
117 u_char uq_inherited_pri;
118
119 /* Spare queue ready to be reused */
120 struct umtxq_queue *uq_spare_queue;
121
122 /* The queue we on */
123 struct umtxq_queue *uq_cur_queue;
124 };
125
126 TAILQ_HEAD(umtxq_head, umtx_q);
127
128 /* Per-key wait-queue */
129 struct umtxq_queue {
130 struct umtxq_head head;
131 struct umtx_key key;
132 LIST_ENTRY(umtxq_queue) link;
133 int length;
134 };
135
136 LIST_HEAD(umtxq_list, umtxq_queue);
137
138 /* Userland lock object's wait-queue chain */
139 struct umtxq_chain {
140 /* Lock for this chain. */
141 struct mtx uc_lock;
142
143 /* List of sleep queues. */
144 struct umtxq_list uc_queue[2];
145 #define UMTX_SHARED_QUEUE 0
146 #define UMTX_EXCLUSIVE_QUEUE 1
147
148 LIST_HEAD(, umtxq_queue) uc_spare_queue;
149
150 /* Busy flag */
151 char uc_busy;
152
153 /* Chain lock waiters */
154 int uc_waiters;
155
156 /* All PI in the list */
157 TAILQ_HEAD(,umtx_pi) uc_pi_list;
158
159 #ifdef UMTX_PROFILING
160 int length;
161 int max_length;
162 #endif
163 };
164
165 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
166 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
167
168 /*
169 * Don't propagate time-sharing priority, there is a security reason,
170 * a user can simply introduce PI-mutex, let thread A lock the mutex,
171 * and let another thread B block on the mutex, because B is
172 * sleeping, its priority will be boosted, this causes A's priority to
173 * be boosted via priority propagating too and will never be lowered even
174 * if it is using 100%CPU, this is unfair to other processes.
175 */
176
177 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
178 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
179 PRI_MAX_TIMESHARE : (td)->td_user_pri)
180
181 #define GOLDEN_RATIO_PRIME 2654404609U
182 #define UMTX_CHAINS 512
183 #define UMTX_SHIFTS (__WORD_BIT - 9)
184
185 #define GET_SHARE(flags) \
186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187
188 #define BUSY_SPINS 200
189
190 static uma_zone_t umtx_pi_zone;
191 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
192 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
193 static int umtx_pi_allocated;
194
195 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
196 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
197 &umtx_pi_allocated, 0, "Allocated umtx_pi");
198
199 #ifdef UMTX_PROFILING
200 static long max_length;
201 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
202 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
203 #endif
204
205 static void umtxq_sysinit(void *);
206 static void umtxq_hash(struct umtx_key *key);
207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
208 static void umtxq_lock(struct umtx_key *key);
209 static void umtxq_unlock(struct umtx_key *key);
210 static void umtxq_busy(struct umtx_key *key);
211 static void umtxq_unbusy(struct umtx_key *key);
212 static void umtxq_insert_queue(struct umtx_q *uq, int q);
213 static void umtxq_remove_queue(struct umtx_q *uq, int q);
214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
215 static int umtxq_count(struct umtx_key *key);
216 static struct umtx_pi *umtx_pi_alloc(int);
217 static void umtx_pi_free(struct umtx_pi *pi);
218 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
219 static void umtx_thread_cleanup(struct thread *td);
220 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
221 struct image_params *imgp __unused);
222 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
223
224 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
225 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
226 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
227
228 static struct mtx umtx_lock;
229
230 #ifdef UMTX_PROFILING
231 static void
232 umtx_init_profiling(void)
233 {
234 struct sysctl_oid *chain_oid;
235 char chain_name[10];
236 int i;
237
238 for (i = 0; i < UMTX_CHAINS; ++i) {
239 snprintf(chain_name, sizeof(chain_name), "%d", i);
240 chain_oid = SYSCTL_ADD_NODE(NULL,
241 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
242 chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
243 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
244 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
245 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
246 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
247 }
248 }
249 #endif
250
251 static void
252 umtxq_sysinit(void *arg __unused)
253 {
254 int i, j;
255
256 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
257 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
258 for (i = 0; i < 2; ++i) {
259 for (j = 0; j < UMTX_CHAINS; ++j) {
260 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
261 MTX_DEF | MTX_DUPOK);
262 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
263 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
264 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
265 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
266 umtxq_chains[i][j].uc_busy = 0;
267 umtxq_chains[i][j].uc_waiters = 0;
268 #ifdef UMTX_PROFILING
269 umtxq_chains[i][j].length = 0;
270 umtxq_chains[i][j].max_length = 0;
271 #endif
272 }
273 }
274 #ifdef UMTX_PROFILING
275 umtx_init_profiling();
276 #endif
277 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279 EVENTHANDLER_PRI_ANY);
280 }
281
282 struct umtx_q *
283 umtxq_alloc(void)
284 {
285 struct umtx_q *uq;
286
287 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289 TAILQ_INIT(&uq->uq_spare_queue->head);
290 TAILQ_INIT(&uq->uq_pi_contested);
291 uq->uq_inherited_pri = PRI_MAX;
292 return (uq);
293 }
294
295 void
296 umtxq_free(struct umtx_q *uq)
297 {
298 MPASS(uq->uq_spare_queue != NULL);
299 free(uq->uq_spare_queue, M_UMTX);
300 free(uq, M_UMTX);
301 }
302
303 static inline void
304 umtxq_hash(struct umtx_key *key)
305 {
306 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308 }
309
310 static inline struct umtxq_chain *
311 umtxq_getchain(struct umtx_key *key)
312 {
313 if (key->type <= TYPE_SEM)
314 return (&umtxq_chains[1][key->hash]);
315 return (&umtxq_chains[0][key->hash]);
316 }
317
318 /*
319 * Lock a chain.
320 */
321 static inline void
322 umtxq_lock(struct umtx_key *key)
323 {
324 struct umtxq_chain *uc;
325
326 uc = umtxq_getchain(key);
327 mtx_lock(&uc->uc_lock);
328 }
329
330 /*
331 * Unlock a chain.
332 */
333 static inline void
334 umtxq_unlock(struct umtx_key *key)
335 {
336 struct umtxq_chain *uc;
337
338 uc = umtxq_getchain(key);
339 mtx_unlock(&uc->uc_lock);
340 }
341
342 /*
343 * Set chain to busy state when following operation
344 * may be blocked (kernel mutex can not be used).
345 */
346 static inline void
347 umtxq_busy(struct umtx_key *key)
348 {
349 struct umtxq_chain *uc;
350
351 uc = umtxq_getchain(key);
352 mtx_assert(&uc->uc_lock, MA_OWNED);
353 if (uc->uc_busy) {
354 #ifdef SMP
355 if (smp_cpus > 1) {
356 int count = BUSY_SPINS;
357 if (count > 0) {
358 umtxq_unlock(key);
359 while (uc->uc_busy && --count > 0)
360 cpu_spinwait();
361 umtxq_lock(key);
362 }
363 }
364 #endif
365 while (uc->uc_busy) {
366 uc->uc_waiters++;
367 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
368 uc->uc_waiters--;
369 }
370 }
371 uc->uc_busy = 1;
372 }
373
374 /*
375 * Unbusy a chain.
376 */
377 static inline void
378 umtxq_unbusy(struct umtx_key *key)
379 {
380 struct umtxq_chain *uc;
381
382 uc = umtxq_getchain(key);
383 mtx_assert(&uc->uc_lock, MA_OWNED);
384 KASSERT(uc->uc_busy != 0, ("not busy"));
385 uc->uc_busy = 0;
386 if (uc->uc_waiters)
387 wakeup_one(uc);
388 }
389
390 static struct umtxq_queue *
391 umtxq_queue_lookup(struct umtx_key *key, int q)
392 {
393 struct umtxq_queue *uh;
394 struct umtxq_chain *uc;
395
396 uc = umtxq_getchain(key);
397 UMTXQ_LOCKED_ASSERT(uc);
398 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
399 if (umtx_key_match(&uh->key, key))
400 return (uh);
401 }
402
403 return (NULL);
404 }
405
406 static inline void
407 umtxq_insert_queue(struct umtx_q *uq, int q)
408 {
409 struct umtxq_queue *uh;
410 struct umtxq_chain *uc;
411
412 uc = umtxq_getchain(&uq->uq_key);
413 UMTXQ_LOCKED_ASSERT(uc);
414 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
415 uh = umtxq_queue_lookup(&uq->uq_key, q);
416 if (uh != NULL) {
417 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
418 } else {
419 uh = uq->uq_spare_queue;
420 uh->key = uq->uq_key;
421 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
422 }
423 uq->uq_spare_queue = NULL;
424
425 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
426 uh->length++;
427 #ifdef UMTX_PROFILING
428 uc->length++;
429 if (uc->length > uc->max_length) {
430 uc->max_length = uc->length;
431 if (uc->max_length > max_length)
432 max_length = uc->max_length;
433 }
434 #endif
435 uq->uq_flags |= UQF_UMTXQ;
436 uq->uq_cur_queue = uh;
437 return;
438 }
439
440 static inline void
441 umtxq_remove_queue(struct umtx_q *uq, int q)
442 {
443 struct umtxq_chain *uc;
444 struct umtxq_queue *uh;
445
446 uc = umtxq_getchain(&uq->uq_key);
447 UMTXQ_LOCKED_ASSERT(uc);
448 if (uq->uq_flags & UQF_UMTXQ) {
449 uh = uq->uq_cur_queue;
450 TAILQ_REMOVE(&uh->head, uq, uq_link);
451 uh->length--;
452 #ifdef UMTX_PROFILING
453 uc->length--;
454 #endif
455 uq->uq_flags &= ~UQF_UMTXQ;
456 if (TAILQ_EMPTY(&uh->head)) {
457 KASSERT(uh->length == 0,
458 ("inconsistent umtxq_queue length"));
459 LIST_REMOVE(uh, link);
460 } else {
461 uh = LIST_FIRST(&uc->uc_spare_queue);
462 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
463 LIST_REMOVE(uh, link);
464 }
465 uq->uq_spare_queue = uh;
466 uq->uq_cur_queue = NULL;
467 }
468 }
469
470 /*
471 * Check if there are multiple waiters
472 */
473 static int
474 umtxq_count(struct umtx_key *key)
475 {
476 struct umtxq_chain *uc;
477 struct umtxq_queue *uh;
478
479 uc = umtxq_getchain(key);
480 UMTXQ_LOCKED_ASSERT(uc);
481 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
482 if (uh != NULL)
483 return (uh->length);
484 return (0);
485 }
486
487 /*
488 * Check if there are multiple PI waiters and returns first
489 * waiter.
490 */
491 static int
492 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
493 {
494 struct umtxq_chain *uc;
495 struct umtxq_queue *uh;
496
497 *first = NULL;
498 uc = umtxq_getchain(key);
499 UMTXQ_LOCKED_ASSERT(uc);
500 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
501 if (uh != NULL) {
502 *first = TAILQ_FIRST(&uh->head);
503 return (uh->length);
504 }
505 return (0);
506 }
507
508 static int
509 umtxq_check_susp(struct thread *td)
510 {
511 struct proc *p;
512 int error;
513
514 /*
515 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
516 * eventually break the lockstep loop.
517 */
518 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
519 return (0);
520 error = 0;
521 p = td->td_proc;
522 PROC_LOCK(p);
523 if (P_SHOULDSTOP(p) ||
524 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
525 if (p->p_flag & P_SINGLE_EXIT)
526 error = EINTR;
527 else
528 error = ERESTART;
529 }
530 PROC_UNLOCK(p);
531 return (error);
532 }
533
534 /*
535 * Wake up threads waiting on an userland object.
536 */
537
538 static int
539 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
540 {
541 struct umtxq_chain *uc;
542 struct umtxq_queue *uh;
543 struct umtx_q *uq;
544 int ret;
545
546 ret = 0;
547 uc = umtxq_getchain(key);
548 UMTXQ_LOCKED_ASSERT(uc);
549 uh = umtxq_queue_lookup(key, q);
550 if (uh != NULL) {
551 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
552 umtxq_remove_queue(uq, q);
553 wakeup(uq);
554 if (++ret >= n_wake)
555 return (ret);
556 }
557 }
558 return (ret);
559 }
560
561
562 /*
563 * Wake up specified thread.
564 */
565 static inline void
566 umtxq_signal_thread(struct umtx_q *uq)
567 {
568 struct umtxq_chain *uc;
569
570 uc = umtxq_getchain(&uq->uq_key);
571 UMTXQ_LOCKED_ASSERT(uc);
572 umtxq_remove(uq);
573 wakeup(uq);
574 }
575
576 /*
577 * Put thread into sleep state, before sleeping, check if
578 * thread was removed from umtx queue.
579 */
580 static inline int
581 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
582 {
583 struct umtxq_chain *uc;
584 int error;
585
586 uc = umtxq_getchain(&uq->uq_key);
587 UMTXQ_LOCKED_ASSERT(uc);
588 if (!(uq->uq_flags & UQF_UMTXQ))
589 return (0);
590 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
591 if (error == EWOULDBLOCK)
592 error = ETIMEDOUT;
593 return (error);
594 }
595
596 /*
597 * Convert userspace address into unique logical address.
598 */
599 int
600 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
601 {
602 struct thread *td = curthread;
603 vm_map_t map;
604 vm_map_entry_t entry;
605 vm_pindex_t pindex;
606 vm_prot_t prot;
607 boolean_t wired;
608
609 key->type = type;
610 if (share == THREAD_SHARE) {
611 key->shared = 0;
612 key->info.private.vs = td->td_proc->p_vmspace;
613 key->info.private.addr = (uintptr_t)addr;
614 } else {
615 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
616 map = &td->td_proc->p_vmspace->vm_map;
617 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
618 &entry, &key->info.shared.object, &pindex, &prot,
619 &wired) != KERN_SUCCESS) {
620 return EFAULT;
621 }
622
623 if ((share == PROCESS_SHARE) ||
624 (share == AUTO_SHARE &&
625 VM_INHERIT_SHARE == entry->inheritance)) {
626 key->shared = 1;
627 key->info.shared.offset = entry->offset + entry->start -
628 (vm_offset_t)addr;
629 vm_object_reference(key->info.shared.object);
630 } else {
631 key->shared = 0;
632 key->info.private.vs = td->td_proc->p_vmspace;
633 key->info.private.addr = (uintptr_t)addr;
634 }
635 vm_map_lookup_done(map, entry);
636 }
637
638 umtxq_hash(key);
639 return (0);
640 }
641
642 /*
643 * Release key.
644 */
645 void
646 umtx_key_release(struct umtx_key *key)
647 {
648 if (key->shared)
649 vm_object_deallocate(key->info.shared.object);
650 }
651
652 /*
653 * Lock a umtx object.
654 */
655 static int
656 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
657 {
658 struct umtx_q *uq;
659 u_long owner;
660 u_long old;
661 int error = 0;
662
663 uq = td->td_umtxq;
664
665 /*
666 * Care must be exercised when dealing with umtx structure. It
667 * can fault on any access.
668 */
669 for (;;) {
670 /*
671 * Try the uncontested case. This should be done in userland.
672 */
673 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
674
675 /* The acquire succeeded. */
676 if (owner == UMTX_UNOWNED)
677 return (0);
678
679 /* The address was invalid. */
680 if (owner == -1)
681 return (EFAULT);
682
683 /* If no one owns it but it is contested try to acquire it. */
684 if (owner == UMTX_CONTESTED) {
685 owner = casuword(&umtx->u_owner,
686 UMTX_CONTESTED, id | UMTX_CONTESTED);
687
688 if (owner == UMTX_CONTESTED)
689 return (0);
690
691 /* The address was invalid. */
692 if (owner == -1)
693 return (EFAULT);
694
695 error = umtxq_check_susp(td);
696 if (error != 0)
697 break;
698
699 /* If this failed the lock has changed, restart. */
700 continue;
701 }
702
703 /*
704 * If we caught a signal, we have retried and now
705 * exit immediately.
706 */
707 if (error != 0)
708 return (error);
709
710 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
711 AUTO_SHARE, &uq->uq_key)) != 0)
712 return (error);
713
714 umtxq_lock(&uq->uq_key);
715 umtxq_busy(&uq->uq_key);
716 umtxq_insert(uq);
717 umtxq_unbusy(&uq->uq_key);
718 umtxq_unlock(&uq->uq_key);
719
720 /*
721 * Set the contested bit so that a release in user space
722 * knows to use the system call for unlock. If this fails
723 * either some one else has acquired the lock or it has been
724 * released.
725 */
726 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
727
728 /* The address was invalid. */
729 if (old == -1) {
730 umtxq_lock(&uq->uq_key);
731 umtxq_remove(uq);
732 umtxq_unlock(&uq->uq_key);
733 umtx_key_release(&uq->uq_key);
734 return (EFAULT);
735 }
736
737 /*
738 * We set the contested bit, sleep. Otherwise the lock changed
739 * and we need to retry or we lost a race to the thread
740 * unlocking the umtx.
741 */
742 umtxq_lock(&uq->uq_key);
743 if (old == owner)
744 error = umtxq_sleep(uq, "umtx", timo);
745 umtxq_remove(uq);
746 umtxq_unlock(&uq->uq_key);
747 umtx_key_release(&uq->uq_key);
748
749 if (error == 0)
750 error = umtxq_check_susp(td);
751 }
752
753 return (0);
754 }
755
756 /*
757 * Lock a umtx object.
758 */
759 static int
760 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
761 struct timespec *timeout)
762 {
763 struct timespec ts, ts2, ts3;
764 struct timeval tv;
765 int error;
766
767 if (timeout == NULL) {
768 error = _do_lock_umtx(td, umtx, id, 0);
769 /* Mutex locking is restarted if it is interrupted. */
770 if (error == EINTR)
771 error = ERESTART;
772 } else {
773 getnanouptime(&ts);
774 timespecadd(&ts, timeout);
775 TIMESPEC_TO_TIMEVAL(&tv, timeout);
776 for (;;) {
777 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
778 if (error != ETIMEDOUT)
779 break;
780 getnanouptime(&ts2);
781 if (timespeccmp(&ts2, &ts, >=)) {
782 error = ETIMEDOUT;
783 break;
784 }
785 ts3 = ts;
786 timespecsub(&ts3, &ts2);
787 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
788 }
789 /* Timed-locking is not restarted. */
790 if (error == ERESTART)
791 error = EINTR;
792 }
793 return (error);
794 }
795
796 /*
797 * Unlock a umtx object.
798 */
799 static int
800 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
801 {
802 struct umtx_key key;
803 u_long owner;
804 u_long old;
805 int error;
806 int count;
807
808 /*
809 * Make sure we own this mtx.
810 */
811 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
812 if (owner == -1)
813 return (EFAULT);
814
815 if ((owner & ~UMTX_CONTESTED) != id)
816 return (EPERM);
817
818 /* This should be done in userland */
819 if ((owner & UMTX_CONTESTED) == 0) {
820 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
821 if (old == -1)
822 return (EFAULT);
823 if (old == owner)
824 return (0);
825 owner = old;
826 }
827
828 /* We should only ever be in here for contested locks */
829 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
830 &key)) != 0)
831 return (error);
832
833 umtxq_lock(&key);
834 umtxq_busy(&key);
835 count = umtxq_count(&key);
836 umtxq_unlock(&key);
837
838 /*
839 * When unlocking the umtx, it must be marked as unowned if
840 * there is zero or one thread only waiting for it.
841 * Otherwise, it must be marked as contested.
842 */
843 old = casuword(&umtx->u_owner, owner,
844 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
845 umtxq_lock(&key);
846 umtxq_signal(&key,1);
847 umtxq_unbusy(&key);
848 umtxq_unlock(&key);
849 umtx_key_release(&key);
850 if (old == -1)
851 return (EFAULT);
852 if (old != owner)
853 return (EINVAL);
854 return (0);
855 }
856
857 #ifdef COMPAT_FREEBSD32
858
859 /*
860 * Lock a umtx object.
861 */
862 static int
863 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
864 {
865 struct umtx_q *uq;
866 uint32_t owner;
867 uint32_t old;
868 int error = 0;
869
870 uq = td->td_umtxq;
871
872 /*
873 * Care must be exercised when dealing with umtx structure. It
874 * can fault on any access.
875 */
876 for (;;) {
877 /*
878 * Try the uncontested case. This should be done in userland.
879 */
880 owner = casuword32(m, UMUTEX_UNOWNED, id);
881
882 /* The acquire succeeded. */
883 if (owner == UMUTEX_UNOWNED)
884 return (0);
885
886 /* The address was invalid. */
887 if (owner == -1)
888 return (EFAULT);
889
890 /* If no one owns it but it is contested try to acquire it. */
891 if (owner == UMUTEX_CONTESTED) {
892 owner = casuword32(m,
893 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
894 if (owner == UMUTEX_CONTESTED)
895 return (0);
896
897 /* The address was invalid. */
898 if (owner == -1)
899 return (EFAULT);
900
901 error = umtxq_check_susp(td);
902 if (error != 0)
903 break;
904
905 /* If this failed the lock has changed, restart. */
906 continue;
907 }
908
909 /*
910 * If we caught a signal, we have retried and now
911 * exit immediately.
912 */
913 if (error != 0)
914 return (error);
915
916 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
917 AUTO_SHARE, &uq->uq_key)) != 0)
918 return (error);
919
920 umtxq_lock(&uq->uq_key);
921 umtxq_busy(&uq->uq_key);
922 umtxq_insert(uq);
923 umtxq_unbusy(&uq->uq_key);
924 umtxq_unlock(&uq->uq_key);
925
926 /*
927 * Set the contested bit so that a release in user space
928 * knows to use the system call for unlock. If this fails
929 * either some one else has acquired the lock or it has been
930 * released.
931 */
932 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
933
934 /* The address was invalid. */
935 if (old == -1) {
936 umtxq_lock(&uq->uq_key);
937 umtxq_remove(uq);
938 umtxq_unlock(&uq->uq_key);
939 umtx_key_release(&uq->uq_key);
940 return (EFAULT);
941 }
942
943 /*
944 * We set the contested bit, sleep. Otherwise the lock changed
945 * and we need to retry or we lost a race to the thread
946 * unlocking the umtx.
947 */
948 umtxq_lock(&uq->uq_key);
949 if (old == owner)
950 error = umtxq_sleep(uq, "umtx", timo);
951 umtxq_remove(uq);
952 umtxq_unlock(&uq->uq_key);
953 umtx_key_release(&uq->uq_key);
954
955 if (error == 0)
956 error = umtxq_check_susp(td);
957 }
958
959 return (0);
960 }
961
962 /*
963 * Lock a umtx object.
964 */
965 static int
966 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
967 struct timespec *timeout)
968 {
969 struct timespec ts, ts2, ts3;
970 struct timeval tv;
971 int error;
972
973 if (timeout == NULL) {
974 error = _do_lock_umtx32(td, m, id, 0);
975 /* Mutex locking is restarted if it is interrupted. */
976 if (error == EINTR)
977 error = ERESTART;
978 } else {
979 getnanouptime(&ts);
980 timespecadd(&ts, timeout);
981 TIMESPEC_TO_TIMEVAL(&tv, timeout);
982 for (;;) {
983 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
984 if (error != ETIMEDOUT)
985 break;
986 getnanouptime(&ts2);
987 if (timespeccmp(&ts2, &ts, >=)) {
988 error = ETIMEDOUT;
989 break;
990 }
991 ts3 = ts;
992 timespecsub(&ts3, &ts2);
993 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
994 }
995 /* Timed-locking is not restarted. */
996 if (error == ERESTART)
997 error = EINTR;
998 }
999 return (error);
1000 }
1001
1002 /*
1003 * Unlock a umtx object.
1004 */
1005 static int
1006 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
1007 {
1008 struct umtx_key key;
1009 uint32_t owner;
1010 uint32_t old;
1011 int error;
1012 int count;
1013
1014 /*
1015 * Make sure we own this mtx.
1016 */
1017 owner = fuword32(m);
1018 if (owner == -1)
1019 return (EFAULT);
1020
1021 if ((owner & ~UMUTEX_CONTESTED) != id)
1022 return (EPERM);
1023
1024 /* This should be done in userland */
1025 if ((owner & UMUTEX_CONTESTED) == 0) {
1026 old = casuword32(m, owner, UMUTEX_UNOWNED);
1027 if (old == -1)
1028 return (EFAULT);
1029 if (old == owner)
1030 return (0);
1031 owner = old;
1032 }
1033
1034 /* We should only ever be in here for contested locks */
1035 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1036 &key)) != 0)
1037 return (error);
1038
1039 umtxq_lock(&key);
1040 umtxq_busy(&key);
1041 count = umtxq_count(&key);
1042 umtxq_unlock(&key);
1043
1044 /*
1045 * When unlocking the umtx, it must be marked as unowned if
1046 * there is zero or one thread only waiting for it.
1047 * Otherwise, it must be marked as contested.
1048 */
1049 old = casuword32(m, owner,
1050 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1051 umtxq_lock(&key);
1052 umtxq_signal(&key,1);
1053 umtxq_unbusy(&key);
1054 umtxq_unlock(&key);
1055 umtx_key_release(&key);
1056 if (old == -1)
1057 return (EFAULT);
1058 if (old != owner)
1059 return (EINVAL);
1060 return (0);
1061 }
1062 #endif
1063
1064 /*
1065 * Fetch and compare value, sleep on the address if value is not changed.
1066 */
1067 static int
1068 do_wait(struct thread *td, void *addr, u_long id,
1069 struct timespec *timeout, int compat32, int is_private)
1070 {
1071 struct umtx_q *uq;
1072 struct timespec ts, ts2, ts3;
1073 struct timeval tv;
1074 u_long tmp;
1075 int error = 0;
1076
1077 uq = td->td_umtxq;
1078 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1079 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1080 return (error);
1081
1082 umtxq_lock(&uq->uq_key);
1083 umtxq_insert(uq);
1084 umtxq_unlock(&uq->uq_key);
1085 if (compat32 == 0)
1086 tmp = fuword(addr);
1087 else
1088 tmp = (unsigned int)fuword32(addr);
1089 if (tmp != id) {
1090 umtxq_lock(&uq->uq_key);
1091 umtxq_remove(uq);
1092 umtxq_unlock(&uq->uq_key);
1093 } else if (timeout == NULL) {
1094 umtxq_lock(&uq->uq_key);
1095 error = umtxq_sleep(uq, "uwait", 0);
1096 umtxq_remove(uq);
1097 umtxq_unlock(&uq->uq_key);
1098 } else {
1099 getnanouptime(&ts);
1100 timespecadd(&ts, timeout);
1101 TIMESPEC_TO_TIMEVAL(&tv, timeout);
1102 umtxq_lock(&uq->uq_key);
1103 for (;;) {
1104 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1105 if (!(uq->uq_flags & UQF_UMTXQ)) {
1106 error = 0;
1107 break;
1108 }
1109 if (error != ETIMEDOUT)
1110 break;
1111 umtxq_unlock(&uq->uq_key);
1112 getnanouptime(&ts2);
1113 if (timespeccmp(&ts2, &ts, >=)) {
1114 error = ETIMEDOUT;
1115 umtxq_lock(&uq->uq_key);
1116 break;
1117 }
1118 ts3 = ts;
1119 timespecsub(&ts3, &ts2);
1120 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1121 umtxq_lock(&uq->uq_key);
1122 }
1123 umtxq_remove(uq);
1124 umtxq_unlock(&uq->uq_key);
1125 }
1126 umtx_key_release(&uq->uq_key);
1127 if (error == ERESTART)
1128 error = EINTR;
1129 return (error);
1130 }
1131
1132 /*
1133 * Wake up threads sleeping on the specified address.
1134 */
1135 int
1136 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1137 {
1138 struct umtx_key key;
1139 int ret;
1140
1141 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1142 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1143 return (ret);
1144 umtxq_lock(&key);
1145 ret = umtxq_signal(&key, n_wake);
1146 umtxq_unlock(&key);
1147 umtx_key_release(&key);
1148 return (0);
1149 }
1150
1151 /*
1152 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153 */
1154 static int
1155 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1156 int mode)
1157 {
1158 struct umtx_q *uq;
1159 uint32_t owner, old, id;
1160 int error = 0;
1161
1162 id = td->td_tid;
1163 uq = td->td_umtxq;
1164
1165 /*
1166 * Care must be exercised when dealing with umtx structure. It
1167 * can fault on any access.
1168 */
1169 for (;;) {
1170 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1171 if (mode == _UMUTEX_WAIT) {
1172 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1173 return (0);
1174 } else {
1175 /*
1176 * Try the uncontested case. This should be done in userland.
1177 */
1178 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1179
1180 /* The acquire succeeded. */
1181 if (owner == UMUTEX_UNOWNED)
1182 return (0);
1183
1184 /* The address was invalid. */
1185 if (owner == -1)
1186 return (EFAULT);
1187
1188 /* If no one owns it but it is contested try to acquire it. */
1189 if (owner == UMUTEX_CONTESTED) {
1190 owner = casuword32(&m->m_owner,
1191 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1192
1193 if (owner == UMUTEX_CONTESTED)
1194 return (0);
1195
1196 /* The address was invalid. */
1197 if (owner == -1)
1198 return (EFAULT);
1199
1200 error = umtxq_check_susp(td);
1201 if (error != 0)
1202 return (error);
1203
1204 /* If this failed the lock has changed, restart. */
1205 continue;
1206 }
1207 }
1208
1209 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1210 (owner & ~UMUTEX_CONTESTED) == id)
1211 return (EDEADLK);
1212
1213 if (mode == _UMUTEX_TRY)
1214 return (EBUSY);
1215
1216 /*
1217 * If we caught a signal, we have retried and now
1218 * exit immediately.
1219 */
1220 if (error != 0)
1221 return (error);
1222
1223 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1224 GET_SHARE(flags), &uq->uq_key)) != 0)
1225 return (error);
1226
1227 umtxq_lock(&uq->uq_key);
1228 umtxq_busy(&uq->uq_key);
1229 umtxq_insert(uq);
1230 umtxq_unlock(&uq->uq_key);
1231
1232 /*
1233 * Set the contested bit so that a release in user space
1234 * knows to use the system call for unlock. If this fails
1235 * either some one else has acquired the lock or it has been
1236 * released.
1237 */
1238 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1239
1240 /* The address was invalid. */
1241 if (old == -1) {
1242 umtxq_lock(&uq->uq_key);
1243 umtxq_remove(uq);
1244 umtxq_unbusy(&uq->uq_key);
1245 umtxq_unlock(&uq->uq_key);
1246 umtx_key_release(&uq->uq_key);
1247 return (EFAULT);
1248 }
1249
1250 /*
1251 * We set the contested bit, sleep. Otherwise the lock changed
1252 * and we need to retry or we lost a race to the thread
1253 * unlocking the umtx.
1254 */
1255 umtxq_lock(&uq->uq_key);
1256 umtxq_unbusy(&uq->uq_key);
1257 if (old == owner)
1258 error = umtxq_sleep(uq, "umtxn", timo);
1259 umtxq_remove(uq);
1260 umtxq_unlock(&uq->uq_key);
1261 umtx_key_release(&uq->uq_key);
1262
1263 if (error == 0)
1264 error = umtxq_check_susp(td);
1265 }
1266
1267 return (0);
1268 }
1269
1270 /*
1271 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1272 */
1273 /*
1274 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1275 */
1276 static int
1277 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1278 {
1279 struct umtx_key key;
1280 uint32_t owner, old, id;
1281 int error;
1282 int count;
1283
1284 id = td->td_tid;
1285 /*
1286 * Make sure we own this mtx.
1287 */
1288 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1289 if (owner == -1)
1290 return (EFAULT);
1291
1292 if ((owner & ~UMUTEX_CONTESTED) != id)
1293 return (EPERM);
1294
1295 if ((owner & UMUTEX_CONTESTED) == 0) {
1296 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1297 if (old == -1)
1298 return (EFAULT);
1299 if (old == owner)
1300 return (0);
1301 owner = old;
1302 }
1303
1304 /* We should only ever be in here for contested locks */
1305 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1306 &key)) != 0)
1307 return (error);
1308
1309 umtxq_lock(&key);
1310 umtxq_busy(&key);
1311 count = umtxq_count(&key);
1312 umtxq_unlock(&key);
1313
1314 /*
1315 * When unlocking the umtx, it must be marked as unowned if
1316 * there is zero or one thread only waiting for it.
1317 * Otherwise, it must be marked as contested.
1318 */
1319 old = casuword32(&m->m_owner, owner,
1320 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1321 umtxq_lock(&key);
1322 umtxq_signal(&key,1);
1323 umtxq_unbusy(&key);
1324 umtxq_unlock(&key);
1325 umtx_key_release(&key);
1326 if (old == -1)
1327 return (EFAULT);
1328 if (old != owner)
1329 return (EINVAL);
1330 return (0);
1331 }
1332
1333 /*
1334 * Check if the mutex is available and wake up a waiter,
1335 * only for simple mutex.
1336 */
1337 static int
1338 do_wake_umutex(struct thread *td, struct umutex *m)
1339 {
1340 struct umtx_key key;
1341 uint32_t owner;
1342 uint32_t flags;
1343 int error;
1344 int count;
1345
1346 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1347 if (owner == -1)
1348 return (EFAULT);
1349
1350 if ((owner & ~UMUTEX_CONTESTED) != 0)
1351 return (0);
1352
1353 flags = fuword32(&m->m_flags);
1354
1355 /* We should only ever be in here for contested locks */
1356 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1357 &key)) != 0)
1358 return (error);
1359
1360 umtxq_lock(&key);
1361 umtxq_busy(&key);
1362 count = umtxq_count(&key);
1363 umtxq_unlock(&key);
1364
1365 if (count <= 1)
1366 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1367
1368 umtxq_lock(&key);
1369 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1370 umtxq_signal(&key, 1);
1371 umtxq_unbusy(&key);
1372 umtxq_unlock(&key);
1373 umtx_key_release(&key);
1374 return (0);
1375 }
1376
1377 /*
1378 * Check if the mutex has waiters and tries to fix contention bit.
1379 */
1380 static int
1381 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1382 {
1383 struct umtx_key key;
1384 uint32_t owner, old;
1385 int type;
1386 int error;
1387 int count;
1388
1389 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1390 case 0:
1391 type = TYPE_NORMAL_UMUTEX;
1392 break;
1393 case UMUTEX_PRIO_INHERIT:
1394 type = TYPE_PI_UMUTEX;
1395 break;
1396 case UMUTEX_PRIO_PROTECT:
1397 type = TYPE_PP_UMUTEX;
1398 break;
1399 default:
1400 return (EINVAL);
1401 }
1402 if ((error = umtx_key_get(m, type, GET_SHARE(flags),
1403 &key)) != 0)
1404 return (error);
1405
1406 owner = 0;
1407 umtxq_lock(&key);
1408 umtxq_busy(&key);
1409 count = umtxq_count(&key);
1410 umtxq_unlock(&key);
1411 /*
1412 * Only repair contention bit if there is a waiter, this means the mutex
1413 * is still being referenced by userland code, otherwise don't update
1414 * any memory.
1415 */
1416 if (count > 1) {
1417 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1418 while ((owner & UMUTEX_CONTESTED) ==0) {
1419 old = casuword32(&m->m_owner, owner,
1420 owner|UMUTEX_CONTESTED);
1421 if (old == owner)
1422 break;
1423 owner = old;
1424 if (old == -1)
1425 break;
1426 error = umtxq_check_susp(td);
1427 if (error != 0)
1428 break;
1429 }
1430 } else if (count == 1) {
1431 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1432 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
1433 (owner & UMUTEX_CONTESTED) == 0) {
1434 old = casuword32(&m->m_owner, owner,
1435 owner|UMUTEX_CONTESTED);
1436 if (old == owner)
1437 break;
1438 owner = old;
1439 if (old == -1)
1440 break;
1441 error = umtxq_check_susp(td);
1442 if (error != 0)
1443 break;
1444 }
1445 }
1446 umtxq_lock(&key);
1447 if (owner == -1) {
1448 error = EFAULT;
1449 umtxq_signal(&key, INT_MAX);
1450 }
1451 else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1452 umtxq_signal(&key, 1);
1453 umtxq_unbusy(&key);
1454 umtxq_unlock(&key);
1455 umtx_key_release(&key);
1456 return (error);
1457 }
1458
1459 static inline struct umtx_pi *
1460 umtx_pi_alloc(int flags)
1461 {
1462 struct umtx_pi *pi;
1463
1464 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1465 TAILQ_INIT(&pi->pi_blocked);
1466 atomic_add_int(&umtx_pi_allocated, 1);
1467 return (pi);
1468 }
1469
1470 static inline void
1471 umtx_pi_free(struct umtx_pi *pi)
1472 {
1473 uma_zfree(umtx_pi_zone, pi);
1474 atomic_add_int(&umtx_pi_allocated, -1);
1475 }
1476
1477 /*
1478 * Adjust the thread's position on a pi_state after its priority has been
1479 * changed.
1480 */
1481 static int
1482 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1483 {
1484 struct umtx_q *uq, *uq1, *uq2;
1485 struct thread *td1;
1486
1487 mtx_assert(&umtx_lock, MA_OWNED);
1488 if (pi == NULL)
1489 return (0);
1490
1491 uq = td->td_umtxq;
1492
1493 /*
1494 * Check if the thread needs to be moved on the blocked chain.
1495 * It needs to be moved if either its priority is lower than
1496 * the previous thread or higher than the next thread.
1497 */
1498 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1499 uq2 = TAILQ_NEXT(uq, uq_lockq);
1500 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1501 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1502 /*
1503 * Remove thread from blocked chain and determine where
1504 * it should be moved to.
1505 */
1506 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1507 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1508 td1 = uq1->uq_thread;
1509 MPASS(td1->td_proc->p_magic == P_MAGIC);
1510 if (UPRI(td1) > UPRI(td))
1511 break;
1512 }
1513
1514 if (uq1 == NULL)
1515 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1516 else
1517 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1518 }
1519 return (1);
1520 }
1521
1522 /*
1523 * Propagate priority when a thread is blocked on POSIX
1524 * PI mutex.
1525 */
1526 static void
1527 umtx_propagate_priority(struct thread *td)
1528 {
1529 struct umtx_q *uq;
1530 struct umtx_pi *pi;
1531 int pri;
1532
1533 mtx_assert(&umtx_lock, MA_OWNED);
1534 pri = UPRI(td);
1535 uq = td->td_umtxq;
1536 pi = uq->uq_pi_blocked;
1537 if (pi == NULL)
1538 return;
1539
1540 for (;;) {
1541 td = pi->pi_owner;
1542 if (td == NULL || td == curthread)
1543 return;
1544
1545 MPASS(td->td_proc != NULL);
1546 MPASS(td->td_proc->p_magic == P_MAGIC);
1547
1548 thread_lock(td);
1549 if (td->td_lend_user_pri > pri)
1550 sched_lend_user_prio(td, pri);
1551 else {
1552 thread_unlock(td);
1553 break;
1554 }
1555 thread_unlock(td);
1556
1557 /*
1558 * Pick up the lock that td is blocked on.
1559 */
1560 uq = td->td_umtxq;
1561 pi = uq->uq_pi_blocked;
1562 if (pi == NULL)
1563 break;
1564 /* Resort td on the list if needed. */
1565 umtx_pi_adjust_thread(pi, td);
1566 }
1567 }
1568
1569 /*
1570 * Unpropagate priority for a PI mutex when a thread blocked on
1571 * it is interrupted by signal or resumed by others.
1572 */
1573 static void
1574 umtx_repropagate_priority(struct umtx_pi *pi)
1575 {
1576 struct umtx_q *uq, *uq_owner;
1577 struct umtx_pi *pi2;
1578 int pri;
1579
1580 mtx_assert(&umtx_lock, MA_OWNED);
1581
1582 while (pi != NULL && pi->pi_owner != NULL) {
1583 pri = PRI_MAX;
1584 uq_owner = pi->pi_owner->td_umtxq;
1585
1586 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1587 uq = TAILQ_FIRST(&pi2->pi_blocked);
1588 if (uq != NULL) {
1589 if (pri > UPRI(uq->uq_thread))
1590 pri = UPRI(uq->uq_thread);
1591 }
1592 }
1593
1594 if (pri > uq_owner->uq_inherited_pri)
1595 pri = uq_owner->uq_inherited_pri;
1596 thread_lock(pi->pi_owner);
1597 sched_lend_user_prio(pi->pi_owner, pri);
1598 thread_unlock(pi->pi_owner);
1599 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1600 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1601 }
1602 }
1603
1604 /*
1605 * Insert a PI mutex into owned list.
1606 */
1607 static void
1608 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1609 {
1610 struct umtx_q *uq_owner;
1611
1612 uq_owner = owner->td_umtxq;
1613 mtx_assert(&umtx_lock, MA_OWNED);
1614 if (pi->pi_owner != NULL)
1615 panic("pi_ower != NULL");
1616 pi->pi_owner = owner;
1617 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1618 }
1619
1620 /*
1621 * Claim ownership of a PI mutex.
1622 */
1623 static int
1624 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1625 {
1626 struct umtx_q *uq, *uq_owner;
1627
1628 uq_owner = owner->td_umtxq;
1629 mtx_lock_spin(&umtx_lock);
1630 if (pi->pi_owner == owner) {
1631 mtx_unlock_spin(&umtx_lock);
1632 return (0);
1633 }
1634
1635 if (pi->pi_owner != NULL) {
1636 /*
1637 * userland may have already messed the mutex, sigh.
1638 */
1639 mtx_unlock_spin(&umtx_lock);
1640 return (EPERM);
1641 }
1642 umtx_pi_setowner(pi, owner);
1643 uq = TAILQ_FIRST(&pi->pi_blocked);
1644 if (uq != NULL) {
1645 int pri;
1646
1647 pri = UPRI(uq->uq_thread);
1648 thread_lock(owner);
1649 if (pri < UPRI(owner))
1650 sched_lend_user_prio(owner, pri);
1651 thread_unlock(owner);
1652 }
1653 mtx_unlock_spin(&umtx_lock);
1654 return (0);
1655 }
1656
1657 /*
1658 * Adjust a thread's order position in its blocked PI mutex,
1659 * this may result new priority propagating process.
1660 */
1661 void
1662 umtx_pi_adjust(struct thread *td, u_char oldpri)
1663 {
1664 struct umtx_q *uq;
1665 struct umtx_pi *pi;
1666
1667 uq = td->td_umtxq;
1668 mtx_lock_spin(&umtx_lock);
1669 /*
1670 * Pick up the lock that td is blocked on.
1671 */
1672 pi = uq->uq_pi_blocked;
1673 if (pi != NULL) {
1674 umtx_pi_adjust_thread(pi, td);
1675 umtx_repropagate_priority(pi);
1676 }
1677 mtx_unlock_spin(&umtx_lock);
1678 }
1679
1680 /*
1681 * Sleep on a PI mutex.
1682 */
1683 static int
1684 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1685 uint32_t owner, const char *wmesg, int timo)
1686 {
1687 struct umtxq_chain *uc;
1688 struct thread *td, *td1;
1689 struct umtx_q *uq1;
1690 int pri;
1691 int error = 0;
1692
1693 td = uq->uq_thread;
1694 KASSERT(td == curthread, ("inconsistent uq_thread"));
1695 uc = umtxq_getchain(&uq->uq_key);
1696 UMTXQ_LOCKED_ASSERT(uc);
1697 UMTXQ_BUSY_ASSERT(uc);
1698 umtxq_insert(uq);
1699 mtx_lock_spin(&umtx_lock);
1700 if (pi->pi_owner == NULL) {
1701 mtx_unlock_spin(&umtx_lock);
1702 /* XXX Only look up thread in current process. */
1703 td1 = tdfind(owner, curproc->p_pid);
1704 mtx_lock_spin(&umtx_lock);
1705 if (td1 != NULL) {
1706 if (pi->pi_owner == NULL)
1707 umtx_pi_setowner(pi, td1);
1708 PROC_UNLOCK(td1->td_proc);
1709 }
1710 }
1711
1712 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1713 pri = UPRI(uq1->uq_thread);
1714 if (pri > UPRI(td))
1715 break;
1716 }
1717
1718 if (uq1 != NULL)
1719 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1720 else
1721 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1722
1723 uq->uq_pi_blocked = pi;
1724 thread_lock(td);
1725 td->td_flags |= TDF_UPIBLOCKED;
1726 thread_unlock(td);
1727 umtx_propagate_priority(td);
1728 mtx_unlock_spin(&umtx_lock);
1729 umtxq_unbusy(&uq->uq_key);
1730
1731 if (uq->uq_flags & UQF_UMTXQ) {
1732 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1733 if (error == EWOULDBLOCK)
1734 error = ETIMEDOUT;
1735 if (uq->uq_flags & UQF_UMTXQ) {
1736 umtxq_remove(uq);
1737 }
1738 }
1739 mtx_lock_spin(&umtx_lock);
1740 uq->uq_pi_blocked = NULL;
1741 thread_lock(td);
1742 td->td_flags &= ~TDF_UPIBLOCKED;
1743 thread_unlock(td);
1744 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1745 umtx_repropagate_priority(pi);
1746 mtx_unlock_spin(&umtx_lock);
1747 umtxq_unlock(&uq->uq_key);
1748
1749 return (error);
1750 }
1751
1752 /*
1753 * Add reference count for a PI mutex.
1754 */
1755 static void
1756 umtx_pi_ref(struct umtx_pi *pi)
1757 {
1758 struct umtxq_chain *uc;
1759
1760 uc = umtxq_getchain(&pi->pi_key);
1761 UMTXQ_LOCKED_ASSERT(uc);
1762 pi->pi_refcount++;
1763 }
1764
1765 /*
1766 * Decrease reference count for a PI mutex, if the counter
1767 * is decreased to zero, its memory space is freed.
1768 */
1769 static void
1770 umtx_pi_unref(struct umtx_pi *pi)
1771 {
1772 struct umtxq_chain *uc;
1773
1774 uc = umtxq_getchain(&pi->pi_key);
1775 UMTXQ_LOCKED_ASSERT(uc);
1776 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1777 if (--pi->pi_refcount == 0) {
1778 mtx_lock_spin(&umtx_lock);
1779 if (pi->pi_owner != NULL) {
1780 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1781 pi, pi_link);
1782 pi->pi_owner = NULL;
1783 }
1784 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1785 ("blocked queue not empty"));
1786 mtx_unlock_spin(&umtx_lock);
1787 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1788 umtx_pi_free(pi);
1789 }
1790 }
1791
1792 /*
1793 * Find a PI mutex in hash table.
1794 */
1795 static struct umtx_pi *
1796 umtx_pi_lookup(struct umtx_key *key)
1797 {
1798 struct umtxq_chain *uc;
1799 struct umtx_pi *pi;
1800
1801 uc = umtxq_getchain(key);
1802 UMTXQ_LOCKED_ASSERT(uc);
1803
1804 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1805 if (umtx_key_match(&pi->pi_key, key)) {
1806 return (pi);
1807 }
1808 }
1809 return (NULL);
1810 }
1811
1812 /*
1813 * Insert a PI mutex into hash table.
1814 */
1815 static inline void
1816 umtx_pi_insert(struct umtx_pi *pi)
1817 {
1818 struct umtxq_chain *uc;
1819
1820 uc = umtxq_getchain(&pi->pi_key);
1821 UMTXQ_LOCKED_ASSERT(uc);
1822 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1823 }
1824
1825 /*
1826 * Lock a PI mutex.
1827 */
1828 static int
1829 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1830 int try)
1831 {
1832 struct umtx_q *uq;
1833 struct umtx_pi *pi, *new_pi;
1834 uint32_t id, owner, old;
1835 int error;
1836
1837 id = td->td_tid;
1838 uq = td->td_umtxq;
1839
1840 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841 &uq->uq_key)) != 0)
1842 return (error);
1843 umtxq_lock(&uq->uq_key);
1844 pi = umtx_pi_lookup(&uq->uq_key);
1845 if (pi == NULL) {
1846 new_pi = umtx_pi_alloc(M_NOWAIT);
1847 if (new_pi == NULL) {
1848 umtxq_unlock(&uq->uq_key);
1849 new_pi = umtx_pi_alloc(M_WAITOK);
1850 umtxq_lock(&uq->uq_key);
1851 pi = umtx_pi_lookup(&uq->uq_key);
1852 if (pi != NULL) {
1853 umtx_pi_free(new_pi);
1854 new_pi = NULL;
1855 }
1856 }
1857 if (new_pi != NULL) {
1858 new_pi->pi_key = uq->uq_key;
1859 umtx_pi_insert(new_pi);
1860 pi = new_pi;
1861 }
1862 }
1863 umtx_pi_ref(pi);
1864 umtxq_unlock(&uq->uq_key);
1865
1866 /*
1867 * Care must be exercised when dealing with umtx structure. It
1868 * can fault on any access.
1869 */
1870 for (;;) {
1871 /*
1872 * Try the uncontested case. This should be done in userland.
1873 */
1874 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1875
1876 /* The acquire succeeded. */
1877 if (owner == UMUTEX_UNOWNED) {
1878 error = 0;
1879 break;
1880 }
1881
1882 /* The address was invalid. */
1883 if (owner == -1) {
1884 error = EFAULT;
1885 break;
1886 }
1887
1888 /* If no one owns it but it is contested try to acquire it. */
1889 if (owner == UMUTEX_CONTESTED) {
1890 owner = casuword32(&m->m_owner,
1891 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1892
1893 if (owner == UMUTEX_CONTESTED) {
1894 umtxq_lock(&uq->uq_key);
1895 umtxq_busy(&uq->uq_key);
1896 error = umtx_pi_claim(pi, td);
1897 umtxq_unbusy(&uq->uq_key);
1898 umtxq_unlock(&uq->uq_key);
1899 break;
1900 }
1901
1902 /* The address was invalid. */
1903 if (owner == -1) {
1904 error = EFAULT;
1905 break;
1906 }
1907
1908 error = umtxq_check_susp(td);
1909 if (error != 0)
1910 break;
1911
1912 /* If this failed the lock has changed, restart. */
1913 continue;
1914 }
1915
1916 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1917 (owner & ~UMUTEX_CONTESTED) == id) {
1918 error = EDEADLK;
1919 break;
1920 }
1921
1922 if (try != 0) {
1923 error = EBUSY;
1924 break;
1925 }
1926
1927 /*
1928 * If we caught a signal, we have retried and now
1929 * exit immediately.
1930 */
1931 if (error != 0)
1932 break;
1933
1934 umtxq_lock(&uq->uq_key);
1935 umtxq_busy(&uq->uq_key);
1936 umtxq_unlock(&uq->uq_key);
1937
1938 /*
1939 * Set the contested bit so that a release in user space
1940 * knows to use the system call for unlock. If this fails
1941 * either some one else has acquired the lock or it has been
1942 * released.
1943 */
1944 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1945
1946 /* The address was invalid. */
1947 if (old == -1) {
1948 umtxq_lock(&uq->uq_key);
1949 umtxq_unbusy(&uq->uq_key);
1950 umtxq_unlock(&uq->uq_key);
1951 error = EFAULT;
1952 break;
1953 }
1954
1955 umtxq_lock(&uq->uq_key);
1956 /*
1957 * We set the contested bit, sleep. Otherwise the lock changed
1958 * and we need to retry or we lost a race to the thread
1959 * unlocking the umtx.
1960 */
1961 if (old == owner)
1962 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1963 "umtxpi", timo);
1964 else {
1965 umtxq_unbusy(&uq->uq_key);
1966 umtxq_unlock(&uq->uq_key);
1967 }
1968
1969 error = umtxq_check_susp(td);
1970 if (error != 0)
1971 break;
1972 }
1973
1974 umtxq_lock(&uq->uq_key);
1975 umtx_pi_unref(pi);
1976 umtxq_unlock(&uq->uq_key);
1977
1978 umtx_key_release(&uq->uq_key);
1979 return (error);
1980 }
1981
1982 /*
1983 * Unlock a PI mutex.
1984 */
1985 static int
1986 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1987 {
1988 struct umtx_key key;
1989 struct umtx_q *uq_first, *uq_first2, *uq_me;
1990 struct umtx_pi *pi, *pi2;
1991 uint32_t owner, old, id;
1992 int error;
1993 int count;
1994 int pri;
1995
1996 id = td->td_tid;
1997 /*
1998 * Make sure we own this mtx.
1999 */
2000 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2001 if (owner == -1)
2002 return (EFAULT);
2003
2004 if ((owner & ~UMUTEX_CONTESTED) != id)
2005 return (EPERM);
2006
2007 /* This should be done in userland */
2008 if ((owner & UMUTEX_CONTESTED) == 0) {
2009 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
2010 if (old == -1)
2011 return (EFAULT);
2012 if (old == owner)
2013 return (0);
2014 owner = old;
2015 }
2016
2017 /* We should only ever be in here for contested locks */
2018 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
2019 &key)) != 0)
2020 return (error);
2021
2022 umtxq_lock(&key);
2023 umtxq_busy(&key);
2024 count = umtxq_count_pi(&key, &uq_first);
2025 if (uq_first != NULL) {
2026 mtx_lock_spin(&umtx_lock);
2027 pi = uq_first->uq_pi_blocked;
2028 KASSERT(pi != NULL, ("pi == NULL?"));
2029 if (pi->pi_owner != curthread) {
2030 mtx_unlock_spin(&umtx_lock);
2031 umtxq_unbusy(&key);
2032 umtxq_unlock(&key);
2033 umtx_key_release(&key);
2034 /* userland messed the mutex */
2035 return (EPERM);
2036 }
2037 uq_me = curthread->td_umtxq;
2038 pi->pi_owner = NULL;
2039 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
2040 /* get highest priority thread which is still sleeping. */
2041 uq_first = TAILQ_FIRST(&pi->pi_blocked);
2042 while (uq_first != NULL &&
2043 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2044 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2045 }
2046 pri = PRI_MAX;
2047 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2048 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2049 if (uq_first2 != NULL) {
2050 if (pri > UPRI(uq_first2->uq_thread))
2051 pri = UPRI(uq_first2->uq_thread);
2052 }
2053 }
2054 thread_lock(curthread);
2055 sched_lend_user_prio(curthread, pri);
2056 thread_unlock(curthread);
2057 mtx_unlock_spin(&umtx_lock);
2058 if (uq_first)
2059 umtxq_signal_thread(uq_first);
2060 }
2061 umtxq_unlock(&key);
2062
2063 /*
2064 * When unlocking the umtx, it must be marked as unowned if
2065 * there is zero or one thread only waiting for it.
2066 * Otherwise, it must be marked as contested.
2067 */
2068 old = casuword32(&m->m_owner, owner,
2069 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
2070
2071 umtxq_lock(&key);
2072 umtxq_unbusy(&key);
2073 umtxq_unlock(&key);
2074 umtx_key_release(&key);
2075 if (old == -1)
2076 return (EFAULT);
2077 if (old != owner)
2078 return (EINVAL);
2079 return (0);
2080 }
2081
2082 /*
2083 * Lock a PP mutex.
2084 */
2085 static int
2086 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
2087 int try)
2088 {
2089 struct umtx_q *uq, *uq2;
2090 struct umtx_pi *pi;
2091 uint32_t ceiling;
2092 uint32_t owner, id;
2093 int error, pri, old_inherited_pri, su;
2094
2095 id = td->td_tid;
2096 uq = td->td_umtxq;
2097 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2098 &uq->uq_key)) != 0)
2099 return (error);
2100 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2101 for (;;) {
2102 old_inherited_pri = uq->uq_inherited_pri;
2103 umtxq_lock(&uq->uq_key);
2104 umtxq_busy(&uq->uq_key);
2105 umtxq_unlock(&uq->uq_key);
2106
2107 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
2108 if (ceiling > RTP_PRIO_MAX) {
2109 error = EINVAL;
2110 goto out;
2111 }
2112
2113 mtx_lock_spin(&umtx_lock);
2114 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2115 mtx_unlock_spin(&umtx_lock);
2116 error = EINVAL;
2117 goto out;
2118 }
2119 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2120 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2121 thread_lock(td);
2122 if (uq->uq_inherited_pri < UPRI(td))
2123 sched_lend_user_prio(td, uq->uq_inherited_pri);
2124 thread_unlock(td);
2125 }
2126 mtx_unlock_spin(&umtx_lock);
2127
2128 owner = casuword32(&m->m_owner,
2129 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2130
2131 if (owner == UMUTEX_CONTESTED) {
2132 error = 0;
2133 break;
2134 }
2135
2136 /* The address was invalid. */
2137 if (owner == -1) {
2138 error = EFAULT;
2139 break;
2140 }
2141
2142 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2143 (owner & ~UMUTEX_CONTESTED) == id) {
2144 error = EDEADLK;
2145 break;
2146 }
2147
2148 if (try != 0) {
2149 error = EBUSY;
2150 break;
2151 }
2152
2153 /*
2154 * If we caught a signal, we have retried and now
2155 * exit immediately.
2156 */
2157 if (error != 0)
2158 break;
2159
2160 umtxq_lock(&uq->uq_key);
2161 umtxq_insert(uq);
2162 umtxq_unbusy(&uq->uq_key);
2163 error = umtxq_sleep(uq, "umtxpp", timo);
2164 umtxq_remove(uq);
2165 umtxq_unlock(&uq->uq_key);
2166
2167 mtx_lock_spin(&umtx_lock);
2168 uq->uq_inherited_pri = old_inherited_pri;
2169 pri = PRI_MAX;
2170 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2171 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2172 if (uq2 != NULL) {
2173 if (pri > UPRI(uq2->uq_thread))
2174 pri = UPRI(uq2->uq_thread);
2175 }
2176 }
2177 if (pri > uq->uq_inherited_pri)
2178 pri = uq->uq_inherited_pri;
2179 thread_lock(td);
2180 sched_lend_user_prio(td, pri);
2181 thread_unlock(td);
2182 mtx_unlock_spin(&umtx_lock);
2183 }
2184
2185 if (error != 0) {
2186 mtx_lock_spin(&umtx_lock);
2187 uq->uq_inherited_pri = old_inherited_pri;
2188 pri = PRI_MAX;
2189 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2190 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2191 if (uq2 != NULL) {
2192 if (pri > UPRI(uq2->uq_thread))
2193 pri = UPRI(uq2->uq_thread);
2194 }
2195 }
2196 if (pri > uq->uq_inherited_pri)
2197 pri = uq->uq_inherited_pri;
2198 thread_lock(td);
2199 sched_lend_user_prio(td, pri);
2200 thread_unlock(td);
2201 mtx_unlock_spin(&umtx_lock);
2202 }
2203
2204 out:
2205 umtxq_lock(&uq->uq_key);
2206 umtxq_unbusy(&uq->uq_key);
2207 umtxq_unlock(&uq->uq_key);
2208 umtx_key_release(&uq->uq_key);
2209 return (error);
2210 }
2211
2212 /*
2213 * Unlock a PP mutex.
2214 */
2215 static int
2216 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2217 {
2218 struct umtx_key key;
2219 struct umtx_q *uq, *uq2;
2220 struct umtx_pi *pi;
2221 uint32_t owner, id;
2222 uint32_t rceiling;
2223 int error, pri, new_inherited_pri, su;
2224
2225 id = td->td_tid;
2226 uq = td->td_umtxq;
2227 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2228
2229 /*
2230 * Make sure we own this mtx.
2231 */
2232 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2233 if (owner == -1)
2234 return (EFAULT);
2235
2236 if ((owner & ~UMUTEX_CONTESTED) != id)
2237 return (EPERM);
2238
2239 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2240 if (error != 0)
2241 return (error);
2242
2243 if (rceiling == -1)
2244 new_inherited_pri = PRI_MAX;
2245 else {
2246 rceiling = RTP_PRIO_MAX - rceiling;
2247 if (rceiling > RTP_PRIO_MAX)
2248 return (EINVAL);
2249 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2250 }
2251
2252 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2253 &key)) != 0)
2254 return (error);
2255 umtxq_lock(&key);
2256 umtxq_busy(&key);
2257 umtxq_unlock(&key);
2258 /*
2259 * For priority protected mutex, always set unlocked state
2260 * to UMUTEX_CONTESTED, so that userland always enters kernel
2261 * to lock the mutex, it is necessary because thread priority
2262 * has to be adjusted for such mutex.
2263 */
2264 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2265 UMUTEX_CONTESTED);
2266
2267 umtxq_lock(&key);
2268 if (error == 0)
2269 umtxq_signal(&key, 1);
2270 umtxq_unbusy(&key);
2271 umtxq_unlock(&key);
2272
2273 if (error == -1)
2274 error = EFAULT;
2275 else {
2276 mtx_lock_spin(&umtx_lock);
2277 if (su != 0)
2278 uq->uq_inherited_pri = new_inherited_pri;
2279 pri = PRI_MAX;
2280 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2281 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2282 if (uq2 != NULL) {
2283 if (pri > UPRI(uq2->uq_thread))
2284 pri = UPRI(uq2->uq_thread);
2285 }
2286 }
2287 if (pri > uq->uq_inherited_pri)
2288 pri = uq->uq_inherited_pri;
2289 thread_lock(td);
2290 sched_lend_user_prio(td, pri);
2291 thread_unlock(td);
2292 mtx_unlock_spin(&umtx_lock);
2293 }
2294 umtx_key_release(&key);
2295 return (error);
2296 }
2297
2298 static int
2299 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2300 uint32_t *old_ceiling)
2301 {
2302 struct umtx_q *uq;
2303 uint32_t save_ceiling;
2304 uint32_t owner, id;
2305 uint32_t flags;
2306 int error;
2307
2308 flags = fuword32(&m->m_flags);
2309 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2310 return (EINVAL);
2311 if (ceiling > RTP_PRIO_MAX)
2312 return (EINVAL);
2313 id = td->td_tid;
2314 uq = td->td_umtxq;
2315 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2316 &uq->uq_key)) != 0)
2317 return (error);
2318 for (;;) {
2319 umtxq_lock(&uq->uq_key);
2320 umtxq_busy(&uq->uq_key);
2321 umtxq_unlock(&uq->uq_key);
2322
2323 save_ceiling = fuword32(&m->m_ceilings[0]);
2324
2325 owner = casuword32(&m->m_owner,
2326 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2327
2328 if (owner == UMUTEX_CONTESTED) {
2329 suword32(&m->m_ceilings[0], ceiling);
2330 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2331 UMUTEX_CONTESTED);
2332 error = 0;
2333 break;
2334 }
2335
2336 /* The address was invalid. */
2337 if (owner == -1) {
2338 error = EFAULT;
2339 break;
2340 }
2341
2342 if ((owner & ~UMUTEX_CONTESTED) == id) {
2343 suword32(&m->m_ceilings[0], ceiling);
2344 error = 0;
2345 break;
2346 }
2347
2348 /*
2349 * If we caught a signal, we have retried and now
2350 * exit immediately.
2351 */
2352 if (error != 0)
2353 break;
2354
2355 /*
2356 * We set the contested bit, sleep. Otherwise the lock changed
2357 * and we need to retry or we lost a race to the thread
2358 * unlocking the umtx.
2359 */
2360 umtxq_lock(&uq->uq_key);
2361 umtxq_insert(uq);
2362 umtxq_unbusy(&uq->uq_key);
2363 error = umtxq_sleep(uq, "umtxpp", 0);
2364 umtxq_remove(uq);
2365 umtxq_unlock(&uq->uq_key);
2366 }
2367 umtxq_lock(&uq->uq_key);
2368 if (error == 0)
2369 umtxq_signal(&uq->uq_key, INT_MAX);
2370 umtxq_unbusy(&uq->uq_key);
2371 umtxq_unlock(&uq->uq_key);
2372 umtx_key_release(&uq->uq_key);
2373 if (error == 0 && old_ceiling != NULL)
2374 suword32(old_ceiling, save_ceiling);
2375 return (error);
2376 }
2377
2378 static int
2379 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2380 int mode)
2381 {
2382 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2383 case 0:
2384 return (_do_lock_normal(td, m, flags, timo, mode));
2385 case UMUTEX_PRIO_INHERIT:
2386 return (_do_lock_pi(td, m, flags, timo, mode));
2387 case UMUTEX_PRIO_PROTECT:
2388 return (_do_lock_pp(td, m, flags, timo, mode));
2389 }
2390 return (EINVAL);
2391 }
2392
2393 /*
2394 * Lock a userland POSIX mutex.
2395 */
2396 static int
2397 do_lock_umutex(struct thread *td, struct umutex *m,
2398 struct timespec *timeout, int mode)
2399 {
2400 struct timespec ts, ts2, ts3;
2401 struct timeval tv;
2402 uint32_t flags;
2403 int error;
2404
2405 flags = fuword32(&m->m_flags);
2406 if (flags == -1)
2407 return (EFAULT);
2408
2409 if (timeout == NULL) {
2410 error = _do_lock_umutex(td, m, flags, 0, mode);
2411 /* Mutex locking is restarted if it is interrupted. */
2412 if (error == EINTR && mode != _UMUTEX_WAIT)
2413 error = ERESTART;
2414 } else {
2415 getnanouptime(&ts);
2416 timespecadd(&ts, timeout);
2417 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2418 for (;;) {
2419 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2420 if (error != ETIMEDOUT)
2421 break;
2422 getnanouptime(&ts2);
2423 if (timespeccmp(&ts2, &ts, >=)) {
2424 error = ETIMEDOUT;
2425 break;
2426 }
2427 ts3 = ts;
2428 timespecsub(&ts3, &ts2);
2429 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2430 }
2431 /* Timed-locking is not restarted. */
2432 if (error == ERESTART)
2433 error = EINTR;
2434 }
2435 return (error);
2436 }
2437
2438 /*
2439 * Unlock a userland POSIX mutex.
2440 */
2441 static int
2442 do_unlock_umutex(struct thread *td, struct umutex *m)
2443 {
2444 uint32_t flags;
2445
2446 flags = fuword32(&m->m_flags);
2447 if (flags == -1)
2448 return (EFAULT);
2449
2450 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2451 case 0:
2452 return (do_unlock_normal(td, m, flags));
2453 case UMUTEX_PRIO_INHERIT:
2454 return (do_unlock_pi(td, m, flags));
2455 case UMUTEX_PRIO_PROTECT:
2456 return (do_unlock_pp(td, m, flags));
2457 }
2458
2459 return (EINVAL);
2460 }
2461
2462 static int
2463 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2464 struct timespec *timeout, u_long wflags)
2465 {
2466 struct umtx_q *uq;
2467 struct timeval tv;
2468 struct timespec cts, ets, tts;
2469 uint32_t flags;
2470 uint32_t clockid;
2471 int error;
2472
2473 uq = td->td_umtxq;
2474 flags = fuword32(&cv->c_flags);
2475 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2476 if (error != 0)
2477 return (error);
2478
2479 if ((wflags & CVWAIT_CLOCKID) != 0) {
2480 clockid = fuword32(&cv->c_clockid);
2481 if (clockid < CLOCK_REALTIME ||
2482 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2483 /* hmm, only HW clock id will work. */
2484 return (EINVAL);
2485 }
2486 } else {
2487 clockid = CLOCK_REALTIME;
2488 }
2489
2490 umtxq_lock(&uq->uq_key);
2491 umtxq_busy(&uq->uq_key);
2492 umtxq_insert(uq);
2493 umtxq_unlock(&uq->uq_key);
2494
2495 /*
2496 * Set c_has_waiters to 1 before releasing user mutex, also
2497 * don't modify cache line when unnecessary.
2498 */
2499 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2500 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2501
2502 umtxq_lock(&uq->uq_key);
2503 umtxq_unbusy(&uq->uq_key);
2504 umtxq_unlock(&uq->uq_key);
2505
2506 error = do_unlock_umutex(td, m);
2507
2508 umtxq_lock(&uq->uq_key);
2509 if (error == 0) {
2510 if (timeout == NULL) {
2511 error = umtxq_sleep(uq, "ucond", 0);
2512 } else {
2513 if ((wflags & CVWAIT_ABSTIME) == 0) {
2514 kern_clock_gettime(td, clockid, &ets);
2515 timespecadd(&ets, timeout);
2516 tts = *timeout;
2517 } else { /* absolute time */
2518 ets = *timeout;
2519 tts = *timeout;
2520 kern_clock_gettime(td, clockid, &cts);
2521 timespecsub(&tts, &cts);
2522 }
2523 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2524 for (;;) {
2525 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2526 if (error != ETIMEDOUT)
2527 break;
2528 kern_clock_gettime(td, clockid, &cts);
2529 if (timespeccmp(&cts, &ets, >=)) {
2530 error = ETIMEDOUT;
2531 break;
2532 }
2533 tts = ets;
2534 timespecsub(&tts, &cts);
2535 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2536 }
2537 }
2538 }
2539
2540 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2541 error = 0;
2542 else {
2543 /*
2544 * This must be timeout,interrupted by signal or
2545 * surprious wakeup, clear c_has_waiter flag when
2546 * necessary.
2547 */
2548 umtxq_busy(&uq->uq_key);
2549 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2550 int oldlen = uq->uq_cur_queue->length;
2551 umtxq_remove(uq);
2552 if (oldlen == 1) {
2553 umtxq_unlock(&uq->uq_key);
2554 suword32(
2555 __DEVOLATILE(uint32_t *,
2556 &cv->c_has_waiters), 0);
2557 umtxq_lock(&uq->uq_key);
2558 }
2559 }
2560 umtxq_unbusy(&uq->uq_key);
2561 if (error == ERESTART)
2562 error = EINTR;
2563 }
2564
2565 umtxq_unlock(&uq->uq_key);
2566 umtx_key_release(&uq->uq_key);
2567 return (error);
2568 }
2569
2570 /*
2571 * Signal a userland condition variable.
2572 */
2573 static int
2574 do_cv_signal(struct thread *td, struct ucond *cv)
2575 {
2576 struct umtx_key key;
2577 int error, cnt, nwake;
2578 uint32_t flags;
2579
2580 flags = fuword32(&cv->c_flags);
2581 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2582 return (error);
2583 umtxq_lock(&key);
2584 umtxq_busy(&key);
2585 cnt = umtxq_count(&key);
2586 nwake = umtxq_signal(&key, 1);
2587 if (cnt <= nwake) {
2588 umtxq_unlock(&key);
2589 error = suword32(
2590 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2591 umtxq_lock(&key);
2592 }
2593 umtxq_unbusy(&key);
2594 umtxq_unlock(&key);
2595 umtx_key_release(&key);
2596 return (error);
2597 }
2598
2599 static int
2600 do_cv_broadcast(struct thread *td, struct ucond *cv)
2601 {
2602 struct umtx_key key;
2603 int error;
2604 uint32_t flags;
2605
2606 flags = fuword32(&cv->c_flags);
2607 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2608 return (error);
2609
2610 umtxq_lock(&key);
2611 umtxq_busy(&key);
2612 umtxq_signal(&key, INT_MAX);
2613 umtxq_unlock(&key);
2614
2615 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2616
2617 umtxq_lock(&key);
2618 umtxq_unbusy(&key);
2619 umtxq_unlock(&key);
2620
2621 umtx_key_release(&key);
2622 return (error);
2623 }
2624
2625 static int
2626 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2627 {
2628 struct umtx_q *uq;
2629 uint32_t flags, wrflags;
2630 int32_t state, oldstate;
2631 int32_t blocked_readers;
2632 int error;
2633
2634 uq = td->td_umtxq;
2635 flags = fuword32(&rwlock->rw_flags);
2636 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2637 if (error != 0)
2638 return (error);
2639
2640 wrflags = URWLOCK_WRITE_OWNER;
2641 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2642 wrflags |= URWLOCK_WRITE_WAITERS;
2643
2644 for (;;) {
2645 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2646 /* try to lock it */
2647 while (!(state & wrflags)) {
2648 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2649 umtx_key_release(&uq->uq_key);
2650 return (EAGAIN);
2651 }
2652 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2653 if (oldstate == -1) {
2654 umtx_key_release(&uq->uq_key);
2655 return (EFAULT);
2656 }
2657 if (oldstate == state) {
2658 umtx_key_release(&uq->uq_key);
2659 return (0);
2660 }
2661 error = umtxq_check_susp(td);
2662 if (error != 0)
2663 break;
2664 state = oldstate;
2665 }
2666
2667 if (error)
2668 break;
2669
2670 /* grab monitor lock */
2671 umtxq_lock(&uq->uq_key);
2672 umtxq_busy(&uq->uq_key);
2673 umtxq_unlock(&uq->uq_key);
2674
2675 /*
2676 * re-read the state, in case it changed between the try-lock above
2677 * and the check below
2678 */
2679 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2680
2681 /* set read contention bit */
2682 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2683 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2684 if (oldstate == -1) {
2685 error = EFAULT;
2686 break;
2687 }
2688 if (oldstate == state)
2689 goto sleep;
2690 state = oldstate;
2691 error = umtxq_check_susp(td);
2692 if (error != 0)
2693 break;
2694 }
2695 if (error != 0) {
2696 umtxq_lock(&uq->uq_key);
2697 umtxq_unbusy(&uq->uq_key);
2698 umtxq_unlock(&uq->uq_key);
2699 break;
2700 }
2701
2702 /* state is changed while setting flags, restart */
2703 if (!(state & wrflags)) {
2704 umtxq_lock(&uq->uq_key);
2705 umtxq_unbusy(&uq->uq_key);
2706 umtxq_unlock(&uq->uq_key);
2707 error = umtxq_check_susp(td);
2708 if (error != 0)
2709 break;
2710 continue;
2711 }
2712
2713 sleep:
2714 /* contention bit is set, before sleeping, increase read waiter count */
2715 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2716 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2717
2718 while (state & wrflags) {
2719 umtxq_lock(&uq->uq_key);
2720 umtxq_insert(uq);
2721 umtxq_unbusy(&uq->uq_key);
2722
2723 error = umtxq_sleep(uq, "urdlck", timo);
2724
2725 umtxq_busy(&uq->uq_key);
2726 umtxq_remove(uq);
2727 umtxq_unlock(&uq->uq_key);
2728 if (error)
2729 break;
2730 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2731 }
2732
2733 /* decrease read waiter count, and may clear read contention bit */
2734 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2735 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2736 if (blocked_readers == 1) {
2737 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2738 for (;;) {
2739 oldstate = casuword32(&rwlock->rw_state, state,
2740 state & ~URWLOCK_READ_WAITERS);
2741 if (oldstate == -1) {
2742 error = EFAULT;
2743 break;
2744 }
2745 if (oldstate == state)
2746 break;
2747 state = oldstate;
2748 error = umtxq_check_susp(td);
2749 if (error != 0)
2750 break;
2751 }
2752 }
2753
2754 umtxq_lock(&uq->uq_key);
2755 umtxq_unbusy(&uq->uq_key);
2756 umtxq_unlock(&uq->uq_key);
2757 if (error != 0)
2758 break;
2759 }
2760 umtx_key_release(&uq->uq_key);
2761 return (error);
2762 }
2763
2764 static int
2765 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2766 {
2767 struct timespec ts, ts2, ts3;
2768 struct timeval tv;
2769 int error;
2770
2771 getnanouptime(&ts);
2772 timespecadd(&ts, timeout);
2773 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2774 for (;;) {
2775 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2776 if (error != ETIMEDOUT)
2777 break;
2778 getnanouptime(&ts2);
2779 if (timespeccmp(&ts2, &ts, >=)) {
2780 error = ETIMEDOUT;
2781 break;
2782 }
2783 ts3 = ts;
2784 timespecsub(&ts3, &ts2);
2785 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2786 }
2787 if (error == ERESTART)
2788 error = EINTR;
2789 return (error);
2790 }
2791
2792 static int
2793 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2794 {
2795 struct umtx_q *uq;
2796 uint32_t flags;
2797 int32_t state, oldstate;
2798 int32_t blocked_writers;
2799 int32_t blocked_readers;
2800 int error;
2801
2802 uq = td->td_umtxq;
2803 flags = fuword32(&rwlock->rw_flags);
2804 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2805 if (error != 0)
2806 return (error);
2807
2808 blocked_readers = 0;
2809 for (;;) {
2810 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2811 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2812 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2813 if (oldstate == -1) {
2814 umtx_key_release(&uq->uq_key);
2815 return (EFAULT);
2816 }
2817 if (oldstate == state) {
2818 umtx_key_release(&uq->uq_key);
2819 return (0);
2820 }
2821 state = oldstate;
2822 error = umtxq_check_susp(td);
2823 if (error != 0)
2824 break;
2825 }
2826
2827 if (error) {
2828 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2829 blocked_readers != 0) {
2830 umtxq_lock(&uq->uq_key);
2831 umtxq_busy(&uq->uq_key);
2832 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2833 umtxq_unbusy(&uq->uq_key);
2834 umtxq_unlock(&uq->uq_key);
2835 }
2836
2837 break;
2838 }
2839
2840 /* grab monitor lock */
2841 umtxq_lock(&uq->uq_key);
2842 umtxq_busy(&uq->uq_key);
2843 umtxq_unlock(&uq->uq_key);
2844
2845 /*
2846 * re-read the state, in case it changed between the try-lock above
2847 * and the check below
2848 */
2849 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2850
2851 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2852 (state & URWLOCK_WRITE_WAITERS) == 0) {
2853 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2854 if (oldstate == -1) {
2855 error = EFAULT;
2856 break;
2857 }
2858 if (oldstate == state)
2859 goto sleep;
2860 state = oldstate;
2861 error = umtxq_check_susp(td);
2862 if (error != 0)
2863 break;
2864 }
2865 if (error != 0) {
2866 umtxq_lock(&uq->uq_key);
2867 umtxq_unbusy(&uq->uq_key);
2868 umtxq_unlock(&uq->uq_key);
2869 break;
2870 }
2871
2872 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2873 umtxq_lock(&uq->uq_key);
2874 umtxq_unbusy(&uq->uq_key);
2875 umtxq_unlock(&uq->uq_key);
2876 error = umtxq_check_susp(td);
2877 if (error != 0)
2878 break;
2879 continue;
2880 }
2881 sleep:
2882 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2883 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2884
2885 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2886 umtxq_lock(&uq->uq_key);
2887 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2888 umtxq_unbusy(&uq->uq_key);
2889
2890 error = umtxq_sleep(uq, "uwrlck", timo);
2891
2892 umtxq_busy(&uq->uq_key);
2893 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2894 umtxq_unlock(&uq->uq_key);
2895 if (error)
2896 break;
2897 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2898 }
2899
2900 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2901 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2902 if (blocked_writers == 1) {
2903 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2904 for (;;) {
2905 oldstate = casuword32(&rwlock->rw_state, state,
2906 state & ~URWLOCK_WRITE_WAITERS);
2907 if (oldstate == -1) {
2908 error = EFAULT;
2909 break;
2910 }
2911 if (oldstate == state)
2912 break;
2913 state = oldstate;
2914 error = umtxq_check_susp(td);
2915 /*
2916 * We are leaving the URWLOCK_WRITE_WAITERS
2917 * behind, but this should not harm the
2918 * correctness.
2919 */
2920 if (error != 0)
2921 break;
2922 }
2923 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2924 } else
2925 blocked_readers = 0;
2926
2927 umtxq_lock(&uq->uq_key);
2928 umtxq_unbusy(&uq->uq_key);
2929 umtxq_unlock(&uq->uq_key);
2930 }
2931
2932 umtx_key_release(&uq->uq_key);
2933 return (error);
2934 }
2935
2936 static int
2937 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2938 {
2939 struct timespec ts, ts2, ts3;
2940 struct timeval tv;
2941 int error;
2942
2943 getnanouptime(&ts);
2944 timespecadd(&ts, timeout);
2945 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2946 for (;;) {
2947 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2948 if (error != ETIMEDOUT)
2949 break;
2950 getnanouptime(&ts2);
2951 if (timespeccmp(&ts2, &ts, >=)) {
2952 error = ETIMEDOUT;
2953 break;
2954 }
2955 ts3 = ts;
2956 timespecsub(&ts3, &ts2);
2957 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2958 }
2959 if (error == ERESTART)
2960 error = EINTR;
2961 return (error);
2962 }
2963
2964 static int
2965 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2966 {
2967 struct umtx_q *uq;
2968 uint32_t flags;
2969 int32_t state, oldstate;
2970 int error, q, count;
2971
2972 uq = td->td_umtxq;
2973 flags = fuword32(&rwlock->rw_flags);
2974 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2975 if (error != 0)
2976 return (error);
2977
2978 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2979 if (state & URWLOCK_WRITE_OWNER) {
2980 for (;;) {
2981 oldstate = casuword32(&rwlock->rw_state, state,
2982 state & ~URWLOCK_WRITE_OWNER);
2983 if (oldstate == -1) {
2984 error = EFAULT;
2985 goto out;
2986 }
2987 if (oldstate != state) {
2988 state = oldstate;
2989 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2990 error = EPERM;
2991 goto out;
2992 }
2993 error = umtxq_check_susp(td);
2994 if (error != 0)
2995 goto out;
2996 } else
2997 break;
2998 }
2999 } else if (URWLOCK_READER_COUNT(state) != 0) {
3000 for (;;) {
3001 oldstate = casuword32(&rwlock->rw_state, state,
3002 state - 1);
3003 if (oldstate == -1) {
3004 error = EFAULT;
3005 goto out;
3006 }
3007 if (oldstate != state) {
3008 state = oldstate;
3009 if (URWLOCK_READER_COUNT(oldstate) == 0) {
3010 error = EPERM;
3011 goto out;
3012 }
3013 error = umtxq_check_susp(td);
3014 if (error != 0)
3015 goto out;
3016 } else
3017 break;
3018 }
3019 } else {
3020 error = EPERM;
3021 goto out;
3022 }
3023
3024 count = 0;
3025
3026 if (!(flags & URWLOCK_PREFER_READER)) {
3027 if (state & URWLOCK_WRITE_WAITERS) {
3028 count = 1;
3029 q = UMTX_EXCLUSIVE_QUEUE;
3030 } else if (state & URWLOCK_READ_WAITERS) {
3031 count = INT_MAX;
3032 q = UMTX_SHARED_QUEUE;
3033 }
3034 } else {
3035 if (state & URWLOCK_READ_WAITERS) {
3036 count = INT_MAX;
3037 q = UMTX_SHARED_QUEUE;
3038 } else if (state & URWLOCK_WRITE_WAITERS) {
3039 count = 1;
3040 q = UMTX_EXCLUSIVE_QUEUE;
3041 }
3042 }
3043
3044 if (count) {
3045 umtxq_lock(&uq->uq_key);
3046 umtxq_busy(&uq->uq_key);
3047 umtxq_signal_queue(&uq->uq_key, count, q);
3048 umtxq_unbusy(&uq->uq_key);
3049 umtxq_unlock(&uq->uq_key);
3050 }
3051 out:
3052 umtx_key_release(&uq->uq_key);
3053 return (error);
3054 }
3055
3056 static int
3057 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
3058 {
3059 struct umtx_q *uq;
3060 struct timeval tv;
3061 struct timespec cts, ets, tts;
3062 uint32_t flags, count;
3063 int error;
3064
3065 uq = td->td_umtxq;
3066 flags = fuword32(&sem->_flags);
3067 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3068 if (error != 0)
3069 return (error);
3070 umtxq_lock(&uq->uq_key);
3071 umtxq_busy(&uq->uq_key);
3072 umtxq_insert(uq);
3073 umtxq_unlock(&uq->uq_key);
3074
3075 if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
3076 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
3077
3078 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
3079 if (count != 0) {
3080 umtxq_lock(&uq->uq_key);
3081 umtxq_unbusy(&uq->uq_key);
3082 umtxq_remove(uq);
3083 umtxq_unlock(&uq->uq_key);
3084 umtx_key_release(&uq->uq_key);
3085 return (0);
3086 }
3087
3088 umtxq_lock(&uq->uq_key);
3089 umtxq_unbusy(&uq->uq_key);
3090 umtxq_unlock(&uq->uq_key);
3091
3092 umtxq_lock(&uq->uq_key);
3093 if (timeout == NULL) {
3094 error = umtxq_sleep(uq, "usem", 0);
3095 } else {
3096 getnanouptime(&ets);
3097 timespecadd(&ets, timeout);
3098 TIMESPEC_TO_TIMEVAL(&tv, timeout);
3099 for (;;) {
3100 error = umtxq_sleep(uq, "usem", tvtohz(&tv));
3101 if (error != ETIMEDOUT)
3102 break;
3103 getnanouptime(&cts);
3104 if (timespeccmp(&cts, &ets, >=)) {
3105 error = ETIMEDOUT;
3106 break;
3107 }
3108 tts = ets;
3109 timespecsub(&tts, &cts);
3110 TIMESPEC_TO_TIMEVAL(&tv, &tts);
3111 }
3112 }
3113
3114 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3115 error = 0;
3116 else {
3117 umtxq_remove(uq);
3118 /* A relative timeout cannot be restarted. */
3119 if (error == ERESTART && timeout != NULL)
3120 error = EINTR;
3121 }
3122 umtxq_unlock(&uq->uq_key);
3123 umtx_key_release(&uq->uq_key);
3124 return (error);
3125 }
3126
3127 /*
3128 * Signal a userland condition variable.
3129 */
3130 static int
3131 do_sem_wake(struct thread *td, struct _usem *sem)
3132 {
3133 struct umtx_key key;
3134 int error, cnt, nwake;
3135 uint32_t flags;
3136
3137 flags = fuword32(&sem->_flags);
3138 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3139 return (error);
3140 umtxq_lock(&key);
3141 umtxq_busy(&key);
3142 cnt = umtxq_count(&key);
3143 nwake = umtxq_signal(&key, 1);
3144 if (cnt <= nwake) {
3145 umtxq_unlock(&key);
3146 error = suword32(
3147 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
3148 umtxq_lock(&key);
3149 }
3150 umtxq_unbusy(&key);
3151 umtxq_unlock(&key);
3152 umtx_key_release(&key);
3153 return (error);
3154 }
3155
3156 int
3157 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
3158 /* struct umtx *umtx */
3159 {
3160 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
3161 }
3162
3163 int
3164 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
3165 /* struct umtx *umtx */
3166 {
3167 return do_unlock_umtx(td, uap->umtx, td->td_tid);
3168 }
3169
3170 inline int
3171 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3172 {
3173 int error;
3174
3175 error = copyin(addr, tsp, sizeof(struct timespec));
3176 if (error == 0) {
3177 if (tsp->tv_sec < 0 ||
3178 tsp->tv_nsec >= 1000000000 ||
3179 tsp->tv_nsec < 0)
3180 error = EINVAL;
3181 }
3182 return (error);
3183 }
3184
3185 static int
3186 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
3187 {
3188 struct timespec *ts, timeout;
3189 int error;
3190
3191 /* Allow a null timespec (wait forever). */
3192 if (uap->uaddr2 == NULL)
3193 ts = NULL;
3194 else {
3195 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3196 if (error != 0)
3197 return (error);
3198 ts = &timeout;
3199 }
3200 return (do_lock_umtx(td, uap->obj, uap->val, ts));
3201 }
3202
3203 static int
3204 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
3205 {
3206 return (do_unlock_umtx(td, uap->obj, uap->val));
3207 }
3208
3209 static int
3210 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3211 {
3212 struct timespec *ts, timeout;
3213 int error;
3214
3215 if (uap->uaddr2 == NULL)
3216 ts = NULL;
3217 else {
3218 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3219 if (error != 0)
3220 return (error);
3221 ts = &timeout;
3222 }
3223 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
3224 }
3225
3226 static int
3227 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3228 {
3229 struct timespec *ts, timeout;
3230 int error;
3231
3232 if (uap->uaddr2 == NULL)
3233 ts = NULL;
3234 else {
3235 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3236 if (error != 0)
3237 return (error);
3238 ts = &timeout;
3239 }
3240 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3241 }
3242
3243 static int
3244 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3245 {
3246 struct timespec *ts, timeout;
3247 int error;
3248
3249 if (uap->uaddr2 == NULL)
3250 ts = NULL;
3251 else {
3252 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3253 if (error != 0)
3254 return (error);
3255 ts = &timeout;
3256 }
3257 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3258 }
3259
3260 static int
3261 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3262 {
3263 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3264 }
3265
3266 #define BATCH_SIZE 128
3267 static int
3268 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3269 {
3270 int count = uap->val;
3271 void *uaddrs[BATCH_SIZE];
3272 char **upp = (char **)uap->obj;
3273 int tocopy;
3274 int error = 0;
3275 int i, pos = 0;
3276
3277 while (count > 0) {
3278 tocopy = count;
3279 if (tocopy > BATCH_SIZE)
3280 tocopy = BATCH_SIZE;
3281 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3282 if (error != 0)
3283 break;
3284 for (i = 0; i < tocopy; ++i)
3285 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3286 count -= tocopy;
3287 pos += tocopy;
3288 }
3289 return (error);
3290 }
3291
3292 static int
3293 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3294 {
3295 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3296 }
3297
3298 static int
3299 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3300 {
3301 struct timespec *ts, timeout;
3302 int error;
3303
3304 /* Allow a null timespec (wait forever). */
3305 if (uap->uaddr2 == NULL)
3306 ts = NULL;
3307 else {
3308 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3309 if (error != 0)
3310 return (error);
3311 ts = &timeout;
3312 }
3313 return do_lock_umutex(td, uap->obj, ts, 0);
3314 }
3315
3316 static int
3317 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3318 {
3319 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3320 }
3321
3322 static int
3323 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3324 {
3325 struct timespec *ts, timeout;
3326 int error;
3327
3328 /* Allow a null timespec (wait forever). */
3329 if (uap->uaddr2 == NULL)
3330 ts = NULL;
3331 else {
3332 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3333 if (error != 0)
3334 return (error);
3335 ts = &timeout;
3336 }
3337 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3338 }
3339
3340 static int
3341 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3342 {
3343 return do_wake_umutex(td, uap->obj);
3344 }
3345
3346 static int
3347 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3348 {
3349 return do_unlock_umutex(td, uap->obj);
3350 }
3351
3352 static int
3353 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3354 {
3355 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3356 }
3357
3358 static int
3359 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3360 {
3361 struct timespec *ts, timeout;
3362 int error;
3363
3364 /* Allow a null timespec (wait forever). */
3365 if (uap->uaddr2 == NULL)
3366 ts = NULL;
3367 else {
3368 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3369 if (error != 0)
3370 return (error);
3371 ts = &timeout;
3372 }
3373 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3374 }
3375
3376 static int
3377 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3378 {
3379 return do_cv_signal(td, uap->obj);
3380 }
3381
3382 static int
3383 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3384 {
3385 return do_cv_broadcast(td, uap->obj);
3386 }
3387
3388 static int
3389 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3390 {
3391 struct timespec timeout;
3392 int error;
3393
3394 /* Allow a null timespec (wait forever). */
3395 if (uap->uaddr2 == NULL) {
3396 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3397 } else {
3398 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3399 if (error != 0)
3400 return (error);
3401 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3402 }
3403 return (error);
3404 }
3405
3406 static int
3407 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3408 {
3409 struct timespec timeout;
3410 int error;
3411
3412 /* Allow a null timespec (wait forever). */
3413 if (uap->uaddr2 == NULL) {
3414 error = do_rw_wrlock(td, uap->obj, 0);
3415 } else {
3416 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3417 if (error != 0)
3418 return (error);
3419
3420 error = do_rw_wrlock2(td, uap->obj, &timeout);
3421 }
3422 return (error);
3423 }
3424
3425 static int
3426 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3427 {
3428 return do_rw_unlock(td, uap->obj);
3429 }
3430
3431 static int
3432 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3433 {
3434 struct timespec *ts, timeout;
3435 int error;
3436
3437 /* Allow a null timespec (wait forever). */
3438 if (uap->uaddr2 == NULL)
3439 ts = NULL;
3440 else {
3441 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3442 if (error != 0)
3443 return (error);
3444 ts = &timeout;
3445 }
3446 return (do_sem_wait(td, uap->obj, ts));
3447 }
3448
3449 static int
3450 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3451 {
3452 return do_sem_wake(td, uap->obj);
3453 }
3454
3455 static int
3456 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3457 {
3458 return do_wake2_umutex(td, uap->obj, uap->val);
3459 }
3460
3461 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3462
3463 static _umtx_op_func op_table[] = {
3464 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3465 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3466 __umtx_op_wait, /* UMTX_OP_WAIT */
3467 __umtx_op_wake, /* UMTX_OP_WAKE */
3468 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3469 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3470 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3471 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3472 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3473 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3474 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3475 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3476 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3477 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3478 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3479 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3480 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3481 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3482 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3483 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
3484 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3485 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */
3486 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */
3487 };
3488
3489 int
3490 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3491 {
3492 if ((unsigned)uap->op < UMTX_OP_MAX)
3493 return (*op_table[uap->op])(td, uap);
3494 return (EINVAL);
3495 }
3496
3497 #ifdef COMPAT_FREEBSD32
3498 int
3499 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3500 /* struct umtx *umtx */
3501 {
3502 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3503 }
3504
3505 int
3506 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3507 /* struct umtx *umtx */
3508 {
3509 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3510 }
3511
3512 struct timespec32 {
3513 int32_t tv_sec;
3514 int32_t tv_nsec;
3515 };
3516
3517 static inline int
3518 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
3519 {
3520 struct timespec32 ts32;
3521 int error;
3522
3523 error = copyin(addr, &ts32, sizeof(struct timespec32));
3524 if (error == 0) {
3525 if (ts32.tv_sec < 0 ||
3526 ts32.tv_nsec >= 1000000000 ||
3527 ts32.tv_nsec < 0)
3528 error = EINVAL;
3529 else {
3530 tsp->tv_sec = ts32.tv_sec;
3531 tsp->tv_nsec = ts32.tv_nsec;
3532 }
3533 }
3534 return (error);
3535 }
3536
3537 static int
3538 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3539 {
3540 struct timespec *ts, timeout;
3541 int error;
3542
3543 /* Allow a null timespec (wait forever). */
3544 if (uap->uaddr2 == NULL)
3545 ts = NULL;
3546 else {
3547 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3548 if (error != 0)
3549 return (error);
3550 ts = &timeout;
3551 }
3552 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3553 }
3554
3555 static int
3556 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3557 {
3558 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3559 }
3560
3561 static int
3562 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3563 {
3564 struct timespec *ts, timeout;
3565 int error;
3566
3567 if (uap->uaddr2 == NULL)
3568 ts = NULL;
3569 else {
3570 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3571 if (error != 0)
3572 return (error);
3573 ts = &timeout;
3574 }
3575 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3576 }
3577
3578 static int
3579 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3580 {
3581 struct timespec *ts, timeout;
3582 int error;
3583
3584 /* Allow a null timespec (wait forever). */
3585 if (uap->uaddr2 == NULL)
3586 ts = NULL;
3587 else {
3588 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3589 if (error != 0)
3590 return (error);
3591 ts = &timeout;
3592 }
3593 return do_lock_umutex(td, uap->obj, ts, 0);
3594 }
3595
3596 static int
3597 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3598 {
3599 struct timespec *ts, timeout;
3600 int error;
3601
3602 /* Allow a null timespec (wait forever). */
3603 if (uap->uaddr2 == NULL)
3604 ts = NULL;
3605 else {
3606 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3607 if (error != 0)
3608 return (error);
3609 ts = &timeout;
3610 }
3611 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3612 }
3613
3614 static int
3615 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3616 {
3617 struct timespec *ts, timeout;
3618 int error;
3619
3620 /* Allow a null timespec (wait forever). */
3621 if (uap->uaddr2 == NULL)
3622 ts = NULL;
3623 else {
3624 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3625 if (error != 0)
3626 return (error);
3627 ts = &timeout;
3628 }
3629 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3630 }
3631
3632 static int
3633 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3634 {
3635 struct timespec timeout;
3636 int error;
3637
3638 /* Allow a null timespec (wait forever). */
3639 if (uap->uaddr2 == NULL) {
3640 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3641 } else {
3642 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3643 if (error != 0)
3644 return (error);
3645 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3646 }
3647 return (error);
3648 }
3649
3650 static int
3651 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3652 {
3653 struct timespec timeout;
3654 int error;
3655
3656 /* Allow a null timespec (wait forever). */
3657 if (uap->uaddr2 == NULL) {
3658 error = do_rw_wrlock(td, uap->obj, 0);
3659 } else {
3660 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3661 if (error != 0)
3662 return (error);
3663
3664 error = do_rw_wrlock2(td, uap->obj, &timeout);
3665 }
3666 return (error);
3667 }
3668
3669 static int
3670 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3671 {
3672 struct timespec *ts, timeout;
3673 int error;
3674
3675 if (uap->uaddr2 == NULL)
3676 ts = NULL;
3677 else {
3678 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3679 if (error != 0)
3680 return (error);
3681 ts = &timeout;
3682 }
3683 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3684 }
3685
3686 static int
3687 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3688 {
3689 struct timespec *ts, timeout;
3690 int error;
3691
3692 /* Allow a null timespec (wait forever). */
3693 if (uap->uaddr2 == NULL)
3694 ts = NULL;
3695 else {
3696 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3697 if (error != 0)
3698 return (error);
3699 ts = &timeout;
3700 }
3701 return (do_sem_wait(td, uap->obj, ts));
3702 }
3703
3704 static int
3705 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3706 {
3707 int count = uap->val;
3708 uint32_t uaddrs[BATCH_SIZE];
3709 uint32_t **upp = (uint32_t **)uap->obj;
3710 int tocopy;
3711 int error = 0;
3712 int i, pos = 0;
3713
3714 while (count > 0) {
3715 tocopy = count;
3716 if (tocopy > BATCH_SIZE)
3717 tocopy = BATCH_SIZE;
3718 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3719 if (error != 0)
3720 break;
3721 for (i = 0; i < tocopy; ++i)
3722 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3723 INT_MAX, 1);
3724 count -= tocopy;
3725 pos += tocopy;
3726 }
3727 return (error);
3728 }
3729
3730 static _umtx_op_func op_table_compat32[] = {
3731 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3732 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3733 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3734 __umtx_op_wake, /* UMTX_OP_WAKE */
3735 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3736 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3737 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3738 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3739 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3740 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3741 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3742 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3743 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3744 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3745 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3746 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3747 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3748 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3749 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3750 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
3751 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3752 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */
3753 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */
3754 };
3755
3756 int
3757 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3758 {
3759 if ((unsigned)uap->op < UMTX_OP_MAX)
3760 return (*op_table_compat32[uap->op])(td,
3761 (struct _umtx_op_args *)uap);
3762 return (EINVAL);
3763 }
3764 #endif
3765
3766 void
3767 umtx_thread_init(struct thread *td)
3768 {
3769 td->td_umtxq = umtxq_alloc();
3770 td->td_umtxq->uq_thread = td;
3771 }
3772
3773 void
3774 umtx_thread_fini(struct thread *td)
3775 {
3776 umtxq_free(td->td_umtxq);
3777 }
3778
3779 /*
3780 * It will be called when new thread is created, e.g fork().
3781 */
3782 void
3783 umtx_thread_alloc(struct thread *td)
3784 {
3785 struct umtx_q *uq;
3786
3787 uq = td->td_umtxq;
3788 uq->uq_inherited_pri = PRI_MAX;
3789
3790 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3791 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3792 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3793 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3794 }
3795
3796 /*
3797 * exec() hook.
3798 */
3799 static void
3800 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3801 struct image_params *imgp __unused)
3802 {
3803 umtx_thread_cleanup(curthread);
3804 }
3805
3806 /*
3807 * thread_exit() hook.
3808 */
3809 void
3810 umtx_thread_exit(struct thread *td)
3811 {
3812 umtx_thread_cleanup(td);
3813 }
3814
3815 /*
3816 * clean up umtx data.
3817 */
3818 static void
3819 umtx_thread_cleanup(struct thread *td)
3820 {
3821 struct umtx_q *uq;
3822 struct umtx_pi *pi;
3823
3824 if ((uq = td->td_umtxq) == NULL)
3825 return;
3826
3827 mtx_lock_spin(&umtx_lock);
3828 uq->uq_inherited_pri = PRI_MAX;
3829 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3830 pi->pi_owner = NULL;
3831 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3832 }
3833 mtx_unlock_spin(&umtx_lock);
3834 thread_lock(td);
3835 sched_lend_user_prio(td, PRI_MAX);
3836 thread_unlock(td);
3837 }
Cache object: 55ce3e09d0b36d8bd9572d3828b3a069
|