FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/8.0/sys/kern/kern_umtx.c 198030 2009-10-13 13:03:31Z attilio $");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60
61 #define TYPE_SIMPLE_WAIT 0
62 #define TYPE_CV 1
63 #define TYPE_SIMPLE_LOCK 2
64 #define TYPE_NORMAL_UMUTEX 3
65 #define TYPE_PI_UMUTEX 4
66 #define TYPE_PP_UMUTEX 5
67 #define TYPE_RWLOCK 6
68
69 #define _UMUTEX_TRY 1
70 #define _UMUTEX_WAIT 2
71
72 /* Key to represent a unique userland synchronous object */
73 struct umtx_key {
74 int hash;
75 int type;
76 int shared;
77 union {
78 struct {
79 vm_object_t object;
80 uintptr_t offset;
81 } shared;
82 struct {
83 struct vmspace *vs;
84 uintptr_t addr;
85 } private;
86 struct {
87 void *a;
88 uintptr_t b;
89 } both;
90 } info;
91 };
92
93 /* Priority inheritance mutex info. */
94 struct umtx_pi {
95 /* Owner thread */
96 struct thread *pi_owner;
97
98 /* Reference count */
99 int pi_refcount;
100
101 /* List entry to link umtx holding by thread */
102 TAILQ_ENTRY(umtx_pi) pi_link;
103
104 /* List entry in hash */
105 TAILQ_ENTRY(umtx_pi) pi_hashlink;
106
107 /* List for waiters */
108 TAILQ_HEAD(,umtx_q) pi_blocked;
109
110 /* Identify a userland lock object */
111 struct umtx_key pi_key;
112 };
113
114 /* A userland synchronous object user. */
115 struct umtx_q {
116 /* Linked list for the hash. */
117 TAILQ_ENTRY(umtx_q) uq_link;
118
119 /* Umtx key. */
120 struct umtx_key uq_key;
121
122 /* Umtx flags. */
123 int uq_flags;
124 #define UQF_UMTXQ 0x0001
125
126 /* The thread waits on. */
127 struct thread *uq_thread;
128
129 /*
130 * Blocked on PI mutex. read can use chain lock
131 * or umtx_lock, write must have both chain lock and
132 * umtx_lock being hold.
133 */
134 struct umtx_pi *uq_pi_blocked;
135
136 /* On blocked list */
137 TAILQ_ENTRY(umtx_q) uq_lockq;
138
139 /* Thread contending with us */
140 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
141
142 /* Inherited priority from PP mutex */
143 u_char uq_inherited_pri;
144 };
145
146 TAILQ_HEAD(umtxq_head, umtx_q);
147
148 /* Userland lock object's wait-queue chain */
149 struct umtxq_chain {
150 /* Lock for this chain. */
151 struct mtx uc_lock;
152
153 /* List of sleep queues. */
154 struct umtxq_head uc_queue[2];
155 #define UMTX_SHARED_QUEUE 0
156 #define UMTX_EXCLUSIVE_QUEUE 1
157
158 /* Busy flag */
159 char uc_busy;
160
161 /* Chain lock waiters */
162 int uc_waiters;
163
164 /* All PI in the list */
165 TAILQ_HEAD(,umtx_pi) uc_pi_list;
166 };
167
168 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
169 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
170
171 /*
172 * Don't propagate time-sharing priority, there is a security reason,
173 * a user can simply introduce PI-mutex, let thread A lock the mutex,
174 * and let another thread B block on the mutex, because B is
175 * sleeping, its priority will be boosted, this causes A's priority to
176 * be boosted via priority propagating too and will never be lowered even
177 * if it is using 100%CPU, this is unfair to other processes.
178 */
179
180 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
181 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
182 PRI_MAX_TIMESHARE : (td)->td_user_pri)
183
184 #define GOLDEN_RATIO_PRIME 2654404609U
185 #define UMTX_CHAINS 128
186 #define UMTX_SHIFTS (__WORD_BIT - 7)
187
188 #define THREAD_SHARE 0
189 #define PROCESS_SHARE 1
190 #define AUTO_SHARE 2
191
192 #define GET_SHARE(flags) \
193 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
194
195 #define BUSY_SPINS 200
196
197 static uma_zone_t umtx_pi_zone;
198 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
199 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
200 static int umtx_pi_allocated;
201
202 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
203 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
204 &umtx_pi_allocated, 0, "Allocated umtx_pi");
205
206 static void umtxq_sysinit(void *);
207 static void umtxq_hash(struct umtx_key *key);
208 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
209 static void umtxq_lock(struct umtx_key *key);
210 static void umtxq_unlock(struct umtx_key *key);
211 static void umtxq_busy(struct umtx_key *key);
212 static void umtxq_unbusy(struct umtx_key *key);
213 static void umtxq_insert_queue(struct umtx_q *uq, int q);
214 static void umtxq_remove_queue(struct umtx_q *uq, int q);
215 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
216 static int umtxq_count(struct umtx_key *key);
217 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
218 static int umtx_key_get(void *addr, int type, int share,
219 struct umtx_key *key);
220 static void umtx_key_release(struct umtx_key *key);
221 static struct umtx_pi *umtx_pi_alloc(int);
222 static void umtx_pi_free(struct umtx_pi *pi);
223 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
224 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
225 static void umtx_thread_cleanup(struct thread *td);
226 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
227 struct image_params *imgp __unused);
228 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229
230 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
231 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
232 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
233
234 static struct mtx umtx_lock;
235
236 static void
237 umtxq_sysinit(void *arg __unused)
238 {
239 int i, j;
240
241 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
242 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
243 for (i = 0; i < 2; ++i) {
244 for (j = 0; j < UMTX_CHAINS; ++j) {
245 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
246 MTX_DEF | MTX_DUPOK);
247 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
248 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
249 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
250 umtxq_chains[i][j].uc_busy = 0;
251 umtxq_chains[i][j].uc_waiters = 0;
252 }
253 }
254 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
255 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
256 EVENTHANDLER_PRI_ANY);
257 }
258
259 struct umtx_q *
260 umtxq_alloc(void)
261 {
262 struct umtx_q *uq;
263
264 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
265 TAILQ_INIT(&uq->uq_pi_contested);
266 uq->uq_inherited_pri = PRI_MAX;
267 return (uq);
268 }
269
270 void
271 umtxq_free(struct umtx_q *uq)
272 {
273 free(uq, M_UMTX);
274 }
275
276 static inline void
277 umtxq_hash(struct umtx_key *key)
278 {
279 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
280 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
281 }
282
283 static inline int
284 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
285 {
286 return (k1->type == k2->type &&
287 k1->info.both.a == k2->info.both.a &&
288 k1->info.both.b == k2->info.both.b);
289 }
290
291 static inline struct umtxq_chain *
292 umtxq_getchain(struct umtx_key *key)
293 {
294 if (key->type <= TYPE_CV)
295 return (&umtxq_chains[1][key->hash]);
296 return (&umtxq_chains[0][key->hash]);
297 }
298
299 /*
300 * Lock a chain.
301 */
302 static inline void
303 umtxq_lock(struct umtx_key *key)
304 {
305 struct umtxq_chain *uc;
306
307 uc = umtxq_getchain(key);
308 mtx_lock(&uc->uc_lock);
309 }
310
311 /*
312 * Unlock a chain.
313 */
314 static inline void
315 umtxq_unlock(struct umtx_key *key)
316 {
317 struct umtxq_chain *uc;
318
319 uc = umtxq_getchain(key);
320 mtx_unlock(&uc->uc_lock);
321 }
322
323 /*
324 * Set chain to busy state when following operation
325 * may be blocked (kernel mutex can not be used).
326 */
327 static inline void
328 umtxq_busy(struct umtx_key *key)
329 {
330 struct umtxq_chain *uc;
331
332 uc = umtxq_getchain(key);
333 mtx_assert(&uc->uc_lock, MA_OWNED);
334 if (uc->uc_busy) {
335 #ifdef SMP
336 if (smp_cpus > 1) {
337 int count = BUSY_SPINS;
338 if (count > 0) {
339 umtxq_unlock(key);
340 while (uc->uc_busy && --count > 0)
341 cpu_spinwait();
342 umtxq_lock(key);
343 }
344 }
345 #endif
346 while (uc->uc_busy) {
347 uc->uc_waiters++;
348 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
349 uc->uc_waiters--;
350 }
351 }
352 uc->uc_busy = 1;
353 }
354
355 /*
356 * Unbusy a chain.
357 */
358 static inline void
359 umtxq_unbusy(struct umtx_key *key)
360 {
361 struct umtxq_chain *uc;
362
363 uc = umtxq_getchain(key);
364 mtx_assert(&uc->uc_lock, MA_OWNED);
365 KASSERT(uc->uc_busy != 0, ("not busy"));
366 uc->uc_busy = 0;
367 if (uc->uc_waiters)
368 wakeup_one(uc);
369 }
370
371 static inline void
372 umtxq_insert_queue(struct umtx_q *uq, int q)
373 {
374 struct umtxq_chain *uc;
375
376 uc = umtxq_getchain(&uq->uq_key);
377 UMTXQ_LOCKED_ASSERT(uc);
378 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
379 uq->uq_flags |= UQF_UMTXQ;
380 }
381
382 static inline void
383 umtxq_remove_queue(struct umtx_q *uq, int q)
384 {
385 struct umtxq_chain *uc;
386
387 uc = umtxq_getchain(&uq->uq_key);
388 UMTXQ_LOCKED_ASSERT(uc);
389 if (uq->uq_flags & UQF_UMTXQ) {
390 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
391 uq->uq_flags &= ~UQF_UMTXQ;
392 }
393 }
394
395 /*
396 * Check if there are multiple waiters
397 */
398 static int
399 umtxq_count(struct umtx_key *key)
400 {
401 struct umtxq_chain *uc;
402 struct umtx_q *uq;
403 int count = 0;
404
405 uc = umtxq_getchain(key);
406 UMTXQ_LOCKED_ASSERT(uc);
407 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
408 if (umtx_key_match(&uq->uq_key, key)) {
409 if (++count > 1)
410 break;
411 }
412 }
413 return (count);
414 }
415
416 /*
417 * Check if there are multiple PI waiters and returns first
418 * waiter.
419 */
420 static int
421 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
422 {
423 struct umtxq_chain *uc;
424 struct umtx_q *uq;
425 int count = 0;
426
427 *first = NULL;
428 uc = umtxq_getchain(key);
429 UMTXQ_LOCKED_ASSERT(uc);
430 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
431 if (umtx_key_match(&uq->uq_key, key)) {
432 if (++count > 1)
433 break;
434 *first = uq;
435 }
436 }
437 return (count);
438 }
439
440 /*
441 * Wake up threads waiting on an userland object.
442 */
443
444 static int
445 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
446 {
447 struct umtxq_chain *uc;
448 struct umtx_q *uq, *next;
449 int ret;
450
451 ret = 0;
452 uc = umtxq_getchain(key);
453 UMTXQ_LOCKED_ASSERT(uc);
454 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
455 if (umtx_key_match(&uq->uq_key, key)) {
456 umtxq_remove_queue(uq, q);
457 wakeup(uq);
458 if (++ret >= n_wake)
459 break;
460 }
461 }
462 return (ret);
463 }
464
465
466 /*
467 * Wake up specified thread.
468 */
469 static inline void
470 umtxq_signal_thread(struct umtx_q *uq)
471 {
472 struct umtxq_chain *uc;
473
474 uc = umtxq_getchain(&uq->uq_key);
475 UMTXQ_LOCKED_ASSERT(uc);
476 umtxq_remove(uq);
477 wakeup(uq);
478 }
479
480 /*
481 * Put thread into sleep state, before sleeping, check if
482 * thread was removed from umtx queue.
483 */
484 static inline int
485 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
486 {
487 struct umtxq_chain *uc;
488 int error;
489
490 uc = umtxq_getchain(&uq->uq_key);
491 UMTXQ_LOCKED_ASSERT(uc);
492 if (!(uq->uq_flags & UQF_UMTXQ))
493 return (0);
494 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
495 if (error == EWOULDBLOCK)
496 error = ETIMEDOUT;
497 return (error);
498 }
499
500 /*
501 * Convert userspace address into unique logical address.
502 */
503 static int
504 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
505 {
506 struct thread *td = curthread;
507 vm_map_t map;
508 vm_map_entry_t entry;
509 vm_pindex_t pindex;
510 vm_prot_t prot;
511 boolean_t wired;
512
513 key->type = type;
514 if (share == THREAD_SHARE) {
515 key->shared = 0;
516 key->info.private.vs = td->td_proc->p_vmspace;
517 key->info.private.addr = (uintptr_t)addr;
518 } else {
519 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
520 map = &td->td_proc->p_vmspace->vm_map;
521 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
522 &entry, &key->info.shared.object, &pindex, &prot,
523 &wired) != KERN_SUCCESS) {
524 return EFAULT;
525 }
526
527 if ((share == PROCESS_SHARE) ||
528 (share == AUTO_SHARE &&
529 VM_INHERIT_SHARE == entry->inheritance)) {
530 key->shared = 1;
531 key->info.shared.offset = entry->offset + entry->start -
532 (vm_offset_t)addr;
533 vm_object_reference(key->info.shared.object);
534 } else {
535 key->shared = 0;
536 key->info.private.vs = td->td_proc->p_vmspace;
537 key->info.private.addr = (uintptr_t)addr;
538 }
539 vm_map_lookup_done(map, entry);
540 }
541
542 umtxq_hash(key);
543 return (0);
544 }
545
546 /*
547 * Release key.
548 */
549 static inline void
550 umtx_key_release(struct umtx_key *key)
551 {
552 if (key->shared)
553 vm_object_deallocate(key->info.shared.object);
554 }
555
556 /*
557 * Lock a umtx object.
558 */
559 static int
560 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
561 {
562 struct umtx_q *uq;
563 u_long owner;
564 u_long old;
565 int error = 0;
566
567 uq = td->td_umtxq;
568
569 /*
570 * Care must be exercised when dealing with umtx structure. It
571 * can fault on any access.
572 */
573 for (;;) {
574 /*
575 * Try the uncontested case. This should be done in userland.
576 */
577 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
578
579 /* The acquire succeeded. */
580 if (owner == UMTX_UNOWNED)
581 return (0);
582
583 /* The address was invalid. */
584 if (owner == -1)
585 return (EFAULT);
586
587 /* If no one owns it but it is contested try to acquire it. */
588 if (owner == UMTX_CONTESTED) {
589 owner = casuword(&umtx->u_owner,
590 UMTX_CONTESTED, id | UMTX_CONTESTED);
591
592 if (owner == UMTX_CONTESTED)
593 return (0);
594
595 /* The address was invalid. */
596 if (owner == -1)
597 return (EFAULT);
598
599 /* If this failed the lock has changed, restart. */
600 continue;
601 }
602
603 /*
604 * If we caught a signal, we have retried and now
605 * exit immediately.
606 */
607 if (error != 0)
608 return (error);
609
610 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
611 AUTO_SHARE, &uq->uq_key)) != 0)
612 return (error);
613
614 umtxq_lock(&uq->uq_key);
615 umtxq_busy(&uq->uq_key);
616 umtxq_insert(uq);
617 umtxq_unbusy(&uq->uq_key);
618 umtxq_unlock(&uq->uq_key);
619
620 /*
621 * Set the contested bit so that a release in user space
622 * knows to use the system call for unlock. If this fails
623 * either some one else has acquired the lock or it has been
624 * released.
625 */
626 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
627
628 /* The address was invalid. */
629 if (old == -1) {
630 umtxq_lock(&uq->uq_key);
631 umtxq_remove(uq);
632 umtxq_unlock(&uq->uq_key);
633 umtx_key_release(&uq->uq_key);
634 return (EFAULT);
635 }
636
637 /*
638 * We set the contested bit, sleep. Otherwise the lock changed
639 * and we need to retry or we lost a race to the thread
640 * unlocking the umtx.
641 */
642 umtxq_lock(&uq->uq_key);
643 if (old == owner)
644 error = umtxq_sleep(uq, "umtx", timo);
645 umtxq_remove(uq);
646 umtxq_unlock(&uq->uq_key);
647 umtx_key_release(&uq->uq_key);
648 }
649
650 return (0);
651 }
652
653 /*
654 * Lock a umtx object.
655 */
656 static int
657 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
658 struct timespec *timeout)
659 {
660 struct timespec ts, ts2, ts3;
661 struct timeval tv;
662 int error;
663
664 if (timeout == NULL) {
665 error = _do_lock_umtx(td, umtx, id, 0);
666 /* Mutex locking is restarted if it is interrupted. */
667 if (error == EINTR)
668 error = ERESTART;
669 } else {
670 getnanouptime(&ts);
671 timespecadd(&ts, timeout);
672 TIMESPEC_TO_TIMEVAL(&tv, timeout);
673 for (;;) {
674 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
675 if (error != ETIMEDOUT)
676 break;
677 getnanouptime(&ts2);
678 if (timespeccmp(&ts2, &ts, >=)) {
679 error = ETIMEDOUT;
680 break;
681 }
682 ts3 = ts;
683 timespecsub(&ts3, &ts2);
684 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
685 }
686 /* Timed-locking is not restarted. */
687 if (error == ERESTART)
688 error = EINTR;
689 }
690 return (error);
691 }
692
693 /*
694 * Unlock a umtx object.
695 */
696 static int
697 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
698 {
699 struct umtx_key key;
700 u_long owner;
701 u_long old;
702 int error;
703 int count;
704
705 /*
706 * Make sure we own this mtx.
707 */
708 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
709 if (owner == -1)
710 return (EFAULT);
711
712 if ((owner & ~UMTX_CONTESTED) != id)
713 return (EPERM);
714
715 /* This should be done in userland */
716 if ((owner & UMTX_CONTESTED) == 0) {
717 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
718 if (old == -1)
719 return (EFAULT);
720 if (old == owner)
721 return (0);
722 owner = old;
723 }
724
725 /* We should only ever be in here for contested locks */
726 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
727 &key)) != 0)
728 return (error);
729
730 umtxq_lock(&key);
731 umtxq_busy(&key);
732 count = umtxq_count(&key);
733 umtxq_unlock(&key);
734
735 /*
736 * When unlocking the umtx, it must be marked as unowned if
737 * there is zero or one thread only waiting for it.
738 * Otherwise, it must be marked as contested.
739 */
740 old = casuword(&umtx->u_owner, owner,
741 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
742 umtxq_lock(&key);
743 umtxq_signal(&key,1);
744 umtxq_unbusy(&key);
745 umtxq_unlock(&key);
746 umtx_key_release(&key);
747 if (old == -1)
748 return (EFAULT);
749 if (old != owner)
750 return (EINVAL);
751 return (0);
752 }
753
754 #ifdef COMPAT_IA32
755
756 /*
757 * Lock a umtx object.
758 */
759 static int
760 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
761 {
762 struct umtx_q *uq;
763 uint32_t owner;
764 uint32_t old;
765 int error = 0;
766
767 uq = td->td_umtxq;
768
769 /*
770 * Care must be exercised when dealing with umtx structure. It
771 * can fault on any access.
772 */
773 for (;;) {
774 /*
775 * Try the uncontested case. This should be done in userland.
776 */
777 owner = casuword32(m, UMUTEX_UNOWNED, id);
778
779 /* The acquire succeeded. */
780 if (owner == UMUTEX_UNOWNED)
781 return (0);
782
783 /* The address was invalid. */
784 if (owner == -1)
785 return (EFAULT);
786
787 /* If no one owns it but it is contested try to acquire it. */
788 if (owner == UMUTEX_CONTESTED) {
789 owner = casuword32(m,
790 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
791 if (owner == UMUTEX_CONTESTED)
792 return (0);
793
794 /* The address was invalid. */
795 if (owner == -1)
796 return (EFAULT);
797
798 /* If this failed the lock has changed, restart. */
799 continue;
800 }
801
802 /*
803 * If we caught a signal, we have retried and now
804 * exit immediately.
805 */
806 if (error != 0)
807 return (error);
808
809 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
810 AUTO_SHARE, &uq->uq_key)) != 0)
811 return (error);
812
813 umtxq_lock(&uq->uq_key);
814 umtxq_busy(&uq->uq_key);
815 umtxq_insert(uq);
816 umtxq_unbusy(&uq->uq_key);
817 umtxq_unlock(&uq->uq_key);
818
819 /*
820 * Set the contested bit so that a release in user space
821 * knows to use the system call for unlock. If this fails
822 * either some one else has acquired the lock or it has been
823 * released.
824 */
825 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
826
827 /* The address was invalid. */
828 if (old == -1) {
829 umtxq_lock(&uq->uq_key);
830 umtxq_remove(uq);
831 umtxq_unlock(&uq->uq_key);
832 umtx_key_release(&uq->uq_key);
833 return (EFAULT);
834 }
835
836 /*
837 * We set the contested bit, sleep. Otherwise the lock changed
838 * and we need to retry or we lost a race to the thread
839 * unlocking the umtx.
840 */
841 umtxq_lock(&uq->uq_key);
842 if (old == owner)
843 error = umtxq_sleep(uq, "umtx", timo);
844 umtxq_remove(uq);
845 umtxq_unlock(&uq->uq_key);
846 umtx_key_release(&uq->uq_key);
847 }
848
849 return (0);
850 }
851
852 /*
853 * Lock a umtx object.
854 */
855 static int
856 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
857 struct timespec *timeout)
858 {
859 struct timespec ts, ts2, ts3;
860 struct timeval tv;
861 int error;
862
863 if (timeout == NULL) {
864 error = _do_lock_umtx32(td, m, id, 0);
865 /* Mutex locking is restarted if it is interrupted. */
866 if (error == EINTR)
867 error = ERESTART;
868 } else {
869 getnanouptime(&ts);
870 timespecadd(&ts, timeout);
871 TIMESPEC_TO_TIMEVAL(&tv, timeout);
872 for (;;) {
873 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
874 if (error != ETIMEDOUT)
875 break;
876 getnanouptime(&ts2);
877 if (timespeccmp(&ts2, &ts, >=)) {
878 error = ETIMEDOUT;
879 break;
880 }
881 ts3 = ts;
882 timespecsub(&ts3, &ts2);
883 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
884 }
885 /* Timed-locking is not restarted. */
886 if (error == ERESTART)
887 error = EINTR;
888 }
889 return (error);
890 }
891
892 /*
893 * Unlock a umtx object.
894 */
895 static int
896 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
897 {
898 struct umtx_key key;
899 uint32_t owner;
900 uint32_t old;
901 int error;
902 int count;
903
904 /*
905 * Make sure we own this mtx.
906 */
907 owner = fuword32(m);
908 if (owner == -1)
909 return (EFAULT);
910
911 if ((owner & ~UMUTEX_CONTESTED) != id)
912 return (EPERM);
913
914 /* This should be done in userland */
915 if ((owner & UMUTEX_CONTESTED) == 0) {
916 old = casuword32(m, owner, UMUTEX_UNOWNED);
917 if (old == -1)
918 return (EFAULT);
919 if (old == owner)
920 return (0);
921 owner = old;
922 }
923
924 /* We should only ever be in here for contested locks */
925 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
926 &key)) != 0)
927 return (error);
928
929 umtxq_lock(&key);
930 umtxq_busy(&key);
931 count = umtxq_count(&key);
932 umtxq_unlock(&key);
933
934 /*
935 * When unlocking the umtx, it must be marked as unowned if
936 * there is zero or one thread only waiting for it.
937 * Otherwise, it must be marked as contested.
938 */
939 old = casuword32(m, owner,
940 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
941 umtxq_lock(&key);
942 umtxq_signal(&key,1);
943 umtxq_unbusy(&key);
944 umtxq_unlock(&key);
945 umtx_key_release(&key);
946 if (old == -1)
947 return (EFAULT);
948 if (old != owner)
949 return (EINVAL);
950 return (0);
951 }
952 #endif
953
954 /*
955 * Fetch and compare value, sleep on the address if value is not changed.
956 */
957 static int
958 do_wait(struct thread *td, void *addr, u_long id,
959 struct timespec *timeout, int compat32, int is_private)
960 {
961 struct umtx_q *uq;
962 struct timespec ts, ts2, ts3;
963 struct timeval tv;
964 u_long tmp;
965 int error = 0;
966
967 uq = td->td_umtxq;
968 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
969 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
970 return (error);
971
972 umtxq_lock(&uq->uq_key);
973 umtxq_insert(uq);
974 umtxq_unlock(&uq->uq_key);
975 if (compat32 == 0)
976 tmp = fuword(addr);
977 else
978 tmp = (unsigned int)fuword32(addr);
979 if (tmp != id) {
980 umtxq_lock(&uq->uq_key);
981 umtxq_remove(uq);
982 umtxq_unlock(&uq->uq_key);
983 } else if (timeout == NULL) {
984 umtxq_lock(&uq->uq_key);
985 error = umtxq_sleep(uq, "uwait", 0);
986 umtxq_remove(uq);
987 umtxq_unlock(&uq->uq_key);
988 } else {
989 getnanouptime(&ts);
990 timespecadd(&ts, timeout);
991 TIMESPEC_TO_TIMEVAL(&tv, timeout);
992 umtxq_lock(&uq->uq_key);
993 for (;;) {
994 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
995 if (!(uq->uq_flags & UQF_UMTXQ))
996 break;
997 if (error != ETIMEDOUT)
998 break;
999 umtxq_unlock(&uq->uq_key);
1000 getnanouptime(&ts2);
1001 if (timespeccmp(&ts2, &ts, >=)) {
1002 error = ETIMEDOUT;
1003 umtxq_lock(&uq->uq_key);
1004 break;
1005 }
1006 ts3 = ts;
1007 timespecsub(&ts3, &ts2);
1008 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1009 umtxq_lock(&uq->uq_key);
1010 }
1011 umtxq_remove(uq);
1012 umtxq_unlock(&uq->uq_key);
1013 }
1014 umtx_key_release(&uq->uq_key);
1015 if (error == ERESTART)
1016 error = EINTR;
1017 return (error);
1018 }
1019
1020 /*
1021 * Wake up threads sleeping on the specified address.
1022 */
1023 int
1024 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1025 {
1026 struct umtx_key key;
1027 int ret;
1028
1029 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1030 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1031 return (ret);
1032 umtxq_lock(&key);
1033 ret = umtxq_signal(&key, n_wake);
1034 umtxq_unlock(&key);
1035 umtx_key_release(&key);
1036 return (0);
1037 }
1038
1039 /*
1040 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1041 */
1042 static int
1043 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1044 int mode)
1045 {
1046 struct umtx_q *uq;
1047 uint32_t owner, old, id;
1048 int error = 0;
1049
1050 id = td->td_tid;
1051 uq = td->td_umtxq;
1052
1053 /*
1054 * Care must be exercised when dealing with umtx structure. It
1055 * can fault on any access.
1056 */
1057 for (;;) {
1058 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1059 if (mode == _UMUTEX_WAIT) {
1060 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1061 return (0);
1062 } else {
1063 /*
1064 * Try the uncontested case. This should be done in userland.
1065 */
1066 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1067
1068 /* The acquire succeeded. */
1069 if (owner == UMUTEX_UNOWNED)
1070 return (0);
1071
1072 /* The address was invalid. */
1073 if (owner == -1)
1074 return (EFAULT);
1075
1076 /* If no one owns it but it is contested try to acquire it. */
1077 if (owner == UMUTEX_CONTESTED) {
1078 owner = casuword32(&m->m_owner,
1079 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1080
1081 if (owner == UMUTEX_CONTESTED)
1082 return (0);
1083
1084 /* The address was invalid. */
1085 if (owner == -1)
1086 return (EFAULT);
1087
1088 /* If this failed the lock has changed, restart. */
1089 continue;
1090 }
1091 }
1092
1093 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1094 (owner & ~UMUTEX_CONTESTED) == id)
1095 return (EDEADLK);
1096
1097 if (mode == _UMUTEX_TRY)
1098 return (EBUSY);
1099
1100 /*
1101 * If we caught a signal, we have retried and now
1102 * exit immediately.
1103 */
1104 if (error != 0)
1105 return (error);
1106
1107 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1108 GET_SHARE(flags), &uq->uq_key)) != 0)
1109 return (error);
1110
1111 umtxq_lock(&uq->uq_key);
1112 umtxq_busy(&uq->uq_key);
1113 umtxq_insert(uq);
1114 umtxq_unlock(&uq->uq_key);
1115
1116 /*
1117 * Set the contested bit so that a release in user space
1118 * knows to use the system call for unlock. If this fails
1119 * either some one else has acquired the lock or it has been
1120 * released.
1121 */
1122 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1123
1124 /* The address was invalid. */
1125 if (old == -1) {
1126 umtxq_lock(&uq->uq_key);
1127 umtxq_remove(uq);
1128 umtxq_unbusy(&uq->uq_key);
1129 umtxq_unlock(&uq->uq_key);
1130 umtx_key_release(&uq->uq_key);
1131 return (EFAULT);
1132 }
1133
1134 /*
1135 * We set the contested bit, sleep. Otherwise the lock changed
1136 * and we need to retry or we lost a race to the thread
1137 * unlocking the umtx.
1138 */
1139 umtxq_lock(&uq->uq_key);
1140 umtxq_unbusy(&uq->uq_key);
1141 if (old == owner)
1142 error = umtxq_sleep(uq, "umtxn", timo);
1143 umtxq_remove(uq);
1144 umtxq_unlock(&uq->uq_key);
1145 umtx_key_release(&uq->uq_key);
1146 }
1147
1148 return (0);
1149 }
1150
1151 /*
1152 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153 */
1154 /*
1155 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1156 */
1157 static int
1158 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1159 {
1160 struct umtx_key key;
1161 uint32_t owner, old, id;
1162 int error;
1163 int count;
1164
1165 id = td->td_tid;
1166 /*
1167 * Make sure we own this mtx.
1168 */
1169 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1170 if (owner == -1)
1171 return (EFAULT);
1172
1173 if ((owner & ~UMUTEX_CONTESTED) != id)
1174 return (EPERM);
1175
1176 if ((owner & UMUTEX_CONTESTED) == 0) {
1177 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1178 if (old == -1)
1179 return (EFAULT);
1180 if (old == owner)
1181 return (0);
1182 owner = old;
1183 }
1184
1185 /* We should only ever be in here for contested locks */
1186 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1187 &key)) != 0)
1188 return (error);
1189
1190 umtxq_lock(&key);
1191 umtxq_busy(&key);
1192 count = umtxq_count(&key);
1193 umtxq_unlock(&key);
1194
1195 /*
1196 * When unlocking the umtx, it must be marked as unowned if
1197 * there is zero or one thread only waiting for it.
1198 * Otherwise, it must be marked as contested.
1199 */
1200 old = casuword32(&m->m_owner, owner,
1201 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1202 umtxq_lock(&key);
1203 umtxq_signal(&key,1);
1204 umtxq_unbusy(&key);
1205 umtxq_unlock(&key);
1206 umtx_key_release(&key);
1207 if (old == -1)
1208 return (EFAULT);
1209 if (old != owner)
1210 return (EINVAL);
1211 return (0);
1212 }
1213
1214 /*
1215 * Check if the mutex is available and wake up a waiter,
1216 * only for simple mutex.
1217 */
1218 static int
1219 do_wake_umutex(struct thread *td, struct umutex *m)
1220 {
1221 struct umtx_key key;
1222 uint32_t owner;
1223 uint32_t flags;
1224 int error;
1225 int count;
1226
1227 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1228 if (owner == -1)
1229 return (EFAULT);
1230
1231 if ((owner & ~UMUTEX_CONTESTED) != 0)
1232 return (0);
1233
1234 flags = fuword32(&m->m_flags);
1235
1236 /* We should only ever be in here for contested locks */
1237 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1238 &key)) != 0)
1239 return (error);
1240
1241 umtxq_lock(&key);
1242 umtxq_busy(&key);
1243 count = umtxq_count(&key);
1244 umtxq_unlock(&key);
1245
1246 if (count <= 1)
1247 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1248
1249 umtxq_lock(&key);
1250 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1251 umtxq_signal(&key, 1);
1252 umtxq_unbusy(&key);
1253 umtxq_unlock(&key);
1254 umtx_key_release(&key);
1255 return (0);
1256 }
1257
1258 static inline struct umtx_pi *
1259 umtx_pi_alloc(int flags)
1260 {
1261 struct umtx_pi *pi;
1262
1263 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1264 TAILQ_INIT(&pi->pi_blocked);
1265 atomic_add_int(&umtx_pi_allocated, 1);
1266 return (pi);
1267 }
1268
1269 static inline void
1270 umtx_pi_free(struct umtx_pi *pi)
1271 {
1272 uma_zfree(umtx_pi_zone, pi);
1273 atomic_add_int(&umtx_pi_allocated, -1);
1274 }
1275
1276 /*
1277 * Adjust the thread's position on a pi_state after its priority has been
1278 * changed.
1279 */
1280 static int
1281 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1282 {
1283 struct umtx_q *uq, *uq1, *uq2;
1284 struct thread *td1;
1285
1286 mtx_assert(&umtx_lock, MA_OWNED);
1287 if (pi == NULL)
1288 return (0);
1289
1290 uq = td->td_umtxq;
1291
1292 /*
1293 * Check if the thread needs to be moved on the blocked chain.
1294 * It needs to be moved if either its priority is lower than
1295 * the previous thread or higher than the next thread.
1296 */
1297 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1298 uq2 = TAILQ_NEXT(uq, uq_lockq);
1299 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1300 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1301 /*
1302 * Remove thread from blocked chain and determine where
1303 * it should be moved to.
1304 */
1305 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1306 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1307 td1 = uq1->uq_thread;
1308 MPASS(td1->td_proc->p_magic == P_MAGIC);
1309 if (UPRI(td1) > UPRI(td))
1310 break;
1311 }
1312
1313 if (uq1 == NULL)
1314 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1315 else
1316 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1317 }
1318 return (1);
1319 }
1320
1321 /*
1322 * Propagate priority when a thread is blocked on POSIX
1323 * PI mutex.
1324 */
1325 static void
1326 umtx_propagate_priority(struct thread *td)
1327 {
1328 struct umtx_q *uq;
1329 struct umtx_pi *pi;
1330 int pri;
1331
1332 mtx_assert(&umtx_lock, MA_OWNED);
1333 pri = UPRI(td);
1334 uq = td->td_umtxq;
1335 pi = uq->uq_pi_blocked;
1336 if (pi == NULL)
1337 return;
1338
1339 for (;;) {
1340 td = pi->pi_owner;
1341 if (td == NULL)
1342 return;
1343
1344 MPASS(td->td_proc != NULL);
1345 MPASS(td->td_proc->p_magic == P_MAGIC);
1346
1347 if (UPRI(td) <= pri)
1348 return;
1349
1350 thread_lock(td);
1351 sched_lend_user_prio(td, pri);
1352 thread_unlock(td);
1353
1354 /*
1355 * Pick up the lock that td is blocked on.
1356 */
1357 uq = td->td_umtxq;
1358 pi = uq->uq_pi_blocked;
1359 /* Resort td on the list if needed. */
1360 if (!umtx_pi_adjust_thread(pi, td))
1361 break;
1362 }
1363 }
1364
1365 /*
1366 * Unpropagate priority for a PI mutex when a thread blocked on
1367 * it is interrupted by signal or resumed by others.
1368 */
1369 static void
1370 umtx_unpropagate_priority(struct umtx_pi *pi)
1371 {
1372 struct umtx_q *uq, *uq_owner;
1373 struct umtx_pi *pi2;
1374 int pri, oldpri;
1375
1376 mtx_assert(&umtx_lock, MA_OWNED);
1377
1378 while (pi != NULL && pi->pi_owner != NULL) {
1379 pri = PRI_MAX;
1380 uq_owner = pi->pi_owner->td_umtxq;
1381
1382 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1383 uq = TAILQ_FIRST(&pi2->pi_blocked);
1384 if (uq != NULL) {
1385 if (pri > UPRI(uq->uq_thread))
1386 pri = UPRI(uq->uq_thread);
1387 }
1388 }
1389
1390 if (pri > uq_owner->uq_inherited_pri)
1391 pri = uq_owner->uq_inherited_pri;
1392 thread_lock(pi->pi_owner);
1393 oldpri = pi->pi_owner->td_user_pri;
1394 sched_unlend_user_prio(pi->pi_owner, pri);
1395 thread_unlock(pi->pi_owner);
1396 if (uq_owner->uq_pi_blocked != NULL)
1397 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1398 pi = uq_owner->uq_pi_blocked;
1399 }
1400 }
1401
1402 /*
1403 * Insert a PI mutex into owned list.
1404 */
1405 static void
1406 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1407 {
1408 struct umtx_q *uq_owner;
1409
1410 uq_owner = owner->td_umtxq;
1411 mtx_assert(&umtx_lock, MA_OWNED);
1412 if (pi->pi_owner != NULL)
1413 panic("pi_ower != NULL");
1414 pi->pi_owner = owner;
1415 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1416 }
1417
1418 /*
1419 * Claim ownership of a PI mutex.
1420 */
1421 static int
1422 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1423 {
1424 struct umtx_q *uq, *uq_owner;
1425
1426 uq_owner = owner->td_umtxq;
1427 mtx_lock_spin(&umtx_lock);
1428 if (pi->pi_owner == owner) {
1429 mtx_unlock_spin(&umtx_lock);
1430 return (0);
1431 }
1432
1433 if (pi->pi_owner != NULL) {
1434 /*
1435 * userland may have already messed the mutex, sigh.
1436 */
1437 mtx_unlock_spin(&umtx_lock);
1438 return (EPERM);
1439 }
1440 umtx_pi_setowner(pi, owner);
1441 uq = TAILQ_FIRST(&pi->pi_blocked);
1442 if (uq != NULL) {
1443 int pri;
1444
1445 pri = UPRI(uq->uq_thread);
1446 thread_lock(owner);
1447 if (pri < UPRI(owner))
1448 sched_lend_user_prio(owner, pri);
1449 thread_unlock(owner);
1450 }
1451 mtx_unlock_spin(&umtx_lock);
1452 return (0);
1453 }
1454
1455 static void
1456 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1457 {
1458 struct umtx_q *uq;
1459 struct umtx_pi *pi;
1460
1461 uq = td->td_umtxq;
1462 /*
1463 * Pick up the lock that td is blocked on.
1464 */
1465 pi = uq->uq_pi_blocked;
1466 MPASS(pi != NULL);
1467
1468 /* Resort the turnstile on the list. */
1469 if (!umtx_pi_adjust_thread(pi, td))
1470 return;
1471
1472 /*
1473 * If our priority was lowered and we are at the head of the
1474 * turnstile, then propagate our new priority up the chain.
1475 */
1476 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1477 umtx_propagate_priority(td);
1478 }
1479
1480 /*
1481 * Adjust a thread's order position in its blocked PI mutex,
1482 * this may result new priority propagating process.
1483 */
1484 void
1485 umtx_pi_adjust(struct thread *td, u_char oldpri)
1486 {
1487 struct umtx_q *uq;
1488 struct umtx_pi *pi;
1489
1490 uq = td->td_umtxq;
1491 mtx_lock_spin(&umtx_lock);
1492 /*
1493 * Pick up the lock that td is blocked on.
1494 */
1495 pi = uq->uq_pi_blocked;
1496 if (pi != NULL)
1497 umtx_pi_adjust_locked(td, oldpri);
1498 mtx_unlock_spin(&umtx_lock);
1499 }
1500
1501 /*
1502 * Sleep on a PI mutex.
1503 */
1504 static int
1505 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1506 uint32_t owner, const char *wmesg, int timo)
1507 {
1508 struct umtxq_chain *uc;
1509 struct thread *td, *td1;
1510 struct umtx_q *uq1;
1511 int pri;
1512 int error = 0;
1513
1514 td = uq->uq_thread;
1515 KASSERT(td == curthread, ("inconsistent uq_thread"));
1516 uc = umtxq_getchain(&uq->uq_key);
1517 UMTXQ_LOCKED_ASSERT(uc);
1518 UMTXQ_BUSY_ASSERT(uc);
1519 umtxq_insert(uq);
1520 mtx_lock_spin(&umtx_lock);
1521 if (pi->pi_owner == NULL) {
1522 /* XXX
1523 * Current, We only support process private PI-mutex,
1524 * non-contended PI-mutexes are locked in userland.
1525 * Process shared PI-mutex should always be initialized
1526 * by kernel and be registered in kernel, locking should
1527 * always be done by kernel to avoid security problems.
1528 * For process private PI-mutex, we can find owner
1529 * thread and boost its priority safely.
1530 */
1531 mtx_unlock_spin(&umtx_lock);
1532 PROC_LOCK(curproc);
1533 td1 = thread_find(curproc, owner);
1534 mtx_lock_spin(&umtx_lock);
1535 if (td1 != NULL && pi->pi_owner == NULL) {
1536 uq1 = td1->td_umtxq;
1537 umtx_pi_setowner(pi, td1);
1538 }
1539 PROC_UNLOCK(curproc);
1540 }
1541
1542 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543 pri = UPRI(uq1->uq_thread);
1544 if (pri > UPRI(td))
1545 break;
1546 }
1547
1548 if (uq1 != NULL)
1549 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550 else
1551 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552
1553 uq->uq_pi_blocked = pi;
1554 thread_lock(td);
1555 td->td_flags |= TDF_UPIBLOCKED;
1556 thread_unlock(td);
1557 umtx_propagate_priority(td);
1558 mtx_unlock_spin(&umtx_lock);
1559 umtxq_unbusy(&uq->uq_key);
1560
1561 if (uq->uq_flags & UQF_UMTXQ) {
1562 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563 if (error == EWOULDBLOCK)
1564 error = ETIMEDOUT;
1565 if (uq->uq_flags & UQF_UMTXQ) {
1566 umtxq_remove(uq);
1567 }
1568 }
1569 mtx_lock_spin(&umtx_lock);
1570 uq->uq_pi_blocked = NULL;
1571 thread_lock(td);
1572 td->td_flags &= ~TDF_UPIBLOCKED;
1573 thread_unlock(td);
1574 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575 umtx_unpropagate_priority(pi);
1576 mtx_unlock_spin(&umtx_lock);
1577 umtxq_unlock(&uq->uq_key);
1578
1579 return (error);
1580 }
1581
1582 /*
1583 * Add reference count for a PI mutex.
1584 */
1585 static void
1586 umtx_pi_ref(struct umtx_pi *pi)
1587 {
1588 struct umtxq_chain *uc;
1589
1590 uc = umtxq_getchain(&pi->pi_key);
1591 UMTXQ_LOCKED_ASSERT(uc);
1592 pi->pi_refcount++;
1593 }
1594
1595 /*
1596 * Decrease reference count for a PI mutex, if the counter
1597 * is decreased to zero, its memory space is freed.
1598 */
1599 static void
1600 umtx_pi_unref(struct umtx_pi *pi)
1601 {
1602 struct umtxq_chain *uc;
1603
1604 uc = umtxq_getchain(&pi->pi_key);
1605 UMTXQ_LOCKED_ASSERT(uc);
1606 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607 if (--pi->pi_refcount == 0) {
1608 mtx_lock_spin(&umtx_lock);
1609 if (pi->pi_owner != NULL) {
1610 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611 pi, pi_link);
1612 pi->pi_owner = NULL;
1613 }
1614 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615 ("blocked queue not empty"));
1616 mtx_unlock_spin(&umtx_lock);
1617 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618 umtx_pi_free(pi);
1619 }
1620 }
1621
1622 /*
1623 * Find a PI mutex in hash table.
1624 */
1625 static struct umtx_pi *
1626 umtx_pi_lookup(struct umtx_key *key)
1627 {
1628 struct umtxq_chain *uc;
1629 struct umtx_pi *pi;
1630
1631 uc = umtxq_getchain(key);
1632 UMTXQ_LOCKED_ASSERT(uc);
1633
1634 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635 if (umtx_key_match(&pi->pi_key, key)) {
1636 return (pi);
1637 }
1638 }
1639 return (NULL);
1640 }
1641
1642 /*
1643 * Insert a PI mutex into hash table.
1644 */
1645 static inline void
1646 umtx_pi_insert(struct umtx_pi *pi)
1647 {
1648 struct umtxq_chain *uc;
1649
1650 uc = umtxq_getchain(&pi->pi_key);
1651 UMTXQ_LOCKED_ASSERT(uc);
1652 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653 }
1654
1655 /*
1656 * Lock a PI mutex.
1657 */
1658 static int
1659 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660 int try)
1661 {
1662 struct umtx_q *uq;
1663 struct umtx_pi *pi, *new_pi;
1664 uint32_t id, owner, old;
1665 int error;
1666
1667 id = td->td_tid;
1668 uq = td->td_umtxq;
1669
1670 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671 &uq->uq_key)) != 0)
1672 return (error);
1673 umtxq_lock(&uq->uq_key);
1674 pi = umtx_pi_lookup(&uq->uq_key);
1675 if (pi == NULL) {
1676 new_pi = umtx_pi_alloc(M_NOWAIT);
1677 if (new_pi == NULL) {
1678 umtxq_unlock(&uq->uq_key);
1679 new_pi = umtx_pi_alloc(M_WAITOK);
1680 umtxq_lock(&uq->uq_key);
1681 pi = umtx_pi_lookup(&uq->uq_key);
1682 if (pi != NULL) {
1683 umtx_pi_free(new_pi);
1684 new_pi = NULL;
1685 }
1686 }
1687 if (new_pi != NULL) {
1688 new_pi->pi_key = uq->uq_key;
1689 umtx_pi_insert(new_pi);
1690 pi = new_pi;
1691 }
1692 }
1693 umtx_pi_ref(pi);
1694 umtxq_unlock(&uq->uq_key);
1695
1696 /*
1697 * Care must be exercised when dealing with umtx structure. It
1698 * can fault on any access.
1699 */
1700 for (;;) {
1701 /*
1702 * Try the uncontested case. This should be done in userland.
1703 */
1704 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705
1706 /* The acquire succeeded. */
1707 if (owner == UMUTEX_UNOWNED) {
1708 error = 0;
1709 break;
1710 }
1711
1712 /* The address was invalid. */
1713 if (owner == -1) {
1714 error = EFAULT;
1715 break;
1716 }
1717
1718 /* If no one owns it but it is contested try to acquire it. */
1719 if (owner == UMUTEX_CONTESTED) {
1720 owner = casuword32(&m->m_owner,
1721 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722
1723 if (owner == UMUTEX_CONTESTED) {
1724 umtxq_lock(&uq->uq_key);
1725 umtxq_busy(&uq->uq_key);
1726 error = umtx_pi_claim(pi, td);
1727 umtxq_unbusy(&uq->uq_key);
1728 umtxq_unlock(&uq->uq_key);
1729 break;
1730 }
1731
1732 /* The address was invalid. */
1733 if (owner == -1) {
1734 error = EFAULT;
1735 break;
1736 }
1737
1738 /* If this failed the lock has changed, restart. */
1739 continue;
1740 }
1741
1742 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743 (owner & ~UMUTEX_CONTESTED) == id) {
1744 error = EDEADLK;
1745 break;
1746 }
1747
1748 if (try != 0) {
1749 error = EBUSY;
1750 break;
1751 }
1752
1753 /*
1754 * If we caught a signal, we have retried and now
1755 * exit immediately.
1756 */
1757 if (error != 0)
1758 break;
1759
1760 umtxq_lock(&uq->uq_key);
1761 umtxq_busy(&uq->uq_key);
1762 umtxq_unlock(&uq->uq_key);
1763
1764 /*
1765 * Set the contested bit so that a release in user space
1766 * knows to use the system call for unlock. If this fails
1767 * either some one else has acquired the lock or it has been
1768 * released.
1769 */
1770 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771
1772 /* The address was invalid. */
1773 if (old == -1) {
1774 umtxq_lock(&uq->uq_key);
1775 umtxq_unbusy(&uq->uq_key);
1776 umtxq_unlock(&uq->uq_key);
1777 error = EFAULT;
1778 break;
1779 }
1780
1781 umtxq_lock(&uq->uq_key);
1782 /*
1783 * We set the contested bit, sleep. Otherwise the lock changed
1784 * and we need to retry or we lost a race to the thread
1785 * unlocking the umtx.
1786 */
1787 if (old == owner)
1788 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789 "umtxpi", timo);
1790 else {
1791 umtxq_unbusy(&uq->uq_key);
1792 umtxq_unlock(&uq->uq_key);
1793 }
1794 }
1795
1796 umtxq_lock(&uq->uq_key);
1797 umtx_pi_unref(pi);
1798 umtxq_unlock(&uq->uq_key);
1799
1800 umtx_key_release(&uq->uq_key);
1801 return (error);
1802 }
1803
1804 /*
1805 * Unlock a PI mutex.
1806 */
1807 static int
1808 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809 {
1810 struct umtx_key key;
1811 struct umtx_q *uq_first, *uq_first2, *uq_me;
1812 struct umtx_pi *pi, *pi2;
1813 uint32_t owner, old, id;
1814 int error;
1815 int count;
1816 int pri;
1817
1818 id = td->td_tid;
1819 /*
1820 * Make sure we own this mtx.
1821 */
1822 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823 if (owner == -1)
1824 return (EFAULT);
1825
1826 if ((owner & ~UMUTEX_CONTESTED) != id)
1827 return (EPERM);
1828
1829 /* This should be done in userland */
1830 if ((owner & UMUTEX_CONTESTED) == 0) {
1831 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832 if (old == -1)
1833 return (EFAULT);
1834 if (old == owner)
1835 return (0);
1836 owner = old;
1837 }
1838
1839 /* We should only ever be in here for contested locks */
1840 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841 &key)) != 0)
1842 return (error);
1843
1844 umtxq_lock(&key);
1845 umtxq_busy(&key);
1846 count = umtxq_count_pi(&key, &uq_first);
1847 if (uq_first != NULL) {
1848 mtx_lock_spin(&umtx_lock);
1849 pi = uq_first->uq_pi_blocked;
1850 KASSERT(pi != NULL, ("pi == NULL?"));
1851 if (pi->pi_owner != curthread) {
1852 mtx_unlock_spin(&umtx_lock);
1853 umtxq_unbusy(&key);
1854 umtxq_unlock(&key);
1855 umtx_key_release(&key);
1856 /* userland messed the mutex */
1857 return (EPERM);
1858 }
1859 uq_me = curthread->td_umtxq;
1860 pi->pi_owner = NULL;
1861 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862 /* get highest priority thread which is still sleeping. */
1863 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864 while (uq_first != NULL &&
1865 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867 }
1868 pri = PRI_MAX;
1869 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871 if (uq_first2 != NULL) {
1872 if (pri > UPRI(uq_first2->uq_thread))
1873 pri = UPRI(uq_first2->uq_thread);
1874 }
1875 }
1876 thread_lock(curthread);
1877 sched_unlend_user_prio(curthread, pri);
1878 thread_unlock(curthread);
1879 mtx_unlock_spin(&umtx_lock);
1880 if (uq_first)
1881 umtxq_signal_thread(uq_first);
1882 }
1883 umtxq_unlock(&key);
1884
1885 /*
1886 * When unlocking the umtx, it must be marked as unowned if
1887 * there is zero or one thread only waiting for it.
1888 * Otherwise, it must be marked as contested.
1889 */
1890 old = casuword32(&m->m_owner, owner,
1891 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892
1893 umtxq_lock(&key);
1894 umtxq_unbusy(&key);
1895 umtxq_unlock(&key);
1896 umtx_key_release(&key);
1897 if (old == -1)
1898 return (EFAULT);
1899 if (old != owner)
1900 return (EINVAL);
1901 return (0);
1902 }
1903
1904 /*
1905 * Lock a PP mutex.
1906 */
1907 static int
1908 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909 int try)
1910 {
1911 struct umtx_q *uq, *uq2;
1912 struct umtx_pi *pi;
1913 uint32_t ceiling;
1914 uint32_t owner, id;
1915 int error, pri, old_inherited_pri, su;
1916
1917 id = td->td_tid;
1918 uq = td->td_umtxq;
1919 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920 &uq->uq_key)) != 0)
1921 return (error);
1922 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923 for (;;) {
1924 old_inherited_pri = uq->uq_inherited_pri;
1925 umtxq_lock(&uq->uq_key);
1926 umtxq_busy(&uq->uq_key);
1927 umtxq_unlock(&uq->uq_key);
1928
1929 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930 if (ceiling > RTP_PRIO_MAX) {
1931 error = EINVAL;
1932 goto out;
1933 }
1934
1935 mtx_lock_spin(&umtx_lock);
1936 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937 mtx_unlock_spin(&umtx_lock);
1938 error = EINVAL;
1939 goto out;
1940 }
1941 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943 thread_lock(td);
1944 if (uq->uq_inherited_pri < UPRI(td))
1945 sched_lend_user_prio(td, uq->uq_inherited_pri);
1946 thread_unlock(td);
1947 }
1948 mtx_unlock_spin(&umtx_lock);
1949
1950 owner = casuword32(&m->m_owner,
1951 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952
1953 if (owner == UMUTEX_CONTESTED) {
1954 error = 0;
1955 break;
1956 }
1957
1958 /* The address was invalid. */
1959 if (owner == -1) {
1960 error = EFAULT;
1961 break;
1962 }
1963
1964 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965 (owner & ~UMUTEX_CONTESTED) == id) {
1966 error = EDEADLK;
1967 break;
1968 }
1969
1970 if (try != 0) {
1971 error = EBUSY;
1972 break;
1973 }
1974
1975 /*
1976 * If we caught a signal, we have retried and now
1977 * exit immediately.
1978 */
1979 if (error != 0)
1980 break;
1981
1982 umtxq_lock(&uq->uq_key);
1983 umtxq_insert(uq);
1984 umtxq_unbusy(&uq->uq_key);
1985 error = umtxq_sleep(uq, "umtxpp", timo);
1986 umtxq_remove(uq);
1987 umtxq_unlock(&uq->uq_key);
1988
1989 mtx_lock_spin(&umtx_lock);
1990 uq->uq_inherited_pri = old_inherited_pri;
1991 pri = PRI_MAX;
1992 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994 if (uq2 != NULL) {
1995 if (pri > UPRI(uq2->uq_thread))
1996 pri = UPRI(uq2->uq_thread);
1997 }
1998 }
1999 if (pri > uq->uq_inherited_pri)
2000 pri = uq->uq_inherited_pri;
2001 thread_lock(td);
2002 sched_unlend_user_prio(td, pri);
2003 thread_unlock(td);
2004 mtx_unlock_spin(&umtx_lock);
2005 }
2006
2007 if (error != 0) {
2008 mtx_lock_spin(&umtx_lock);
2009 uq->uq_inherited_pri = old_inherited_pri;
2010 pri = PRI_MAX;
2011 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013 if (uq2 != NULL) {
2014 if (pri > UPRI(uq2->uq_thread))
2015 pri = UPRI(uq2->uq_thread);
2016 }
2017 }
2018 if (pri > uq->uq_inherited_pri)
2019 pri = uq->uq_inherited_pri;
2020 thread_lock(td);
2021 sched_unlend_user_prio(td, pri);
2022 thread_unlock(td);
2023 mtx_unlock_spin(&umtx_lock);
2024 }
2025
2026 out:
2027 umtxq_lock(&uq->uq_key);
2028 umtxq_unbusy(&uq->uq_key);
2029 umtxq_unlock(&uq->uq_key);
2030 umtx_key_release(&uq->uq_key);
2031 return (error);
2032 }
2033
2034 /*
2035 * Unlock a PP mutex.
2036 */
2037 static int
2038 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039 {
2040 struct umtx_key key;
2041 struct umtx_q *uq, *uq2;
2042 struct umtx_pi *pi;
2043 uint32_t owner, id;
2044 uint32_t rceiling;
2045 int error, pri, new_inherited_pri, su;
2046
2047 id = td->td_tid;
2048 uq = td->td_umtxq;
2049 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050
2051 /*
2052 * Make sure we own this mtx.
2053 */
2054 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055 if (owner == -1)
2056 return (EFAULT);
2057
2058 if ((owner & ~UMUTEX_CONTESTED) != id)
2059 return (EPERM);
2060
2061 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062 if (error != 0)
2063 return (error);
2064
2065 if (rceiling == -1)
2066 new_inherited_pri = PRI_MAX;
2067 else {
2068 rceiling = RTP_PRIO_MAX - rceiling;
2069 if (rceiling > RTP_PRIO_MAX)
2070 return (EINVAL);
2071 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072 }
2073
2074 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075 &key)) != 0)
2076 return (error);
2077 umtxq_lock(&key);
2078 umtxq_busy(&key);
2079 umtxq_unlock(&key);
2080 /*
2081 * For priority protected mutex, always set unlocked state
2082 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083 * to lock the mutex, it is necessary because thread priority
2084 * has to be adjusted for such mutex.
2085 */
2086 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087 UMUTEX_CONTESTED);
2088
2089 umtxq_lock(&key);
2090 if (error == 0)
2091 umtxq_signal(&key, 1);
2092 umtxq_unbusy(&key);
2093 umtxq_unlock(&key);
2094
2095 if (error == -1)
2096 error = EFAULT;
2097 else {
2098 mtx_lock_spin(&umtx_lock);
2099 if (su != 0)
2100 uq->uq_inherited_pri = new_inherited_pri;
2101 pri = PRI_MAX;
2102 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104 if (uq2 != NULL) {
2105 if (pri > UPRI(uq2->uq_thread))
2106 pri = UPRI(uq2->uq_thread);
2107 }
2108 }
2109 if (pri > uq->uq_inherited_pri)
2110 pri = uq->uq_inherited_pri;
2111 thread_lock(td);
2112 sched_unlend_user_prio(td, pri);
2113 thread_unlock(td);
2114 mtx_unlock_spin(&umtx_lock);
2115 }
2116 umtx_key_release(&key);
2117 return (error);
2118 }
2119
2120 static int
2121 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122 uint32_t *old_ceiling)
2123 {
2124 struct umtx_q *uq;
2125 uint32_t save_ceiling;
2126 uint32_t owner, id;
2127 uint32_t flags;
2128 int error;
2129
2130 flags = fuword32(&m->m_flags);
2131 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132 return (EINVAL);
2133 if (ceiling > RTP_PRIO_MAX)
2134 return (EINVAL);
2135 id = td->td_tid;
2136 uq = td->td_umtxq;
2137 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138 &uq->uq_key)) != 0)
2139 return (error);
2140 for (;;) {
2141 umtxq_lock(&uq->uq_key);
2142 umtxq_busy(&uq->uq_key);
2143 umtxq_unlock(&uq->uq_key);
2144
2145 save_ceiling = fuword32(&m->m_ceilings[0]);
2146
2147 owner = casuword32(&m->m_owner,
2148 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149
2150 if (owner == UMUTEX_CONTESTED) {
2151 suword32(&m->m_ceilings[0], ceiling);
2152 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153 UMUTEX_CONTESTED);
2154 error = 0;
2155 break;
2156 }
2157
2158 /* The address was invalid. */
2159 if (owner == -1) {
2160 error = EFAULT;
2161 break;
2162 }
2163
2164 if ((owner & ~UMUTEX_CONTESTED) == id) {
2165 suword32(&m->m_ceilings[0], ceiling);
2166 error = 0;
2167 break;
2168 }
2169
2170 /*
2171 * If we caught a signal, we have retried and now
2172 * exit immediately.
2173 */
2174 if (error != 0)
2175 break;
2176
2177 /*
2178 * We set the contested bit, sleep. Otherwise the lock changed
2179 * and we need to retry or we lost a race to the thread
2180 * unlocking the umtx.
2181 */
2182 umtxq_lock(&uq->uq_key);
2183 umtxq_insert(uq);
2184 umtxq_unbusy(&uq->uq_key);
2185 error = umtxq_sleep(uq, "umtxpp", 0);
2186 umtxq_remove(uq);
2187 umtxq_unlock(&uq->uq_key);
2188 }
2189 umtxq_lock(&uq->uq_key);
2190 if (error == 0)
2191 umtxq_signal(&uq->uq_key, INT_MAX);
2192 umtxq_unbusy(&uq->uq_key);
2193 umtxq_unlock(&uq->uq_key);
2194 umtx_key_release(&uq->uq_key);
2195 if (error == 0 && old_ceiling != NULL)
2196 suword32(old_ceiling, save_ceiling);
2197 return (error);
2198 }
2199
2200 static int
2201 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202 int mode)
2203 {
2204 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205 case 0:
2206 return (_do_lock_normal(td, m, flags, timo, mode));
2207 case UMUTEX_PRIO_INHERIT:
2208 return (_do_lock_pi(td, m, flags, timo, mode));
2209 case UMUTEX_PRIO_PROTECT:
2210 return (_do_lock_pp(td, m, flags, timo, mode));
2211 }
2212 return (EINVAL);
2213 }
2214
2215 /*
2216 * Lock a userland POSIX mutex.
2217 */
2218 static int
2219 do_lock_umutex(struct thread *td, struct umutex *m,
2220 struct timespec *timeout, int mode)
2221 {
2222 struct timespec ts, ts2, ts3;
2223 struct timeval tv;
2224 uint32_t flags;
2225 int error;
2226
2227 flags = fuword32(&m->m_flags);
2228 if (flags == -1)
2229 return (EFAULT);
2230
2231 if (timeout == NULL) {
2232 error = _do_lock_umutex(td, m, flags, 0, mode);
2233 /* Mutex locking is restarted if it is interrupted. */
2234 if (error == EINTR && mode != _UMUTEX_WAIT)
2235 error = ERESTART;
2236 } else {
2237 getnanouptime(&ts);
2238 timespecadd(&ts, timeout);
2239 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2240 for (;;) {
2241 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2242 if (error != ETIMEDOUT)
2243 break;
2244 getnanouptime(&ts2);
2245 if (timespeccmp(&ts2, &ts, >=)) {
2246 error = ETIMEDOUT;
2247 break;
2248 }
2249 ts3 = ts;
2250 timespecsub(&ts3, &ts2);
2251 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2252 }
2253 /* Timed-locking is not restarted. */
2254 if (error == ERESTART)
2255 error = EINTR;
2256 }
2257 return (error);
2258 }
2259
2260 /*
2261 * Unlock a userland POSIX mutex.
2262 */
2263 static int
2264 do_unlock_umutex(struct thread *td, struct umutex *m)
2265 {
2266 uint32_t flags;
2267
2268 flags = fuword32(&m->m_flags);
2269 if (flags == -1)
2270 return (EFAULT);
2271
2272 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2273 case 0:
2274 return (do_unlock_normal(td, m, flags));
2275 case UMUTEX_PRIO_INHERIT:
2276 return (do_unlock_pi(td, m, flags));
2277 case UMUTEX_PRIO_PROTECT:
2278 return (do_unlock_pp(td, m, flags));
2279 }
2280
2281 return (EINVAL);
2282 }
2283
2284 static int
2285 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2286 struct timespec *timeout, u_long wflags)
2287 {
2288 struct umtx_q *uq;
2289 struct timeval tv;
2290 struct timespec cts, ets, tts;
2291 uint32_t flags;
2292 int error;
2293
2294 uq = td->td_umtxq;
2295 flags = fuword32(&cv->c_flags);
2296 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2297 if (error != 0)
2298 return (error);
2299 umtxq_lock(&uq->uq_key);
2300 umtxq_busy(&uq->uq_key);
2301 umtxq_insert(uq);
2302 umtxq_unlock(&uq->uq_key);
2303
2304 /*
2305 * The magic thing is we should set c_has_waiters to 1 before
2306 * releasing user mutex.
2307 */
2308 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2309
2310 umtxq_lock(&uq->uq_key);
2311 umtxq_unbusy(&uq->uq_key);
2312 umtxq_unlock(&uq->uq_key);
2313
2314 error = do_unlock_umutex(td, m);
2315
2316 umtxq_lock(&uq->uq_key);
2317 if (error == 0) {
2318 if ((wflags & UMTX_CHECK_UNPARKING) &&
2319 (td->td_pflags & TDP_WAKEUP)) {
2320 td->td_pflags &= ~TDP_WAKEUP;
2321 error = EINTR;
2322 } else if (timeout == NULL) {
2323 error = umtxq_sleep(uq, "ucond", 0);
2324 } else {
2325 getnanouptime(&ets);
2326 timespecadd(&ets, timeout);
2327 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2328 for (;;) {
2329 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2330 if (error != ETIMEDOUT)
2331 break;
2332 getnanouptime(&cts);
2333 if (timespeccmp(&cts, &ets, >=)) {
2334 error = ETIMEDOUT;
2335 break;
2336 }
2337 tts = ets;
2338 timespecsub(&tts, &cts);
2339 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2340 }
2341 }
2342 }
2343
2344 if (error != 0) {
2345 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2346 /*
2347 * If we concurrently got do_cv_signal()d
2348 * and we got an error or UNIX signals or a timeout,
2349 * then, perform another umtxq_signal to avoid
2350 * consuming the wakeup. This may cause supurious
2351 * wakeup for another thread which was just queued,
2352 * but SUSV3 explicitly allows supurious wakeup to
2353 * occur, and indeed a kernel based implementation
2354 * can not avoid it.
2355 */
2356 if (!umtxq_signal(&uq->uq_key, 1))
2357 error = 0;
2358 }
2359 if (error == ERESTART)
2360 error = EINTR;
2361 }
2362 umtxq_remove(uq);
2363 umtxq_unlock(&uq->uq_key);
2364 umtx_key_release(&uq->uq_key);
2365 return (error);
2366 }
2367
2368 /*
2369 * Signal a userland condition variable.
2370 */
2371 static int
2372 do_cv_signal(struct thread *td, struct ucond *cv)
2373 {
2374 struct umtx_key key;
2375 int error, cnt, nwake;
2376 uint32_t flags;
2377
2378 flags = fuword32(&cv->c_flags);
2379 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2380 return (error);
2381 umtxq_lock(&key);
2382 umtxq_busy(&key);
2383 cnt = umtxq_count(&key);
2384 nwake = umtxq_signal(&key, 1);
2385 if (cnt <= nwake) {
2386 umtxq_unlock(&key);
2387 error = suword32(
2388 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2389 umtxq_lock(&key);
2390 }
2391 umtxq_unbusy(&key);
2392 umtxq_unlock(&key);
2393 umtx_key_release(&key);
2394 return (error);
2395 }
2396
2397 static int
2398 do_cv_broadcast(struct thread *td, struct ucond *cv)
2399 {
2400 struct umtx_key key;
2401 int error;
2402 uint32_t flags;
2403
2404 flags = fuword32(&cv->c_flags);
2405 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2406 return (error);
2407
2408 umtxq_lock(&key);
2409 umtxq_busy(&key);
2410 umtxq_signal(&key, INT_MAX);
2411 umtxq_unlock(&key);
2412
2413 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2414
2415 umtxq_lock(&key);
2416 umtxq_unbusy(&key);
2417 umtxq_unlock(&key);
2418
2419 umtx_key_release(&key);
2420 return (error);
2421 }
2422
2423 static int
2424 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2425 {
2426 struct umtx_q *uq;
2427 uint32_t flags, wrflags;
2428 int32_t state, oldstate;
2429 int32_t blocked_readers;
2430 int error;
2431
2432 uq = td->td_umtxq;
2433 flags = fuword32(&rwlock->rw_flags);
2434 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2435 if (error != 0)
2436 return (error);
2437
2438 wrflags = URWLOCK_WRITE_OWNER;
2439 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2440 wrflags |= URWLOCK_WRITE_WAITERS;
2441
2442 for (;;) {
2443 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2444 /* try to lock it */
2445 while (!(state & wrflags)) {
2446 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2447 umtx_key_release(&uq->uq_key);
2448 return (EAGAIN);
2449 }
2450 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2451 if (oldstate == state) {
2452 umtx_key_release(&uq->uq_key);
2453 return (0);
2454 }
2455 state = oldstate;
2456 }
2457
2458 if (error)
2459 break;
2460
2461 /* grab monitor lock */
2462 umtxq_lock(&uq->uq_key);
2463 umtxq_busy(&uq->uq_key);
2464 umtxq_unlock(&uq->uq_key);
2465
2466 /* set read contention bit */
2467 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2468 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2469 if (oldstate == state)
2470 goto sleep;
2471 state = oldstate;
2472 }
2473
2474 /* state is changed while setting flags, restart */
2475 if (!(state & wrflags)) {
2476 umtxq_lock(&uq->uq_key);
2477 umtxq_unbusy(&uq->uq_key);
2478 umtxq_unlock(&uq->uq_key);
2479 continue;
2480 }
2481
2482 sleep:
2483 /* contention bit is set, before sleeping, increase read waiter count */
2484 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2485 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2486
2487 while (state & wrflags) {
2488 umtxq_lock(&uq->uq_key);
2489 umtxq_insert(uq);
2490 umtxq_unbusy(&uq->uq_key);
2491
2492 error = umtxq_sleep(uq, "urdlck", timo);
2493
2494 umtxq_busy(&uq->uq_key);
2495 umtxq_remove(uq);
2496 umtxq_unlock(&uq->uq_key);
2497 if (error)
2498 break;
2499 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2500 }
2501
2502 /* decrease read waiter count, and may clear read contention bit */
2503 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2504 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2505 if (blocked_readers == 1) {
2506 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2507 for (;;) {
2508 oldstate = casuword32(&rwlock->rw_state, state,
2509 state & ~URWLOCK_READ_WAITERS);
2510 if (oldstate == state)
2511 break;
2512 state = oldstate;
2513 }
2514 }
2515
2516 umtxq_lock(&uq->uq_key);
2517 umtxq_unbusy(&uq->uq_key);
2518 umtxq_unlock(&uq->uq_key);
2519 }
2520 umtx_key_release(&uq->uq_key);
2521 return (error);
2522 }
2523
2524 static int
2525 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2526 {
2527 struct timespec ts, ts2, ts3;
2528 struct timeval tv;
2529 int error;
2530
2531 getnanouptime(&ts);
2532 timespecadd(&ts, timeout);
2533 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2534 for (;;) {
2535 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2536 if (error != ETIMEDOUT)
2537 break;
2538 getnanouptime(&ts2);
2539 if (timespeccmp(&ts2, &ts, >=)) {
2540 error = ETIMEDOUT;
2541 break;
2542 }
2543 ts3 = ts;
2544 timespecsub(&ts3, &ts2);
2545 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2546 }
2547 if (error == ERESTART)
2548 error = EINTR;
2549 return (error);
2550 }
2551
2552 static int
2553 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2554 {
2555 struct umtx_q *uq;
2556 uint32_t flags;
2557 int32_t state, oldstate;
2558 int32_t blocked_writers;
2559 int32_t blocked_readers;
2560 int error;
2561
2562 uq = td->td_umtxq;
2563 flags = fuword32(&rwlock->rw_flags);
2564 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2565 if (error != 0)
2566 return (error);
2567
2568 blocked_readers = 0;
2569 for (;;) {
2570 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2571 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2572 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2573 if (oldstate == state) {
2574 umtx_key_release(&uq->uq_key);
2575 return (0);
2576 }
2577 state = oldstate;
2578 }
2579
2580 if (error) {
2581 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2582 blocked_readers != 0) {
2583 umtxq_lock(&uq->uq_key);
2584 umtxq_busy(&uq->uq_key);
2585 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2586 umtxq_unbusy(&uq->uq_key);
2587 umtxq_unlock(&uq->uq_key);
2588 }
2589
2590 break;
2591 }
2592
2593 /* grab monitor lock */
2594 umtxq_lock(&uq->uq_key);
2595 umtxq_busy(&uq->uq_key);
2596 umtxq_unlock(&uq->uq_key);
2597
2598 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2599 (state & URWLOCK_WRITE_WAITERS) == 0) {
2600 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2601 if (oldstate == state)
2602 goto sleep;
2603 state = oldstate;
2604 }
2605
2606 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2607 umtxq_lock(&uq->uq_key);
2608 umtxq_unbusy(&uq->uq_key);
2609 umtxq_unlock(&uq->uq_key);
2610 continue;
2611 }
2612 sleep:
2613 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2614 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2615
2616 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2617 umtxq_lock(&uq->uq_key);
2618 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2619 umtxq_unbusy(&uq->uq_key);
2620
2621 error = umtxq_sleep(uq, "uwrlck", timo);
2622
2623 umtxq_busy(&uq->uq_key);
2624 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2625 umtxq_unlock(&uq->uq_key);
2626 if (error)
2627 break;
2628 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2629 }
2630
2631 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2632 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2633 if (blocked_writers == 1) {
2634 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2635 for (;;) {
2636 oldstate = casuword32(&rwlock->rw_state, state,
2637 state & ~URWLOCK_WRITE_WAITERS);
2638 if (oldstate == state)
2639 break;
2640 state = oldstate;
2641 }
2642 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2643 } else
2644 blocked_readers = 0;
2645
2646 umtxq_lock(&uq->uq_key);
2647 umtxq_unbusy(&uq->uq_key);
2648 umtxq_unlock(&uq->uq_key);
2649 }
2650
2651 umtx_key_release(&uq->uq_key);
2652 return (error);
2653 }
2654
2655 static int
2656 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2657 {
2658 struct timespec ts, ts2, ts3;
2659 struct timeval tv;
2660 int error;
2661
2662 getnanouptime(&ts);
2663 timespecadd(&ts, timeout);
2664 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2665 for (;;) {
2666 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2667 if (error != ETIMEDOUT)
2668 break;
2669 getnanouptime(&ts2);
2670 if (timespeccmp(&ts2, &ts, >=)) {
2671 error = ETIMEDOUT;
2672 break;
2673 }
2674 ts3 = ts;
2675 timespecsub(&ts3, &ts2);
2676 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2677 }
2678 if (error == ERESTART)
2679 error = EINTR;
2680 return (error);
2681 }
2682
2683 static int
2684 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2685 {
2686 struct umtx_q *uq;
2687 uint32_t flags;
2688 int32_t state, oldstate;
2689 int error, q, count;
2690
2691 uq = td->td_umtxq;
2692 flags = fuword32(&rwlock->rw_flags);
2693 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2694 if (error != 0)
2695 return (error);
2696
2697 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2698 if (state & URWLOCK_WRITE_OWNER) {
2699 for (;;) {
2700 oldstate = casuword32(&rwlock->rw_state, state,
2701 state & ~URWLOCK_WRITE_OWNER);
2702 if (oldstate != state) {
2703 state = oldstate;
2704 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2705 error = EPERM;
2706 goto out;
2707 }
2708 } else
2709 break;
2710 }
2711 } else if (URWLOCK_READER_COUNT(state) != 0) {
2712 for (;;) {
2713 oldstate = casuword32(&rwlock->rw_state, state,
2714 state - 1);
2715 if (oldstate != state) {
2716 state = oldstate;
2717 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2718 error = EPERM;
2719 goto out;
2720 }
2721 }
2722 else
2723 break;
2724 }
2725 } else {
2726 error = EPERM;
2727 goto out;
2728 }
2729
2730 count = 0;
2731
2732 if (!(flags & URWLOCK_PREFER_READER)) {
2733 if (state & URWLOCK_WRITE_WAITERS) {
2734 count = 1;
2735 q = UMTX_EXCLUSIVE_QUEUE;
2736 } else if (state & URWLOCK_READ_WAITERS) {
2737 count = INT_MAX;
2738 q = UMTX_SHARED_QUEUE;
2739 }
2740 } else {
2741 if (state & URWLOCK_READ_WAITERS) {
2742 count = INT_MAX;
2743 q = UMTX_SHARED_QUEUE;
2744 } else if (state & URWLOCK_WRITE_WAITERS) {
2745 count = 1;
2746 q = UMTX_EXCLUSIVE_QUEUE;
2747 }
2748 }
2749
2750 if (count) {
2751 umtxq_lock(&uq->uq_key);
2752 umtxq_busy(&uq->uq_key);
2753 umtxq_signal_queue(&uq->uq_key, count, q);
2754 umtxq_unbusy(&uq->uq_key);
2755 umtxq_unlock(&uq->uq_key);
2756 }
2757 out:
2758 umtx_key_release(&uq->uq_key);
2759 return (error);
2760 }
2761
2762 int
2763 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2764 /* struct umtx *umtx */
2765 {
2766 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2767 }
2768
2769 int
2770 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2771 /* struct umtx *umtx */
2772 {
2773 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2774 }
2775
2776 static int
2777 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2778 {
2779 struct timespec *ts, timeout;
2780 int error;
2781
2782 /* Allow a null timespec (wait forever). */
2783 if (uap->uaddr2 == NULL)
2784 ts = NULL;
2785 else {
2786 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2787 if (error != 0)
2788 return (error);
2789 if (timeout.tv_nsec >= 1000000000 ||
2790 timeout.tv_nsec < 0) {
2791 return (EINVAL);
2792 }
2793 ts = &timeout;
2794 }
2795 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2796 }
2797
2798 static int
2799 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2800 {
2801 return (do_unlock_umtx(td, uap->obj, uap->val));
2802 }
2803
2804 static int
2805 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2806 {
2807 struct timespec *ts, timeout;
2808 int error;
2809
2810 if (uap->uaddr2 == NULL)
2811 ts = NULL;
2812 else {
2813 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2814 if (error != 0)
2815 return (error);
2816 if (timeout.tv_nsec >= 1000000000 ||
2817 timeout.tv_nsec < 0)
2818 return (EINVAL);
2819 ts = &timeout;
2820 }
2821 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2822 }
2823
2824 static int
2825 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2826 {
2827 struct timespec *ts, timeout;
2828 int error;
2829
2830 if (uap->uaddr2 == NULL)
2831 ts = NULL;
2832 else {
2833 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2834 if (error != 0)
2835 return (error);
2836 if (timeout.tv_nsec >= 1000000000 ||
2837 timeout.tv_nsec < 0)
2838 return (EINVAL);
2839 ts = &timeout;
2840 }
2841 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2842 }
2843
2844 static int
2845 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2846 {
2847 struct timespec *ts, timeout;
2848 int error;
2849
2850 if (uap->uaddr2 == NULL)
2851 ts = NULL;
2852 else {
2853 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2854 if (error != 0)
2855 return (error);
2856 if (timeout.tv_nsec >= 1000000000 ||
2857 timeout.tv_nsec < 0)
2858 return (EINVAL);
2859 ts = &timeout;
2860 }
2861 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2862 }
2863
2864 static int
2865 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2866 {
2867 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2868 }
2869
2870 static int
2871 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2872 {
2873 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2874 }
2875
2876 static int
2877 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2878 {
2879 struct timespec *ts, timeout;
2880 int error;
2881
2882 /* Allow a null timespec (wait forever). */
2883 if (uap->uaddr2 == NULL)
2884 ts = NULL;
2885 else {
2886 error = copyin(uap->uaddr2, &timeout,
2887 sizeof(timeout));
2888 if (error != 0)
2889 return (error);
2890 if (timeout.tv_nsec >= 1000000000 ||
2891 timeout.tv_nsec < 0) {
2892 return (EINVAL);
2893 }
2894 ts = &timeout;
2895 }
2896 return do_lock_umutex(td, uap->obj, ts, 0);
2897 }
2898
2899 static int
2900 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2901 {
2902 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2903 }
2904
2905 static int
2906 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2907 {
2908 struct timespec *ts, timeout;
2909 int error;
2910
2911 /* Allow a null timespec (wait forever). */
2912 if (uap->uaddr2 == NULL)
2913 ts = NULL;
2914 else {
2915 error = copyin(uap->uaddr2, &timeout,
2916 sizeof(timeout));
2917 if (error != 0)
2918 return (error);
2919 if (timeout.tv_nsec >= 1000000000 ||
2920 timeout.tv_nsec < 0) {
2921 return (EINVAL);
2922 }
2923 ts = &timeout;
2924 }
2925 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2926 }
2927
2928 static int
2929 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2930 {
2931 return do_wake_umutex(td, uap->obj);
2932 }
2933
2934 static int
2935 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2936 {
2937 return do_unlock_umutex(td, uap->obj);
2938 }
2939
2940 static int
2941 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2942 {
2943 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2944 }
2945
2946 static int
2947 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2948 {
2949 struct timespec *ts, timeout;
2950 int error;
2951
2952 /* Allow a null timespec (wait forever). */
2953 if (uap->uaddr2 == NULL)
2954 ts = NULL;
2955 else {
2956 error = copyin(uap->uaddr2, &timeout,
2957 sizeof(timeout));
2958 if (error != 0)
2959 return (error);
2960 if (timeout.tv_nsec >= 1000000000 ||
2961 timeout.tv_nsec < 0) {
2962 return (EINVAL);
2963 }
2964 ts = &timeout;
2965 }
2966 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2967 }
2968
2969 static int
2970 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2971 {
2972 return do_cv_signal(td, uap->obj);
2973 }
2974
2975 static int
2976 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2977 {
2978 return do_cv_broadcast(td, uap->obj);
2979 }
2980
2981 static int
2982 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2983 {
2984 struct timespec timeout;
2985 int error;
2986
2987 /* Allow a null timespec (wait forever). */
2988 if (uap->uaddr2 == NULL) {
2989 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2990 } else {
2991 error = copyin(uap->uaddr2, &timeout,
2992 sizeof(timeout));
2993 if (error != 0)
2994 return (error);
2995 if (timeout.tv_nsec >= 1000000000 ||
2996 timeout.tv_nsec < 0) {
2997 return (EINVAL);
2998 }
2999 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3000 }
3001 return (error);
3002 }
3003
3004 static int
3005 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3006 {
3007 struct timespec timeout;
3008 int error;
3009
3010 /* Allow a null timespec (wait forever). */
3011 if (uap->uaddr2 == NULL) {
3012 error = do_rw_wrlock(td, uap->obj, 0);
3013 } else {
3014 error = copyin(uap->uaddr2, &timeout,
3015 sizeof(timeout));
3016 if (error != 0)
3017 return (error);
3018 if (timeout.tv_nsec >= 1000000000 ||
3019 timeout.tv_nsec < 0) {
3020 return (EINVAL);
3021 }
3022
3023 error = do_rw_wrlock2(td, uap->obj, &timeout);
3024 }
3025 return (error);
3026 }
3027
3028 static int
3029 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3030 {
3031 return do_rw_unlock(td, uap->obj);
3032 }
3033
3034 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3035
3036 static _umtx_op_func op_table[] = {
3037 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3038 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3039 __umtx_op_wait, /* UMTX_OP_WAIT */
3040 __umtx_op_wake, /* UMTX_OP_WAKE */
3041 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3042 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3043 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3044 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3045 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3046 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3047 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3048 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3049 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3050 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3051 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3052 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3053 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3054 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3055 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3056 };
3057
3058 int
3059 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
3060 {
3061 if ((unsigned)uap->op < UMTX_OP_MAX)
3062 return (*op_table[uap->op])(td, uap);
3063 return (EINVAL);
3064 }
3065
3066 #ifdef COMPAT_IA32
3067 int
3068 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3069 /* struct umtx *umtx */
3070 {
3071 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3072 }
3073
3074 int
3075 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3076 /* struct umtx *umtx */
3077 {
3078 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3079 }
3080
3081 struct timespec32 {
3082 u_int32_t tv_sec;
3083 u_int32_t tv_nsec;
3084 };
3085
3086 static inline int
3087 copyin_timeout32(void *addr, struct timespec *tsp)
3088 {
3089 struct timespec32 ts32;
3090 int error;
3091
3092 error = copyin(addr, &ts32, sizeof(struct timespec32));
3093 if (error == 0) {
3094 tsp->tv_sec = ts32.tv_sec;
3095 tsp->tv_nsec = ts32.tv_nsec;
3096 }
3097 return (error);
3098 }
3099
3100 static int
3101 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3102 {
3103 struct timespec *ts, timeout;
3104 int error;
3105
3106 /* Allow a null timespec (wait forever). */
3107 if (uap->uaddr2 == NULL)
3108 ts = NULL;
3109 else {
3110 error = copyin_timeout32(uap->uaddr2, &timeout);
3111 if (error != 0)
3112 return (error);
3113 if (timeout.tv_nsec >= 1000000000 ||
3114 timeout.tv_nsec < 0) {
3115 return (EINVAL);
3116 }
3117 ts = &timeout;
3118 }
3119 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3120 }
3121
3122 static int
3123 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3124 {
3125 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3126 }
3127
3128 static int
3129 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3130 {
3131 struct timespec *ts, timeout;
3132 int error;
3133
3134 if (uap->uaddr2 == NULL)
3135 ts = NULL;
3136 else {
3137 error = copyin_timeout32(uap->uaddr2, &timeout);
3138 if (error != 0)
3139 return (error);
3140 if (timeout.tv_nsec >= 1000000000 ||
3141 timeout.tv_nsec < 0)
3142 return (EINVAL);
3143 ts = &timeout;
3144 }
3145 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3146 }
3147
3148 static int
3149 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3150 {
3151 struct timespec *ts, timeout;
3152 int error;
3153
3154 /* Allow a null timespec (wait forever). */
3155 if (uap->uaddr2 == NULL)
3156 ts = NULL;
3157 else {
3158 error = copyin_timeout32(uap->uaddr2, &timeout);
3159 if (error != 0)
3160 return (error);
3161 if (timeout.tv_nsec >= 1000000000 ||
3162 timeout.tv_nsec < 0)
3163 return (EINVAL);
3164 ts = &timeout;
3165 }
3166 return do_lock_umutex(td, uap->obj, ts, 0);
3167 }
3168
3169 static int
3170 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3171 {
3172 struct timespec *ts, timeout;
3173 int error;
3174
3175 /* Allow a null timespec (wait forever). */
3176 if (uap->uaddr2 == NULL)
3177 ts = NULL;
3178 else {
3179 error = copyin_timeout32(uap->uaddr2, &timeout);
3180 if (error != 0)
3181 return (error);
3182 if (timeout.tv_nsec >= 1000000000 ||
3183 timeout.tv_nsec < 0)
3184 return (EINVAL);
3185 ts = &timeout;
3186 }
3187 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3188 }
3189
3190 static int
3191 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3192 {
3193 struct timespec *ts, timeout;
3194 int error;
3195
3196 /* Allow a null timespec (wait forever). */
3197 if (uap->uaddr2 == NULL)
3198 ts = NULL;
3199 else {
3200 error = copyin_timeout32(uap->uaddr2, &timeout);
3201 if (error != 0)
3202 return (error);
3203 if (timeout.tv_nsec >= 1000000000 ||
3204 timeout.tv_nsec < 0)
3205 return (EINVAL);
3206 ts = &timeout;
3207 }
3208 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3209 }
3210
3211 static int
3212 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3213 {
3214 struct timespec timeout;
3215 int error;
3216
3217 /* Allow a null timespec (wait forever). */
3218 if (uap->uaddr2 == NULL) {
3219 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3220 } else {
3221 error = copyin(uap->uaddr2, &timeout,
3222 sizeof(timeout));
3223 if (error != 0)
3224 return (error);
3225 if (timeout.tv_nsec >= 1000000000 ||
3226 timeout.tv_nsec < 0) {
3227 return (EINVAL);
3228 }
3229 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3230 }
3231 return (error);
3232 }
3233
3234 static int
3235 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3236 {
3237 struct timespec timeout;
3238 int error;
3239
3240 /* Allow a null timespec (wait forever). */
3241 if (uap->uaddr2 == NULL) {
3242 error = do_rw_wrlock(td, uap->obj, 0);
3243 } else {
3244 error = copyin_timeout32(uap->uaddr2, &timeout);
3245 if (error != 0)
3246 return (error);
3247 if (timeout.tv_nsec >= 1000000000 ||
3248 timeout.tv_nsec < 0) {
3249 return (EINVAL);
3250 }
3251
3252 error = do_rw_wrlock2(td, uap->obj, &timeout);
3253 }
3254 return (error);
3255 }
3256
3257 static int
3258 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3259 {
3260 struct timespec *ts, timeout;
3261 int error;
3262
3263 if (uap->uaddr2 == NULL)
3264 ts = NULL;
3265 else {
3266 error = copyin_timeout32(uap->uaddr2, &timeout);
3267 if (error != 0)
3268 return (error);
3269 if (timeout.tv_nsec >= 1000000000 ||
3270 timeout.tv_nsec < 0)
3271 return (EINVAL);
3272 ts = &timeout;
3273 }
3274 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3275 }
3276
3277 static _umtx_op_func op_table_compat32[] = {
3278 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3279 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3280 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3281 __umtx_op_wake, /* UMTX_OP_WAKE */
3282 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3283 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3284 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3285 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3286 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3287 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3288 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3289 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3290 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3291 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3292 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3293 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3294 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3295 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3296 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3297 };
3298
3299 int
3300 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3301 {
3302 if ((unsigned)uap->op < UMTX_OP_MAX)
3303 return (*op_table_compat32[uap->op])(td,
3304 (struct _umtx_op_args *)uap);
3305 return (EINVAL);
3306 }
3307 #endif
3308
3309 void
3310 umtx_thread_init(struct thread *td)
3311 {
3312 td->td_umtxq = umtxq_alloc();
3313 td->td_umtxq->uq_thread = td;
3314 }
3315
3316 void
3317 umtx_thread_fini(struct thread *td)
3318 {
3319 umtxq_free(td->td_umtxq);
3320 }
3321
3322 /*
3323 * It will be called when new thread is created, e.g fork().
3324 */
3325 void
3326 umtx_thread_alloc(struct thread *td)
3327 {
3328 struct umtx_q *uq;
3329
3330 uq = td->td_umtxq;
3331 uq->uq_inherited_pri = PRI_MAX;
3332
3333 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3334 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3335 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3336 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3337 }
3338
3339 /*
3340 * exec() hook.
3341 */
3342 static void
3343 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3344 struct image_params *imgp __unused)
3345 {
3346 umtx_thread_cleanup(curthread);
3347 }
3348
3349 /*
3350 * thread_exit() hook.
3351 */
3352 void
3353 umtx_thread_exit(struct thread *td)
3354 {
3355 umtx_thread_cleanup(td);
3356 }
3357
3358 /*
3359 * clean up umtx data.
3360 */
3361 static void
3362 umtx_thread_cleanup(struct thread *td)
3363 {
3364 struct umtx_q *uq;
3365 struct umtx_pi *pi;
3366
3367 if ((uq = td->td_umtxq) == NULL)
3368 return;
3369
3370 mtx_lock_spin(&umtx_lock);
3371 uq->uq_inherited_pri = PRI_MAX;
3372 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3373 pi->pi_owner = NULL;
3374 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3375 }
3376 thread_lock(td);
3377 td->td_flags &= ~TDF_UBORROWING;
3378 thread_unlock(td);
3379 mtx_unlock_spin(&umtx_lock);
3380 }
Cache object: f4ea89f5b1361a38a3c3bafbf4968c76
|