FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/7.3/sys/kern/kern_umtx.c 203694 2010-02-09 01:19:10Z davidxu $");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60
61 #define TYPE_SIMPLE_WAIT 0
62 #define TYPE_CV 1
63 #define TYPE_SIMPLE_LOCK 2
64 #define TYPE_NORMAL_UMUTEX 3
65 #define TYPE_PI_UMUTEX 4
66 #define TYPE_PP_UMUTEX 5
67 #define TYPE_RWLOCK 6
68
69 #define _UMUTEX_TRY 1
70 #define _UMUTEX_WAIT 2
71
72 /* Key to represent a unique userland synchronous object */
73 struct umtx_key {
74 int hash;
75 int type;
76 int shared;
77 union {
78 struct {
79 vm_object_t object;
80 uintptr_t offset;
81 } shared;
82 struct {
83 struct vmspace *vs;
84 uintptr_t addr;
85 } private;
86 struct {
87 void *a;
88 uintptr_t b;
89 } both;
90 } info;
91 };
92
93 /* Priority inheritance mutex info. */
94 struct umtx_pi {
95 /* Owner thread */
96 struct thread *pi_owner;
97
98 /* Reference count */
99 int pi_refcount;
100
101 /* List entry to link umtx holding by thread */
102 TAILQ_ENTRY(umtx_pi) pi_link;
103
104 /* List entry in hash */
105 TAILQ_ENTRY(umtx_pi) pi_hashlink;
106
107 /* List for waiters */
108 TAILQ_HEAD(,umtx_q) pi_blocked;
109
110 /* Identify a userland lock object */
111 struct umtx_key pi_key;
112 };
113
114 /* A userland synchronous object user. */
115 struct umtx_q {
116 /* Linked list for the hash. */
117 TAILQ_ENTRY(umtx_q) uq_link;
118
119 /* Umtx key. */
120 struct umtx_key uq_key;
121
122 /* Umtx flags. */
123 int uq_flags;
124 #define UQF_UMTXQ 0x0001
125
126 /* The thread waits on. */
127 struct thread *uq_thread;
128
129 /*
130 * Blocked on PI mutex. read can use chain lock
131 * or umtx_lock, write must have both chain lock and
132 * umtx_lock being hold.
133 */
134 struct umtx_pi *uq_pi_blocked;
135
136 /* On blocked list */
137 TAILQ_ENTRY(umtx_q) uq_lockq;
138
139 /* Thread contending with us */
140 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
141
142 /* Inherited priority from PP mutex */
143 u_char uq_inherited_pri;
144 };
145
146 TAILQ_HEAD(umtxq_head, umtx_q);
147
148 /* Userland lock object's wait-queue chain */
149 struct umtxq_chain {
150 /* Lock for this chain. */
151 struct mtx uc_lock;
152
153 /* List of sleep queues. */
154 struct umtxq_head uc_queue[2];
155 #define UMTX_SHARED_QUEUE 0
156 #define UMTX_EXCLUSIVE_QUEUE 1
157
158 /* Busy flag */
159 char uc_busy;
160
161 /* Chain lock waiters */
162 int uc_waiters;
163
164 /* All PI in the list */
165 TAILQ_HEAD(,umtx_pi) uc_pi_list;
166 };
167
168 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
169 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
170
171 /*
172 * Don't propagate time-sharing priority, there is a security reason,
173 * a user can simply introduce PI-mutex, let thread A lock the mutex,
174 * and let another thread B block on the mutex, because B is
175 * sleeping, its priority will be boosted, this causes A's priority to
176 * be boosted via priority propagating too and will never be lowered even
177 * if it is using 100%CPU, this is unfair to other processes.
178 */
179
180 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
181 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
182 PRI_MAX_TIMESHARE : (td)->td_user_pri)
183
184 #define GOLDEN_RATIO_PRIME 2654404609U
185 #define UMTX_CHAINS 128
186 #define UMTX_SHIFTS (__WORD_BIT - 7)
187
188 #define THREAD_SHARE 0
189 #define PROCESS_SHARE 1
190 #define AUTO_SHARE 2
191
192 #define GET_SHARE(flags) \
193 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
194
195 #define BUSY_SPINS 200
196
197 static uma_zone_t umtx_pi_zone;
198 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
199 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
200 static int umtx_pi_allocated;
201
202 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
203 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
204 &umtx_pi_allocated, 0, "Allocated umtx_pi");
205
206 static void umtxq_sysinit(void *);
207 static void umtxq_hash(struct umtx_key *key);
208 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
209 static void umtxq_lock(struct umtx_key *key);
210 static void umtxq_unlock(struct umtx_key *key);
211 static void umtxq_busy(struct umtx_key *key);
212 static void umtxq_unbusy(struct umtx_key *key);
213 static void umtxq_insert_queue(struct umtx_q *uq, int q);
214 static void umtxq_remove_queue(struct umtx_q *uq, int q);
215 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
216 static int umtxq_count(struct umtx_key *key);
217 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
218 static int umtx_key_get(void *addr, int type, int share,
219 struct umtx_key *key);
220 static void umtx_key_release(struct umtx_key *key);
221 static struct umtx_pi *umtx_pi_alloc(int);
222 static void umtx_pi_free(struct umtx_pi *pi);
223 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
224 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
225 static void umtx_thread_cleanup(struct thread *td);
226 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
227 struct image_params *imgp __unused);
228 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229
230 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
231 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
232 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
233
234 static struct mtx umtx_lock;
235
236 static void
237 umtxq_sysinit(void *arg __unused)
238 {
239 int i, j;
240
241 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
242 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
243 for (i = 0; i < 2; ++i) {
244 for (j = 0; j < UMTX_CHAINS; ++j) {
245 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
246 MTX_DEF | MTX_DUPOK);
247 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
248 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
249 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
250 umtxq_chains[i][j].uc_busy = 0;
251 umtxq_chains[i][j].uc_waiters = 0;
252 }
253 }
254 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
255 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
256 EVENTHANDLER_PRI_ANY);
257 }
258
259 struct umtx_q *
260 umtxq_alloc(void)
261 {
262 struct umtx_q *uq;
263
264 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
265 TAILQ_INIT(&uq->uq_pi_contested);
266 uq->uq_inherited_pri = PRI_MAX;
267 return (uq);
268 }
269
270 void
271 umtxq_free(struct umtx_q *uq)
272 {
273 free(uq, M_UMTX);
274 }
275
276 static inline void
277 umtxq_hash(struct umtx_key *key)
278 {
279 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
280 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
281 }
282
283 static inline int
284 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
285 {
286 return (k1->type == k2->type &&
287 k1->info.both.a == k2->info.both.a &&
288 k1->info.both.b == k2->info.both.b);
289 }
290
291 static inline struct umtxq_chain *
292 umtxq_getchain(struct umtx_key *key)
293 {
294 if (key->type <= TYPE_CV)
295 return (&umtxq_chains[1][key->hash]);
296 return (&umtxq_chains[0][key->hash]);
297 }
298
299 /*
300 * Lock a chain.
301 */
302 static inline void
303 umtxq_lock(struct umtx_key *key)
304 {
305 struct umtxq_chain *uc;
306
307 uc = umtxq_getchain(key);
308 mtx_lock(&uc->uc_lock);
309 }
310
311 /*
312 * Unlock a chain.
313 */
314 static inline void
315 umtxq_unlock(struct umtx_key *key)
316 {
317 struct umtxq_chain *uc;
318
319 uc = umtxq_getchain(key);
320 mtx_unlock(&uc->uc_lock);
321 }
322
323 /*
324 * Set chain to busy state when following operation
325 * may be blocked (kernel mutex can not be used).
326 */
327 static inline void
328 umtxq_busy(struct umtx_key *key)
329 {
330 struct umtxq_chain *uc;
331
332 uc = umtxq_getchain(key);
333 mtx_assert(&uc->uc_lock, MA_OWNED);
334 if (uc->uc_busy) {
335 #ifdef SMP
336 if (smp_cpus > 1) {
337 int count = BUSY_SPINS;
338 if (count > 0) {
339 umtxq_unlock(key);
340 while (uc->uc_busy && --count > 0)
341 cpu_spinwait();
342 umtxq_lock(key);
343 }
344 }
345 #endif
346 while (uc->uc_busy) {
347 uc->uc_waiters++;
348 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
349 uc->uc_waiters--;
350 }
351 }
352 uc->uc_busy = 1;
353 }
354
355 /*
356 * Unbusy a chain.
357 */
358 static inline void
359 umtxq_unbusy(struct umtx_key *key)
360 {
361 struct umtxq_chain *uc;
362
363 uc = umtxq_getchain(key);
364 mtx_assert(&uc->uc_lock, MA_OWNED);
365 KASSERT(uc->uc_busy != 0, ("not busy"));
366 uc->uc_busy = 0;
367 if (uc->uc_waiters)
368 wakeup_one(uc);
369 }
370
371 static inline void
372 umtxq_insert_queue(struct umtx_q *uq, int q)
373 {
374 struct umtxq_chain *uc;
375
376 uc = umtxq_getchain(&uq->uq_key);
377 UMTXQ_LOCKED_ASSERT(uc);
378 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
379 uq->uq_flags |= UQF_UMTXQ;
380 }
381
382 static inline void
383 umtxq_remove_queue(struct umtx_q *uq, int q)
384 {
385 struct umtxq_chain *uc;
386
387 uc = umtxq_getchain(&uq->uq_key);
388 UMTXQ_LOCKED_ASSERT(uc);
389 if (uq->uq_flags & UQF_UMTXQ) {
390 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
391 uq->uq_flags &= ~UQF_UMTXQ;
392 }
393 }
394
395 /*
396 * Check if there are multiple waiters
397 */
398 static int
399 umtxq_count(struct umtx_key *key)
400 {
401 struct umtxq_chain *uc;
402 struct umtx_q *uq;
403 int count = 0;
404
405 uc = umtxq_getchain(key);
406 UMTXQ_LOCKED_ASSERT(uc);
407 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
408 if (umtx_key_match(&uq->uq_key, key)) {
409 if (++count > 1)
410 break;
411 }
412 }
413 return (count);
414 }
415
416 /*
417 * Check if there are multiple PI waiters and returns first
418 * waiter.
419 */
420 static int
421 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
422 {
423 struct umtxq_chain *uc;
424 struct umtx_q *uq;
425 int count = 0;
426
427 *first = NULL;
428 uc = umtxq_getchain(key);
429 UMTXQ_LOCKED_ASSERT(uc);
430 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
431 if (umtx_key_match(&uq->uq_key, key)) {
432 if (++count > 1)
433 break;
434 *first = uq;
435 }
436 }
437 return (count);
438 }
439
440 /*
441 * Wake up threads waiting on an userland object.
442 */
443
444 static int
445 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
446 {
447 struct umtxq_chain *uc;
448 struct umtx_q *uq, *next;
449 int ret;
450
451 ret = 0;
452 uc = umtxq_getchain(key);
453 UMTXQ_LOCKED_ASSERT(uc);
454 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
455 if (umtx_key_match(&uq->uq_key, key)) {
456 umtxq_remove_queue(uq, q);
457 wakeup(uq);
458 if (++ret >= n_wake)
459 break;
460 }
461 }
462 return (ret);
463 }
464
465
466 /*
467 * Wake up specified thread.
468 */
469 static inline void
470 umtxq_signal_thread(struct umtx_q *uq)
471 {
472 struct umtxq_chain *uc;
473
474 uc = umtxq_getchain(&uq->uq_key);
475 UMTXQ_LOCKED_ASSERT(uc);
476 umtxq_remove(uq);
477 wakeup(uq);
478 }
479
480 /*
481 * Put thread into sleep state, before sleeping, check if
482 * thread was removed from umtx queue.
483 */
484 static inline int
485 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
486 {
487 struct umtxq_chain *uc;
488 int error;
489
490 uc = umtxq_getchain(&uq->uq_key);
491 UMTXQ_LOCKED_ASSERT(uc);
492 if (!(uq->uq_flags & UQF_UMTXQ))
493 return (0);
494 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
495 if (error == EWOULDBLOCK)
496 error = ETIMEDOUT;
497 return (error);
498 }
499
500 /*
501 * Convert userspace address into unique logical address.
502 */
503 static int
504 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
505 {
506 struct thread *td = curthread;
507 vm_map_t map;
508 vm_map_entry_t entry;
509 vm_pindex_t pindex;
510 vm_prot_t prot;
511 boolean_t wired;
512
513 key->type = type;
514 if (share == THREAD_SHARE) {
515 key->shared = 0;
516 key->info.private.vs = td->td_proc->p_vmspace;
517 key->info.private.addr = (uintptr_t)addr;
518 } else {
519 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
520 map = &td->td_proc->p_vmspace->vm_map;
521 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
522 &entry, &key->info.shared.object, &pindex, &prot,
523 &wired) != KERN_SUCCESS) {
524 return EFAULT;
525 }
526
527 if ((share == PROCESS_SHARE) ||
528 (share == AUTO_SHARE &&
529 VM_INHERIT_SHARE == entry->inheritance)) {
530 key->shared = 1;
531 key->info.shared.offset = entry->offset + entry->start -
532 (vm_offset_t)addr;
533 vm_object_reference(key->info.shared.object);
534 } else {
535 key->shared = 0;
536 key->info.private.vs = td->td_proc->p_vmspace;
537 key->info.private.addr = (uintptr_t)addr;
538 }
539 vm_map_lookup_done(map, entry);
540 }
541
542 umtxq_hash(key);
543 return (0);
544 }
545
546 /*
547 * Release key.
548 */
549 static inline void
550 umtx_key_release(struct umtx_key *key)
551 {
552 if (key->shared)
553 vm_object_deallocate(key->info.shared.object);
554 }
555
556 /*
557 * Lock a umtx object.
558 */
559 static int
560 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
561 {
562 struct umtx_q *uq;
563 u_long owner;
564 u_long old;
565 int error = 0;
566
567 uq = td->td_umtxq;
568
569 /*
570 * Care must be exercised when dealing with umtx structure. It
571 * can fault on any access.
572 */
573 for (;;) {
574 /*
575 * Try the uncontested case. This should be done in userland.
576 */
577 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
578
579 /* The acquire succeeded. */
580 if (owner == UMTX_UNOWNED)
581 return (0);
582
583 /* The address was invalid. */
584 if (owner == -1)
585 return (EFAULT);
586
587 /* If no one owns it but it is contested try to acquire it. */
588 if (owner == UMTX_CONTESTED) {
589 owner = casuword(&umtx->u_owner,
590 UMTX_CONTESTED, id | UMTX_CONTESTED);
591
592 if (owner == UMTX_CONTESTED)
593 return (0);
594
595 /* The address was invalid. */
596 if (owner == -1)
597 return (EFAULT);
598
599 /* If this failed the lock has changed, restart. */
600 continue;
601 }
602
603 /*
604 * If we caught a signal, we have retried and now
605 * exit immediately.
606 */
607 if (error != 0)
608 return (error);
609
610 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
611 AUTO_SHARE, &uq->uq_key)) != 0)
612 return (error);
613
614 umtxq_lock(&uq->uq_key);
615 umtxq_busy(&uq->uq_key);
616 umtxq_insert(uq);
617 umtxq_unbusy(&uq->uq_key);
618 umtxq_unlock(&uq->uq_key);
619
620 /*
621 * Set the contested bit so that a release in user space
622 * knows to use the system call for unlock. If this fails
623 * either some one else has acquired the lock or it has been
624 * released.
625 */
626 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
627
628 /* The address was invalid. */
629 if (old == -1) {
630 umtxq_lock(&uq->uq_key);
631 umtxq_remove(uq);
632 umtxq_unlock(&uq->uq_key);
633 umtx_key_release(&uq->uq_key);
634 return (EFAULT);
635 }
636
637 /*
638 * We set the contested bit, sleep. Otherwise the lock changed
639 * and we need to retry or we lost a race to the thread
640 * unlocking the umtx.
641 */
642 umtxq_lock(&uq->uq_key);
643 if (old == owner)
644 error = umtxq_sleep(uq, "umtx", timo);
645 umtxq_remove(uq);
646 umtxq_unlock(&uq->uq_key);
647 umtx_key_release(&uq->uq_key);
648 }
649
650 return (0);
651 }
652
653 /*
654 * Lock a umtx object.
655 */
656 static int
657 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
658 struct timespec *timeout)
659 {
660 struct timespec ts, ts2, ts3;
661 struct timeval tv;
662 int error;
663
664 if (timeout == NULL) {
665 error = _do_lock_umtx(td, umtx, id, 0);
666 /* Mutex locking is restarted if it is interrupted. */
667 if (error == EINTR)
668 error = ERESTART;
669 } else {
670 getnanouptime(&ts);
671 timespecadd(&ts, timeout);
672 TIMESPEC_TO_TIMEVAL(&tv, timeout);
673 for (;;) {
674 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
675 if (error != ETIMEDOUT)
676 break;
677 getnanouptime(&ts2);
678 if (timespeccmp(&ts2, &ts, >=)) {
679 error = ETIMEDOUT;
680 break;
681 }
682 ts3 = ts;
683 timespecsub(&ts3, &ts2);
684 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
685 }
686 /* Timed-locking is not restarted. */
687 if (error == ERESTART)
688 error = EINTR;
689 }
690 return (error);
691 }
692
693 /*
694 * Unlock a umtx object.
695 */
696 static int
697 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
698 {
699 struct umtx_key key;
700 u_long owner;
701 u_long old;
702 int error;
703 int count;
704
705 /*
706 * Make sure we own this mtx.
707 */
708 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
709 if (owner == -1)
710 return (EFAULT);
711
712 if ((owner & ~UMTX_CONTESTED) != id)
713 return (EPERM);
714
715 /* This should be done in userland */
716 if ((owner & UMTX_CONTESTED) == 0) {
717 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
718 if (old == -1)
719 return (EFAULT);
720 if (old == owner)
721 return (0);
722 owner = old;
723 }
724
725 /* We should only ever be in here for contested locks */
726 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
727 &key)) != 0)
728 return (error);
729
730 umtxq_lock(&key);
731 umtxq_busy(&key);
732 count = umtxq_count(&key);
733 umtxq_unlock(&key);
734
735 /*
736 * When unlocking the umtx, it must be marked as unowned if
737 * there is zero or one thread only waiting for it.
738 * Otherwise, it must be marked as contested.
739 */
740 old = casuword(&umtx->u_owner, owner,
741 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
742 umtxq_lock(&key);
743 umtxq_signal(&key,1);
744 umtxq_unbusy(&key);
745 umtxq_unlock(&key);
746 umtx_key_release(&key);
747 if (old == -1)
748 return (EFAULT);
749 if (old != owner)
750 return (EINVAL);
751 return (0);
752 }
753
754 #ifdef COMPAT_IA32
755
756 /*
757 * Lock a umtx object.
758 */
759 static int
760 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
761 {
762 struct umtx_q *uq;
763 uint32_t owner;
764 uint32_t old;
765 int error = 0;
766
767 uq = td->td_umtxq;
768
769 /*
770 * Care must be exercised when dealing with umtx structure. It
771 * can fault on any access.
772 */
773 for (;;) {
774 /*
775 * Try the uncontested case. This should be done in userland.
776 */
777 owner = casuword32(m, UMUTEX_UNOWNED, id);
778
779 /* The acquire succeeded. */
780 if (owner == UMUTEX_UNOWNED)
781 return (0);
782
783 /* The address was invalid. */
784 if (owner == -1)
785 return (EFAULT);
786
787 /* If no one owns it but it is contested try to acquire it. */
788 if (owner == UMUTEX_CONTESTED) {
789 owner = casuword32(m,
790 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
791 if (owner == UMUTEX_CONTESTED)
792 return (0);
793
794 /* The address was invalid. */
795 if (owner == -1)
796 return (EFAULT);
797
798 /* If this failed the lock has changed, restart. */
799 continue;
800 }
801
802 /*
803 * If we caught a signal, we have retried and now
804 * exit immediately.
805 */
806 if (error != 0)
807 return (error);
808
809 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
810 AUTO_SHARE, &uq->uq_key)) != 0)
811 return (error);
812
813 umtxq_lock(&uq->uq_key);
814 umtxq_busy(&uq->uq_key);
815 umtxq_insert(uq);
816 umtxq_unbusy(&uq->uq_key);
817 umtxq_unlock(&uq->uq_key);
818
819 /*
820 * Set the contested bit so that a release in user space
821 * knows to use the system call for unlock. If this fails
822 * either some one else has acquired the lock or it has been
823 * released.
824 */
825 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
826
827 /* The address was invalid. */
828 if (old == -1) {
829 umtxq_lock(&uq->uq_key);
830 umtxq_remove(uq);
831 umtxq_unlock(&uq->uq_key);
832 umtx_key_release(&uq->uq_key);
833 return (EFAULT);
834 }
835
836 /*
837 * We set the contested bit, sleep. Otherwise the lock changed
838 * and we need to retry or we lost a race to the thread
839 * unlocking the umtx.
840 */
841 umtxq_lock(&uq->uq_key);
842 if (old == owner)
843 error = umtxq_sleep(uq, "umtx", timo);
844 umtxq_remove(uq);
845 umtxq_unlock(&uq->uq_key);
846 umtx_key_release(&uq->uq_key);
847 }
848
849 return (0);
850 }
851
852 /*
853 * Lock a umtx object.
854 */
855 static int
856 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
857 struct timespec *timeout)
858 {
859 struct timespec ts, ts2, ts3;
860 struct timeval tv;
861 int error;
862
863 if (timeout == NULL) {
864 error = _do_lock_umtx32(td, m, id, 0);
865 /* Mutex locking is restarted if it is interrupted. */
866 if (error == EINTR)
867 error = ERESTART;
868 } else {
869 getnanouptime(&ts);
870 timespecadd(&ts, timeout);
871 TIMESPEC_TO_TIMEVAL(&tv, timeout);
872 for (;;) {
873 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
874 if (error != ETIMEDOUT)
875 break;
876 getnanouptime(&ts2);
877 if (timespeccmp(&ts2, &ts, >=)) {
878 error = ETIMEDOUT;
879 break;
880 }
881 ts3 = ts;
882 timespecsub(&ts3, &ts2);
883 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
884 }
885 /* Timed-locking is not restarted. */
886 if (error == ERESTART)
887 error = EINTR;
888 }
889 return (error);
890 }
891
892 /*
893 * Unlock a umtx object.
894 */
895 static int
896 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
897 {
898 struct umtx_key key;
899 uint32_t owner;
900 uint32_t old;
901 int error;
902 int count;
903
904 /*
905 * Make sure we own this mtx.
906 */
907 owner = fuword32(m);
908 if (owner == -1)
909 return (EFAULT);
910
911 if ((owner & ~UMUTEX_CONTESTED) != id)
912 return (EPERM);
913
914 /* This should be done in userland */
915 if ((owner & UMUTEX_CONTESTED) == 0) {
916 old = casuword32(m, owner, UMUTEX_UNOWNED);
917 if (old == -1)
918 return (EFAULT);
919 if (old == owner)
920 return (0);
921 owner = old;
922 }
923
924 /* We should only ever be in here for contested locks */
925 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
926 &key)) != 0)
927 return (error);
928
929 umtxq_lock(&key);
930 umtxq_busy(&key);
931 count = umtxq_count(&key);
932 umtxq_unlock(&key);
933
934 /*
935 * When unlocking the umtx, it must be marked as unowned if
936 * there is zero or one thread only waiting for it.
937 * Otherwise, it must be marked as contested.
938 */
939 old = casuword32(m, owner,
940 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
941 umtxq_lock(&key);
942 umtxq_signal(&key,1);
943 umtxq_unbusy(&key);
944 umtxq_unlock(&key);
945 umtx_key_release(&key);
946 if (old == -1)
947 return (EFAULT);
948 if (old != owner)
949 return (EINVAL);
950 return (0);
951 }
952 #endif
953
954 /*
955 * Fetch and compare value, sleep on the address if value is not changed.
956 */
957 static int
958 do_wait(struct thread *td, void *addr, u_long id,
959 struct timespec *timeout, int compat32, int is_private)
960 {
961 struct umtx_q *uq;
962 struct timespec ts, ts2, ts3;
963 struct timeval tv;
964 u_long tmp;
965 int error = 0;
966
967 uq = td->td_umtxq;
968 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
969 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
970 return (error);
971
972 umtxq_lock(&uq->uq_key);
973 umtxq_insert(uq);
974 umtxq_unlock(&uq->uq_key);
975 if (compat32 == 0)
976 tmp = fuword(addr);
977 else
978 tmp = (unsigned int)fuword32(addr);
979 if (tmp != id) {
980 umtxq_lock(&uq->uq_key);
981 umtxq_remove(uq);
982 umtxq_unlock(&uq->uq_key);
983 } else if (timeout == NULL) {
984 umtxq_lock(&uq->uq_key);
985 error = umtxq_sleep(uq, "uwait", 0);
986 umtxq_remove(uq);
987 umtxq_unlock(&uq->uq_key);
988 } else {
989 getnanouptime(&ts);
990 timespecadd(&ts, timeout);
991 TIMESPEC_TO_TIMEVAL(&tv, timeout);
992 umtxq_lock(&uq->uq_key);
993 for (;;) {
994 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
995 if (!(uq->uq_flags & UQF_UMTXQ))
996 break;
997 if (error != ETIMEDOUT)
998 break;
999 umtxq_unlock(&uq->uq_key);
1000 getnanouptime(&ts2);
1001 if (timespeccmp(&ts2, &ts, >=)) {
1002 error = ETIMEDOUT;
1003 umtxq_lock(&uq->uq_key);
1004 break;
1005 }
1006 ts3 = ts;
1007 timespecsub(&ts3, &ts2);
1008 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1009 umtxq_lock(&uq->uq_key);
1010 }
1011 umtxq_remove(uq);
1012 umtxq_unlock(&uq->uq_key);
1013 }
1014 umtx_key_release(&uq->uq_key);
1015 if (error == ERESTART)
1016 error = EINTR;
1017 return (error);
1018 }
1019
1020 /*
1021 * Wake up threads sleeping on the specified address.
1022 */
1023 int
1024 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1025 {
1026 struct umtx_key key;
1027 int ret;
1028
1029 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1030 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1031 return (ret);
1032 umtxq_lock(&key);
1033 ret = umtxq_signal(&key, n_wake);
1034 umtxq_unlock(&key);
1035 umtx_key_release(&key);
1036 return (0);
1037 }
1038
1039 /*
1040 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1041 */
1042 static int
1043 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1044 int mode)
1045 {
1046 struct umtx_q *uq;
1047 uint32_t owner, old, id;
1048 int error = 0;
1049
1050 id = td->td_tid;
1051 uq = td->td_umtxq;
1052
1053 /*
1054 * Care must be exercised when dealing with umtx structure. It
1055 * can fault on any access.
1056 */
1057 for (;;) {
1058 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1059 if (mode == _UMUTEX_WAIT) {
1060 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1061 return (0);
1062 } else {
1063 /*
1064 * Try the uncontested case. This should be done in userland.
1065 */
1066 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1067
1068 /* The acquire succeeded. */
1069 if (owner == UMUTEX_UNOWNED)
1070 return (0);
1071
1072 /* The address was invalid. */
1073 if (owner == -1)
1074 return (EFAULT);
1075
1076 /* If no one owns it but it is contested try to acquire it. */
1077 if (owner == UMUTEX_CONTESTED) {
1078 owner = casuword32(&m->m_owner,
1079 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1080
1081 if (owner == UMUTEX_CONTESTED)
1082 return (0);
1083
1084 /* The address was invalid. */
1085 if (owner == -1)
1086 return (EFAULT);
1087
1088 /* If this failed the lock has changed, restart. */
1089 continue;
1090 }
1091 }
1092
1093 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1094 (owner & ~UMUTEX_CONTESTED) == id)
1095 return (EDEADLK);
1096
1097 if (mode == _UMUTEX_TRY)
1098 return (EBUSY);
1099
1100 /*
1101 * If we caught a signal, we have retried and now
1102 * exit immediately.
1103 */
1104 if (error != 0)
1105 return (error);
1106
1107 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1108 GET_SHARE(flags), &uq->uq_key)) != 0)
1109 return (error);
1110
1111 umtxq_lock(&uq->uq_key);
1112 umtxq_busy(&uq->uq_key);
1113 umtxq_insert(uq);
1114 umtxq_unlock(&uq->uq_key);
1115
1116 /*
1117 * Set the contested bit so that a release in user space
1118 * knows to use the system call for unlock. If this fails
1119 * either some one else has acquired the lock or it has been
1120 * released.
1121 */
1122 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1123
1124 /* The address was invalid. */
1125 if (old == -1) {
1126 umtxq_lock(&uq->uq_key);
1127 umtxq_remove(uq);
1128 umtxq_unbusy(&uq->uq_key);
1129 umtxq_unlock(&uq->uq_key);
1130 umtx_key_release(&uq->uq_key);
1131 return (EFAULT);
1132 }
1133
1134 /*
1135 * We set the contested bit, sleep. Otherwise the lock changed
1136 * and we need to retry or we lost a race to the thread
1137 * unlocking the umtx.
1138 */
1139 umtxq_lock(&uq->uq_key);
1140 umtxq_unbusy(&uq->uq_key);
1141 if (old == owner)
1142 error = umtxq_sleep(uq, "umtxn", timo);
1143 umtxq_remove(uq);
1144 umtxq_unlock(&uq->uq_key);
1145 umtx_key_release(&uq->uq_key);
1146 }
1147
1148 return (0);
1149 }
1150
1151 /*
1152 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153 */
1154 /*
1155 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1156 */
1157 static int
1158 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1159 {
1160 struct umtx_key key;
1161 uint32_t owner, old, id;
1162 int error;
1163 int count;
1164
1165 id = td->td_tid;
1166 /*
1167 * Make sure we own this mtx.
1168 */
1169 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1170 if (owner == -1)
1171 return (EFAULT);
1172
1173 if ((owner & ~UMUTEX_CONTESTED) != id)
1174 return (EPERM);
1175
1176 if ((owner & UMUTEX_CONTESTED) == 0) {
1177 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1178 if (old == -1)
1179 return (EFAULT);
1180 if (old == owner)
1181 return (0);
1182 owner = old;
1183 }
1184
1185 /* We should only ever be in here for contested locks */
1186 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1187 &key)) != 0)
1188 return (error);
1189
1190 umtxq_lock(&key);
1191 umtxq_busy(&key);
1192 count = umtxq_count(&key);
1193 umtxq_unlock(&key);
1194
1195 /*
1196 * When unlocking the umtx, it must be marked as unowned if
1197 * there is zero or one thread only waiting for it.
1198 * Otherwise, it must be marked as contested.
1199 */
1200 old = casuword32(&m->m_owner, owner,
1201 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1202 umtxq_lock(&key);
1203 umtxq_signal(&key,1);
1204 umtxq_unbusy(&key);
1205 umtxq_unlock(&key);
1206 umtx_key_release(&key);
1207 if (old == -1)
1208 return (EFAULT);
1209 if (old != owner)
1210 return (EINVAL);
1211 return (0);
1212 }
1213
1214 /*
1215 * Check if the mutex is available and wake up a waiter,
1216 * only for simple mutex.
1217 */
1218 static int
1219 do_wake_umutex(struct thread *td, struct umutex *m)
1220 {
1221 struct umtx_key key;
1222 uint32_t owner;
1223 uint32_t flags;
1224 int error;
1225 int count;
1226
1227 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1228 if (owner == -1)
1229 return (EFAULT);
1230
1231 if ((owner & ~UMUTEX_CONTESTED) != 0)
1232 return (0);
1233
1234 flags = fuword32(&m->m_flags);
1235
1236 /* We should only ever be in here for contested locks */
1237 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1238 &key)) != 0)
1239 return (error);
1240
1241 umtxq_lock(&key);
1242 umtxq_busy(&key);
1243 count = umtxq_count(&key);
1244 umtxq_unlock(&key);
1245
1246 if (count <= 1)
1247 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1248
1249 umtxq_lock(&key);
1250 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1251 umtxq_signal(&key, 1);
1252 umtxq_unbusy(&key);
1253 umtxq_unlock(&key);
1254 umtx_key_release(&key);
1255 return (0);
1256 }
1257
1258 static inline struct umtx_pi *
1259 umtx_pi_alloc(int flags)
1260 {
1261 struct umtx_pi *pi;
1262
1263 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1264 TAILQ_INIT(&pi->pi_blocked);
1265 atomic_add_int(&umtx_pi_allocated, 1);
1266 return (pi);
1267 }
1268
1269 static inline void
1270 umtx_pi_free(struct umtx_pi *pi)
1271 {
1272 uma_zfree(umtx_pi_zone, pi);
1273 atomic_add_int(&umtx_pi_allocated, -1);
1274 }
1275
1276 /*
1277 * Adjust the thread's position on a pi_state after its priority has been
1278 * changed.
1279 */
1280 static int
1281 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1282 {
1283 struct umtx_q *uq, *uq1, *uq2;
1284 struct thread *td1;
1285
1286 mtx_assert(&umtx_lock, MA_OWNED);
1287 if (pi == NULL)
1288 return (0);
1289
1290 uq = td->td_umtxq;
1291
1292 /*
1293 * Check if the thread needs to be moved on the blocked chain.
1294 * It needs to be moved if either its priority is lower than
1295 * the previous thread or higher than the next thread.
1296 */
1297 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1298 uq2 = TAILQ_NEXT(uq, uq_lockq);
1299 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1300 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1301 /*
1302 * Remove thread from blocked chain and determine where
1303 * it should be moved to.
1304 */
1305 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1306 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1307 td1 = uq1->uq_thread;
1308 MPASS(td1->td_proc->p_magic == P_MAGIC);
1309 if (UPRI(td1) > UPRI(td))
1310 break;
1311 }
1312
1313 if (uq1 == NULL)
1314 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1315 else
1316 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1317 }
1318 return (1);
1319 }
1320
1321 /*
1322 * Propagate priority when a thread is blocked on POSIX
1323 * PI mutex.
1324 */
1325 static void
1326 umtx_propagate_priority(struct thread *td)
1327 {
1328 struct umtx_q *uq;
1329 struct umtx_pi *pi;
1330 int pri;
1331
1332 mtx_assert(&umtx_lock, MA_OWNED);
1333 pri = UPRI(td);
1334 uq = td->td_umtxq;
1335 pi = uq->uq_pi_blocked;
1336 if (pi == NULL)
1337 return;
1338
1339 for (;;) {
1340 td = pi->pi_owner;
1341 if (td == NULL)
1342 return;
1343
1344 MPASS(td->td_proc != NULL);
1345 MPASS(td->td_proc->p_magic == P_MAGIC);
1346
1347 if (UPRI(td) <= pri)
1348 return;
1349
1350 thread_lock(td);
1351 sched_lend_user_prio(td, pri);
1352 thread_unlock(td);
1353
1354 /*
1355 * Pick up the lock that td is blocked on.
1356 */
1357 uq = td->td_umtxq;
1358 pi = uq->uq_pi_blocked;
1359 /* Resort td on the list if needed. */
1360 if (!umtx_pi_adjust_thread(pi, td))
1361 break;
1362 }
1363 }
1364
1365 /*
1366 * Unpropagate priority for a PI mutex when a thread blocked on
1367 * it is interrupted by signal or resumed by others.
1368 */
1369 static void
1370 umtx_unpropagate_priority(struct umtx_pi *pi)
1371 {
1372 struct umtx_q *uq, *uq_owner;
1373 struct umtx_pi *pi2;
1374 int pri, oldpri;
1375
1376 mtx_assert(&umtx_lock, MA_OWNED);
1377
1378 while (pi != NULL && pi->pi_owner != NULL) {
1379 pri = PRI_MAX;
1380 uq_owner = pi->pi_owner->td_umtxq;
1381
1382 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1383 uq = TAILQ_FIRST(&pi2->pi_blocked);
1384 if (uq != NULL) {
1385 if (pri > UPRI(uq->uq_thread))
1386 pri = UPRI(uq->uq_thread);
1387 }
1388 }
1389
1390 if (pri > uq_owner->uq_inherited_pri)
1391 pri = uq_owner->uq_inherited_pri;
1392 thread_lock(pi->pi_owner);
1393 oldpri = pi->pi_owner->td_user_pri;
1394 sched_unlend_user_prio(pi->pi_owner, pri);
1395 thread_unlock(pi->pi_owner);
1396 if (uq_owner->uq_pi_blocked != NULL)
1397 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1398 pi = uq_owner->uq_pi_blocked;
1399 }
1400 }
1401
1402 /*
1403 * Insert a PI mutex into owned list.
1404 */
1405 static void
1406 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1407 {
1408 struct umtx_q *uq_owner;
1409
1410 uq_owner = owner->td_umtxq;
1411 mtx_assert(&umtx_lock, MA_OWNED);
1412 if (pi->pi_owner != NULL)
1413 panic("pi_ower != NULL");
1414 pi->pi_owner = owner;
1415 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1416 }
1417
1418 /*
1419 * Claim ownership of a PI mutex.
1420 */
1421 static int
1422 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1423 {
1424 struct umtx_q *uq, *uq_owner;
1425
1426 uq_owner = owner->td_umtxq;
1427 mtx_lock_spin(&umtx_lock);
1428 if (pi->pi_owner == owner) {
1429 mtx_unlock_spin(&umtx_lock);
1430 return (0);
1431 }
1432
1433 if (pi->pi_owner != NULL) {
1434 /*
1435 * userland may have already messed the mutex, sigh.
1436 */
1437 mtx_unlock_spin(&umtx_lock);
1438 return (EPERM);
1439 }
1440 umtx_pi_setowner(pi, owner);
1441 uq = TAILQ_FIRST(&pi->pi_blocked);
1442 if (uq != NULL) {
1443 int pri;
1444
1445 pri = UPRI(uq->uq_thread);
1446 thread_lock(owner);
1447 if (pri < UPRI(owner))
1448 sched_lend_user_prio(owner, pri);
1449 thread_unlock(owner);
1450 }
1451 mtx_unlock_spin(&umtx_lock);
1452 return (0);
1453 }
1454
1455 static void
1456 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1457 {
1458 struct umtx_q *uq;
1459 struct umtx_pi *pi;
1460
1461 uq = td->td_umtxq;
1462 /*
1463 * Pick up the lock that td is blocked on.
1464 */
1465 pi = uq->uq_pi_blocked;
1466 MPASS(pi != NULL);
1467
1468 /* Resort the turnstile on the list. */
1469 if (!umtx_pi_adjust_thread(pi, td))
1470 return;
1471
1472 /*
1473 * If our priority was lowered and we are at the head of the
1474 * turnstile, then propagate our new priority up the chain.
1475 */
1476 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1477 umtx_propagate_priority(td);
1478 }
1479
1480 /*
1481 * Adjust a thread's order position in its blocked PI mutex,
1482 * this may result new priority propagating process.
1483 */
1484 void
1485 umtx_pi_adjust(struct thread *td, u_char oldpri)
1486 {
1487 struct umtx_q *uq;
1488 struct umtx_pi *pi;
1489
1490 uq = td->td_umtxq;
1491 mtx_lock_spin(&umtx_lock);
1492 /*
1493 * Pick up the lock that td is blocked on.
1494 */
1495 pi = uq->uq_pi_blocked;
1496 if (pi != NULL)
1497 umtx_pi_adjust_locked(td, oldpri);
1498 mtx_unlock_spin(&umtx_lock);
1499 }
1500
1501 /*
1502 * Sleep on a PI mutex.
1503 */
1504 static int
1505 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1506 uint32_t owner, const char *wmesg, int timo)
1507 {
1508 struct umtxq_chain *uc;
1509 struct thread *td, *td1;
1510 struct umtx_q *uq1;
1511 int pri;
1512 int error = 0;
1513
1514 td = uq->uq_thread;
1515 KASSERT(td == curthread, ("inconsistent uq_thread"));
1516 uc = umtxq_getchain(&uq->uq_key);
1517 UMTXQ_LOCKED_ASSERT(uc);
1518 UMTXQ_BUSY_ASSERT(uc);
1519 umtxq_insert(uq);
1520 mtx_lock_spin(&umtx_lock);
1521 if (pi->pi_owner == NULL) {
1522 /* XXX
1523 * Current, We only support process private PI-mutex,
1524 * non-contended PI-mutexes are locked in userland.
1525 * Process shared PI-mutex should always be initialized
1526 * by kernel and be registered in kernel, locking should
1527 * always be done by kernel to avoid security problems.
1528 * For process private PI-mutex, we can find owner
1529 * thread and boost its priority safely.
1530 */
1531 mtx_unlock_spin(&umtx_lock);
1532 PROC_LOCK(curproc);
1533 td1 = thread_find(curproc, owner);
1534 mtx_lock_spin(&umtx_lock);
1535 if (td1 != NULL && pi->pi_owner == NULL) {
1536 uq1 = td1->td_umtxq;
1537 umtx_pi_setowner(pi, td1);
1538 }
1539 PROC_UNLOCK(curproc);
1540 }
1541
1542 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543 pri = UPRI(uq1->uq_thread);
1544 if (pri > UPRI(td))
1545 break;
1546 }
1547
1548 if (uq1 != NULL)
1549 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550 else
1551 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552
1553 uq->uq_pi_blocked = pi;
1554 thread_lock(td);
1555 td->td_flags |= TDF_UPIBLOCKED;
1556 thread_unlock(td);
1557 umtx_propagate_priority(td);
1558 mtx_unlock_spin(&umtx_lock);
1559 umtxq_unbusy(&uq->uq_key);
1560
1561 if (uq->uq_flags & UQF_UMTXQ) {
1562 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563 if (error == EWOULDBLOCK)
1564 error = ETIMEDOUT;
1565 if (uq->uq_flags & UQF_UMTXQ) {
1566 umtxq_remove(uq);
1567 }
1568 }
1569 mtx_lock_spin(&umtx_lock);
1570 uq->uq_pi_blocked = NULL;
1571 thread_lock(td);
1572 td->td_flags &= ~TDF_UPIBLOCKED;
1573 thread_unlock(td);
1574 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575 umtx_unpropagate_priority(pi);
1576 mtx_unlock_spin(&umtx_lock);
1577 umtxq_unlock(&uq->uq_key);
1578
1579 return (error);
1580 }
1581
1582 /*
1583 * Add reference count for a PI mutex.
1584 */
1585 static void
1586 umtx_pi_ref(struct umtx_pi *pi)
1587 {
1588 struct umtxq_chain *uc;
1589
1590 uc = umtxq_getchain(&pi->pi_key);
1591 UMTXQ_LOCKED_ASSERT(uc);
1592 pi->pi_refcount++;
1593 }
1594
1595 /*
1596 * Decrease reference count for a PI mutex, if the counter
1597 * is decreased to zero, its memory space is freed.
1598 */
1599 static void
1600 umtx_pi_unref(struct umtx_pi *pi)
1601 {
1602 struct umtxq_chain *uc;
1603
1604 uc = umtxq_getchain(&pi->pi_key);
1605 UMTXQ_LOCKED_ASSERT(uc);
1606 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607 if (--pi->pi_refcount == 0) {
1608 mtx_lock_spin(&umtx_lock);
1609 if (pi->pi_owner != NULL) {
1610 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611 pi, pi_link);
1612 pi->pi_owner = NULL;
1613 }
1614 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615 ("blocked queue not empty"));
1616 mtx_unlock_spin(&umtx_lock);
1617 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618 umtx_pi_free(pi);
1619 }
1620 }
1621
1622 /*
1623 * Find a PI mutex in hash table.
1624 */
1625 static struct umtx_pi *
1626 umtx_pi_lookup(struct umtx_key *key)
1627 {
1628 struct umtxq_chain *uc;
1629 struct umtx_pi *pi;
1630
1631 uc = umtxq_getchain(key);
1632 UMTXQ_LOCKED_ASSERT(uc);
1633
1634 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635 if (umtx_key_match(&pi->pi_key, key)) {
1636 return (pi);
1637 }
1638 }
1639 return (NULL);
1640 }
1641
1642 /*
1643 * Insert a PI mutex into hash table.
1644 */
1645 static inline void
1646 umtx_pi_insert(struct umtx_pi *pi)
1647 {
1648 struct umtxq_chain *uc;
1649
1650 uc = umtxq_getchain(&pi->pi_key);
1651 UMTXQ_LOCKED_ASSERT(uc);
1652 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653 }
1654
1655 /*
1656 * Lock a PI mutex.
1657 */
1658 static int
1659 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660 int try)
1661 {
1662 struct umtx_q *uq;
1663 struct umtx_pi *pi, *new_pi;
1664 uint32_t id, owner, old;
1665 int error;
1666
1667 id = td->td_tid;
1668 uq = td->td_umtxq;
1669
1670 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671 &uq->uq_key)) != 0)
1672 return (error);
1673 umtxq_lock(&uq->uq_key);
1674 pi = umtx_pi_lookup(&uq->uq_key);
1675 if (pi == NULL) {
1676 new_pi = umtx_pi_alloc(M_NOWAIT);
1677 if (new_pi == NULL) {
1678 umtxq_unlock(&uq->uq_key);
1679 new_pi = umtx_pi_alloc(M_WAITOK);
1680 umtxq_lock(&uq->uq_key);
1681 pi = umtx_pi_lookup(&uq->uq_key);
1682 if (pi != NULL) {
1683 umtx_pi_free(new_pi);
1684 new_pi = NULL;
1685 }
1686 }
1687 if (new_pi != NULL) {
1688 new_pi->pi_key = uq->uq_key;
1689 umtx_pi_insert(new_pi);
1690 pi = new_pi;
1691 }
1692 }
1693 umtx_pi_ref(pi);
1694 umtxq_unlock(&uq->uq_key);
1695
1696 /*
1697 * Care must be exercised when dealing with umtx structure. It
1698 * can fault on any access.
1699 */
1700 for (;;) {
1701 /*
1702 * Try the uncontested case. This should be done in userland.
1703 */
1704 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705
1706 /* The acquire succeeded. */
1707 if (owner == UMUTEX_UNOWNED) {
1708 error = 0;
1709 break;
1710 }
1711
1712 /* The address was invalid. */
1713 if (owner == -1) {
1714 error = EFAULT;
1715 break;
1716 }
1717
1718 /* If no one owns it but it is contested try to acquire it. */
1719 if (owner == UMUTEX_CONTESTED) {
1720 owner = casuword32(&m->m_owner,
1721 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722
1723 if (owner == UMUTEX_CONTESTED) {
1724 umtxq_lock(&uq->uq_key);
1725 umtxq_busy(&uq->uq_key);
1726 error = umtx_pi_claim(pi, td);
1727 umtxq_unbusy(&uq->uq_key);
1728 umtxq_unlock(&uq->uq_key);
1729 break;
1730 }
1731
1732 /* The address was invalid. */
1733 if (owner == -1) {
1734 error = EFAULT;
1735 break;
1736 }
1737
1738 /* If this failed the lock has changed, restart. */
1739 continue;
1740 }
1741
1742 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743 (owner & ~UMUTEX_CONTESTED) == id) {
1744 error = EDEADLK;
1745 break;
1746 }
1747
1748 if (try != 0) {
1749 error = EBUSY;
1750 break;
1751 }
1752
1753 /*
1754 * If we caught a signal, we have retried and now
1755 * exit immediately.
1756 */
1757 if (error != 0)
1758 break;
1759
1760 umtxq_lock(&uq->uq_key);
1761 umtxq_busy(&uq->uq_key);
1762 umtxq_unlock(&uq->uq_key);
1763
1764 /*
1765 * Set the contested bit so that a release in user space
1766 * knows to use the system call for unlock. If this fails
1767 * either some one else has acquired the lock or it has been
1768 * released.
1769 */
1770 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771
1772 /* The address was invalid. */
1773 if (old == -1) {
1774 umtxq_lock(&uq->uq_key);
1775 umtxq_unbusy(&uq->uq_key);
1776 umtxq_unlock(&uq->uq_key);
1777 error = EFAULT;
1778 break;
1779 }
1780
1781 umtxq_lock(&uq->uq_key);
1782 /*
1783 * We set the contested bit, sleep. Otherwise the lock changed
1784 * and we need to retry or we lost a race to the thread
1785 * unlocking the umtx.
1786 */
1787 if (old == owner)
1788 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789 "umtxpi", timo);
1790 else {
1791 umtxq_unbusy(&uq->uq_key);
1792 umtxq_unlock(&uq->uq_key);
1793 }
1794 }
1795
1796 umtxq_lock(&uq->uq_key);
1797 umtx_pi_unref(pi);
1798 umtxq_unlock(&uq->uq_key);
1799
1800 umtx_key_release(&uq->uq_key);
1801 return (error);
1802 }
1803
1804 /*
1805 * Unlock a PI mutex.
1806 */
1807 static int
1808 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809 {
1810 struct umtx_key key;
1811 struct umtx_q *uq_first, *uq_first2, *uq_me;
1812 struct umtx_pi *pi, *pi2;
1813 uint32_t owner, old, id;
1814 int error;
1815 int count;
1816 int pri;
1817
1818 id = td->td_tid;
1819 /*
1820 * Make sure we own this mtx.
1821 */
1822 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823 if (owner == -1)
1824 return (EFAULT);
1825
1826 if ((owner & ~UMUTEX_CONTESTED) != id)
1827 return (EPERM);
1828
1829 /* This should be done in userland */
1830 if ((owner & UMUTEX_CONTESTED) == 0) {
1831 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832 if (old == -1)
1833 return (EFAULT);
1834 if (old == owner)
1835 return (0);
1836 owner = old;
1837 }
1838
1839 /* We should only ever be in here for contested locks */
1840 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841 &key)) != 0)
1842 return (error);
1843
1844 umtxq_lock(&key);
1845 umtxq_busy(&key);
1846 count = umtxq_count_pi(&key, &uq_first);
1847 if (uq_first != NULL) {
1848 mtx_lock_spin(&umtx_lock);
1849 pi = uq_first->uq_pi_blocked;
1850 KASSERT(pi != NULL, ("pi == NULL?"));
1851 if (pi->pi_owner != curthread) {
1852 mtx_unlock_spin(&umtx_lock);
1853 umtxq_unbusy(&key);
1854 umtxq_unlock(&key);
1855 umtx_key_release(&key);
1856 /* userland messed the mutex */
1857 return (EPERM);
1858 }
1859 uq_me = curthread->td_umtxq;
1860 pi->pi_owner = NULL;
1861 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862 /* get highest priority thread which is still sleeping. */
1863 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864 while (uq_first != NULL &&
1865 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867 }
1868 pri = PRI_MAX;
1869 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871 if (uq_first2 != NULL) {
1872 if (pri > UPRI(uq_first2->uq_thread))
1873 pri = UPRI(uq_first2->uq_thread);
1874 }
1875 }
1876 thread_lock(curthread);
1877 sched_unlend_user_prio(curthread, pri);
1878 thread_unlock(curthread);
1879 mtx_unlock_spin(&umtx_lock);
1880 if (uq_first)
1881 umtxq_signal_thread(uq_first);
1882 }
1883 umtxq_unlock(&key);
1884
1885 /*
1886 * When unlocking the umtx, it must be marked as unowned if
1887 * there is zero or one thread only waiting for it.
1888 * Otherwise, it must be marked as contested.
1889 */
1890 old = casuword32(&m->m_owner, owner,
1891 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892
1893 umtxq_lock(&key);
1894 umtxq_unbusy(&key);
1895 umtxq_unlock(&key);
1896 umtx_key_release(&key);
1897 if (old == -1)
1898 return (EFAULT);
1899 if (old != owner)
1900 return (EINVAL);
1901 return (0);
1902 }
1903
1904 /*
1905 * Lock a PP mutex.
1906 */
1907 static int
1908 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909 int try)
1910 {
1911 struct umtx_q *uq, *uq2;
1912 struct umtx_pi *pi;
1913 uint32_t ceiling;
1914 uint32_t owner, id;
1915 int error, pri, old_inherited_pri, su;
1916
1917 id = td->td_tid;
1918 uq = td->td_umtxq;
1919 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920 &uq->uq_key)) != 0)
1921 return (error);
1922 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923 for (;;) {
1924 old_inherited_pri = uq->uq_inherited_pri;
1925 umtxq_lock(&uq->uq_key);
1926 umtxq_busy(&uq->uq_key);
1927 umtxq_unlock(&uq->uq_key);
1928
1929 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930 if (ceiling > RTP_PRIO_MAX) {
1931 error = EINVAL;
1932 goto out;
1933 }
1934
1935 mtx_lock_spin(&umtx_lock);
1936 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937 mtx_unlock_spin(&umtx_lock);
1938 error = EINVAL;
1939 goto out;
1940 }
1941 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943 thread_lock(td);
1944 if (uq->uq_inherited_pri < UPRI(td))
1945 sched_lend_user_prio(td, uq->uq_inherited_pri);
1946 thread_unlock(td);
1947 }
1948 mtx_unlock_spin(&umtx_lock);
1949
1950 owner = casuword32(&m->m_owner,
1951 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952
1953 if (owner == UMUTEX_CONTESTED) {
1954 error = 0;
1955 break;
1956 }
1957
1958 /* The address was invalid. */
1959 if (owner == -1) {
1960 error = EFAULT;
1961 break;
1962 }
1963
1964 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965 (owner & ~UMUTEX_CONTESTED) == id) {
1966 error = EDEADLK;
1967 break;
1968 }
1969
1970 if (try != 0) {
1971 error = EBUSY;
1972 break;
1973 }
1974
1975 /*
1976 * If we caught a signal, we have retried and now
1977 * exit immediately.
1978 */
1979 if (error != 0)
1980 break;
1981
1982 umtxq_lock(&uq->uq_key);
1983 umtxq_insert(uq);
1984 umtxq_unbusy(&uq->uq_key);
1985 error = umtxq_sleep(uq, "umtxpp", timo);
1986 umtxq_remove(uq);
1987 umtxq_unlock(&uq->uq_key);
1988
1989 mtx_lock_spin(&umtx_lock);
1990 uq->uq_inherited_pri = old_inherited_pri;
1991 pri = PRI_MAX;
1992 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994 if (uq2 != NULL) {
1995 if (pri > UPRI(uq2->uq_thread))
1996 pri = UPRI(uq2->uq_thread);
1997 }
1998 }
1999 if (pri > uq->uq_inherited_pri)
2000 pri = uq->uq_inherited_pri;
2001 thread_lock(td);
2002 sched_unlend_user_prio(td, pri);
2003 thread_unlock(td);
2004 mtx_unlock_spin(&umtx_lock);
2005 }
2006
2007 if (error != 0) {
2008 mtx_lock_spin(&umtx_lock);
2009 uq->uq_inherited_pri = old_inherited_pri;
2010 pri = PRI_MAX;
2011 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013 if (uq2 != NULL) {
2014 if (pri > UPRI(uq2->uq_thread))
2015 pri = UPRI(uq2->uq_thread);
2016 }
2017 }
2018 if (pri > uq->uq_inherited_pri)
2019 pri = uq->uq_inherited_pri;
2020 thread_lock(td);
2021 sched_unlend_user_prio(td, pri);
2022 thread_unlock(td);
2023 mtx_unlock_spin(&umtx_lock);
2024 }
2025
2026 out:
2027 umtxq_lock(&uq->uq_key);
2028 umtxq_unbusy(&uq->uq_key);
2029 umtxq_unlock(&uq->uq_key);
2030 umtx_key_release(&uq->uq_key);
2031 return (error);
2032 }
2033
2034 /*
2035 * Unlock a PP mutex.
2036 */
2037 static int
2038 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039 {
2040 struct umtx_key key;
2041 struct umtx_q *uq, *uq2;
2042 struct umtx_pi *pi;
2043 uint32_t owner, id;
2044 uint32_t rceiling;
2045 int error, pri, new_inherited_pri, su;
2046
2047 id = td->td_tid;
2048 uq = td->td_umtxq;
2049 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050
2051 /*
2052 * Make sure we own this mtx.
2053 */
2054 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055 if (owner == -1)
2056 return (EFAULT);
2057
2058 if ((owner & ~UMUTEX_CONTESTED) != id)
2059 return (EPERM);
2060
2061 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062 if (error != 0)
2063 return (error);
2064
2065 if (rceiling == -1)
2066 new_inherited_pri = PRI_MAX;
2067 else {
2068 rceiling = RTP_PRIO_MAX - rceiling;
2069 if (rceiling > RTP_PRIO_MAX)
2070 return (EINVAL);
2071 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072 }
2073
2074 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075 &key)) != 0)
2076 return (error);
2077 umtxq_lock(&key);
2078 umtxq_busy(&key);
2079 umtxq_unlock(&key);
2080 /*
2081 * For priority protected mutex, always set unlocked state
2082 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083 * to lock the mutex, it is necessary because thread priority
2084 * has to be adjusted for such mutex.
2085 */
2086 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087 UMUTEX_CONTESTED);
2088
2089 umtxq_lock(&key);
2090 if (error == 0)
2091 umtxq_signal(&key, 1);
2092 umtxq_unbusy(&key);
2093 umtxq_unlock(&key);
2094
2095 if (error == -1)
2096 error = EFAULT;
2097 else {
2098 mtx_lock_spin(&umtx_lock);
2099 if (su != 0)
2100 uq->uq_inherited_pri = new_inherited_pri;
2101 pri = PRI_MAX;
2102 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104 if (uq2 != NULL) {
2105 if (pri > UPRI(uq2->uq_thread))
2106 pri = UPRI(uq2->uq_thread);
2107 }
2108 }
2109 if (pri > uq->uq_inherited_pri)
2110 pri = uq->uq_inherited_pri;
2111 thread_lock(td);
2112 sched_unlend_user_prio(td, pri);
2113 thread_unlock(td);
2114 mtx_unlock_spin(&umtx_lock);
2115 }
2116 umtx_key_release(&key);
2117 return (error);
2118 }
2119
2120 static int
2121 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122 uint32_t *old_ceiling)
2123 {
2124 struct umtx_q *uq;
2125 uint32_t save_ceiling;
2126 uint32_t owner, id;
2127 uint32_t flags;
2128 int error;
2129
2130 flags = fuword32(&m->m_flags);
2131 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132 return (EINVAL);
2133 if (ceiling > RTP_PRIO_MAX)
2134 return (EINVAL);
2135 id = td->td_tid;
2136 uq = td->td_umtxq;
2137 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138 &uq->uq_key)) != 0)
2139 return (error);
2140 for (;;) {
2141 umtxq_lock(&uq->uq_key);
2142 umtxq_busy(&uq->uq_key);
2143 umtxq_unlock(&uq->uq_key);
2144
2145 save_ceiling = fuword32(&m->m_ceilings[0]);
2146
2147 owner = casuword32(&m->m_owner,
2148 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149
2150 if (owner == UMUTEX_CONTESTED) {
2151 suword32(&m->m_ceilings[0], ceiling);
2152 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153 UMUTEX_CONTESTED);
2154 error = 0;
2155 break;
2156 }
2157
2158 /* The address was invalid. */
2159 if (owner == -1) {
2160 error = EFAULT;
2161 break;
2162 }
2163
2164 if ((owner & ~UMUTEX_CONTESTED) == id) {
2165 suword32(&m->m_ceilings[0], ceiling);
2166 error = 0;
2167 break;
2168 }
2169
2170 /*
2171 * If we caught a signal, we have retried and now
2172 * exit immediately.
2173 */
2174 if (error != 0)
2175 break;
2176
2177 /*
2178 * We set the contested bit, sleep. Otherwise the lock changed
2179 * and we need to retry or we lost a race to the thread
2180 * unlocking the umtx.
2181 */
2182 umtxq_lock(&uq->uq_key);
2183 umtxq_insert(uq);
2184 umtxq_unbusy(&uq->uq_key);
2185 error = umtxq_sleep(uq, "umtxpp", 0);
2186 umtxq_remove(uq);
2187 umtxq_unlock(&uq->uq_key);
2188 }
2189 umtxq_lock(&uq->uq_key);
2190 if (error == 0)
2191 umtxq_signal(&uq->uq_key, INT_MAX);
2192 umtxq_unbusy(&uq->uq_key);
2193 umtxq_unlock(&uq->uq_key);
2194 umtx_key_release(&uq->uq_key);
2195 if (error == 0 && old_ceiling != NULL)
2196 suword32(old_ceiling, save_ceiling);
2197 return (error);
2198 }
2199
2200 static int
2201 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202 int mode)
2203 {
2204 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205 case 0:
2206 return (_do_lock_normal(td, m, flags, timo, mode));
2207 case UMUTEX_PRIO_INHERIT:
2208 return (_do_lock_pi(td, m, flags, timo, mode));
2209 case UMUTEX_PRIO_PROTECT:
2210 return (_do_lock_pp(td, m, flags, timo, mode));
2211 }
2212 return (EINVAL);
2213 }
2214
2215 /*
2216 * Lock a userland POSIX mutex.
2217 */
2218 static int
2219 do_lock_umutex(struct thread *td, struct umutex *m,
2220 struct timespec *timeout, int mode)
2221 {
2222 struct timespec ts, ts2, ts3;
2223 struct timeval tv;
2224 uint32_t flags;
2225 int error;
2226
2227 flags = fuword32(&m->m_flags);
2228 if (flags == -1)
2229 return (EFAULT);
2230
2231 if (timeout == NULL) {
2232 error = _do_lock_umutex(td, m, flags, 0, mode);
2233 /* Mutex locking is restarted if it is interrupted. */
2234 if (error == EINTR && mode != _UMUTEX_WAIT)
2235 error = ERESTART;
2236 } else {
2237 getnanouptime(&ts);
2238 timespecadd(&ts, timeout);
2239 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2240 for (;;) {
2241 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2242 if (error != ETIMEDOUT)
2243 break;
2244 getnanouptime(&ts2);
2245 if (timespeccmp(&ts2, &ts, >=)) {
2246 error = ETIMEDOUT;
2247 break;
2248 }
2249 ts3 = ts;
2250 timespecsub(&ts3, &ts2);
2251 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2252 }
2253 /* Timed-locking is not restarted. */
2254 if (error == ERESTART)
2255 error = EINTR;
2256 }
2257 return (error);
2258 }
2259
2260 /*
2261 * Unlock a userland POSIX mutex.
2262 */
2263 static int
2264 do_unlock_umutex(struct thread *td, struct umutex *m)
2265 {
2266 uint32_t flags;
2267
2268 flags = fuword32(&m->m_flags);
2269 if (flags == -1)
2270 return (EFAULT);
2271
2272 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2273 case 0:
2274 return (do_unlock_normal(td, m, flags));
2275 case UMUTEX_PRIO_INHERIT:
2276 return (do_unlock_pi(td, m, flags));
2277 case UMUTEX_PRIO_PROTECT:
2278 return (do_unlock_pp(td, m, flags));
2279 }
2280
2281 return (EINVAL);
2282 }
2283
2284 static int
2285 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2286 struct timespec *timeout, u_long wflags)
2287 {
2288 struct umtx_q *uq;
2289 struct timeval tv;
2290 struct timespec cts, ets, tts;
2291 uint32_t flags;
2292 int error;
2293
2294 uq = td->td_umtxq;
2295 flags = fuword32(&cv->c_flags);
2296 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2297 if (error != 0)
2298 return (error);
2299 umtxq_lock(&uq->uq_key);
2300 umtxq_busy(&uq->uq_key);
2301 umtxq_insert(uq);
2302 umtxq_unlock(&uq->uq_key);
2303
2304 /*
2305 * The magic thing is we should set c_has_waiters to 1 before
2306 * releasing user mutex.
2307 */
2308 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2309
2310 umtxq_lock(&uq->uq_key);
2311 umtxq_unbusy(&uq->uq_key);
2312 umtxq_unlock(&uq->uq_key);
2313
2314 error = do_unlock_umutex(td, m);
2315
2316 umtxq_lock(&uq->uq_key);
2317 if (error == 0) {
2318 if ((wflags & UMTX_CHECK_UNPARKING) &&
2319 (td->td_pflags & TDP_WAKEUP)) {
2320 td->td_pflags &= ~TDP_WAKEUP;
2321 error = EINTR;
2322 } else if (timeout == NULL) {
2323 error = umtxq_sleep(uq, "ucond", 0);
2324 } else {
2325 getnanouptime(&ets);
2326 timespecadd(&ets, timeout);
2327 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2328 for (;;) {
2329 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2330 if (error != ETIMEDOUT)
2331 break;
2332 getnanouptime(&cts);
2333 if (timespeccmp(&cts, &ets, >=)) {
2334 error = ETIMEDOUT;
2335 break;
2336 }
2337 tts = ets;
2338 timespecsub(&tts, &cts);
2339 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2340 }
2341 }
2342 }
2343
2344 if (error != 0) {
2345 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2346 /*
2347 * If we concurrently got do_cv_signal()d
2348 * and we got an error or UNIX signals or a timeout,
2349 * then, perform another umtxq_signal to avoid
2350 * consuming the wakeup. This may cause supurious
2351 * wakeup for another thread which was just queued,
2352 * but SUSV3 explicitly allows supurious wakeup to
2353 * occur, and indeed a kernel based implementation
2354 * can not avoid it.
2355 */
2356 if (!umtxq_signal(&uq->uq_key, 1))
2357 error = 0;
2358 }
2359 if (error == ERESTART)
2360 error = EINTR;
2361 }
2362 umtxq_remove(uq);
2363 umtxq_unlock(&uq->uq_key);
2364 umtx_key_release(&uq->uq_key);
2365 return (error);
2366 }
2367
2368 /*
2369 * Signal a userland condition variable.
2370 */
2371 static int
2372 do_cv_signal(struct thread *td, struct ucond *cv)
2373 {
2374 struct umtx_key key;
2375 int error, cnt, nwake;
2376 uint32_t flags;
2377
2378 flags = fuword32(&cv->c_flags);
2379 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2380 return (error);
2381 umtxq_lock(&key);
2382 umtxq_busy(&key);
2383 cnt = umtxq_count(&key);
2384 nwake = umtxq_signal(&key, 1);
2385 if (cnt <= nwake) {
2386 umtxq_unlock(&key);
2387 error = suword32(
2388 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2389 umtxq_lock(&key);
2390 }
2391 umtxq_unbusy(&key);
2392 umtxq_unlock(&key);
2393 umtx_key_release(&key);
2394 return (error);
2395 }
2396
2397 static int
2398 do_cv_broadcast(struct thread *td, struct ucond *cv)
2399 {
2400 struct umtx_key key;
2401 int error;
2402 uint32_t flags;
2403
2404 flags = fuword32(&cv->c_flags);
2405 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2406 return (error);
2407
2408 umtxq_lock(&key);
2409 umtxq_busy(&key);
2410 umtxq_signal(&key, INT_MAX);
2411 umtxq_unlock(&key);
2412
2413 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2414
2415 umtxq_lock(&key);
2416 umtxq_unbusy(&key);
2417 umtxq_unlock(&key);
2418
2419 umtx_key_release(&key);
2420 return (error);
2421 }
2422
2423 static int
2424 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2425 {
2426 struct umtx_q *uq;
2427 uint32_t flags, wrflags;
2428 int32_t state, oldstate;
2429 int32_t blocked_readers;
2430 int error;
2431
2432 uq = td->td_umtxq;
2433 flags = fuword32(&rwlock->rw_flags);
2434 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2435 if (error != 0)
2436 return (error);
2437
2438 wrflags = URWLOCK_WRITE_OWNER;
2439 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2440 wrflags |= URWLOCK_WRITE_WAITERS;
2441
2442 for (;;) {
2443 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2444 /* try to lock it */
2445 while (!(state & wrflags)) {
2446 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2447 umtx_key_release(&uq->uq_key);
2448 return (EAGAIN);
2449 }
2450 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2451 if (oldstate == state) {
2452 umtx_key_release(&uq->uq_key);
2453 return (0);
2454 }
2455 state = oldstate;
2456 }
2457
2458 if (error)
2459 break;
2460
2461 /* grab monitor lock */
2462 umtxq_lock(&uq->uq_key);
2463 umtxq_busy(&uq->uq_key);
2464 umtxq_unlock(&uq->uq_key);
2465
2466 /*
2467 * re-read the state, in case it changed between the try-lock above
2468 * and the check below
2469 */
2470 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2471
2472 /* set read contention bit */
2473 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2474 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2475 if (oldstate == state)
2476 goto sleep;
2477 state = oldstate;
2478 }
2479
2480 /* state is changed while setting flags, restart */
2481 if (!(state & wrflags)) {
2482 umtxq_lock(&uq->uq_key);
2483 umtxq_unbusy(&uq->uq_key);
2484 umtxq_unlock(&uq->uq_key);
2485 continue;
2486 }
2487
2488 sleep:
2489 /* contention bit is set, before sleeping, increase read waiter count */
2490 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2491 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2492
2493 while (state & wrflags) {
2494 umtxq_lock(&uq->uq_key);
2495 umtxq_insert(uq);
2496 umtxq_unbusy(&uq->uq_key);
2497
2498 error = umtxq_sleep(uq, "urdlck", timo);
2499
2500 umtxq_busy(&uq->uq_key);
2501 umtxq_remove(uq);
2502 umtxq_unlock(&uq->uq_key);
2503 if (error)
2504 break;
2505 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2506 }
2507
2508 /* decrease read waiter count, and may clear read contention bit */
2509 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2510 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2511 if (blocked_readers == 1) {
2512 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2513 for (;;) {
2514 oldstate = casuword32(&rwlock->rw_state, state,
2515 state & ~URWLOCK_READ_WAITERS);
2516 if (oldstate == state)
2517 break;
2518 state = oldstate;
2519 }
2520 }
2521
2522 umtxq_lock(&uq->uq_key);
2523 umtxq_unbusy(&uq->uq_key);
2524 umtxq_unlock(&uq->uq_key);
2525 }
2526 umtx_key_release(&uq->uq_key);
2527 return (error);
2528 }
2529
2530 static int
2531 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2532 {
2533 struct timespec ts, ts2, ts3;
2534 struct timeval tv;
2535 int error;
2536
2537 getnanouptime(&ts);
2538 timespecadd(&ts, timeout);
2539 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2540 for (;;) {
2541 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2542 if (error != ETIMEDOUT)
2543 break;
2544 getnanouptime(&ts2);
2545 if (timespeccmp(&ts2, &ts, >=)) {
2546 error = ETIMEDOUT;
2547 break;
2548 }
2549 ts3 = ts;
2550 timespecsub(&ts3, &ts2);
2551 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2552 }
2553 if (error == ERESTART)
2554 error = EINTR;
2555 return (error);
2556 }
2557
2558 static int
2559 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2560 {
2561 struct umtx_q *uq;
2562 uint32_t flags;
2563 int32_t state, oldstate;
2564 int32_t blocked_writers;
2565 int32_t blocked_readers;
2566 int error;
2567
2568 uq = td->td_umtxq;
2569 flags = fuword32(&rwlock->rw_flags);
2570 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2571 if (error != 0)
2572 return (error);
2573
2574 blocked_readers = 0;
2575 for (;;) {
2576 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2577 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2578 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2579 if (oldstate == state) {
2580 umtx_key_release(&uq->uq_key);
2581 return (0);
2582 }
2583 state = oldstate;
2584 }
2585
2586 if (error) {
2587 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2588 blocked_readers != 0) {
2589 umtxq_lock(&uq->uq_key);
2590 umtxq_busy(&uq->uq_key);
2591 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2592 umtxq_unbusy(&uq->uq_key);
2593 umtxq_unlock(&uq->uq_key);
2594 }
2595
2596 break;
2597 }
2598
2599 /* grab monitor lock */
2600 umtxq_lock(&uq->uq_key);
2601 umtxq_busy(&uq->uq_key);
2602 umtxq_unlock(&uq->uq_key);
2603
2604 /*
2605 * re-read the state, in case it changed between the try-lock above
2606 * and the check below
2607 */
2608 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2609
2610 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2611 (state & URWLOCK_WRITE_WAITERS) == 0) {
2612 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2613 if (oldstate == state)
2614 goto sleep;
2615 state = oldstate;
2616 }
2617
2618 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2619 umtxq_lock(&uq->uq_key);
2620 umtxq_unbusy(&uq->uq_key);
2621 umtxq_unlock(&uq->uq_key);
2622 continue;
2623 }
2624 sleep:
2625 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2626 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2627
2628 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2629 umtxq_lock(&uq->uq_key);
2630 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2631 umtxq_unbusy(&uq->uq_key);
2632
2633 error = umtxq_sleep(uq, "uwrlck", timo);
2634
2635 umtxq_busy(&uq->uq_key);
2636 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2637 umtxq_unlock(&uq->uq_key);
2638 if (error)
2639 break;
2640 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2641 }
2642
2643 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2644 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2645 if (blocked_writers == 1) {
2646 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2647 for (;;) {
2648 oldstate = casuword32(&rwlock->rw_state, state,
2649 state & ~URWLOCK_WRITE_WAITERS);
2650 if (oldstate == state)
2651 break;
2652 state = oldstate;
2653 }
2654 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2655 } else
2656 blocked_readers = 0;
2657
2658 umtxq_lock(&uq->uq_key);
2659 umtxq_unbusy(&uq->uq_key);
2660 umtxq_unlock(&uq->uq_key);
2661 }
2662
2663 umtx_key_release(&uq->uq_key);
2664 return (error);
2665 }
2666
2667 static int
2668 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2669 {
2670 struct timespec ts, ts2, ts3;
2671 struct timeval tv;
2672 int error;
2673
2674 getnanouptime(&ts);
2675 timespecadd(&ts, timeout);
2676 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2677 for (;;) {
2678 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2679 if (error != ETIMEDOUT)
2680 break;
2681 getnanouptime(&ts2);
2682 if (timespeccmp(&ts2, &ts, >=)) {
2683 error = ETIMEDOUT;
2684 break;
2685 }
2686 ts3 = ts;
2687 timespecsub(&ts3, &ts2);
2688 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2689 }
2690 if (error == ERESTART)
2691 error = EINTR;
2692 return (error);
2693 }
2694
2695 static int
2696 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2697 {
2698 struct umtx_q *uq;
2699 uint32_t flags;
2700 int32_t state, oldstate;
2701 int error, q, count;
2702
2703 uq = td->td_umtxq;
2704 flags = fuword32(&rwlock->rw_flags);
2705 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2706 if (error != 0)
2707 return (error);
2708
2709 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2710 if (state & URWLOCK_WRITE_OWNER) {
2711 for (;;) {
2712 oldstate = casuword32(&rwlock->rw_state, state,
2713 state & ~URWLOCK_WRITE_OWNER);
2714 if (oldstate != state) {
2715 state = oldstate;
2716 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2717 error = EPERM;
2718 goto out;
2719 }
2720 } else
2721 break;
2722 }
2723 } else if (URWLOCK_READER_COUNT(state) != 0) {
2724 for (;;) {
2725 oldstate = casuword32(&rwlock->rw_state, state,
2726 state - 1);
2727 if (oldstate != state) {
2728 state = oldstate;
2729 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2730 error = EPERM;
2731 goto out;
2732 }
2733 }
2734 else
2735 break;
2736 }
2737 } else {
2738 error = EPERM;
2739 goto out;
2740 }
2741
2742 count = 0;
2743
2744 if (!(flags & URWLOCK_PREFER_READER)) {
2745 if (state & URWLOCK_WRITE_WAITERS) {
2746 count = 1;
2747 q = UMTX_EXCLUSIVE_QUEUE;
2748 } else if (state & URWLOCK_READ_WAITERS) {
2749 count = INT_MAX;
2750 q = UMTX_SHARED_QUEUE;
2751 }
2752 } else {
2753 if (state & URWLOCK_READ_WAITERS) {
2754 count = INT_MAX;
2755 q = UMTX_SHARED_QUEUE;
2756 } else if (state & URWLOCK_WRITE_WAITERS) {
2757 count = 1;
2758 q = UMTX_EXCLUSIVE_QUEUE;
2759 }
2760 }
2761
2762 if (count) {
2763 umtxq_lock(&uq->uq_key);
2764 umtxq_busy(&uq->uq_key);
2765 umtxq_signal_queue(&uq->uq_key, count, q);
2766 umtxq_unbusy(&uq->uq_key);
2767 umtxq_unlock(&uq->uq_key);
2768 }
2769 out:
2770 umtx_key_release(&uq->uq_key);
2771 return (error);
2772 }
2773
2774 int
2775 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2776 /* struct umtx *umtx */
2777 {
2778 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2779 }
2780
2781 int
2782 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2783 /* struct umtx *umtx */
2784 {
2785 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2786 }
2787
2788 static int
2789 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2790 {
2791 struct timespec *ts, timeout;
2792 int error;
2793
2794 /* Allow a null timespec (wait forever). */
2795 if (uap->uaddr2 == NULL)
2796 ts = NULL;
2797 else {
2798 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2799 if (error != 0)
2800 return (error);
2801 if (timeout.tv_nsec >= 1000000000 ||
2802 timeout.tv_nsec < 0) {
2803 return (EINVAL);
2804 }
2805 ts = &timeout;
2806 }
2807 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2808 }
2809
2810 static int
2811 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2812 {
2813 return (do_unlock_umtx(td, uap->obj, uap->val));
2814 }
2815
2816 static int
2817 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2818 {
2819 struct timespec *ts, timeout;
2820 int error;
2821
2822 if (uap->uaddr2 == NULL)
2823 ts = NULL;
2824 else {
2825 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2826 if (error != 0)
2827 return (error);
2828 if (timeout.tv_nsec >= 1000000000 ||
2829 timeout.tv_nsec < 0)
2830 return (EINVAL);
2831 ts = &timeout;
2832 }
2833 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2834 }
2835
2836 static int
2837 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2838 {
2839 struct timespec *ts, timeout;
2840 int error;
2841
2842 if (uap->uaddr2 == NULL)
2843 ts = NULL;
2844 else {
2845 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2846 if (error != 0)
2847 return (error);
2848 if (timeout.tv_nsec >= 1000000000 ||
2849 timeout.tv_nsec < 0)
2850 return (EINVAL);
2851 ts = &timeout;
2852 }
2853 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2854 }
2855
2856 static int
2857 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2858 {
2859 struct timespec *ts, timeout;
2860 int error;
2861
2862 if (uap->uaddr2 == NULL)
2863 ts = NULL;
2864 else {
2865 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2866 if (error != 0)
2867 return (error);
2868 if (timeout.tv_nsec >= 1000000000 ||
2869 timeout.tv_nsec < 0)
2870 return (EINVAL);
2871 ts = &timeout;
2872 }
2873 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2874 }
2875
2876 static int
2877 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2878 {
2879 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2880 }
2881
2882 static int
2883 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2884 {
2885 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2886 }
2887
2888 static int
2889 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2890 {
2891 struct timespec *ts, timeout;
2892 int error;
2893
2894 /* Allow a null timespec (wait forever). */
2895 if (uap->uaddr2 == NULL)
2896 ts = NULL;
2897 else {
2898 error = copyin(uap->uaddr2, &timeout,
2899 sizeof(timeout));
2900 if (error != 0)
2901 return (error);
2902 if (timeout.tv_nsec >= 1000000000 ||
2903 timeout.tv_nsec < 0) {
2904 return (EINVAL);
2905 }
2906 ts = &timeout;
2907 }
2908 return do_lock_umutex(td, uap->obj, ts, 0);
2909 }
2910
2911 static int
2912 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2913 {
2914 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2915 }
2916
2917 static int
2918 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2919 {
2920 struct timespec *ts, timeout;
2921 int error;
2922
2923 /* Allow a null timespec (wait forever). */
2924 if (uap->uaddr2 == NULL)
2925 ts = NULL;
2926 else {
2927 error = copyin(uap->uaddr2, &timeout,
2928 sizeof(timeout));
2929 if (error != 0)
2930 return (error);
2931 if (timeout.tv_nsec >= 1000000000 ||
2932 timeout.tv_nsec < 0) {
2933 return (EINVAL);
2934 }
2935 ts = &timeout;
2936 }
2937 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2938 }
2939
2940 static int
2941 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2942 {
2943 return do_wake_umutex(td, uap->obj);
2944 }
2945
2946 static int
2947 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2948 {
2949 return do_unlock_umutex(td, uap->obj);
2950 }
2951
2952 static int
2953 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2954 {
2955 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2956 }
2957
2958 static int
2959 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2960 {
2961 struct timespec *ts, timeout;
2962 int error;
2963
2964 /* Allow a null timespec (wait forever). */
2965 if (uap->uaddr2 == NULL)
2966 ts = NULL;
2967 else {
2968 error = copyin(uap->uaddr2, &timeout,
2969 sizeof(timeout));
2970 if (error != 0)
2971 return (error);
2972 if (timeout.tv_nsec >= 1000000000 ||
2973 timeout.tv_nsec < 0) {
2974 return (EINVAL);
2975 }
2976 ts = &timeout;
2977 }
2978 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2979 }
2980
2981 static int
2982 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2983 {
2984 return do_cv_signal(td, uap->obj);
2985 }
2986
2987 static int
2988 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2989 {
2990 return do_cv_broadcast(td, uap->obj);
2991 }
2992
2993 static int
2994 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2995 {
2996 struct timespec timeout;
2997 int error;
2998
2999 /* Allow a null timespec (wait forever). */
3000 if (uap->uaddr2 == NULL) {
3001 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3002 } else {
3003 error = copyin(uap->uaddr2, &timeout,
3004 sizeof(timeout));
3005 if (error != 0)
3006 return (error);
3007 if (timeout.tv_nsec >= 1000000000 ||
3008 timeout.tv_nsec < 0) {
3009 return (EINVAL);
3010 }
3011 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3012 }
3013 return (error);
3014 }
3015
3016 static int
3017 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3018 {
3019 struct timespec timeout;
3020 int error;
3021
3022 /* Allow a null timespec (wait forever). */
3023 if (uap->uaddr2 == NULL) {
3024 error = do_rw_wrlock(td, uap->obj, 0);
3025 } else {
3026 error = copyin(uap->uaddr2, &timeout,
3027 sizeof(timeout));
3028 if (error != 0)
3029 return (error);
3030 if (timeout.tv_nsec >= 1000000000 ||
3031 timeout.tv_nsec < 0) {
3032 return (EINVAL);
3033 }
3034
3035 error = do_rw_wrlock2(td, uap->obj, &timeout);
3036 }
3037 return (error);
3038 }
3039
3040 static int
3041 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3042 {
3043 return do_rw_unlock(td, uap->obj);
3044 }
3045
3046 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3047
3048 static _umtx_op_func op_table[] = {
3049 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3050 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3051 __umtx_op_wait, /* UMTX_OP_WAIT */
3052 __umtx_op_wake, /* UMTX_OP_WAKE */
3053 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3054 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3055 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3056 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3057 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3058 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3059 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3060 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3061 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3062 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3063 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3064 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3065 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3066 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3067 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3068 };
3069
3070 int
3071 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
3072 {
3073 if ((unsigned)uap->op < UMTX_OP_MAX)
3074 return (*op_table[uap->op])(td, uap);
3075 return (EINVAL);
3076 }
3077
3078 #ifdef COMPAT_IA32
3079 int
3080 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3081 /* struct umtx *umtx */
3082 {
3083 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3084 }
3085
3086 int
3087 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3088 /* struct umtx *umtx */
3089 {
3090 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3091 }
3092
3093 struct timespec32 {
3094 u_int32_t tv_sec;
3095 u_int32_t tv_nsec;
3096 };
3097
3098 static inline int
3099 copyin_timeout32(void *addr, struct timespec *tsp)
3100 {
3101 struct timespec32 ts32;
3102 int error;
3103
3104 error = copyin(addr, &ts32, sizeof(struct timespec32));
3105 if (error == 0) {
3106 tsp->tv_sec = ts32.tv_sec;
3107 tsp->tv_nsec = ts32.tv_nsec;
3108 }
3109 return (error);
3110 }
3111
3112 static int
3113 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3114 {
3115 struct timespec *ts, timeout;
3116 int error;
3117
3118 /* Allow a null timespec (wait forever). */
3119 if (uap->uaddr2 == NULL)
3120 ts = NULL;
3121 else {
3122 error = copyin_timeout32(uap->uaddr2, &timeout);
3123 if (error != 0)
3124 return (error);
3125 if (timeout.tv_nsec >= 1000000000 ||
3126 timeout.tv_nsec < 0) {
3127 return (EINVAL);
3128 }
3129 ts = &timeout;
3130 }
3131 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3132 }
3133
3134 static int
3135 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3136 {
3137 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3138 }
3139
3140 static int
3141 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3142 {
3143 struct timespec *ts, timeout;
3144 int error;
3145
3146 if (uap->uaddr2 == NULL)
3147 ts = NULL;
3148 else {
3149 error = copyin_timeout32(uap->uaddr2, &timeout);
3150 if (error != 0)
3151 return (error);
3152 if (timeout.tv_nsec >= 1000000000 ||
3153 timeout.tv_nsec < 0)
3154 return (EINVAL);
3155 ts = &timeout;
3156 }
3157 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3158 }
3159
3160 static int
3161 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3162 {
3163 struct timespec *ts, timeout;
3164 int error;
3165
3166 /* Allow a null timespec (wait forever). */
3167 if (uap->uaddr2 == NULL)
3168 ts = NULL;
3169 else {
3170 error = copyin_timeout32(uap->uaddr2, &timeout);
3171 if (error != 0)
3172 return (error);
3173 if (timeout.tv_nsec >= 1000000000 ||
3174 timeout.tv_nsec < 0)
3175 return (EINVAL);
3176 ts = &timeout;
3177 }
3178 return do_lock_umutex(td, uap->obj, ts, 0);
3179 }
3180
3181 static int
3182 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3183 {
3184 struct timespec *ts, timeout;
3185 int error;
3186
3187 /* Allow a null timespec (wait forever). */
3188 if (uap->uaddr2 == NULL)
3189 ts = NULL;
3190 else {
3191 error = copyin_timeout32(uap->uaddr2, &timeout);
3192 if (error != 0)
3193 return (error);
3194 if (timeout.tv_nsec >= 1000000000 ||
3195 timeout.tv_nsec < 0)
3196 return (EINVAL);
3197 ts = &timeout;
3198 }
3199 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3200 }
3201
3202 static int
3203 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3204 {
3205 struct timespec *ts, timeout;
3206 int error;
3207
3208 /* Allow a null timespec (wait forever). */
3209 if (uap->uaddr2 == NULL)
3210 ts = NULL;
3211 else {
3212 error = copyin_timeout32(uap->uaddr2, &timeout);
3213 if (error != 0)
3214 return (error);
3215 if (timeout.tv_nsec >= 1000000000 ||
3216 timeout.tv_nsec < 0)
3217 return (EINVAL);
3218 ts = &timeout;
3219 }
3220 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3221 }
3222
3223 static int
3224 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3225 {
3226 struct timespec timeout;
3227 int error;
3228
3229 /* Allow a null timespec (wait forever). */
3230 if (uap->uaddr2 == NULL) {
3231 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3232 } else {
3233 error = copyin(uap->uaddr2, &timeout,
3234 sizeof(timeout));
3235 if (error != 0)
3236 return (error);
3237 if (timeout.tv_nsec >= 1000000000 ||
3238 timeout.tv_nsec < 0) {
3239 return (EINVAL);
3240 }
3241 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3242 }
3243 return (error);
3244 }
3245
3246 static int
3247 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3248 {
3249 struct timespec timeout;
3250 int error;
3251
3252 /* Allow a null timespec (wait forever). */
3253 if (uap->uaddr2 == NULL) {
3254 error = do_rw_wrlock(td, uap->obj, 0);
3255 } else {
3256 error = copyin_timeout32(uap->uaddr2, &timeout);
3257 if (error != 0)
3258 return (error);
3259 if (timeout.tv_nsec >= 1000000000 ||
3260 timeout.tv_nsec < 0) {
3261 return (EINVAL);
3262 }
3263
3264 error = do_rw_wrlock2(td, uap->obj, &timeout);
3265 }
3266 return (error);
3267 }
3268
3269 static int
3270 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3271 {
3272 struct timespec *ts, timeout;
3273 int error;
3274
3275 if (uap->uaddr2 == NULL)
3276 ts = NULL;
3277 else {
3278 error = copyin_timeout32(uap->uaddr2, &timeout);
3279 if (error != 0)
3280 return (error);
3281 if (timeout.tv_nsec >= 1000000000 ||
3282 timeout.tv_nsec < 0)
3283 return (EINVAL);
3284 ts = &timeout;
3285 }
3286 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3287 }
3288
3289 static _umtx_op_func op_table_compat32[] = {
3290 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3291 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3292 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3293 __umtx_op_wake, /* UMTX_OP_WAKE */
3294 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3295 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3296 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3297 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3298 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3299 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3300 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3301 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3302 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3303 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3304 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3305 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3306 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3307 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3308 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3309 };
3310
3311 int
3312 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3313 {
3314 if ((unsigned)uap->op < UMTX_OP_MAX)
3315 return (*op_table_compat32[uap->op])(td,
3316 (struct _umtx_op_args *)uap);
3317 return (EINVAL);
3318 }
3319 #endif
3320
3321 void
3322 umtx_thread_init(struct thread *td)
3323 {
3324 td->td_umtxq = umtxq_alloc();
3325 td->td_umtxq->uq_thread = td;
3326 }
3327
3328 void
3329 umtx_thread_fini(struct thread *td)
3330 {
3331 umtxq_free(td->td_umtxq);
3332 }
3333
3334 /*
3335 * It will be called when new thread is created, e.g fork().
3336 */
3337 void
3338 umtx_thread_alloc(struct thread *td)
3339 {
3340 struct umtx_q *uq;
3341
3342 uq = td->td_umtxq;
3343 uq->uq_inherited_pri = PRI_MAX;
3344
3345 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3346 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3347 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3348 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3349 }
3350
3351 /*
3352 * exec() hook.
3353 */
3354 static void
3355 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3356 struct image_params *imgp __unused)
3357 {
3358 umtx_thread_cleanup(curthread);
3359 }
3360
3361 /*
3362 * thread_exit() hook.
3363 */
3364 void
3365 umtx_thread_exit(struct thread *td)
3366 {
3367 umtx_thread_cleanup(td);
3368 }
3369
3370 /*
3371 * clean up umtx data.
3372 */
3373 static void
3374 umtx_thread_cleanup(struct thread *td)
3375 {
3376 struct umtx_q *uq;
3377 struct umtx_pi *pi;
3378
3379 if ((uq = td->td_umtxq) == NULL)
3380 return;
3381
3382 mtx_lock_spin(&umtx_lock);
3383 uq->uq_inherited_pri = PRI_MAX;
3384 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3385 pi->pi_owner = NULL;
3386 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3387 }
3388 thread_lock(td);
3389 td->td_flags &= ~TDF_UBORROWING;
3390 thread_unlock(td);
3391 mtx_unlock_spin(&umtx_lock);
3392 }
Cache object: dab09b1b7ec6d9baec0f1598473a97ea
|