FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60
61 #define TYPE_SIMPLE_LOCK 0
62 #define TYPE_SIMPLE_WAIT 1
63 #define TYPE_NORMAL_UMUTEX 2
64 #define TYPE_PI_UMUTEX 3
65 #define TYPE_PP_UMUTEX 4
66 #define TYPE_CV 5
67 #define TYPE_RWLOCK 6
68
69 /* Key to represent a unique userland synchronous object */
70 struct umtx_key {
71 int hash;
72 int type;
73 int shared;
74 union {
75 struct {
76 vm_object_t object;
77 uintptr_t offset;
78 } shared;
79 struct {
80 struct vmspace *vs;
81 uintptr_t addr;
82 } private;
83 struct {
84 void *a;
85 uintptr_t b;
86 } both;
87 } info;
88 };
89
90 /* Priority inheritance mutex info. */
91 struct umtx_pi {
92 /* Owner thread */
93 struct thread *pi_owner;
94
95 /* Reference count */
96 int pi_refcount;
97
98 /* List entry to link umtx holding by thread */
99 TAILQ_ENTRY(umtx_pi) pi_link;
100
101 /* List entry in hash */
102 TAILQ_ENTRY(umtx_pi) pi_hashlink;
103
104 /* List for waiters */
105 TAILQ_HEAD(,umtx_q) pi_blocked;
106
107 /* Identify a userland lock object */
108 struct umtx_key pi_key;
109 };
110
111 /* A userland synchronous object user. */
112 struct umtx_q {
113 /* Linked list for the hash. */
114 TAILQ_ENTRY(umtx_q) uq_link;
115
116 /* Umtx key. */
117 struct umtx_key uq_key;
118
119 /* Umtx flags. */
120 int uq_flags;
121 #define UQF_UMTXQ 0x0001
122
123 /* The thread waits on. */
124 struct thread *uq_thread;
125
126 /*
127 * Blocked on PI mutex. read can use chain lock
128 * or umtx_lock, write must have both chain lock and
129 * umtx_lock being hold.
130 */
131 struct umtx_pi *uq_pi_blocked;
132
133 /* On blocked list */
134 TAILQ_ENTRY(umtx_q) uq_lockq;
135
136 /* Thread contending with us */
137 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
138
139 /* Inherited priority from PP mutex */
140 u_char uq_inherited_pri;
141 };
142
143 TAILQ_HEAD(umtxq_head, umtx_q);
144
145 /* Userland lock object's wait-queue chain */
146 struct umtxq_chain {
147 /* Lock for this chain. */
148 struct mtx uc_lock;
149
150 /* List of sleep queues. */
151 struct umtxq_head uc_queue[2];
152 #define UMTX_SHARED_QUEUE 0
153 #define UMTX_EXCLUSIVE_QUEUE 1
154
155 /* Busy flag */
156 char uc_busy;
157
158 /* Chain lock waiters */
159 int uc_waiters;
160
161 /* All PI in the list */
162 TAILQ_HEAD(,umtx_pi) uc_pi_list;
163 };
164
165 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
166
167 /*
168 * Don't propagate time-sharing priority, there is a security reason,
169 * a user can simply introduce PI-mutex, let thread A lock the mutex,
170 * and let another thread B block on the mutex, because B is
171 * sleeping, its priority will be boosted, this causes A's priority to
172 * be boosted via priority propagating too and will never be lowered even
173 * if it is using 100%CPU, this is unfair to other processes.
174 */
175
176 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
177 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
178 PRI_MAX_TIMESHARE : (td)->td_user_pri)
179
180 #define GOLDEN_RATIO_PRIME 2654404609U
181 #define UMTX_CHAINS 128
182 #define UMTX_SHIFTS (__WORD_BIT - 7)
183
184 #define THREAD_SHARE 0
185 #define PROCESS_SHARE 1
186 #define AUTO_SHARE 2
187
188 #define GET_SHARE(flags) \
189 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
190
191 #define BUSY_SPINS 200
192
193 static uma_zone_t umtx_pi_zone;
194 static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
195 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
196 static int umtx_pi_allocated;
197
198 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
199 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
200 &umtx_pi_allocated, 0, "Allocated umtx_pi");
201
202 static void umtxq_sysinit(void *);
203 static void umtxq_hash(struct umtx_key *key);
204 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
205 static void umtxq_lock(struct umtx_key *key);
206 static void umtxq_unlock(struct umtx_key *key);
207 static void umtxq_busy(struct umtx_key *key);
208 static void umtxq_unbusy(struct umtx_key *key);
209 static void umtxq_insert_queue(struct umtx_q *uq, int q);
210 static void umtxq_remove_queue(struct umtx_q *uq, int q);
211 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
212 static int umtxq_count(struct umtx_key *key);
213 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
214 static int umtx_key_get(void *addr, int type, int share,
215 struct umtx_key *key);
216 static void umtx_key_release(struct umtx_key *key);
217 static struct umtx_pi *umtx_pi_alloc(int);
218 static void umtx_pi_free(struct umtx_pi *pi);
219 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
220 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
221 static void umtx_thread_cleanup(struct thread *td);
222 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
223 struct image_params *imgp __unused);
224 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
225
226 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
227 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
228 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
229
230 static struct mtx umtx_lock;
231
232 static void
233 umtxq_sysinit(void *arg __unused)
234 {
235 int i;
236
237 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
238 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
239 for (i = 0; i < UMTX_CHAINS; ++i) {
240 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
241 MTX_DEF | MTX_DUPOK);
242 TAILQ_INIT(&umtxq_chains[i].uc_queue[0]);
243 TAILQ_INIT(&umtxq_chains[i].uc_queue[1]);
244 TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
245 umtxq_chains[i].uc_busy = 0;
246 umtxq_chains[i].uc_waiters = 0;
247 }
248 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
249 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
250 EVENTHANDLER_PRI_ANY);
251 }
252
253 struct umtx_q *
254 umtxq_alloc(void)
255 {
256 struct umtx_q *uq;
257
258 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
259 TAILQ_INIT(&uq->uq_pi_contested);
260 uq->uq_inherited_pri = PRI_MAX;
261 return (uq);
262 }
263
264 void
265 umtxq_free(struct umtx_q *uq)
266 {
267 free(uq, M_UMTX);
268 }
269
270 static inline void
271 umtxq_hash(struct umtx_key *key)
272 {
273 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
274 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
275 }
276
277 static inline int
278 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
279 {
280 return (k1->type == k2->type &&
281 k1->info.both.a == k2->info.both.a &&
282 k1->info.both.b == k2->info.both.b);
283 }
284
285 static inline struct umtxq_chain *
286 umtxq_getchain(struct umtx_key *key)
287 {
288 return (&umtxq_chains[key->hash]);
289 }
290
291 /*
292 * Lock a chain.
293 */
294 static inline void
295 umtxq_lock(struct umtx_key *key)
296 {
297 struct umtxq_chain *uc;
298
299 uc = umtxq_getchain(key);
300 mtx_lock(&uc->uc_lock);
301 }
302
303 /*
304 * Unlock a chain.
305 */
306 static inline void
307 umtxq_unlock(struct umtx_key *key)
308 {
309 struct umtxq_chain *uc;
310
311 uc = umtxq_getchain(key);
312 mtx_unlock(&uc->uc_lock);
313 }
314
315 /*
316 * Set chain to busy state when following operation
317 * may be blocked (kernel mutex can not be used).
318 */
319 static inline void
320 umtxq_busy(struct umtx_key *key)
321 {
322 struct umtxq_chain *uc;
323
324 uc = umtxq_getchain(key);
325 mtx_assert(&uc->uc_lock, MA_OWNED);
326 if (uc->uc_busy) {
327 int count = BUSY_SPINS;
328 if (count > 0) {
329 umtxq_unlock(key);
330 while (uc->uc_busy && --count > 0)
331 cpu_spinwait();
332 umtxq_lock(key);
333 }
334 while (uc->uc_busy != 0) {
335 uc->uc_waiters++;
336 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
337 uc->uc_waiters--;
338 }
339 }
340 uc->uc_busy = 1;
341 }
342
343 /*
344 * Unbusy a chain.
345 */
346 static inline void
347 umtxq_unbusy(struct umtx_key *key)
348 {
349 struct umtxq_chain *uc;
350
351 uc = umtxq_getchain(key);
352 mtx_assert(&uc->uc_lock, MA_OWNED);
353 KASSERT(uc->uc_busy != 0, ("not busy"));
354 uc->uc_busy = 0;
355 if (uc->uc_waiters)
356 wakeup_one(uc);
357 }
358
359 static inline void
360 umtxq_insert_queue(struct umtx_q *uq, int q)
361 {
362 struct umtxq_chain *uc;
363
364 uc = umtxq_getchain(&uq->uq_key);
365 UMTXQ_LOCKED_ASSERT(uc);
366 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
367 uq->uq_flags |= UQF_UMTXQ;
368 }
369
370 static inline void
371 umtxq_remove_queue(struct umtx_q *uq, int q)
372 {
373 struct umtxq_chain *uc;
374
375 uc = umtxq_getchain(&uq->uq_key);
376 UMTXQ_LOCKED_ASSERT(uc);
377 if (uq->uq_flags & UQF_UMTXQ) {
378 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
379 uq->uq_flags &= ~UQF_UMTXQ;
380 }
381 }
382
383 /*
384 * Check if there are multiple waiters
385 */
386 static int
387 umtxq_count(struct umtx_key *key)
388 {
389 struct umtxq_chain *uc;
390 struct umtx_q *uq;
391 int count = 0;
392
393 uc = umtxq_getchain(key);
394 UMTXQ_LOCKED_ASSERT(uc);
395 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
396 if (umtx_key_match(&uq->uq_key, key)) {
397 if (++count > 1)
398 break;
399 }
400 }
401 return (count);
402 }
403
404 /*
405 * Check if there are multiple PI waiters and returns first
406 * waiter.
407 */
408 static int
409 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
410 {
411 struct umtxq_chain *uc;
412 struct umtx_q *uq;
413 int count = 0;
414
415 *first = NULL;
416 uc = umtxq_getchain(key);
417 UMTXQ_LOCKED_ASSERT(uc);
418 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
419 if (umtx_key_match(&uq->uq_key, key)) {
420 if (++count > 1)
421 break;
422 *first = uq;
423 }
424 }
425 return (count);
426 }
427
428 /*
429 * Wake up threads waiting on an userland object.
430 */
431
432 static int
433 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
434 {
435 struct umtxq_chain *uc;
436 struct umtx_q *uq, *next;
437 int ret;
438
439 ret = 0;
440 uc = umtxq_getchain(key);
441 UMTXQ_LOCKED_ASSERT(uc);
442 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
443 if (umtx_key_match(&uq->uq_key, key)) {
444 umtxq_remove_queue(uq, q);
445 wakeup(uq);
446 if (++ret >= n_wake)
447 break;
448 }
449 }
450 return (ret);
451 }
452
453
454 /*
455 * Wake up specified thread.
456 */
457 static inline void
458 umtxq_signal_thread(struct umtx_q *uq)
459 {
460 struct umtxq_chain *uc;
461
462 uc = umtxq_getchain(&uq->uq_key);
463 UMTXQ_LOCKED_ASSERT(uc);
464 umtxq_remove(uq);
465 wakeup(uq);
466 }
467
468 /*
469 * Put thread into sleep state, before sleeping, check if
470 * thread was removed from umtx queue.
471 */
472 static inline int
473 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
474 {
475 struct umtxq_chain *uc;
476 int error;
477
478 uc = umtxq_getchain(&uq->uq_key);
479 UMTXQ_LOCKED_ASSERT(uc);
480 if (!(uq->uq_flags & UQF_UMTXQ))
481 return (0);
482 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
483 if (error == EWOULDBLOCK)
484 error = ETIMEDOUT;
485 return (error);
486 }
487
488 /*
489 * Convert userspace address into unique logical address.
490 */
491 static int
492 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
493 {
494 struct thread *td = curthread;
495 vm_map_t map;
496 vm_map_entry_t entry;
497 vm_pindex_t pindex;
498 vm_prot_t prot;
499 boolean_t wired;
500
501 key->type = type;
502 if (share == THREAD_SHARE) {
503 key->shared = 0;
504 key->info.private.vs = td->td_proc->p_vmspace;
505 key->info.private.addr = (uintptr_t)addr;
506 } else {
507 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
508 map = &td->td_proc->p_vmspace->vm_map;
509 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
510 &entry, &key->info.shared.object, &pindex, &prot,
511 &wired) != KERN_SUCCESS) {
512 return EFAULT;
513 }
514
515 if ((share == PROCESS_SHARE) ||
516 (share == AUTO_SHARE &&
517 VM_INHERIT_SHARE == entry->inheritance)) {
518 key->shared = 1;
519 key->info.shared.offset = entry->offset + entry->start -
520 (vm_offset_t)addr;
521 vm_object_reference(key->info.shared.object);
522 } else {
523 key->shared = 0;
524 key->info.private.vs = td->td_proc->p_vmspace;
525 key->info.private.addr = (uintptr_t)addr;
526 }
527 vm_map_lookup_done(map, entry);
528 }
529
530 umtxq_hash(key);
531 return (0);
532 }
533
534 /*
535 * Release key.
536 */
537 static inline void
538 umtx_key_release(struct umtx_key *key)
539 {
540 if (key->shared)
541 vm_object_deallocate(key->info.shared.object);
542 }
543
544 /*
545 * Lock a umtx object.
546 */
547 static int
548 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
549 {
550 struct umtx_q *uq;
551 u_long owner;
552 u_long old;
553 int error = 0;
554
555 uq = td->td_umtxq;
556
557 /*
558 * Care must be exercised when dealing with umtx structure. It
559 * can fault on any access.
560 */
561 for (;;) {
562 /*
563 * Try the uncontested case. This should be done in userland.
564 */
565 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
566
567 /* The acquire succeeded. */
568 if (owner == UMTX_UNOWNED)
569 return (0);
570
571 /* The address was invalid. */
572 if (owner == -1)
573 return (EFAULT);
574
575 /* If no one owns it but it is contested try to acquire it. */
576 if (owner == UMTX_CONTESTED) {
577 owner = casuword(&umtx->u_owner,
578 UMTX_CONTESTED, id | UMTX_CONTESTED);
579
580 if (owner == UMTX_CONTESTED)
581 return (0);
582
583 /* The address was invalid. */
584 if (owner == -1)
585 return (EFAULT);
586
587 /* If this failed the lock has changed, restart. */
588 continue;
589 }
590
591 /*
592 * If we caught a signal, we have retried and now
593 * exit immediately.
594 */
595 if (error != 0)
596 return (error);
597
598 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
599 AUTO_SHARE, &uq->uq_key)) != 0)
600 return (error);
601
602 umtxq_lock(&uq->uq_key);
603 umtxq_busy(&uq->uq_key);
604 umtxq_insert(uq);
605 umtxq_unbusy(&uq->uq_key);
606 umtxq_unlock(&uq->uq_key);
607
608 /*
609 * Set the contested bit so that a release in user space
610 * knows to use the system call for unlock. If this fails
611 * either some one else has acquired the lock or it has been
612 * released.
613 */
614 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
615
616 /* The address was invalid. */
617 if (old == -1) {
618 umtxq_lock(&uq->uq_key);
619 umtxq_remove(uq);
620 umtxq_unlock(&uq->uq_key);
621 umtx_key_release(&uq->uq_key);
622 return (EFAULT);
623 }
624
625 /*
626 * We set the contested bit, sleep. Otherwise the lock changed
627 * and we need to retry or we lost a race to the thread
628 * unlocking the umtx.
629 */
630 umtxq_lock(&uq->uq_key);
631 if (old == owner)
632 error = umtxq_sleep(uq, "umtx", timo);
633 umtxq_remove(uq);
634 umtxq_unlock(&uq->uq_key);
635 umtx_key_release(&uq->uq_key);
636 }
637
638 return (0);
639 }
640
641 /*
642 * Lock a umtx object.
643 */
644 static int
645 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
646 struct timespec *timeout)
647 {
648 struct timespec ts, ts2, ts3;
649 struct timeval tv;
650 int error;
651
652 if (timeout == NULL) {
653 error = _do_lock_umtx(td, umtx, id, 0);
654 /* Mutex locking is restarted if it is interrupted. */
655 if (error == EINTR)
656 error = ERESTART;
657 } else {
658 getnanouptime(&ts);
659 timespecadd(&ts, timeout);
660 TIMESPEC_TO_TIMEVAL(&tv, timeout);
661 for (;;) {
662 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
663 if (error != ETIMEDOUT)
664 break;
665 getnanouptime(&ts2);
666 if (timespeccmp(&ts2, &ts, >=)) {
667 error = ETIMEDOUT;
668 break;
669 }
670 ts3 = ts;
671 timespecsub(&ts3, &ts2);
672 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
673 }
674 /* Timed-locking is not restarted. */
675 if (error == ERESTART)
676 error = EINTR;
677 }
678 return (error);
679 }
680
681 /*
682 * Unlock a umtx object.
683 */
684 static int
685 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
686 {
687 struct umtx_key key;
688 u_long owner;
689 u_long old;
690 int error;
691 int count;
692
693 /*
694 * Make sure we own this mtx.
695 */
696 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
697 if (owner == -1)
698 return (EFAULT);
699
700 if ((owner & ~UMTX_CONTESTED) != id)
701 return (EPERM);
702
703 /* This should be done in userland */
704 if ((owner & UMTX_CONTESTED) == 0) {
705 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
706 if (old == -1)
707 return (EFAULT);
708 if (old == owner)
709 return (0);
710 owner = old;
711 }
712
713 /* We should only ever be in here for contested locks */
714 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
715 &key)) != 0)
716 return (error);
717
718 umtxq_lock(&key);
719 umtxq_busy(&key);
720 count = umtxq_count(&key);
721 umtxq_unlock(&key);
722
723 /*
724 * When unlocking the umtx, it must be marked as unowned if
725 * there is zero or one thread only waiting for it.
726 * Otherwise, it must be marked as contested.
727 */
728 old = casuword(&umtx->u_owner, owner,
729 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
730 umtxq_lock(&key);
731 umtxq_signal(&key,1);
732 umtxq_unbusy(&key);
733 umtxq_unlock(&key);
734 umtx_key_release(&key);
735 if (old == -1)
736 return (EFAULT);
737 if (old != owner)
738 return (EINVAL);
739 return (0);
740 }
741
742 #ifdef COMPAT_IA32
743
744 /*
745 * Lock a umtx object.
746 */
747 static int
748 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
749 {
750 struct umtx_q *uq;
751 uint32_t owner;
752 uint32_t old;
753 int error = 0;
754
755 uq = td->td_umtxq;
756
757 /*
758 * Care must be exercised when dealing with umtx structure. It
759 * can fault on any access.
760 */
761 for (;;) {
762 /*
763 * Try the uncontested case. This should be done in userland.
764 */
765 owner = casuword32(m, UMUTEX_UNOWNED, id);
766
767 /* The acquire succeeded. */
768 if (owner == UMUTEX_UNOWNED)
769 return (0);
770
771 /* The address was invalid. */
772 if (owner == -1)
773 return (EFAULT);
774
775 /* If no one owns it but it is contested try to acquire it. */
776 if (owner == UMUTEX_CONTESTED) {
777 owner = casuword32(m,
778 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
779 if (owner == UMUTEX_CONTESTED)
780 return (0);
781
782 /* The address was invalid. */
783 if (owner == -1)
784 return (EFAULT);
785
786 /* If this failed the lock has changed, restart. */
787 continue;
788 }
789
790 /*
791 * If we caught a signal, we have retried and now
792 * exit immediately.
793 */
794 if (error != 0)
795 return (error);
796
797 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
798 AUTO_SHARE, &uq->uq_key)) != 0)
799 return (error);
800
801 umtxq_lock(&uq->uq_key);
802 umtxq_busy(&uq->uq_key);
803 umtxq_insert(uq);
804 umtxq_unbusy(&uq->uq_key);
805 umtxq_unlock(&uq->uq_key);
806
807 /*
808 * Set the contested bit so that a release in user space
809 * knows to use the system call for unlock. If this fails
810 * either some one else has acquired the lock or it has been
811 * released.
812 */
813 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
814
815 /* The address was invalid. */
816 if (old == -1) {
817 umtxq_lock(&uq->uq_key);
818 umtxq_remove(uq);
819 umtxq_unlock(&uq->uq_key);
820 umtx_key_release(&uq->uq_key);
821 return (EFAULT);
822 }
823
824 /*
825 * We set the contested bit, sleep. Otherwise the lock changed
826 * and we need to retry or we lost a race to the thread
827 * unlocking the umtx.
828 */
829 umtxq_lock(&uq->uq_key);
830 if (old == owner)
831 error = umtxq_sleep(uq, "umtx", timo);
832 umtxq_remove(uq);
833 umtxq_unlock(&uq->uq_key);
834 umtx_key_release(&uq->uq_key);
835 }
836
837 return (0);
838 }
839
840 /*
841 * Lock a umtx object.
842 */
843 static int
844 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
845 struct timespec *timeout)
846 {
847 struct timespec ts, ts2, ts3;
848 struct timeval tv;
849 int error;
850
851 if (timeout == NULL) {
852 error = _do_lock_umtx32(td, m, id, 0);
853 /* Mutex locking is restarted if it is interrupted. */
854 if (error == EINTR)
855 error = ERESTART;
856 } else {
857 getnanouptime(&ts);
858 timespecadd(&ts, timeout);
859 TIMESPEC_TO_TIMEVAL(&tv, timeout);
860 for (;;) {
861 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
862 if (error != ETIMEDOUT)
863 break;
864 getnanouptime(&ts2);
865 if (timespeccmp(&ts2, &ts, >=)) {
866 error = ETIMEDOUT;
867 break;
868 }
869 ts3 = ts;
870 timespecsub(&ts3, &ts2);
871 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
872 }
873 /* Timed-locking is not restarted. */
874 if (error == ERESTART)
875 error = EINTR;
876 }
877 return (error);
878 }
879
880 /*
881 * Unlock a umtx object.
882 */
883 static int
884 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
885 {
886 struct umtx_key key;
887 uint32_t owner;
888 uint32_t old;
889 int error;
890 int count;
891
892 /*
893 * Make sure we own this mtx.
894 */
895 owner = fuword32(m);
896 if (owner == -1)
897 return (EFAULT);
898
899 if ((owner & ~UMUTEX_CONTESTED) != id)
900 return (EPERM);
901
902 /* This should be done in userland */
903 if ((owner & UMUTEX_CONTESTED) == 0) {
904 old = casuword32(m, owner, UMUTEX_UNOWNED);
905 if (old == -1)
906 return (EFAULT);
907 if (old == owner)
908 return (0);
909 owner = old;
910 }
911
912 /* We should only ever be in here for contested locks */
913 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
914 &key)) != 0)
915 return (error);
916
917 umtxq_lock(&key);
918 umtxq_busy(&key);
919 count = umtxq_count(&key);
920 umtxq_unlock(&key);
921
922 /*
923 * When unlocking the umtx, it must be marked as unowned if
924 * there is zero or one thread only waiting for it.
925 * Otherwise, it must be marked as contested.
926 */
927 old = casuword32(m, owner,
928 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
929 umtxq_lock(&key);
930 umtxq_signal(&key,1);
931 umtxq_unbusy(&key);
932 umtxq_unlock(&key);
933 umtx_key_release(&key);
934 if (old == -1)
935 return (EFAULT);
936 if (old != owner)
937 return (EINVAL);
938 return (0);
939 }
940 #endif
941
942 /*
943 * Fetch and compare value, sleep on the address if value is not changed.
944 */
945 static int
946 do_wait(struct thread *td, void *addr, u_long id,
947 struct timespec *timeout, int compat32)
948 {
949 struct umtx_q *uq;
950 struct timespec ts, ts2, ts3;
951 struct timeval tv;
952 u_long tmp;
953 int error = 0;
954
955 uq = td->td_umtxq;
956 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
957 &uq->uq_key)) != 0)
958 return (error);
959
960 umtxq_lock(&uq->uq_key);
961 umtxq_insert(uq);
962 umtxq_unlock(&uq->uq_key);
963 if (compat32 == 0)
964 tmp = fuword(addr);
965 else
966 tmp = fuword32(addr);
967 if (tmp != id) {
968 umtxq_lock(&uq->uq_key);
969 umtxq_remove(uq);
970 umtxq_unlock(&uq->uq_key);
971 } else if (timeout == NULL) {
972 umtxq_lock(&uq->uq_key);
973 error = umtxq_sleep(uq, "uwait", 0);
974 umtxq_remove(uq);
975 umtxq_unlock(&uq->uq_key);
976 } else {
977 getnanouptime(&ts);
978 timespecadd(&ts, timeout);
979 TIMESPEC_TO_TIMEVAL(&tv, timeout);
980 umtxq_lock(&uq->uq_key);
981 for (;;) {
982 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
983 if (!(uq->uq_flags & UQF_UMTXQ))
984 break;
985 if (error != ETIMEDOUT)
986 break;
987 umtxq_unlock(&uq->uq_key);
988 getnanouptime(&ts2);
989 if (timespeccmp(&ts2, &ts, >=)) {
990 error = ETIMEDOUT;
991 umtxq_lock(&uq->uq_key);
992 break;
993 }
994 ts3 = ts;
995 timespecsub(&ts3, &ts2);
996 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
997 umtxq_lock(&uq->uq_key);
998 }
999 umtxq_remove(uq);
1000 umtxq_unlock(&uq->uq_key);
1001 }
1002 umtx_key_release(&uq->uq_key);
1003 if (error == ERESTART)
1004 error = EINTR;
1005 return (error);
1006 }
1007
1008 /*
1009 * Wake up threads sleeping on the specified address.
1010 */
1011 int
1012 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
1013 {
1014 struct umtx_key key;
1015 int ret;
1016
1017 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1018 &key)) != 0)
1019 return (ret);
1020 umtxq_lock(&key);
1021 ret = umtxq_signal(&key, n_wake);
1022 umtxq_unlock(&key);
1023 umtx_key_release(&key);
1024 return (0);
1025 }
1026
1027 /*
1028 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1029 */
1030 static int
1031 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1032 int try)
1033 {
1034 struct umtx_q *uq;
1035 uint32_t owner, old, id;
1036 int error = 0;
1037
1038 id = td->td_tid;
1039 uq = td->td_umtxq;
1040
1041 /*
1042 * Care must be exercised when dealing with umtx structure. It
1043 * can fault on any access.
1044 */
1045 for (;;) {
1046 /*
1047 * Try the uncontested case. This should be done in userland.
1048 */
1049 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1050
1051 /* The acquire succeeded. */
1052 if (owner == UMUTEX_UNOWNED)
1053 return (0);
1054
1055 /* The address was invalid. */
1056 if (owner == -1)
1057 return (EFAULT);
1058
1059 /* If no one owns it but it is contested try to acquire it. */
1060 if (owner == UMUTEX_CONTESTED) {
1061 owner = casuword32(&m->m_owner,
1062 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1063
1064 if (owner == UMUTEX_CONTESTED)
1065 return (0);
1066
1067 /* The address was invalid. */
1068 if (owner == -1)
1069 return (EFAULT);
1070
1071 /* If this failed the lock has changed, restart. */
1072 continue;
1073 }
1074
1075 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1076 (owner & ~UMUTEX_CONTESTED) == id)
1077 return (EDEADLK);
1078
1079 if (try != 0)
1080 return (EBUSY);
1081
1082 /*
1083 * If we caught a signal, we have retried and now
1084 * exit immediately.
1085 */
1086 if (error != 0)
1087 return (error);
1088
1089 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1090 GET_SHARE(flags), &uq->uq_key)) != 0)
1091 return (error);
1092
1093 umtxq_lock(&uq->uq_key);
1094 umtxq_busy(&uq->uq_key);
1095 umtxq_insert(uq);
1096 umtxq_unbusy(&uq->uq_key);
1097 umtxq_unlock(&uq->uq_key);
1098
1099 /*
1100 * Set the contested bit so that a release in user space
1101 * knows to use the system call for unlock. If this fails
1102 * either some one else has acquired the lock or it has been
1103 * released.
1104 */
1105 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1106
1107 /* The address was invalid. */
1108 if (old == -1) {
1109 umtxq_lock(&uq->uq_key);
1110 umtxq_remove(uq);
1111 umtxq_unlock(&uq->uq_key);
1112 umtx_key_release(&uq->uq_key);
1113 return (EFAULT);
1114 }
1115
1116 /*
1117 * We set the contested bit, sleep. Otherwise the lock changed
1118 * and we need to retry or we lost a race to the thread
1119 * unlocking the umtx.
1120 */
1121 umtxq_lock(&uq->uq_key);
1122 if (old == owner)
1123 error = umtxq_sleep(uq, "umtxn", timo);
1124 umtxq_remove(uq);
1125 umtxq_unlock(&uq->uq_key);
1126 umtx_key_release(&uq->uq_key);
1127 }
1128
1129 return (0);
1130 }
1131
1132 /*
1133 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1134 */
1135 /*
1136 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1137 */
1138 static int
1139 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1140 {
1141 struct umtx_key key;
1142 uint32_t owner, old, id;
1143 int error;
1144 int count;
1145
1146 id = td->td_tid;
1147 /*
1148 * Make sure we own this mtx.
1149 */
1150 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1151 if (owner == -1)
1152 return (EFAULT);
1153
1154 if ((owner & ~UMUTEX_CONTESTED) != id)
1155 return (EPERM);
1156
1157 /* This should be done in userland */
1158 if ((owner & UMUTEX_CONTESTED) == 0) {
1159 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1160 if (old == -1)
1161 return (EFAULT);
1162 if (old == owner)
1163 return (0);
1164 owner = old;
1165 }
1166
1167 /* We should only ever be in here for contested locks */
1168 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1169 &key)) != 0)
1170 return (error);
1171
1172 umtxq_lock(&key);
1173 umtxq_busy(&key);
1174 count = umtxq_count(&key);
1175 umtxq_unlock(&key);
1176
1177 /*
1178 * When unlocking the umtx, it must be marked as unowned if
1179 * there is zero or one thread only waiting for it.
1180 * Otherwise, it must be marked as contested.
1181 */
1182 old = casuword32(&m->m_owner, owner,
1183 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1184 umtxq_lock(&key);
1185 umtxq_signal(&key,1);
1186 umtxq_unbusy(&key);
1187 umtxq_unlock(&key);
1188 umtx_key_release(&key);
1189 if (old == -1)
1190 return (EFAULT);
1191 if (old != owner)
1192 return (EINVAL);
1193 return (0);
1194 }
1195
1196 static inline struct umtx_pi *
1197 umtx_pi_alloc(int flags)
1198 {
1199 struct umtx_pi *pi;
1200
1201 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1202 TAILQ_INIT(&pi->pi_blocked);
1203 atomic_add_int(&umtx_pi_allocated, 1);
1204 return (pi);
1205 }
1206
1207 static inline void
1208 umtx_pi_free(struct umtx_pi *pi)
1209 {
1210 uma_zfree(umtx_pi_zone, pi);
1211 atomic_add_int(&umtx_pi_allocated, -1);
1212 }
1213
1214 /*
1215 * Adjust the thread's position on a pi_state after its priority has been
1216 * changed.
1217 */
1218 static int
1219 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1220 {
1221 struct umtx_q *uq, *uq1, *uq2;
1222 struct thread *td1;
1223
1224 mtx_assert(&umtx_lock, MA_OWNED);
1225 if (pi == NULL)
1226 return (0);
1227
1228 uq = td->td_umtxq;
1229
1230 /*
1231 * Check if the thread needs to be moved on the blocked chain.
1232 * It needs to be moved if either its priority is lower than
1233 * the previous thread or higher than the next thread.
1234 */
1235 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1236 uq2 = TAILQ_NEXT(uq, uq_lockq);
1237 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1238 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1239 /*
1240 * Remove thread from blocked chain and determine where
1241 * it should be moved to.
1242 */
1243 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1244 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1245 td1 = uq1->uq_thread;
1246 MPASS(td1->td_proc->p_magic == P_MAGIC);
1247 if (UPRI(td1) > UPRI(td))
1248 break;
1249 }
1250
1251 if (uq1 == NULL)
1252 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1253 else
1254 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1255 }
1256 return (1);
1257 }
1258
1259 /*
1260 * Propagate priority when a thread is blocked on POSIX
1261 * PI mutex.
1262 */
1263 static void
1264 umtx_propagate_priority(struct thread *td)
1265 {
1266 struct umtx_q *uq;
1267 struct umtx_pi *pi;
1268 int pri;
1269
1270 mtx_assert(&umtx_lock, MA_OWNED);
1271 pri = UPRI(td);
1272 uq = td->td_umtxq;
1273 pi = uq->uq_pi_blocked;
1274 if (pi == NULL)
1275 return;
1276
1277 for (;;) {
1278 td = pi->pi_owner;
1279 if (td == NULL)
1280 return;
1281
1282 MPASS(td->td_proc != NULL);
1283 MPASS(td->td_proc->p_magic == P_MAGIC);
1284
1285 if (UPRI(td) <= pri)
1286 return;
1287
1288 thread_lock(td);
1289 sched_lend_user_prio(td, pri);
1290 thread_unlock(td);
1291
1292 /*
1293 * Pick up the lock that td is blocked on.
1294 */
1295 uq = td->td_umtxq;
1296 pi = uq->uq_pi_blocked;
1297 /* Resort td on the list if needed. */
1298 if (!umtx_pi_adjust_thread(pi, td))
1299 break;
1300 }
1301 }
1302
1303 /*
1304 * Unpropagate priority for a PI mutex when a thread blocked on
1305 * it is interrupted by signal or resumed by others.
1306 */
1307 static void
1308 umtx_unpropagate_priority(struct umtx_pi *pi)
1309 {
1310 struct umtx_q *uq, *uq_owner;
1311 struct umtx_pi *pi2;
1312 int pri, oldpri;
1313
1314 mtx_assert(&umtx_lock, MA_OWNED);
1315
1316 while (pi != NULL && pi->pi_owner != NULL) {
1317 pri = PRI_MAX;
1318 uq_owner = pi->pi_owner->td_umtxq;
1319
1320 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1321 uq = TAILQ_FIRST(&pi2->pi_blocked);
1322 if (uq != NULL) {
1323 if (pri > UPRI(uq->uq_thread))
1324 pri = UPRI(uq->uq_thread);
1325 }
1326 }
1327
1328 if (pri > uq_owner->uq_inherited_pri)
1329 pri = uq_owner->uq_inherited_pri;
1330 thread_lock(pi->pi_owner);
1331 oldpri = pi->pi_owner->td_user_pri;
1332 sched_unlend_user_prio(pi->pi_owner, pri);
1333 thread_unlock(pi->pi_owner);
1334 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1335 pi = uq_owner->uq_pi_blocked;
1336 }
1337 }
1338
1339 /*
1340 * Insert a PI mutex into owned list.
1341 */
1342 static void
1343 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1344 {
1345 struct umtx_q *uq_owner;
1346
1347 uq_owner = owner->td_umtxq;
1348 mtx_assert(&umtx_lock, MA_OWNED);
1349 if (pi->pi_owner != NULL)
1350 panic("pi_ower != NULL");
1351 pi->pi_owner = owner;
1352 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1353 }
1354
1355 /*
1356 * Claim ownership of a PI mutex.
1357 */
1358 static int
1359 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1360 {
1361 struct umtx_q *uq, *uq_owner;
1362
1363 uq_owner = owner->td_umtxq;
1364 mtx_lock_spin(&umtx_lock);
1365 if (pi->pi_owner == owner) {
1366 mtx_unlock_spin(&umtx_lock);
1367 return (0);
1368 }
1369
1370 if (pi->pi_owner != NULL) {
1371 /*
1372 * userland may have already messed the mutex, sigh.
1373 */
1374 mtx_unlock_spin(&umtx_lock);
1375 return (EPERM);
1376 }
1377 umtx_pi_setowner(pi, owner);
1378 uq = TAILQ_FIRST(&pi->pi_blocked);
1379 if (uq != NULL) {
1380 int pri;
1381
1382 pri = UPRI(uq->uq_thread);
1383 thread_lock(owner);
1384 if (pri < UPRI(owner))
1385 sched_lend_user_prio(owner, pri);
1386 thread_unlock(owner);
1387 }
1388 mtx_unlock_spin(&umtx_lock);
1389 return (0);
1390 }
1391
1392 static void
1393 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1394 {
1395 struct umtx_q *uq;
1396 struct umtx_pi *pi;
1397
1398 uq = td->td_umtxq;
1399 /*
1400 * Pick up the lock that td is blocked on.
1401 */
1402 pi = uq->uq_pi_blocked;
1403 MPASS(pi != NULL);
1404
1405 /* Resort the turnstile on the list. */
1406 if (!umtx_pi_adjust_thread(pi, td))
1407 return;
1408
1409 /*
1410 * If our priority was lowered and we are at the head of the
1411 * turnstile, then propagate our new priority up the chain.
1412 */
1413 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1414 umtx_propagate_priority(td);
1415 }
1416
1417 /*
1418 * Adjust a thread's order position in its blocked PI mutex,
1419 * this may result new priority propagating process.
1420 */
1421 void
1422 umtx_pi_adjust(struct thread *td, u_char oldpri)
1423 {
1424 struct umtx_q *uq;
1425 struct umtx_pi *pi;
1426
1427 uq = td->td_umtxq;
1428 mtx_lock_spin(&umtx_lock);
1429 /*
1430 * Pick up the lock that td is blocked on.
1431 */
1432 pi = uq->uq_pi_blocked;
1433 if (pi != NULL)
1434 umtx_pi_adjust_locked(td, oldpri);
1435 mtx_unlock_spin(&umtx_lock);
1436 }
1437
1438 /*
1439 * Sleep on a PI mutex.
1440 */
1441 static int
1442 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1443 uint32_t owner, const char *wmesg, int timo)
1444 {
1445 struct umtxq_chain *uc;
1446 struct thread *td, *td1;
1447 struct umtx_q *uq1;
1448 int pri;
1449 int error = 0;
1450
1451 td = uq->uq_thread;
1452 KASSERT(td == curthread, ("inconsistent uq_thread"));
1453 uc = umtxq_getchain(&uq->uq_key);
1454 UMTXQ_LOCKED_ASSERT(uc);
1455 umtxq_insert(uq);
1456 if (pi->pi_owner == NULL) {
1457 /* XXX
1458 * Current, We only support process private PI-mutex,
1459 * non-contended PI-mutexes are locked in userland.
1460 * Process shared PI-mutex should always be initialized
1461 * by kernel and be registered in kernel, locking should
1462 * always be done by kernel to avoid security problems.
1463 * For process private PI-mutex, we can find owner
1464 * thread and boost its priority safely.
1465 */
1466 PROC_LOCK(curproc);
1467 td1 = thread_find(curproc, owner);
1468 mtx_lock_spin(&umtx_lock);
1469 if (td1 != NULL && pi->pi_owner == NULL) {
1470 uq1 = td1->td_umtxq;
1471 umtx_pi_setowner(pi, td1);
1472 }
1473 PROC_UNLOCK(curproc);
1474 } else {
1475 mtx_lock_spin(&umtx_lock);
1476 }
1477
1478 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1479 pri = UPRI(uq1->uq_thread);
1480 if (pri > UPRI(td))
1481 break;
1482 }
1483
1484 if (uq1 != NULL)
1485 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1486 else
1487 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1488
1489 uq->uq_pi_blocked = pi;
1490 thread_lock(td);
1491 td->td_flags |= TDF_UPIBLOCKED;
1492 thread_unlock(td);
1493 mtx_unlock_spin(&umtx_lock);
1494 umtxq_unlock(&uq->uq_key);
1495
1496 mtx_lock_spin(&umtx_lock);
1497 umtx_propagate_priority(td);
1498 mtx_unlock_spin(&umtx_lock);
1499
1500 umtxq_lock(&uq->uq_key);
1501 if (uq->uq_flags & UQF_UMTXQ) {
1502 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1503 if (error == EWOULDBLOCK)
1504 error = ETIMEDOUT;
1505 if (uq->uq_flags & UQF_UMTXQ) {
1506 umtxq_busy(&uq->uq_key);
1507 umtxq_remove(uq);
1508 umtxq_unbusy(&uq->uq_key);
1509 }
1510 }
1511 umtxq_unlock(&uq->uq_key);
1512
1513 mtx_lock_spin(&umtx_lock);
1514 uq->uq_pi_blocked = NULL;
1515 thread_lock(td);
1516 td->td_flags &= ~TDF_UPIBLOCKED;
1517 thread_unlock(td);
1518 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1519 umtx_unpropagate_priority(pi);
1520 mtx_unlock_spin(&umtx_lock);
1521
1522 umtxq_lock(&uq->uq_key);
1523
1524 return (error);
1525 }
1526
1527 /*
1528 * Add reference count for a PI mutex.
1529 */
1530 static void
1531 umtx_pi_ref(struct umtx_pi *pi)
1532 {
1533 struct umtxq_chain *uc;
1534
1535 uc = umtxq_getchain(&pi->pi_key);
1536 UMTXQ_LOCKED_ASSERT(uc);
1537 pi->pi_refcount++;
1538 }
1539
1540 /*
1541 * Decrease reference count for a PI mutex, if the counter
1542 * is decreased to zero, its memory space is freed.
1543 */
1544 static void
1545 umtx_pi_unref(struct umtx_pi *pi)
1546 {
1547 struct umtxq_chain *uc;
1548 int free = 0;
1549
1550 uc = umtxq_getchain(&pi->pi_key);
1551 UMTXQ_LOCKED_ASSERT(uc);
1552 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1553 if (--pi->pi_refcount == 0) {
1554 mtx_lock_spin(&umtx_lock);
1555 if (pi->pi_owner != NULL) {
1556 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1557 pi, pi_link);
1558 pi->pi_owner = NULL;
1559 }
1560 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1561 ("blocked queue not empty"));
1562 mtx_unlock_spin(&umtx_lock);
1563 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1564 free = 1;
1565 }
1566 if (free)
1567 umtx_pi_free(pi);
1568 }
1569
1570 /*
1571 * Find a PI mutex in hash table.
1572 */
1573 static struct umtx_pi *
1574 umtx_pi_lookup(struct umtx_key *key)
1575 {
1576 struct umtxq_chain *uc;
1577 struct umtx_pi *pi;
1578
1579 uc = umtxq_getchain(key);
1580 UMTXQ_LOCKED_ASSERT(uc);
1581
1582 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1583 if (umtx_key_match(&pi->pi_key, key)) {
1584 return (pi);
1585 }
1586 }
1587 return (NULL);
1588 }
1589
1590 /*
1591 * Insert a PI mutex into hash table.
1592 */
1593 static inline void
1594 umtx_pi_insert(struct umtx_pi *pi)
1595 {
1596 struct umtxq_chain *uc;
1597
1598 uc = umtxq_getchain(&pi->pi_key);
1599 UMTXQ_LOCKED_ASSERT(uc);
1600 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1601 }
1602
1603 /*
1604 * Lock a PI mutex.
1605 */
1606 static int
1607 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1608 int try)
1609 {
1610 struct umtx_q *uq;
1611 struct umtx_pi *pi, *new_pi;
1612 uint32_t id, owner, old;
1613 int error;
1614
1615 id = td->td_tid;
1616 uq = td->td_umtxq;
1617
1618 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1619 &uq->uq_key)) != 0)
1620 return (error);
1621 umtxq_lock(&uq->uq_key);
1622 pi = umtx_pi_lookup(&uq->uq_key);
1623 if (pi == NULL) {
1624 new_pi = umtx_pi_alloc(M_NOWAIT);
1625 if (new_pi == NULL) {
1626 umtxq_unlock(&uq->uq_key);
1627 new_pi = umtx_pi_alloc(M_WAITOK);
1628 new_pi->pi_key = uq->uq_key;
1629 umtxq_lock(&uq->uq_key);
1630 pi = umtx_pi_lookup(&uq->uq_key);
1631 if (pi != NULL) {
1632 umtx_pi_free(new_pi);
1633 new_pi = NULL;
1634 }
1635 }
1636 if (new_pi != NULL) {
1637 new_pi->pi_key = uq->uq_key;
1638 umtx_pi_insert(new_pi);
1639 pi = new_pi;
1640 }
1641 }
1642 umtx_pi_ref(pi);
1643 umtxq_unlock(&uq->uq_key);
1644
1645 /*
1646 * Care must be exercised when dealing with umtx structure. It
1647 * can fault on any access.
1648 */
1649 for (;;) {
1650 /*
1651 * Try the uncontested case. This should be done in userland.
1652 */
1653 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1654
1655 /* The acquire succeeded. */
1656 if (owner == UMUTEX_UNOWNED) {
1657 error = 0;
1658 break;
1659 }
1660
1661 /* The address was invalid. */
1662 if (owner == -1) {
1663 error = EFAULT;
1664 break;
1665 }
1666
1667 /* If no one owns it but it is contested try to acquire it. */
1668 if (owner == UMUTEX_CONTESTED) {
1669 owner = casuword32(&m->m_owner,
1670 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1671
1672 if (owner == UMUTEX_CONTESTED) {
1673 umtxq_lock(&uq->uq_key);
1674 error = umtx_pi_claim(pi, td);
1675 umtxq_unlock(&uq->uq_key);
1676 break;
1677 }
1678
1679 /* The address was invalid. */
1680 if (owner == -1) {
1681 error = EFAULT;
1682 break;
1683 }
1684
1685 /* If this failed the lock has changed, restart. */
1686 continue;
1687 }
1688
1689 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1690 (owner & ~UMUTEX_CONTESTED) == id) {
1691 error = EDEADLK;
1692 break;
1693 }
1694
1695 if (try != 0) {
1696 error = EBUSY;
1697 break;
1698 }
1699
1700 /*
1701 * If we caught a signal, we have retried and now
1702 * exit immediately.
1703 */
1704 if (error != 0)
1705 break;
1706
1707 umtxq_lock(&uq->uq_key);
1708 umtxq_busy(&uq->uq_key);
1709 umtxq_unlock(&uq->uq_key);
1710
1711 /*
1712 * Set the contested bit so that a release in user space
1713 * knows to use the system call for unlock. If this fails
1714 * either some one else has acquired the lock or it has been
1715 * released.
1716 */
1717 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1718
1719 /* The address was invalid. */
1720 if (old == -1) {
1721 umtxq_lock(&uq->uq_key);
1722 umtxq_unbusy(&uq->uq_key);
1723 umtxq_unlock(&uq->uq_key);
1724 error = EFAULT;
1725 break;
1726 }
1727
1728 umtxq_lock(&uq->uq_key);
1729 umtxq_unbusy(&uq->uq_key);
1730 /*
1731 * We set the contested bit, sleep. Otherwise the lock changed
1732 * and we need to retry or we lost a race to the thread
1733 * unlocking the umtx.
1734 */
1735 if (old == owner)
1736 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1737 "umtxpi", timo);
1738 umtxq_unlock(&uq->uq_key);
1739 }
1740
1741 umtxq_lock(&uq->uq_key);
1742 umtx_pi_unref(pi);
1743 umtxq_unlock(&uq->uq_key);
1744
1745 umtx_key_release(&uq->uq_key);
1746 return (error);
1747 }
1748
1749 /*
1750 * Unlock a PI mutex.
1751 */
1752 static int
1753 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1754 {
1755 struct umtx_key key;
1756 struct umtx_q *uq_first, *uq_first2, *uq_me;
1757 struct umtx_pi *pi, *pi2;
1758 uint32_t owner, old, id;
1759 int error;
1760 int count;
1761 int pri;
1762
1763 id = td->td_tid;
1764 /*
1765 * Make sure we own this mtx.
1766 */
1767 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1768 if (owner == -1)
1769 return (EFAULT);
1770
1771 if ((owner & ~UMUTEX_CONTESTED) != id)
1772 return (EPERM);
1773
1774 /* This should be done in userland */
1775 if ((owner & UMUTEX_CONTESTED) == 0) {
1776 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1777 if (old == -1)
1778 return (EFAULT);
1779 if (old == owner)
1780 return (0);
1781 owner = old;
1782 }
1783
1784 /* We should only ever be in here for contested locks */
1785 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1786 &key)) != 0)
1787 return (error);
1788
1789 umtxq_lock(&key);
1790 umtxq_busy(&key);
1791 count = umtxq_count_pi(&key, &uq_first);
1792 if (uq_first != NULL) {
1793 pi = uq_first->uq_pi_blocked;
1794 if (pi->pi_owner != curthread) {
1795 umtxq_unbusy(&key);
1796 umtxq_unlock(&key);
1797 /* userland messed the mutex */
1798 return (EPERM);
1799 }
1800 uq_me = curthread->td_umtxq;
1801 mtx_lock_spin(&umtx_lock);
1802 pi->pi_owner = NULL;
1803 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1804 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1805 pri = PRI_MAX;
1806 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1807 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1808 if (uq_first2 != NULL) {
1809 if (pri > UPRI(uq_first2->uq_thread))
1810 pri = UPRI(uq_first2->uq_thread);
1811 }
1812 }
1813 thread_lock(curthread);
1814 sched_unlend_user_prio(curthread, pri);
1815 thread_unlock(curthread);
1816 mtx_unlock_spin(&umtx_lock);
1817 }
1818 umtxq_unlock(&key);
1819
1820 /*
1821 * When unlocking the umtx, it must be marked as unowned if
1822 * there is zero or one thread only waiting for it.
1823 * Otherwise, it must be marked as contested.
1824 */
1825 old = casuword32(&m->m_owner, owner,
1826 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1827
1828 umtxq_lock(&key);
1829 if (uq_first != NULL)
1830 umtxq_signal_thread(uq_first);
1831 umtxq_unbusy(&key);
1832 umtxq_unlock(&key);
1833 umtx_key_release(&key);
1834 if (old == -1)
1835 return (EFAULT);
1836 if (old != owner)
1837 return (EINVAL);
1838 return (0);
1839 }
1840
1841 /*
1842 * Lock a PP mutex.
1843 */
1844 static int
1845 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1846 int try)
1847 {
1848 struct umtx_q *uq, *uq2;
1849 struct umtx_pi *pi;
1850 uint32_t ceiling;
1851 uint32_t owner, id;
1852 int error, pri, old_inherited_pri, su;
1853
1854 id = td->td_tid;
1855 uq = td->td_umtxq;
1856 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1857 &uq->uq_key)) != 0)
1858 return (error);
1859 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1860 for (;;) {
1861 old_inherited_pri = uq->uq_inherited_pri;
1862 umtxq_lock(&uq->uq_key);
1863 umtxq_busy(&uq->uq_key);
1864 umtxq_unlock(&uq->uq_key);
1865
1866 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1867 if (ceiling > RTP_PRIO_MAX) {
1868 error = EINVAL;
1869 goto out;
1870 }
1871
1872 mtx_lock_spin(&umtx_lock);
1873 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1874 mtx_unlock_spin(&umtx_lock);
1875 error = EINVAL;
1876 goto out;
1877 }
1878 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1879 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1880 thread_lock(td);
1881 if (uq->uq_inherited_pri < UPRI(td))
1882 sched_lend_user_prio(td, uq->uq_inherited_pri);
1883 thread_unlock(td);
1884 }
1885 mtx_unlock_spin(&umtx_lock);
1886
1887 owner = casuword32(&m->m_owner,
1888 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1889
1890 if (owner == UMUTEX_CONTESTED) {
1891 error = 0;
1892 break;
1893 }
1894
1895 /* The address was invalid. */
1896 if (owner == -1) {
1897 error = EFAULT;
1898 break;
1899 }
1900
1901 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1902 (owner & ~UMUTEX_CONTESTED) == id) {
1903 error = EDEADLK;
1904 break;
1905 }
1906
1907 if (try != 0) {
1908 error = EBUSY;
1909 break;
1910 }
1911
1912 /*
1913 * If we caught a signal, we have retried and now
1914 * exit immediately.
1915 */
1916 if (error != 0)
1917 break;
1918
1919 umtxq_lock(&uq->uq_key);
1920 umtxq_insert(uq);
1921 umtxq_unbusy(&uq->uq_key);
1922 error = umtxq_sleep(uq, "umtxpp", timo);
1923 umtxq_remove(uq);
1924 umtxq_unlock(&uq->uq_key);
1925
1926 mtx_lock_spin(&umtx_lock);
1927 uq->uq_inherited_pri = old_inherited_pri;
1928 pri = PRI_MAX;
1929 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1930 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1931 if (uq2 != NULL) {
1932 if (pri > UPRI(uq2->uq_thread))
1933 pri = UPRI(uq2->uq_thread);
1934 }
1935 }
1936 if (pri > uq->uq_inherited_pri)
1937 pri = uq->uq_inherited_pri;
1938 thread_lock(td);
1939 sched_unlend_user_prio(td, pri);
1940 thread_unlock(td);
1941 mtx_unlock_spin(&umtx_lock);
1942 }
1943
1944 if (error != 0) {
1945 mtx_lock_spin(&umtx_lock);
1946 uq->uq_inherited_pri = old_inherited_pri;
1947 pri = PRI_MAX;
1948 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1949 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1950 if (uq2 != NULL) {
1951 if (pri > UPRI(uq2->uq_thread))
1952 pri = UPRI(uq2->uq_thread);
1953 }
1954 }
1955 if (pri > uq->uq_inherited_pri)
1956 pri = uq->uq_inherited_pri;
1957 thread_lock(td);
1958 sched_unlend_user_prio(td, pri);
1959 thread_unlock(td);
1960 mtx_unlock_spin(&umtx_lock);
1961 }
1962
1963 out:
1964 umtxq_lock(&uq->uq_key);
1965 umtxq_unbusy(&uq->uq_key);
1966 umtxq_unlock(&uq->uq_key);
1967 umtx_key_release(&uq->uq_key);
1968 return (error);
1969 }
1970
1971 /*
1972 * Unlock a PP mutex.
1973 */
1974 static int
1975 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1976 {
1977 struct umtx_key key;
1978 struct umtx_q *uq, *uq2;
1979 struct umtx_pi *pi;
1980 uint32_t owner, id;
1981 uint32_t rceiling;
1982 int error, pri, new_inherited_pri, su;
1983
1984 id = td->td_tid;
1985 uq = td->td_umtxq;
1986 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1987
1988 /*
1989 * Make sure we own this mtx.
1990 */
1991 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1992 if (owner == -1)
1993 return (EFAULT);
1994
1995 if ((owner & ~UMUTEX_CONTESTED) != id)
1996 return (EPERM);
1997
1998 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1999 if (error != 0)
2000 return (error);
2001
2002 if (rceiling == -1)
2003 new_inherited_pri = PRI_MAX;
2004 else {
2005 rceiling = RTP_PRIO_MAX - rceiling;
2006 if (rceiling > RTP_PRIO_MAX)
2007 return (EINVAL);
2008 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2009 }
2010
2011 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2012 &key)) != 0)
2013 return (error);
2014 umtxq_lock(&key);
2015 umtxq_busy(&key);
2016 umtxq_unlock(&key);
2017 /*
2018 * For priority protected mutex, always set unlocked state
2019 * to UMUTEX_CONTESTED, so that userland always enters kernel
2020 * to lock the mutex, it is necessary because thread priority
2021 * has to be adjusted for such mutex.
2022 */
2023 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2024 UMUTEX_CONTESTED);
2025
2026 umtxq_lock(&key);
2027 if (error == 0)
2028 umtxq_signal(&key, 1);
2029 umtxq_unbusy(&key);
2030 umtxq_unlock(&key);
2031
2032 if (error == -1)
2033 error = EFAULT;
2034 else {
2035 mtx_lock_spin(&umtx_lock);
2036 if (su != 0)
2037 uq->uq_inherited_pri = new_inherited_pri;
2038 pri = PRI_MAX;
2039 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2040 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2041 if (uq2 != NULL) {
2042 if (pri > UPRI(uq2->uq_thread))
2043 pri = UPRI(uq2->uq_thread);
2044 }
2045 }
2046 if (pri > uq->uq_inherited_pri)
2047 pri = uq->uq_inherited_pri;
2048 thread_lock(td);
2049 sched_unlend_user_prio(td, pri);
2050 thread_unlock(td);
2051 mtx_unlock_spin(&umtx_lock);
2052 }
2053 umtx_key_release(&key);
2054 return (error);
2055 }
2056
2057 static int
2058 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2059 uint32_t *old_ceiling)
2060 {
2061 struct umtx_q *uq;
2062 uint32_t save_ceiling;
2063 uint32_t owner, id;
2064 uint32_t flags;
2065 int error;
2066
2067 flags = fuword32(&m->m_flags);
2068 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2069 return (EINVAL);
2070 if (ceiling > RTP_PRIO_MAX)
2071 return (EINVAL);
2072 id = td->td_tid;
2073 uq = td->td_umtxq;
2074 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075 &uq->uq_key)) != 0)
2076 return (error);
2077 for (;;) {
2078 umtxq_lock(&uq->uq_key);
2079 umtxq_busy(&uq->uq_key);
2080 umtxq_unlock(&uq->uq_key);
2081
2082 save_ceiling = fuword32(&m->m_ceilings[0]);
2083
2084 owner = casuword32(&m->m_owner,
2085 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2086
2087 if (owner == UMUTEX_CONTESTED) {
2088 suword32(&m->m_ceilings[0], ceiling);
2089 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2090 UMUTEX_CONTESTED);
2091 error = 0;
2092 break;
2093 }
2094
2095 /* The address was invalid. */
2096 if (owner == -1) {
2097 error = EFAULT;
2098 break;
2099 }
2100
2101 if ((owner & ~UMUTEX_CONTESTED) == id) {
2102 suword32(&m->m_ceilings[0], ceiling);
2103 error = 0;
2104 break;
2105 }
2106
2107 /*
2108 * If we caught a signal, we have retried and now
2109 * exit immediately.
2110 */
2111 if (error != 0)
2112 break;
2113
2114 /*
2115 * We set the contested bit, sleep. Otherwise the lock changed
2116 * and we need to retry or we lost a race to the thread
2117 * unlocking the umtx.
2118 */
2119 umtxq_lock(&uq->uq_key);
2120 umtxq_insert(uq);
2121 umtxq_unbusy(&uq->uq_key);
2122 error = umtxq_sleep(uq, "umtxpp", 0);
2123 umtxq_remove(uq);
2124 umtxq_unlock(&uq->uq_key);
2125 }
2126 umtxq_lock(&uq->uq_key);
2127 if (error == 0)
2128 umtxq_signal(&uq->uq_key, INT_MAX);
2129 umtxq_unbusy(&uq->uq_key);
2130 umtxq_unlock(&uq->uq_key);
2131 umtx_key_release(&uq->uq_key);
2132 if (error == 0 && old_ceiling != NULL)
2133 suword32(old_ceiling, save_ceiling);
2134 return (error);
2135 }
2136
2137 static int
2138 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2139 int try)
2140 {
2141 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2142 case 0:
2143 return (_do_lock_normal(td, m, flags, timo, try));
2144 case UMUTEX_PRIO_INHERIT:
2145 return (_do_lock_pi(td, m, flags, timo, try));
2146 case UMUTEX_PRIO_PROTECT:
2147 return (_do_lock_pp(td, m, flags, timo, try));
2148 }
2149 return (EINVAL);
2150 }
2151
2152 /*
2153 * Lock a userland POSIX mutex.
2154 */
2155 static int
2156 do_lock_umutex(struct thread *td, struct umutex *m,
2157 struct timespec *timeout, int try)
2158 {
2159 struct timespec ts, ts2, ts3;
2160 struct timeval tv;
2161 uint32_t flags;
2162 int error;
2163
2164 flags = fuword32(&m->m_flags);
2165 if (flags == -1)
2166 return (EFAULT);
2167
2168 if (timeout == NULL) {
2169 error = _do_lock_umutex(td, m, flags, 0, try);
2170 /* Mutex locking is restarted if it is interrupted. */
2171 if (error == EINTR)
2172 error = ERESTART;
2173 } else {
2174 getnanouptime(&ts);
2175 timespecadd(&ts, timeout);
2176 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2177 for (;;) {
2178 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2179 if (error != ETIMEDOUT)
2180 break;
2181 getnanouptime(&ts2);
2182 if (timespeccmp(&ts2, &ts, >=)) {
2183 error = ETIMEDOUT;
2184 break;
2185 }
2186 ts3 = ts;
2187 timespecsub(&ts3, &ts2);
2188 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2189 }
2190 /* Timed-locking is not restarted. */
2191 if (error == ERESTART)
2192 error = EINTR;
2193 }
2194 return (error);
2195 }
2196
2197 /*
2198 * Unlock a userland POSIX mutex.
2199 */
2200 static int
2201 do_unlock_umutex(struct thread *td, struct umutex *m)
2202 {
2203 uint32_t flags;
2204
2205 flags = fuword32(&m->m_flags);
2206 if (flags == -1)
2207 return (EFAULT);
2208
2209 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2210 case 0:
2211 return (do_unlock_normal(td, m, flags));
2212 case UMUTEX_PRIO_INHERIT:
2213 return (do_unlock_pi(td, m, flags));
2214 case UMUTEX_PRIO_PROTECT:
2215 return (do_unlock_pp(td, m, flags));
2216 }
2217
2218 return (EINVAL);
2219 }
2220
2221 static int
2222 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2223 struct timespec *timeout, u_long wflags)
2224 {
2225 struct umtx_q *uq;
2226 struct timeval tv;
2227 struct timespec cts, ets, tts;
2228 uint32_t flags;
2229 int error;
2230
2231 uq = td->td_umtxq;
2232 flags = fuword32(&cv->c_flags);
2233 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2234 if (error != 0)
2235 return (error);
2236 umtxq_lock(&uq->uq_key);
2237 umtxq_busy(&uq->uq_key);
2238 umtxq_insert(uq);
2239 umtxq_unlock(&uq->uq_key);
2240
2241 /*
2242 * The magic thing is we should set c_has_waiters to 1 before
2243 * releasing user mutex.
2244 */
2245 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2246
2247 umtxq_lock(&uq->uq_key);
2248 umtxq_unbusy(&uq->uq_key);
2249 umtxq_unlock(&uq->uq_key);
2250
2251 error = do_unlock_umutex(td, m);
2252
2253 umtxq_lock(&uq->uq_key);
2254 if (error == 0) {
2255 if ((wflags & UMTX_CHECK_UNPARKING) &&
2256 (td->td_pflags & TDP_WAKEUP)) {
2257 td->td_pflags &= ~TDP_WAKEUP;
2258 error = EINTR;
2259 } else if (timeout == NULL) {
2260 error = umtxq_sleep(uq, "ucond", 0);
2261 } else {
2262 getnanouptime(&ets);
2263 timespecadd(&ets, timeout);
2264 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2265 for (;;) {
2266 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2267 if (error != ETIMEDOUT)
2268 break;
2269 getnanouptime(&cts);
2270 if (timespeccmp(&cts, &ets, >=)) {
2271 error = ETIMEDOUT;
2272 break;
2273 }
2274 tts = ets;
2275 timespecsub(&tts, &cts);
2276 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2277 }
2278 }
2279 }
2280
2281 if (error != 0) {
2282 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2283 /*
2284 * If we concurrently got do_cv_signal()d
2285 * and we got an error or UNIX signals or a timeout,
2286 * then, perform another umtxq_signal to avoid
2287 * consuming the wakeup. This may cause supurious
2288 * wakeup for another thread which was just queued,
2289 * but SUSV3 explicitly allows supurious wakeup to
2290 * occur, and indeed a kernel based implementation
2291 * can not avoid it.
2292 */
2293 if (!umtxq_signal(&uq->uq_key, 1))
2294 error = 0;
2295 }
2296 if (error == ERESTART)
2297 error = EINTR;
2298 }
2299 umtxq_remove(uq);
2300 umtxq_unlock(&uq->uq_key);
2301 umtx_key_release(&uq->uq_key);
2302 return (error);
2303 }
2304
2305 /*
2306 * Signal a userland condition variable.
2307 */
2308 static int
2309 do_cv_signal(struct thread *td, struct ucond *cv)
2310 {
2311 struct umtx_key key;
2312 int error, cnt, nwake;
2313 uint32_t flags;
2314
2315 flags = fuword32(&cv->c_flags);
2316 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2317 return (error);
2318 umtxq_lock(&key);
2319 umtxq_busy(&key);
2320 cnt = umtxq_count(&key);
2321 nwake = umtxq_signal(&key, 1);
2322 if (cnt <= nwake) {
2323 umtxq_unlock(&key);
2324 error = suword32(
2325 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2326 umtxq_lock(&key);
2327 }
2328 umtxq_unbusy(&key);
2329 umtxq_unlock(&key);
2330 umtx_key_release(&key);
2331 return (error);
2332 }
2333
2334 static int
2335 do_cv_broadcast(struct thread *td, struct ucond *cv)
2336 {
2337 struct umtx_key key;
2338 int error;
2339 uint32_t flags;
2340
2341 flags = fuword32(&cv->c_flags);
2342 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2343 return (error);
2344
2345 umtxq_lock(&key);
2346 umtxq_busy(&key);
2347 umtxq_signal(&key, INT_MAX);
2348 umtxq_unlock(&key);
2349
2350 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2351
2352 umtxq_lock(&key);
2353 umtxq_unbusy(&key);
2354 umtxq_unlock(&key);
2355
2356 umtx_key_release(&key);
2357 return (error);
2358 }
2359
2360 static int
2361 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2362 {
2363 struct umtx_q *uq;
2364 uint32_t flags, wrflags;
2365 int32_t state, oldstate;
2366 int32_t blocked_readers;
2367 int error;
2368
2369 uq = td->td_umtxq;
2370 flags = fuword32(&rwlock->rw_flags);
2371 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2372 if (error != 0)
2373 return (error);
2374
2375 wrflags = URWLOCK_WRITE_OWNER;
2376 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2377 wrflags |= URWLOCK_WRITE_WAITERS;
2378
2379 for (;;) {
2380 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2381 /* try to lock it */
2382 while (!(state & wrflags)) {
2383 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2384 umtx_key_release(&uq->uq_key);
2385 return (EAGAIN);
2386 }
2387 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2388 if (oldstate == state) {
2389 umtx_key_release(&uq->uq_key);
2390 return (0);
2391 }
2392 state = oldstate;
2393 }
2394
2395 if (error)
2396 break;
2397
2398 /* grab monitor lock */
2399 umtxq_lock(&uq->uq_key);
2400 umtxq_busy(&uq->uq_key);
2401 umtxq_unlock(&uq->uq_key);
2402
2403 /* set read contention bit */
2404 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2405 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2406 if (oldstate == state)
2407 goto sleep;
2408 state = oldstate;
2409 }
2410
2411 /* state is changed while setting flags, restart */
2412 if (!(state & wrflags)) {
2413 umtxq_lock(&uq->uq_key);
2414 umtxq_unbusy(&uq->uq_key);
2415 umtxq_unlock(&uq->uq_key);
2416 continue;
2417 }
2418
2419 sleep:
2420 /* contention bit is set, before sleeping, increase read waiter count */
2421 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2422 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2423
2424 while (state & wrflags) {
2425 umtxq_lock(&uq->uq_key);
2426 umtxq_insert(uq);
2427 umtxq_unbusy(&uq->uq_key);
2428
2429 error = umtxq_sleep(uq, "urdlck", timo);
2430
2431 umtxq_busy(&uq->uq_key);
2432 umtxq_remove(uq);
2433 umtxq_unlock(&uq->uq_key);
2434 if (error)
2435 break;
2436 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2437 }
2438
2439 /* decrease read waiter count, and may clear read contention bit */
2440 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2441 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2442 if (blocked_readers == 1) {
2443 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2444 for (;;) {
2445 oldstate = casuword32(&rwlock->rw_state, state,
2446 state & ~URWLOCK_READ_WAITERS);
2447 if (oldstate == state)
2448 break;
2449 state = oldstate;
2450 }
2451 }
2452
2453 umtxq_lock(&uq->uq_key);
2454 umtxq_unbusy(&uq->uq_key);
2455 umtxq_unlock(&uq->uq_key);
2456 }
2457 umtx_key_release(&uq->uq_key);
2458 return (error);
2459 }
2460
2461 static int
2462 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2463 {
2464 struct timespec ts, ts2, ts3;
2465 struct timeval tv;
2466 int error;
2467
2468 getnanouptime(&ts);
2469 timespecadd(&ts, timeout);
2470 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2471 for (;;) {
2472 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2473 if (error != ETIMEDOUT)
2474 break;
2475 getnanouptime(&ts2);
2476 if (timespeccmp(&ts2, &ts, >=)) {
2477 error = ETIMEDOUT;
2478 break;
2479 }
2480 ts3 = ts;
2481 timespecsub(&ts3, &ts2);
2482 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2483 }
2484 if (error == ERESTART)
2485 error = EINTR;
2486 return (error);
2487 }
2488
2489 static int
2490 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2491 {
2492 struct umtx_q *uq;
2493 uint32_t flags;
2494 int32_t state, oldstate;
2495 int32_t blocked_writers;
2496 int error;
2497
2498 uq = td->td_umtxq;
2499 flags = fuword32(&rwlock->rw_flags);
2500 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2501 if (error != 0)
2502 return (error);
2503
2504 for (;;) {
2505 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2506 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2507 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2508 if (oldstate == state) {
2509 umtx_key_release(&uq->uq_key);
2510 return (0);
2511 }
2512 state = oldstate;
2513 }
2514
2515 if (error)
2516 break;
2517
2518 /* grab monitor lock */
2519 umtxq_lock(&uq->uq_key);
2520 umtxq_busy(&uq->uq_key);
2521 umtxq_unlock(&uq->uq_key);
2522
2523 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2524 (state & URWLOCK_WRITE_WAITERS) == 0) {
2525 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2526 if (oldstate == state)
2527 goto sleep;
2528 state = oldstate;
2529 }
2530
2531 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2532 umtxq_lock(&uq->uq_key);
2533 umtxq_unbusy(&uq->uq_key);
2534 umtxq_unlock(&uq->uq_key);
2535 continue;
2536 }
2537 sleep:
2538 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2539 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2540
2541 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2542 umtxq_lock(&uq->uq_key);
2543 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2544 umtxq_unbusy(&uq->uq_key);
2545
2546 error = umtxq_sleep(uq, "uwrlck", timo);
2547
2548 umtxq_busy(&uq->uq_key);
2549 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2550 umtxq_unlock(&uq->uq_key);
2551 if (error)
2552 break;
2553 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2554 }
2555
2556 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2557 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2558 if (blocked_writers == 1) {
2559 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2560 for (;;) {
2561 oldstate = casuword32(&rwlock->rw_state, state,
2562 state & ~URWLOCK_WRITE_WAITERS);
2563 if (oldstate == state)
2564 break;
2565 state = oldstate;
2566 }
2567 }
2568
2569 umtxq_lock(&uq->uq_key);
2570 umtxq_unbusy(&uq->uq_key);
2571 umtxq_unlock(&uq->uq_key);
2572 }
2573
2574 umtx_key_release(&uq->uq_key);
2575 return (error);
2576 }
2577
2578 static int
2579 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2580 {
2581 struct timespec ts, ts2, ts3;
2582 struct timeval tv;
2583 int error;
2584
2585 getnanouptime(&ts);
2586 timespecadd(&ts, timeout);
2587 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2588 for (;;) {
2589 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2590 if (error != ETIMEDOUT)
2591 break;
2592 getnanouptime(&ts2);
2593 if (timespeccmp(&ts2, &ts, >=)) {
2594 error = ETIMEDOUT;
2595 break;
2596 }
2597 ts3 = ts;
2598 timespecsub(&ts3, &ts2);
2599 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2600 }
2601 if (error == ERESTART)
2602 error = EINTR;
2603 return (error);
2604 }
2605
2606 static int
2607 do_rwlock_unlock(struct thread *td, struct urwlock *rwlock)
2608 {
2609 struct umtx_q *uq;
2610 uint32_t flags;
2611 int32_t state, oldstate;
2612 int error, q, count;
2613
2614 uq = td->td_umtxq;
2615 flags = fuword32(&rwlock->rw_flags);
2616 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2617 if (error != 0)
2618 return (error);
2619
2620 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2621 if (state & URWLOCK_WRITE_OWNER) {
2622 for (;;) {
2623 oldstate = casuword32(&rwlock->rw_state, state,
2624 state & ~URWLOCK_WRITE_OWNER);
2625 if (oldstate != state) {
2626 state = oldstate;
2627 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2628 error = EPERM;
2629 goto out;
2630 }
2631 } else
2632 break;
2633 }
2634 } else if (URWLOCK_READER_COUNT(state) != 0) {
2635 for (;;) {
2636 oldstate = casuword32(&rwlock->rw_state, state,
2637 state - 1);
2638 if (oldstate != state) {
2639 state = oldstate;
2640 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2641 error = EPERM;
2642 goto out;
2643 }
2644 }
2645 else
2646 break;
2647 }
2648 } else {
2649 error = EPERM;
2650 goto out;
2651 }
2652
2653 count = 0;
2654
2655 if (!(flags & URWLOCK_PREFER_READER)) {
2656 if (state & URWLOCK_WRITE_WAITERS) {
2657 count = 1;
2658 q = UMTX_EXCLUSIVE_QUEUE;
2659 } else if (state & URWLOCK_READ_WAITERS) {
2660 count = INT_MAX;
2661 q = UMTX_SHARED_QUEUE;
2662 }
2663 } else {
2664 if (state & URWLOCK_READ_WAITERS) {
2665 count = INT_MAX;
2666 q = UMTX_SHARED_QUEUE;
2667 } else if (state & URWLOCK_WRITE_WAITERS) {
2668 count = 1;
2669 q = UMTX_EXCLUSIVE_QUEUE;
2670 }
2671 }
2672
2673 if (count) {
2674 umtxq_lock(&uq->uq_key);
2675 umtxq_busy(&uq->uq_key);
2676 umtxq_signal_queue(&uq->uq_key, count, q);
2677 umtxq_unbusy(&uq->uq_key);
2678 umtxq_unlock(&uq->uq_key);
2679 }
2680 out:
2681 umtx_key_release(&uq->uq_key);
2682 return (error);
2683 }
2684
2685 int
2686 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2687 /* struct umtx *umtx */
2688 {
2689 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2690 }
2691
2692 int
2693 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2694 /* struct umtx *umtx */
2695 {
2696 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2697 }
2698
2699 static int
2700 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2701 {
2702 struct timespec *ts, timeout;
2703 int error;
2704
2705 /* Allow a null timespec (wait forever). */
2706 if (uap->uaddr2 == NULL)
2707 ts = NULL;
2708 else {
2709 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2710 if (error != 0)
2711 return (error);
2712 if (timeout.tv_nsec >= 1000000000 ||
2713 timeout.tv_nsec < 0) {
2714 return (EINVAL);
2715 }
2716 ts = &timeout;
2717 }
2718 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2719 }
2720
2721 static int
2722 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2723 {
2724 return (do_unlock_umtx(td, uap->obj, uap->val));
2725 }
2726
2727 static int
2728 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2729 {
2730 struct timespec *ts, timeout;
2731 int error;
2732
2733 if (uap->uaddr2 == NULL)
2734 ts = NULL;
2735 else {
2736 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2737 if (error != 0)
2738 return (error);
2739 if (timeout.tv_nsec >= 1000000000 ||
2740 timeout.tv_nsec < 0)
2741 return (EINVAL);
2742 ts = &timeout;
2743 }
2744 return do_wait(td, uap->obj, uap->val, ts, 0);
2745 }
2746
2747 static int
2748 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2749 {
2750 struct timespec *ts, timeout;
2751 int error;
2752
2753 if (uap->uaddr2 == NULL)
2754 ts = NULL;
2755 else {
2756 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2757 if (error != 0)
2758 return (error);
2759 if (timeout.tv_nsec >= 1000000000 ||
2760 timeout.tv_nsec < 0)
2761 return (EINVAL);
2762 ts = &timeout;
2763 }
2764 return do_wait(td, uap->obj, uap->val, ts, 1);
2765 }
2766
2767 static int
2768 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2769 {
2770 return (kern_umtx_wake(td, uap->obj, uap->val));
2771 }
2772
2773 static int
2774 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2775 {
2776 struct timespec *ts, timeout;
2777 int error;
2778
2779 /* Allow a null timespec (wait forever). */
2780 if (uap->uaddr2 == NULL)
2781 ts = NULL;
2782 else {
2783 error = copyin(uap->uaddr2, &timeout,
2784 sizeof(timeout));
2785 if (error != 0)
2786 return (error);
2787 if (timeout.tv_nsec >= 1000000000 ||
2788 timeout.tv_nsec < 0) {
2789 return (EINVAL);
2790 }
2791 ts = &timeout;
2792 }
2793 return do_lock_umutex(td, uap->obj, ts, 0);
2794 }
2795
2796 static int
2797 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2798 {
2799 return do_lock_umutex(td, uap->obj, NULL, 1);
2800 }
2801
2802 static int
2803 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2804 {
2805 return do_unlock_umutex(td, uap->obj);
2806 }
2807
2808 static int
2809 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2810 {
2811 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2812 }
2813
2814 static int
2815 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2816 {
2817 struct timespec *ts, timeout;
2818 int error;
2819
2820 /* Allow a null timespec (wait forever). */
2821 if (uap->uaddr2 == NULL)
2822 ts = NULL;
2823 else {
2824 error = copyin(uap->uaddr2, &timeout,
2825 sizeof(timeout));
2826 if (error != 0)
2827 return (error);
2828 if (timeout.tv_nsec >= 1000000000 ||
2829 timeout.tv_nsec < 0) {
2830 return (EINVAL);
2831 }
2832 ts = &timeout;
2833 }
2834 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2835 }
2836
2837 static int
2838 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2839 {
2840 return do_cv_signal(td, uap->obj);
2841 }
2842
2843 static int
2844 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2845 {
2846 return do_cv_broadcast(td, uap->obj);
2847 }
2848
2849 static int
2850 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2851 {
2852 struct timespec timeout;
2853 int error;
2854
2855 /* Allow a null timespec (wait forever). */
2856 if (uap->uaddr2 == NULL) {
2857 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2858 } else {
2859 error = copyin(uap->uaddr2, &timeout,
2860 sizeof(timeout));
2861 if (error != 0)
2862 return (error);
2863 if (timeout.tv_nsec >= 1000000000 ||
2864 timeout.tv_nsec < 0) {
2865 return (EINVAL);
2866 }
2867 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
2868 }
2869 return (error);
2870 }
2871
2872 static int
2873 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
2874 {
2875 struct timespec timeout;
2876 int error;
2877
2878 /* Allow a null timespec (wait forever). */
2879 if (uap->uaddr2 == NULL) {
2880 error = do_rw_wrlock(td, uap->obj, 0);
2881 } else {
2882 error = copyin(uap->uaddr2, &timeout,
2883 sizeof(timeout));
2884 if (error != 0)
2885 return (error);
2886 if (timeout.tv_nsec >= 1000000000 ||
2887 timeout.tv_nsec < 0) {
2888 return (EINVAL);
2889 }
2890
2891 error = do_rw_wrlock2(td, uap->obj, &timeout);
2892 }
2893 return (error);
2894 }
2895
2896 static int
2897 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
2898 {
2899 return do_rwlock_unlock(td, uap->obj);
2900 }
2901
2902 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2903
2904 static _umtx_op_func op_table[] = {
2905 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
2906 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
2907 __umtx_op_wait, /* UMTX_OP_WAIT */
2908 __umtx_op_wake, /* UMTX_OP_WAKE */
2909 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
2910 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
2911 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
2912 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
2913 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
2914 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
2915 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
2916 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
2917 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
2918 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
2919 __umtx_op_rw_unlock /* UMTX_OP_RW_UNLOCK */
2920 };
2921
2922 int
2923 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
2924 {
2925 if ((unsigned)uap->op < UMTX_OP_MAX)
2926 return (*op_table[uap->op])(td, uap);
2927 return (EINVAL);
2928 }
2929
2930 #ifdef COMPAT_IA32
2931 int
2932 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2933 /* struct umtx *umtx */
2934 {
2935 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2936 }
2937
2938 int
2939 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2940 /* struct umtx *umtx */
2941 {
2942 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2943 }
2944
2945 struct timespec32 {
2946 u_int32_t tv_sec;
2947 u_int32_t tv_nsec;
2948 };
2949
2950 static inline int
2951 copyin_timeout32(void *addr, struct timespec *tsp)
2952 {
2953 struct timespec32 ts32;
2954 int error;
2955
2956 error = copyin(addr, &ts32, sizeof(struct timespec32));
2957 if (error == 0) {
2958 tsp->tv_sec = ts32.tv_sec;
2959 tsp->tv_nsec = ts32.tv_nsec;
2960 }
2961 return (error);
2962 }
2963
2964 static int
2965 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2966 {
2967 struct timespec *ts, timeout;
2968 int error;
2969
2970 /* Allow a null timespec (wait forever). */
2971 if (uap->uaddr2 == NULL)
2972 ts = NULL;
2973 else {
2974 error = copyin_timeout32(uap->uaddr2, &timeout);
2975 if (error != 0)
2976 return (error);
2977 if (timeout.tv_nsec >= 1000000000 ||
2978 timeout.tv_nsec < 0) {
2979 return (EINVAL);
2980 }
2981 ts = &timeout;
2982 }
2983 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2984 }
2985
2986 static int
2987 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2988 {
2989 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2990 }
2991
2992 static int
2993 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2994 {
2995 struct timespec *ts, timeout;
2996 int error;
2997
2998 if (uap->uaddr2 == NULL)
2999 ts = NULL;
3000 else {
3001 error = copyin_timeout32(uap->uaddr2, &timeout);
3002 if (error != 0)
3003 return (error);
3004 if (timeout.tv_nsec >= 1000000000 ||
3005 timeout.tv_nsec < 0)
3006 return (EINVAL);
3007 ts = &timeout;
3008 }
3009 return do_wait(td, uap->obj, uap->val, ts, 1);
3010 }
3011
3012 static int
3013 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3014 {
3015 struct timespec *ts, timeout;
3016 int error;
3017
3018 /* Allow a null timespec (wait forever). */
3019 if (uap->uaddr2 == NULL)
3020 ts = NULL;
3021 else {
3022 error = copyin_timeout32(uap->uaddr2, &timeout);
3023 if (error != 0)
3024 return (error);
3025 if (timeout.tv_nsec >= 1000000000 ||
3026 timeout.tv_nsec < 0)
3027 return (EINVAL);
3028 ts = &timeout;
3029 }
3030 return do_lock_umutex(td, uap->obj, ts, 0);
3031 }
3032
3033 static int
3034 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3035 {
3036 struct timespec *ts, timeout;
3037 int error;
3038
3039 /* Allow a null timespec (wait forever). */
3040 if (uap->uaddr2 == NULL)
3041 ts = NULL;
3042 else {
3043 error = copyin_timeout32(uap->uaddr2, &timeout);
3044 if (error != 0)
3045 return (error);
3046 if (timeout.tv_nsec >= 1000000000 ||
3047 timeout.tv_nsec < 0)
3048 return (EINVAL);
3049 ts = &timeout;
3050 }
3051 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3052 }
3053
3054 static int
3055 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3056 {
3057 struct timespec timeout;
3058 int error;
3059
3060 /* Allow a null timespec (wait forever). */
3061 if (uap->uaddr2 == NULL) {
3062 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3063 } else {
3064 error = copyin(uap->uaddr2, &timeout,
3065 sizeof(timeout));
3066 if (error != 0)
3067 return (error);
3068 if (timeout.tv_nsec >= 1000000000 ||
3069 timeout.tv_nsec < 0) {
3070 return (EINVAL);
3071 }
3072 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3073 }
3074 return (error);
3075 }
3076
3077 static int
3078 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3079 {
3080 struct timespec timeout;
3081 int error;
3082
3083 /* Allow a null timespec (wait forever). */
3084 if (uap->uaddr2 == NULL) {
3085 error = do_rw_wrlock(td, uap->obj, 0);
3086 } else {
3087 error = copyin_timeout32(uap->uaddr2, &timeout);
3088 if (error != 0)
3089 return (error);
3090 if (timeout.tv_nsec >= 1000000000 ||
3091 timeout.tv_nsec < 0) {
3092 return (EINVAL);
3093 }
3094
3095 error = do_rw_wrlock2(td, uap->obj, &timeout);
3096 }
3097 return (error);
3098 }
3099
3100 static _umtx_op_func op_table_compat32[] = {
3101 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3102 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3103 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3104 __umtx_op_wake, /* UMTX_OP_WAKE */
3105 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3106 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3107 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3108 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3109 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3110 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3111 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3112 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3113 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3114 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3115 __umtx_op_rw_unlock /* UMTX_OP_RW_UNLOCK */
3116 };
3117
3118 int
3119 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3120 {
3121 if ((unsigned)uap->op < UMTX_OP_MAX)
3122 return (*op_table_compat32[uap->op])(td,
3123 (struct _umtx_op_args *)uap);
3124 return (EINVAL);
3125 }
3126 #endif
3127
3128 void
3129 umtx_thread_init(struct thread *td)
3130 {
3131 td->td_umtxq = umtxq_alloc();
3132 td->td_umtxq->uq_thread = td;
3133 }
3134
3135 void
3136 umtx_thread_fini(struct thread *td)
3137 {
3138 umtxq_free(td->td_umtxq);
3139 }
3140
3141 /*
3142 * It will be called when new thread is created, e.g fork().
3143 */
3144 void
3145 umtx_thread_alloc(struct thread *td)
3146 {
3147 struct umtx_q *uq;
3148
3149 uq = td->td_umtxq;
3150 uq->uq_inherited_pri = PRI_MAX;
3151
3152 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3153 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3154 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3155 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3156 }
3157
3158 /*
3159 * exec() hook.
3160 */
3161 static void
3162 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3163 struct image_params *imgp __unused)
3164 {
3165 umtx_thread_cleanup(curthread);
3166 }
3167
3168 /*
3169 * thread_exit() hook.
3170 */
3171 void
3172 umtx_thread_exit(struct thread *td)
3173 {
3174 umtx_thread_cleanup(td);
3175 }
3176
3177 /*
3178 * clean up umtx data.
3179 */
3180 static void
3181 umtx_thread_cleanup(struct thread *td)
3182 {
3183 struct umtx_q *uq;
3184 struct umtx_pi *pi;
3185
3186 if ((uq = td->td_umtxq) == NULL)
3187 return;
3188
3189 mtx_lock_spin(&umtx_lock);
3190 uq->uq_inherited_pri = PRI_MAX;
3191 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3192 pi->pi_owner = NULL;
3193 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3194 }
3195 thread_lock(td);
3196 td->td_flags &= ~TDF_UBORROWING;
3197 thread_unlock(td);
3198 mtx_unlock_spin(&umtx_lock);
3199 }
Cache object: 6d7b4c4fa8230aa0d4d1ca7f6defb93b
|