FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60
61 #define TYPE_SIMPLE_WAIT 0
62 #define TYPE_CV 1
63 #define TYPE_SIMPLE_LOCK 2
64 #define TYPE_NORMAL_UMUTEX 3
65 #define TYPE_PI_UMUTEX 4
66 #define TYPE_PP_UMUTEX 5
67 #define TYPE_RWLOCK 6
68
69 #define _UMUTEX_TRY 1
70 #define _UMUTEX_WAIT 2
71
72 /* Key to represent a unique userland synchronous object */
73 struct umtx_key {
74 int hash;
75 int type;
76 int shared;
77 union {
78 struct {
79 vm_object_t object;
80 uintptr_t offset;
81 } shared;
82 struct {
83 struct vmspace *vs;
84 uintptr_t addr;
85 } private;
86 struct {
87 void *a;
88 uintptr_t b;
89 } both;
90 } info;
91 };
92
93 /* Priority inheritance mutex info. */
94 struct umtx_pi {
95 /* Owner thread */
96 struct thread *pi_owner;
97
98 /* Reference count */
99 int pi_refcount;
100
101 /* List entry to link umtx holding by thread */
102 TAILQ_ENTRY(umtx_pi) pi_link;
103
104 /* List entry in hash */
105 TAILQ_ENTRY(umtx_pi) pi_hashlink;
106
107 /* List for waiters */
108 TAILQ_HEAD(,umtx_q) pi_blocked;
109
110 /* Identify a userland lock object */
111 struct umtx_key pi_key;
112 };
113
114 /* A userland synchronous object user. */
115 struct umtx_q {
116 /* Linked list for the hash. */
117 TAILQ_ENTRY(umtx_q) uq_link;
118
119 /* Umtx key. */
120 struct umtx_key uq_key;
121
122 /* Umtx flags. */
123 int uq_flags;
124 #define UQF_UMTXQ 0x0001
125
126 /* The thread waits on. */
127 struct thread *uq_thread;
128
129 /*
130 * Blocked on PI mutex. read can use chain lock
131 * or umtx_lock, write must have both chain lock and
132 * umtx_lock being hold.
133 */
134 struct umtx_pi *uq_pi_blocked;
135
136 /* On blocked list */
137 TAILQ_ENTRY(umtx_q) uq_lockq;
138
139 /* Thread contending with us */
140 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
141
142 /* Inherited priority from PP mutex */
143 u_char uq_inherited_pri;
144 };
145
146 TAILQ_HEAD(umtxq_head, umtx_q);
147
148 /* Userland lock object's wait-queue chain */
149 struct umtxq_chain {
150 /* Lock for this chain. */
151 struct mtx uc_lock;
152
153 /* List of sleep queues. */
154 struct umtxq_head uc_queue[2];
155 #define UMTX_SHARED_QUEUE 0
156 #define UMTX_EXCLUSIVE_QUEUE 1
157
158 /* Busy flag */
159 char uc_busy;
160
161 /* Chain lock waiters */
162 int uc_waiters;
163
164 /* All PI in the list */
165 TAILQ_HEAD(,umtx_pi) uc_pi_list;
166 };
167
168 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
169 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
170
171 /*
172 * Don't propagate time-sharing priority, there is a security reason,
173 * a user can simply introduce PI-mutex, let thread A lock the mutex,
174 * and let another thread B block on the mutex, because B is
175 * sleeping, its priority will be boosted, this causes A's priority to
176 * be boosted via priority propagating too and will never be lowered even
177 * if it is using 100%CPU, this is unfair to other processes.
178 */
179
180 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
181 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
182 PRI_MAX_TIMESHARE : (td)->td_user_pri)
183
184 #define GOLDEN_RATIO_PRIME 2654404609U
185 #define UMTX_CHAINS 128
186 #define UMTX_SHIFTS (__WORD_BIT - 7)
187
188 #define THREAD_SHARE 0
189 #define PROCESS_SHARE 1
190 #define AUTO_SHARE 2
191
192 #define GET_SHARE(flags) \
193 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
194
195 #define BUSY_SPINS 200
196
197 static uma_zone_t umtx_pi_zone;
198 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
199 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
200 static int umtx_pi_allocated;
201
202 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
203 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
204 &umtx_pi_allocated, 0, "Allocated umtx_pi");
205
206 static void umtxq_sysinit(void *);
207 static void umtxq_hash(struct umtx_key *key);
208 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
209 static void umtxq_lock(struct umtx_key *key);
210 static void umtxq_unlock(struct umtx_key *key);
211 static void umtxq_busy(struct umtx_key *key);
212 static void umtxq_unbusy(struct umtx_key *key);
213 static void umtxq_insert_queue(struct umtx_q *uq, int q);
214 static void umtxq_remove_queue(struct umtx_q *uq, int q);
215 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
216 static int umtxq_count(struct umtx_key *key);
217 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
218 static int umtx_key_get(void *addr, int type, int share,
219 struct umtx_key *key);
220 static void umtx_key_release(struct umtx_key *key);
221 static struct umtx_pi *umtx_pi_alloc(int);
222 static void umtx_pi_free(struct umtx_pi *pi);
223 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
224 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
225 static void umtx_thread_cleanup(struct thread *td);
226 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
227 struct image_params *imgp __unused);
228 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229
230 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
231 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
232 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
233
234 static struct mtx umtx_lock;
235
236 static void
237 umtxq_sysinit(void *arg __unused)
238 {
239 int i, j;
240
241 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
242 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
243 for (i = 0; i < 2; ++i) {
244 for (j = 0; j < UMTX_CHAINS; ++j) {
245 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
246 MTX_DEF | MTX_DUPOK);
247 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
248 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
249 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
250 umtxq_chains[i][j].uc_busy = 0;
251 umtxq_chains[i][j].uc_waiters = 0;
252 }
253 }
254 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
255 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
256 EVENTHANDLER_PRI_ANY);
257 }
258
259 struct umtx_q *
260 umtxq_alloc(void)
261 {
262 struct umtx_q *uq;
263
264 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
265 TAILQ_INIT(&uq->uq_pi_contested);
266 uq->uq_inherited_pri = PRI_MAX;
267 return (uq);
268 }
269
270 void
271 umtxq_free(struct umtx_q *uq)
272 {
273 free(uq, M_UMTX);
274 }
275
276 static inline void
277 umtxq_hash(struct umtx_key *key)
278 {
279 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
280 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
281 }
282
283 static inline int
284 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
285 {
286 return (k1->type == k2->type &&
287 k1->info.both.a == k2->info.both.a &&
288 k1->info.both.b == k2->info.both.b);
289 }
290
291 static inline struct umtxq_chain *
292 umtxq_getchain(struct umtx_key *key)
293 {
294 if (key->type <= TYPE_CV)
295 return (&umtxq_chains[1][key->hash]);
296 return (&umtxq_chains[0][key->hash]);
297 }
298
299 /*
300 * Lock a chain.
301 */
302 static inline void
303 umtxq_lock(struct umtx_key *key)
304 {
305 struct umtxq_chain *uc;
306
307 uc = umtxq_getchain(key);
308 mtx_lock(&uc->uc_lock);
309 }
310
311 /*
312 * Unlock a chain.
313 */
314 static inline void
315 umtxq_unlock(struct umtx_key *key)
316 {
317 struct umtxq_chain *uc;
318
319 uc = umtxq_getchain(key);
320 mtx_unlock(&uc->uc_lock);
321 }
322
323 /*
324 * Set chain to busy state when following operation
325 * may be blocked (kernel mutex can not be used).
326 */
327 static inline void
328 umtxq_busy(struct umtx_key *key)
329 {
330 struct umtxq_chain *uc;
331
332 uc = umtxq_getchain(key);
333 mtx_assert(&uc->uc_lock, MA_OWNED);
334 if (uc->uc_busy) {
335 #ifdef SMP
336 if (smp_cpus > 1) {
337 int count = BUSY_SPINS;
338 if (count > 0) {
339 umtxq_unlock(key);
340 while (uc->uc_busy && --count > 0)
341 cpu_spinwait();
342 umtxq_lock(key);
343 }
344 }
345 #endif
346 while (uc->uc_busy) {
347 uc->uc_waiters++;
348 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
349 uc->uc_waiters--;
350 }
351 }
352 uc->uc_busy = 1;
353 }
354
355 /*
356 * Unbusy a chain.
357 */
358 static inline void
359 umtxq_unbusy(struct umtx_key *key)
360 {
361 struct umtxq_chain *uc;
362
363 uc = umtxq_getchain(key);
364 mtx_assert(&uc->uc_lock, MA_OWNED);
365 KASSERT(uc->uc_busy != 0, ("not busy"));
366 uc->uc_busy = 0;
367 if (uc->uc_waiters)
368 wakeup_one(uc);
369 }
370
371 static inline void
372 umtxq_insert_queue(struct umtx_q *uq, int q)
373 {
374 struct umtxq_chain *uc;
375
376 uc = umtxq_getchain(&uq->uq_key);
377 UMTXQ_LOCKED_ASSERT(uc);
378 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
379 uq->uq_flags |= UQF_UMTXQ;
380 }
381
382 static inline void
383 umtxq_remove_queue(struct umtx_q *uq, int q)
384 {
385 struct umtxq_chain *uc;
386
387 uc = umtxq_getchain(&uq->uq_key);
388 UMTXQ_LOCKED_ASSERT(uc);
389 if (uq->uq_flags & UQF_UMTXQ) {
390 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
391 uq->uq_flags &= ~UQF_UMTXQ;
392 }
393 }
394
395 /*
396 * Check if there are multiple waiters
397 */
398 static int
399 umtxq_count(struct umtx_key *key)
400 {
401 struct umtxq_chain *uc;
402 struct umtx_q *uq;
403 int count = 0;
404
405 uc = umtxq_getchain(key);
406 UMTXQ_LOCKED_ASSERT(uc);
407 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
408 if (umtx_key_match(&uq->uq_key, key)) {
409 if (++count > 1)
410 break;
411 }
412 }
413 return (count);
414 }
415
416 /*
417 * Check if there are multiple PI waiters and returns first
418 * waiter.
419 */
420 static int
421 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
422 {
423 struct umtxq_chain *uc;
424 struct umtx_q *uq;
425 int count = 0;
426
427 *first = NULL;
428 uc = umtxq_getchain(key);
429 UMTXQ_LOCKED_ASSERT(uc);
430 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
431 if (umtx_key_match(&uq->uq_key, key)) {
432 if (++count > 1)
433 break;
434 *first = uq;
435 }
436 }
437 return (count);
438 }
439
440 /*
441 * Wake up threads waiting on an userland object.
442 */
443
444 static int
445 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
446 {
447 struct umtxq_chain *uc;
448 struct umtx_q *uq, *next;
449 int ret;
450
451 ret = 0;
452 uc = umtxq_getchain(key);
453 UMTXQ_LOCKED_ASSERT(uc);
454 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
455 if (umtx_key_match(&uq->uq_key, key)) {
456 umtxq_remove_queue(uq, q);
457 wakeup(uq);
458 if (++ret >= n_wake)
459 break;
460 }
461 }
462 return (ret);
463 }
464
465
466 /*
467 * Wake up specified thread.
468 */
469 static inline void
470 umtxq_signal_thread(struct umtx_q *uq)
471 {
472 struct umtxq_chain *uc;
473
474 uc = umtxq_getchain(&uq->uq_key);
475 UMTXQ_LOCKED_ASSERT(uc);
476 umtxq_remove(uq);
477 wakeup(uq);
478 }
479
480 /*
481 * Put thread into sleep state, before sleeping, check if
482 * thread was removed from umtx queue.
483 */
484 static inline int
485 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
486 {
487 struct umtxq_chain *uc;
488 int error;
489
490 uc = umtxq_getchain(&uq->uq_key);
491 UMTXQ_LOCKED_ASSERT(uc);
492 if (!(uq->uq_flags & UQF_UMTXQ))
493 return (0);
494 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
495 if (error == EWOULDBLOCK)
496 error = ETIMEDOUT;
497 return (error);
498 }
499
500 /*
501 * Convert userspace address into unique logical address.
502 */
503 static int
504 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
505 {
506 struct thread *td = curthread;
507 vm_map_t map;
508 vm_map_entry_t entry;
509 vm_pindex_t pindex;
510 vm_prot_t prot;
511 boolean_t wired;
512
513 key->type = type;
514 if (share == THREAD_SHARE) {
515 key->shared = 0;
516 key->info.private.vs = td->td_proc->p_vmspace;
517 key->info.private.addr = (uintptr_t)addr;
518 } else {
519 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
520 map = &td->td_proc->p_vmspace->vm_map;
521 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
522 &entry, &key->info.shared.object, &pindex, &prot,
523 &wired) != KERN_SUCCESS) {
524 return EFAULT;
525 }
526
527 if ((share == PROCESS_SHARE) ||
528 (share == AUTO_SHARE &&
529 VM_INHERIT_SHARE == entry->inheritance)) {
530 key->shared = 1;
531 key->info.shared.offset = entry->offset + entry->start -
532 (vm_offset_t)addr;
533 vm_object_reference(key->info.shared.object);
534 } else {
535 key->shared = 0;
536 key->info.private.vs = td->td_proc->p_vmspace;
537 key->info.private.addr = (uintptr_t)addr;
538 }
539 vm_map_lookup_done(map, entry);
540 }
541
542 umtxq_hash(key);
543 return (0);
544 }
545
546 /*
547 * Release key.
548 */
549 static inline void
550 umtx_key_release(struct umtx_key *key)
551 {
552 if (key->shared)
553 vm_object_deallocate(key->info.shared.object);
554 }
555
556 /*
557 * Lock a umtx object.
558 */
559 static int
560 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
561 {
562 struct umtx_q *uq;
563 u_long owner;
564 u_long old;
565 int error = 0;
566
567 uq = td->td_umtxq;
568
569 /*
570 * Care must be exercised when dealing with umtx structure. It
571 * can fault on any access.
572 */
573 for (;;) {
574 /*
575 * Try the uncontested case. This should be done in userland.
576 */
577 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
578
579 /* The acquire succeeded. */
580 if (owner == UMTX_UNOWNED)
581 return (0);
582
583 /* The address was invalid. */
584 if (owner == -1)
585 return (EFAULT);
586
587 /* If no one owns it but it is contested try to acquire it. */
588 if (owner == UMTX_CONTESTED) {
589 owner = casuword(&umtx->u_owner,
590 UMTX_CONTESTED, id | UMTX_CONTESTED);
591
592 if (owner == UMTX_CONTESTED)
593 return (0);
594
595 /* The address was invalid. */
596 if (owner == -1)
597 return (EFAULT);
598
599 /* If this failed the lock has changed, restart. */
600 continue;
601 }
602
603 /*
604 * If we caught a signal, we have retried and now
605 * exit immediately.
606 */
607 if (error != 0)
608 return (error);
609
610 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
611 AUTO_SHARE, &uq->uq_key)) != 0)
612 return (error);
613
614 umtxq_lock(&uq->uq_key);
615 umtxq_busy(&uq->uq_key);
616 umtxq_insert(uq);
617 umtxq_unbusy(&uq->uq_key);
618 umtxq_unlock(&uq->uq_key);
619
620 /*
621 * Set the contested bit so that a release in user space
622 * knows to use the system call for unlock. If this fails
623 * either some one else has acquired the lock or it has been
624 * released.
625 */
626 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
627
628 /* The address was invalid. */
629 if (old == -1) {
630 umtxq_lock(&uq->uq_key);
631 umtxq_remove(uq);
632 umtxq_unlock(&uq->uq_key);
633 umtx_key_release(&uq->uq_key);
634 return (EFAULT);
635 }
636
637 /*
638 * We set the contested bit, sleep. Otherwise the lock changed
639 * and we need to retry or we lost a race to the thread
640 * unlocking the umtx.
641 */
642 umtxq_lock(&uq->uq_key);
643 if (old == owner)
644 error = umtxq_sleep(uq, "umtx", timo);
645 umtxq_remove(uq);
646 umtxq_unlock(&uq->uq_key);
647 umtx_key_release(&uq->uq_key);
648 }
649
650 return (0);
651 }
652
653 /*
654 * Lock a umtx object.
655 */
656 static int
657 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
658 struct timespec *timeout)
659 {
660 struct timespec ts, ts2, ts3;
661 struct timeval tv;
662 int error;
663
664 if (timeout == NULL) {
665 error = _do_lock_umtx(td, umtx, id, 0);
666 /* Mutex locking is restarted if it is interrupted. */
667 if (error == EINTR)
668 error = ERESTART;
669 } else {
670 getnanouptime(&ts);
671 timespecadd(&ts, timeout);
672 TIMESPEC_TO_TIMEVAL(&tv, timeout);
673 for (;;) {
674 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
675 if (error != ETIMEDOUT)
676 break;
677 getnanouptime(&ts2);
678 if (timespeccmp(&ts2, &ts, >=)) {
679 error = ETIMEDOUT;
680 break;
681 }
682 ts3 = ts;
683 timespecsub(&ts3, &ts2);
684 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
685 }
686 /* Timed-locking is not restarted. */
687 if (error == ERESTART)
688 error = EINTR;
689 }
690 return (error);
691 }
692
693 /*
694 * Unlock a umtx object.
695 */
696 static int
697 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
698 {
699 struct umtx_key key;
700 u_long owner;
701 u_long old;
702 int error;
703 int count;
704
705 /*
706 * Make sure we own this mtx.
707 */
708 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
709 if (owner == -1)
710 return (EFAULT);
711
712 if ((owner & ~UMTX_CONTESTED) != id)
713 return (EPERM);
714
715 /* This should be done in userland */
716 if ((owner & UMTX_CONTESTED) == 0) {
717 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
718 if (old == -1)
719 return (EFAULT);
720 if (old == owner)
721 return (0);
722 owner = old;
723 }
724
725 /* We should only ever be in here for contested locks */
726 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
727 &key)) != 0)
728 return (error);
729
730 umtxq_lock(&key);
731 umtxq_busy(&key);
732 count = umtxq_count(&key);
733 umtxq_unlock(&key);
734
735 /*
736 * When unlocking the umtx, it must be marked as unowned if
737 * there is zero or one thread only waiting for it.
738 * Otherwise, it must be marked as contested.
739 */
740 old = casuword(&umtx->u_owner, owner,
741 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
742 umtxq_lock(&key);
743 umtxq_signal(&key,1);
744 umtxq_unbusy(&key);
745 umtxq_unlock(&key);
746 umtx_key_release(&key);
747 if (old == -1)
748 return (EFAULT);
749 if (old != owner)
750 return (EINVAL);
751 return (0);
752 }
753
754 #ifdef COMPAT_IA32
755
756 /*
757 * Lock a umtx object.
758 */
759 static int
760 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
761 {
762 struct umtx_q *uq;
763 uint32_t owner;
764 uint32_t old;
765 int error = 0;
766
767 uq = td->td_umtxq;
768
769 /*
770 * Care must be exercised when dealing with umtx structure. It
771 * can fault on any access.
772 */
773 for (;;) {
774 /*
775 * Try the uncontested case. This should be done in userland.
776 */
777 owner = casuword32(m, UMUTEX_UNOWNED, id);
778
779 /* The acquire succeeded. */
780 if (owner == UMUTEX_UNOWNED)
781 return (0);
782
783 /* The address was invalid. */
784 if (owner == -1)
785 return (EFAULT);
786
787 /* If no one owns it but it is contested try to acquire it. */
788 if (owner == UMUTEX_CONTESTED) {
789 owner = casuword32(m,
790 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
791 if (owner == UMUTEX_CONTESTED)
792 return (0);
793
794 /* The address was invalid. */
795 if (owner == -1)
796 return (EFAULT);
797
798 /* If this failed the lock has changed, restart. */
799 continue;
800 }
801
802 /*
803 * If we caught a signal, we have retried and now
804 * exit immediately.
805 */
806 if (error != 0)
807 return (error);
808
809 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
810 AUTO_SHARE, &uq->uq_key)) != 0)
811 return (error);
812
813 umtxq_lock(&uq->uq_key);
814 umtxq_busy(&uq->uq_key);
815 umtxq_insert(uq);
816 umtxq_unbusy(&uq->uq_key);
817 umtxq_unlock(&uq->uq_key);
818
819 /*
820 * Set the contested bit so that a release in user space
821 * knows to use the system call for unlock. If this fails
822 * either some one else has acquired the lock or it has been
823 * released.
824 */
825 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
826
827 /* The address was invalid. */
828 if (old == -1) {
829 umtxq_lock(&uq->uq_key);
830 umtxq_remove(uq);
831 umtxq_unlock(&uq->uq_key);
832 umtx_key_release(&uq->uq_key);
833 return (EFAULT);
834 }
835
836 /*
837 * We set the contested bit, sleep. Otherwise the lock changed
838 * and we need to retry or we lost a race to the thread
839 * unlocking the umtx.
840 */
841 umtxq_lock(&uq->uq_key);
842 if (old == owner)
843 error = umtxq_sleep(uq, "umtx", timo);
844 umtxq_remove(uq);
845 umtxq_unlock(&uq->uq_key);
846 umtx_key_release(&uq->uq_key);
847 }
848
849 return (0);
850 }
851
852 /*
853 * Lock a umtx object.
854 */
855 static int
856 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
857 struct timespec *timeout)
858 {
859 struct timespec ts, ts2, ts3;
860 struct timeval tv;
861 int error;
862
863 if (timeout == NULL) {
864 error = _do_lock_umtx32(td, m, id, 0);
865 /* Mutex locking is restarted if it is interrupted. */
866 if (error == EINTR)
867 error = ERESTART;
868 } else {
869 getnanouptime(&ts);
870 timespecadd(&ts, timeout);
871 TIMESPEC_TO_TIMEVAL(&tv, timeout);
872 for (;;) {
873 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
874 if (error != ETIMEDOUT)
875 break;
876 getnanouptime(&ts2);
877 if (timespeccmp(&ts2, &ts, >=)) {
878 error = ETIMEDOUT;
879 break;
880 }
881 ts3 = ts;
882 timespecsub(&ts3, &ts2);
883 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
884 }
885 /* Timed-locking is not restarted. */
886 if (error == ERESTART)
887 error = EINTR;
888 }
889 return (error);
890 }
891
892 /*
893 * Unlock a umtx object.
894 */
895 static int
896 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
897 {
898 struct umtx_key key;
899 uint32_t owner;
900 uint32_t old;
901 int error;
902 int count;
903
904 /*
905 * Make sure we own this mtx.
906 */
907 owner = fuword32(m);
908 if (owner == -1)
909 return (EFAULT);
910
911 if ((owner & ~UMUTEX_CONTESTED) != id)
912 return (EPERM);
913
914 /* This should be done in userland */
915 if ((owner & UMUTEX_CONTESTED) == 0) {
916 old = casuword32(m, owner, UMUTEX_UNOWNED);
917 if (old == -1)
918 return (EFAULT);
919 if (old == owner)
920 return (0);
921 owner = old;
922 }
923
924 /* We should only ever be in here for contested locks */
925 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
926 &key)) != 0)
927 return (error);
928
929 umtxq_lock(&key);
930 umtxq_busy(&key);
931 count = umtxq_count(&key);
932 umtxq_unlock(&key);
933
934 /*
935 * When unlocking the umtx, it must be marked as unowned if
936 * there is zero or one thread only waiting for it.
937 * Otherwise, it must be marked as contested.
938 */
939 old = casuword32(m, owner,
940 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
941 umtxq_lock(&key);
942 umtxq_signal(&key,1);
943 umtxq_unbusy(&key);
944 umtxq_unlock(&key);
945 umtx_key_release(&key);
946 if (old == -1)
947 return (EFAULT);
948 if (old != owner)
949 return (EINVAL);
950 return (0);
951 }
952 #endif
953
954 /*
955 * Fetch and compare value, sleep on the address if value is not changed.
956 */
957 static int
958 do_wait(struct thread *td, void *addr, u_long id,
959 struct timespec *timeout, int compat32, int is_private)
960 {
961 struct umtx_q *uq;
962 struct timespec ts, ts2, ts3;
963 struct timeval tv;
964 u_long tmp;
965 int error = 0;
966
967 uq = td->td_umtxq;
968 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
969 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
970 return (error);
971
972 umtxq_lock(&uq->uq_key);
973 umtxq_insert(uq);
974 umtxq_unlock(&uq->uq_key);
975 if (compat32 == 0)
976 tmp = fuword(addr);
977 else
978 tmp = (unsigned int)fuword32(addr);
979 if (tmp != id) {
980 umtxq_lock(&uq->uq_key);
981 umtxq_remove(uq);
982 umtxq_unlock(&uq->uq_key);
983 } else if (timeout == NULL) {
984 umtxq_lock(&uq->uq_key);
985 error = umtxq_sleep(uq, "uwait", 0);
986 umtxq_remove(uq);
987 umtxq_unlock(&uq->uq_key);
988 } else {
989 getnanouptime(&ts);
990 timespecadd(&ts, timeout);
991 TIMESPEC_TO_TIMEVAL(&tv, timeout);
992 umtxq_lock(&uq->uq_key);
993 for (;;) {
994 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
995 if (!(uq->uq_flags & UQF_UMTXQ))
996 break;
997 if (error != ETIMEDOUT)
998 break;
999 umtxq_unlock(&uq->uq_key);
1000 getnanouptime(&ts2);
1001 if (timespeccmp(&ts2, &ts, >=)) {
1002 error = ETIMEDOUT;
1003 umtxq_lock(&uq->uq_key);
1004 break;
1005 }
1006 ts3 = ts;
1007 timespecsub(&ts3, &ts2);
1008 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1009 umtxq_lock(&uq->uq_key);
1010 }
1011 umtxq_remove(uq);
1012 umtxq_unlock(&uq->uq_key);
1013 }
1014 umtx_key_release(&uq->uq_key);
1015 if (error == ERESTART)
1016 error = EINTR;
1017 return (error);
1018 }
1019
1020 /*
1021 * Wake up threads sleeping on the specified address.
1022 */
1023 int
1024 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1025 {
1026 struct umtx_key key;
1027 int ret;
1028
1029 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1030 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1031 return (ret);
1032 umtxq_lock(&key);
1033 ret = umtxq_signal(&key, n_wake);
1034 umtxq_unlock(&key);
1035 umtx_key_release(&key);
1036 return (0);
1037 }
1038
1039 /*
1040 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1041 */
1042 static int
1043 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1044 int mode)
1045 {
1046 struct umtx_q *uq;
1047 uint32_t owner, old, id;
1048 int error = 0;
1049
1050 id = td->td_tid;
1051 uq = td->td_umtxq;
1052
1053 /*
1054 * Care must be exercised when dealing with umtx structure. It
1055 * can fault on any access.
1056 */
1057 for (;;) {
1058 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1059 if (mode == _UMUTEX_WAIT) {
1060 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1061 return (0);
1062 } else {
1063 /*
1064 * Try the uncontested case. This should be done in userland.
1065 */
1066 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1067
1068 /* The acquire succeeded. */
1069 if (owner == UMUTEX_UNOWNED)
1070 return (0);
1071
1072 /* The address was invalid. */
1073 if (owner == -1)
1074 return (EFAULT);
1075
1076 /* If no one owns it but it is contested try to acquire it. */
1077 if (owner == UMUTEX_CONTESTED) {
1078 owner = casuword32(&m->m_owner,
1079 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1080
1081 if (owner == UMUTEX_CONTESTED)
1082 return (0);
1083
1084 /* The address was invalid. */
1085 if (owner == -1)
1086 return (EFAULT);
1087
1088 /* If this failed the lock has changed, restart. */
1089 continue;
1090 }
1091 }
1092
1093 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1094 (owner & ~UMUTEX_CONTESTED) == id)
1095 return (EDEADLK);
1096
1097 if (mode == _UMUTEX_TRY)
1098 return (EBUSY);
1099
1100 /*
1101 * If we caught a signal, we have retried and now
1102 * exit immediately.
1103 */
1104 if (error != 0)
1105 return (error);
1106
1107 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1108 GET_SHARE(flags), &uq->uq_key)) != 0)
1109 return (error);
1110
1111 umtxq_lock(&uq->uq_key);
1112 umtxq_busy(&uq->uq_key);
1113 umtxq_insert(uq);
1114 umtxq_unlock(&uq->uq_key);
1115
1116 /*
1117 * Set the contested bit so that a release in user space
1118 * knows to use the system call for unlock. If this fails
1119 * either some one else has acquired the lock or it has been
1120 * released.
1121 */
1122 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1123
1124 /* The address was invalid. */
1125 if (old == -1) {
1126 umtxq_lock(&uq->uq_key);
1127 umtxq_remove(uq);
1128 umtxq_unbusy(&uq->uq_key);
1129 umtxq_unlock(&uq->uq_key);
1130 umtx_key_release(&uq->uq_key);
1131 return (EFAULT);
1132 }
1133
1134 /*
1135 * We set the contested bit, sleep. Otherwise the lock changed
1136 * and we need to retry or we lost a race to the thread
1137 * unlocking the umtx.
1138 */
1139 umtxq_lock(&uq->uq_key);
1140 umtxq_unbusy(&uq->uq_key);
1141 if (old == owner)
1142 error = umtxq_sleep(uq, "umtxn", timo);
1143 umtxq_remove(uq);
1144 umtxq_unlock(&uq->uq_key);
1145 umtx_key_release(&uq->uq_key);
1146 }
1147
1148 return (0);
1149 }
1150
1151 /*
1152 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153 */
1154 /*
1155 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1156 */
1157 static int
1158 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1159 {
1160 struct umtx_key key;
1161 uint32_t owner, old, id;
1162 int error;
1163 int count;
1164
1165 id = td->td_tid;
1166 /*
1167 * Make sure we own this mtx.
1168 */
1169 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1170 if (owner == -1)
1171 return (EFAULT);
1172
1173 if ((owner & ~UMUTEX_CONTESTED) != id)
1174 return (EPERM);
1175
1176 if ((owner & UMUTEX_CONTESTED) == 0) {
1177 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1178 if (old == -1)
1179 return (EFAULT);
1180 if (old == owner)
1181 return (0);
1182 owner = old;
1183 }
1184
1185 /* We should only ever be in here for contested locks */
1186 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1187 &key)) != 0)
1188 return (error);
1189
1190 umtxq_lock(&key);
1191 umtxq_busy(&key);
1192 count = umtxq_count(&key);
1193 umtxq_unlock(&key);
1194
1195 /*
1196 * When unlocking the umtx, it must be marked as unowned if
1197 * there is zero or one thread only waiting for it.
1198 * Otherwise, it must be marked as contested.
1199 */
1200 old = casuword32(&m->m_owner, owner,
1201 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1202 umtxq_lock(&key);
1203 umtxq_signal(&key,1);
1204 umtxq_unbusy(&key);
1205 umtxq_unlock(&key);
1206 umtx_key_release(&key);
1207 if (old == -1)
1208 return (EFAULT);
1209 if (old != owner)
1210 return (EINVAL);
1211 return (0);
1212 }
1213
1214 /*
1215 * Check if the mutex is available and wake up a waiter,
1216 * only for simple mutex.
1217 */
1218 static int
1219 do_wake_umutex(struct thread *td, struct umutex *m)
1220 {
1221 struct umtx_key key;
1222 uint32_t owner;
1223 uint32_t flags;
1224 int error;
1225 int count;
1226
1227 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1228 if (owner == -1)
1229 return (EFAULT);
1230
1231 if ((owner & ~UMUTEX_CONTESTED) != 0)
1232 return (0);
1233
1234 flags = fuword32(&m->m_flags);
1235
1236 /* We should only ever be in here for contested locks */
1237 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1238 &key)) != 0)
1239 return (error);
1240
1241 umtxq_lock(&key);
1242 umtxq_busy(&key);
1243 count = umtxq_count(&key);
1244 umtxq_unlock(&key);
1245
1246 if (count <= 1)
1247 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1248
1249 umtxq_lock(&key);
1250 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1251 umtxq_signal(&key, 1);
1252 umtxq_unbusy(&key);
1253 umtxq_unlock(&key);
1254 umtx_key_release(&key);
1255 return (0);
1256 }
1257
1258 static inline struct umtx_pi *
1259 umtx_pi_alloc(int flags)
1260 {
1261 struct umtx_pi *pi;
1262
1263 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1264 TAILQ_INIT(&pi->pi_blocked);
1265 atomic_add_int(&umtx_pi_allocated, 1);
1266 return (pi);
1267 }
1268
1269 static inline void
1270 umtx_pi_free(struct umtx_pi *pi)
1271 {
1272 uma_zfree(umtx_pi_zone, pi);
1273 atomic_add_int(&umtx_pi_allocated, -1);
1274 }
1275
1276 /*
1277 * Adjust the thread's position on a pi_state after its priority has been
1278 * changed.
1279 */
1280 static int
1281 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1282 {
1283 struct umtx_q *uq, *uq1, *uq2;
1284 struct thread *td1;
1285
1286 mtx_assert(&umtx_lock, MA_OWNED);
1287 if (pi == NULL)
1288 return (0);
1289
1290 uq = td->td_umtxq;
1291
1292 /*
1293 * Check if the thread needs to be moved on the blocked chain.
1294 * It needs to be moved if either its priority is lower than
1295 * the previous thread or higher than the next thread.
1296 */
1297 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1298 uq2 = TAILQ_NEXT(uq, uq_lockq);
1299 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1300 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1301 /*
1302 * Remove thread from blocked chain and determine where
1303 * it should be moved to.
1304 */
1305 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1306 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1307 td1 = uq1->uq_thread;
1308 MPASS(td1->td_proc->p_magic == P_MAGIC);
1309 if (UPRI(td1) > UPRI(td))
1310 break;
1311 }
1312
1313 if (uq1 == NULL)
1314 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1315 else
1316 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1317 }
1318 return (1);
1319 }
1320
1321 /*
1322 * Propagate priority when a thread is blocked on POSIX
1323 * PI mutex.
1324 */
1325 static void
1326 umtx_propagate_priority(struct thread *td)
1327 {
1328 struct umtx_q *uq;
1329 struct umtx_pi *pi;
1330 int pri;
1331
1332 mtx_assert(&umtx_lock, MA_OWNED);
1333 pri = UPRI(td);
1334 uq = td->td_umtxq;
1335 pi = uq->uq_pi_blocked;
1336 if (pi == NULL)
1337 return;
1338
1339 for (;;) {
1340 td = pi->pi_owner;
1341 if (td == NULL)
1342 return;
1343
1344 MPASS(td->td_proc != NULL);
1345 MPASS(td->td_proc->p_magic == P_MAGIC);
1346
1347 if (UPRI(td) <= pri)
1348 return;
1349
1350 thread_lock(td);
1351 sched_lend_user_prio(td, pri);
1352 thread_unlock(td);
1353
1354 /*
1355 * Pick up the lock that td is blocked on.
1356 */
1357 uq = td->td_umtxq;
1358 pi = uq->uq_pi_blocked;
1359 /* Resort td on the list if needed. */
1360 if (!umtx_pi_adjust_thread(pi, td))
1361 break;
1362 }
1363 }
1364
1365 /*
1366 * Unpropagate priority for a PI mutex when a thread blocked on
1367 * it is interrupted by signal or resumed by others.
1368 */
1369 static void
1370 umtx_unpropagate_priority(struct umtx_pi *pi)
1371 {
1372 struct umtx_q *uq, *uq_owner;
1373 struct umtx_pi *pi2;
1374 int pri, oldpri;
1375
1376 mtx_assert(&umtx_lock, MA_OWNED);
1377
1378 while (pi != NULL && pi->pi_owner != NULL) {
1379 pri = PRI_MAX;
1380 uq_owner = pi->pi_owner->td_umtxq;
1381
1382 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1383 uq = TAILQ_FIRST(&pi2->pi_blocked);
1384 if (uq != NULL) {
1385 if (pri > UPRI(uq->uq_thread))
1386 pri = UPRI(uq->uq_thread);
1387 }
1388 }
1389
1390 if (pri > uq_owner->uq_inherited_pri)
1391 pri = uq_owner->uq_inherited_pri;
1392 thread_lock(pi->pi_owner);
1393 oldpri = pi->pi_owner->td_user_pri;
1394 sched_unlend_user_prio(pi->pi_owner, pri);
1395 thread_unlock(pi->pi_owner);
1396 if (uq_owner->uq_pi_blocked != NULL)
1397 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1398 pi = uq_owner->uq_pi_blocked;
1399 }
1400 }
1401
1402 /*
1403 * Insert a PI mutex into owned list.
1404 */
1405 static void
1406 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1407 {
1408 struct umtx_q *uq_owner;
1409
1410 uq_owner = owner->td_umtxq;
1411 mtx_assert(&umtx_lock, MA_OWNED);
1412 if (pi->pi_owner != NULL)
1413 panic("pi_ower != NULL");
1414 pi->pi_owner = owner;
1415 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1416 }
1417
1418 /*
1419 * Claim ownership of a PI mutex.
1420 */
1421 static int
1422 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1423 {
1424 struct umtx_q *uq, *uq_owner;
1425
1426 uq_owner = owner->td_umtxq;
1427 mtx_lock_spin(&umtx_lock);
1428 if (pi->pi_owner == owner) {
1429 mtx_unlock_spin(&umtx_lock);
1430 return (0);
1431 }
1432
1433 if (pi->pi_owner != NULL) {
1434 /*
1435 * userland may have already messed the mutex, sigh.
1436 */
1437 mtx_unlock_spin(&umtx_lock);
1438 return (EPERM);
1439 }
1440 umtx_pi_setowner(pi, owner);
1441 uq = TAILQ_FIRST(&pi->pi_blocked);
1442 if (uq != NULL) {
1443 int pri;
1444
1445 pri = UPRI(uq->uq_thread);
1446 thread_lock(owner);
1447 if (pri < UPRI(owner))
1448 sched_lend_user_prio(owner, pri);
1449 thread_unlock(owner);
1450 }
1451 mtx_unlock_spin(&umtx_lock);
1452 return (0);
1453 }
1454
1455 static void
1456 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1457 {
1458 struct umtx_q *uq;
1459 struct umtx_pi *pi;
1460
1461 uq = td->td_umtxq;
1462 /*
1463 * Pick up the lock that td is blocked on.
1464 */
1465 pi = uq->uq_pi_blocked;
1466 MPASS(pi != NULL);
1467
1468 /* Resort the turnstile on the list. */
1469 if (!umtx_pi_adjust_thread(pi, td))
1470 return;
1471
1472 /*
1473 * If our priority was lowered and we are at the head of the
1474 * turnstile, then propagate our new priority up the chain.
1475 */
1476 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1477 umtx_propagate_priority(td);
1478 }
1479
1480 /*
1481 * Adjust a thread's order position in its blocked PI mutex,
1482 * this may result new priority propagating process.
1483 */
1484 void
1485 umtx_pi_adjust(struct thread *td, u_char oldpri)
1486 {
1487 struct umtx_q *uq;
1488 struct umtx_pi *pi;
1489
1490 uq = td->td_umtxq;
1491 mtx_lock_spin(&umtx_lock);
1492 /*
1493 * Pick up the lock that td is blocked on.
1494 */
1495 pi = uq->uq_pi_blocked;
1496 if (pi != NULL)
1497 umtx_pi_adjust_locked(td, oldpri);
1498 mtx_unlock_spin(&umtx_lock);
1499 }
1500
1501 /*
1502 * Sleep on a PI mutex.
1503 */
1504 static int
1505 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1506 uint32_t owner, const char *wmesg, int timo)
1507 {
1508 struct umtxq_chain *uc;
1509 struct thread *td, *td1;
1510 struct umtx_q *uq1;
1511 int pri;
1512 int error = 0;
1513
1514 td = uq->uq_thread;
1515 KASSERT(td == curthread, ("inconsistent uq_thread"));
1516 uc = umtxq_getchain(&uq->uq_key);
1517 UMTXQ_LOCKED_ASSERT(uc);
1518 UMTXQ_BUSY_ASSERT(uc);
1519 umtxq_insert(uq);
1520 mtx_lock_spin(&umtx_lock);
1521 if (pi->pi_owner == NULL) {
1522 /* XXX
1523 * Current, We only support process private PI-mutex,
1524 * non-contended PI-mutexes are locked in userland.
1525 * Process shared PI-mutex should always be initialized
1526 * by kernel and be registered in kernel, locking should
1527 * always be done by kernel to avoid security problems.
1528 * For process private PI-mutex, we can find owner
1529 * thread and boost its priority safely.
1530 */
1531 mtx_unlock_spin(&umtx_lock);
1532 PROC_LOCK(curproc);
1533 td1 = thread_find(curproc, owner);
1534 mtx_lock_spin(&umtx_lock);
1535 if (td1 != NULL && pi->pi_owner == NULL) {
1536 uq1 = td1->td_umtxq;
1537 umtx_pi_setowner(pi, td1);
1538 }
1539 PROC_UNLOCK(curproc);
1540 }
1541
1542 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543 pri = UPRI(uq1->uq_thread);
1544 if (pri > UPRI(td))
1545 break;
1546 }
1547
1548 if (uq1 != NULL)
1549 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550 else
1551 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552
1553 uq->uq_pi_blocked = pi;
1554 thread_lock(td);
1555 td->td_flags |= TDF_UPIBLOCKED;
1556 thread_unlock(td);
1557 umtx_propagate_priority(td);
1558 mtx_unlock_spin(&umtx_lock);
1559 umtxq_unbusy(&uq->uq_key);
1560
1561 if (uq->uq_flags & UQF_UMTXQ) {
1562 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563 if (error == EWOULDBLOCK)
1564 error = ETIMEDOUT;
1565 if (uq->uq_flags & UQF_UMTXQ) {
1566 umtxq_remove(uq);
1567 }
1568 }
1569 mtx_lock_spin(&umtx_lock);
1570 uq->uq_pi_blocked = NULL;
1571 thread_lock(td);
1572 td->td_flags &= ~TDF_UPIBLOCKED;
1573 thread_unlock(td);
1574 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575 umtx_unpropagate_priority(pi);
1576 mtx_unlock_spin(&umtx_lock);
1577 umtxq_unlock(&uq->uq_key);
1578
1579 return (error);
1580 }
1581
1582 /*
1583 * Add reference count for a PI mutex.
1584 */
1585 static void
1586 umtx_pi_ref(struct umtx_pi *pi)
1587 {
1588 struct umtxq_chain *uc;
1589
1590 uc = umtxq_getchain(&pi->pi_key);
1591 UMTXQ_LOCKED_ASSERT(uc);
1592 pi->pi_refcount++;
1593 }
1594
1595 /*
1596 * Decrease reference count for a PI mutex, if the counter
1597 * is decreased to zero, its memory space is freed.
1598 */
1599 static void
1600 umtx_pi_unref(struct umtx_pi *pi)
1601 {
1602 struct umtxq_chain *uc;
1603
1604 uc = umtxq_getchain(&pi->pi_key);
1605 UMTXQ_LOCKED_ASSERT(uc);
1606 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607 if (--pi->pi_refcount == 0) {
1608 mtx_lock_spin(&umtx_lock);
1609 if (pi->pi_owner != NULL) {
1610 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611 pi, pi_link);
1612 pi->pi_owner = NULL;
1613 }
1614 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615 ("blocked queue not empty"));
1616 mtx_unlock_spin(&umtx_lock);
1617 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618 umtx_pi_free(pi);
1619 }
1620 }
1621
1622 /*
1623 * Find a PI mutex in hash table.
1624 */
1625 static struct umtx_pi *
1626 umtx_pi_lookup(struct umtx_key *key)
1627 {
1628 struct umtxq_chain *uc;
1629 struct umtx_pi *pi;
1630
1631 uc = umtxq_getchain(key);
1632 UMTXQ_LOCKED_ASSERT(uc);
1633
1634 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635 if (umtx_key_match(&pi->pi_key, key)) {
1636 return (pi);
1637 }
1638 }
1639 return (NULL);
1640 }
1641
1642 /*
1643 * Insert a PI mutex into hash table.
1644 */
1645 static inline void
1646 umtx_pi_insert(struct umtx_pi *pi)
1647 {
1648 struct umtxq_chain *uc;
1649
1650 uc = umtxq_getchain(&pi->pi_key);
1651 UMTXQ_LOCKED_ASSERT(uc);
1652 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653 }
1654
1655 /*
1656 * Lock a PI mutex.
1657 */
1658 static int
1659 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660 int try)
1661 {
1662 struct umtx_q *uq;
1663 struct umtx_pi *pi, *new_pi;
1664 uint32_t id, owner, old;
1665 int error;
1666
1667 id = td->td_tid;
1668 uq = td->td_umtxq;
1669
1670 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671 &uq->uq_key)) != 0)
1672 return (error);
1673 umtxq_lock(&uq->uq_key);
1674 pi = umtx_pi_lookup(&uq->uq_key);
1675 if (pi == NULL) {
1676 new_pi = umtx_pi_alloc(M_NOWAIT);
1677 if (new_pi == NULL) {
1678 umtxq_unlock(&uq->uq_key);
1679 new_pi = umtx_pi_alloc(M_WAITOK);
1680 umtxq_lock(&uq->uq_key);
1681 pi = umtx_pi_lookup(&uq->uq_key);
1682 if (pi != NULL) {
1683 umtx_pi_free(new_pi);
1684 new_pi = NULL;
1685 }
1686 }
1687 if (new_pi != NULL) {
1688 new_pi->pi_key = uq->uq_key;
1689 umtx_pi_insert(new_pi);
1690 pi = new_pi;
1691 }
1692 }
1693 umtx_pi_ref(pi);
1694 umtxq_unlock(&uq->uq_key);
1695
1696 /*
1697 * Care must be exercised when dealing with umtx structure. It
1698 * can fault on any access.
1699 */
1700 for (;;) {
1701 /*
1702 * Try the uncontested case. This should be done in userland.
1703 */
1704 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705
1706 /* The acquire succeeded. */
1707 if (owner == UMUTEX_UNOWNED) {
1708 error = 0;
1709 break;
1710 }
1711
1712 /* The address was invalid. */
1713 if (owner == -1) {
1714 error = EFAULT;
1715 break;
1716 }
1717
1718 /* If no one owns it but it is contested try to acquire it. */
1719 if (owner == UMUTEX_CONTESTED) {
1720 owner = casuword32(&m->m_owner,
1721 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722
1723 if (owner == UMUTEX_CONTESTED) {
1724 umtxq_lock(&uq->uq_key);
1725 umtxq_busy(&uq->uq_key);
1726 error = umtx_pi_claim(pi, td);
1727 umtxq_unbusy(&uq->uq_key);
1728 umtxq_unlock(&uq->uq_key);
1729 break;
1730 }
1731
1732 /* The address was invalid. */
1733 if (owner == -1) {
1734 error = EFAULT;
1735 break;
1736 }
1737
1738 /* If this failed the lock has changed, restart. */
1739 continue;
1740 }
1741
1742 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743 (owner & ~UMUTEX_CONTESTED) == id) {
1744 error = EDEADLK;
1745 break;
1746 }
1747
1748 if (try != 0) {
1749 error = EBUSY;
1750 break;
1751 }
1752
1753 /*
1754 * If we caught a signal, we have retried and now
1755 * exit immediately.
1756 */
1757 if (error != 0)
1758 break;
1759
1760 umtxq_lock(&uq->uq_key);
1761 umtxq_busy(&uq->uq_key);
1762 umtxq_unlock(&uq->uq_key);
1763
1764 /*
1765 * Set the contested bit so that a release in user space
1766 * knows to use the system call for unlock. If this fails
1767 * either some one else has acquired the lock or it has been
1768 * released.
1769 */
1770 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771
1772 /* The address was invalid. */
1773 if (old == -1) {
1774 umtxq_lock(&uq->uq_key);
1775 umtxq_unbusy(&uq->uq_key);
1776 umtxq_unlock(&uq->uq_key);
1777 error = EFAULT;
1778 break;
1779 }
1780
1781 umtxq_lock(&uq->uq_key);
1782 /*
1783 * We set the contested bit, sleep. Otherwise the lock changed
1784 * and we need to retry or we lost a race to the thread
1785 * unlocking the umtx.
1786 */
1787 if (old == owner)
1788 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789 "umtxpi", timo);
1790 else {
1791 umtxq_unbusy(&uq->uq_key);
1792 umtxq_unlock(&uq->uq_key);
1793 }
1794 }
1795
1796 umtxq_lock(&uq->uq_key);
1797 umtx_pi_unref(pi);
1798 umtxq_unlock(&uq->uq_key);
1799
1800 umtx_key_release(&uq->uq_key);
1801 return (error);
1802 }
1803
1804 /*
1805 * Unlock a PI mutex.
1806 */
1807 static int
1808 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809 {
1810 struct umtx_key key;
1811 struct umtx_q *uq_first, *uq_first2, *uq_me;
1812 struct umtx_pi *pi, *pi2;
1813 uint32_t owner, old, id;
1814 int error;
1815 int count;
1816 int pri;
1817
1818 id = td->td_tid;
1819 /*
1820 * Make sure we own this mtx.
1821 */
1822 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823 if (owner == -1)
1824 return (EFAULT);
1825
1826 if ((owner & ~UMUTEX_CONTESTED) != id)
1827 return (EPERM);
1828
1829 /* This should be done in userland */
1830 if ((owner & UMUTEX_CONTESTED) == 0) {
1831 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832 if (old == -1)
1833 return (EFAULT);
1834 if (old == owner)
1835 return (0);
1836 owner = old;
1837 }
1838
1839 /* We should only ever be in here for contested locks */
1840 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841 &key)) != 0)
1842 return (error);
1843
1844 umtxq_lock(&key);
1845 umtxq_busy(&key);
1846 count = umtxq_count_pi(&key, &uq_first);
1847 if (uq_first != NULL) {
1848 mtx_lock_spin(&umtx_lock);
1849 pi = uq_first->uq_pi_blocked;
1850 KASSERT(pi != NULL, ("pi == NULL?"));
1851 if (pi->pi_owner != curthread) {
1852 mtx_unlock_spin(&umtx_lock);
1853 umtxq_unbusy(&key);
1854 umtxq_unlock(&key);
1855 umtx_key_release(&key);
1856 /* userland messed the mutex */
1857 return (EPERM);
1858 }
1859 uq_me = curthread->td_umtxq;
1860 pi->pi_owner = NULL;
1861 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862 /* get highest priority thread which is still sleeping. */
1863 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864 while (uq_first != NULL &&
1865 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867 }
1868 pri = PRI_MAX;
1869 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871 if (uq_first2 != NULL) {
1872 if (pri > UPRI(uq_first2->uq_thread))
1873 pri = UPRI(uq_first2->uq_thread);
1874 }
1875 }
1876 thread_lock(curthread);
1877 sched_unlend_user_prio(curthread, pri);
1878 thread_unlock(curthread);
1879 mtx_unlock_spin(&umtx_lock);
1880 if (uq_first)
1881 umtxq_signal_thread(uq_first);
1882 }
1883 umtxq_unlock(&key);
1884
1885 /*
1886 * When unlocking the umtx, it must be marked as unowned if
1887 * there is zero or one thread only waiting for it.
1888 * Otherwise, it must be marked as contested.
1889 */
1890 old = casuword32(&m->m_owner, owner,
1891 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892
1893 umtxq_lock(&key);
1894 umtxq_unbusy(&key);
1895 umtxq_unlock(&key);
1896 umtx_key_release(&key);
1897 if (old == -1)
1898 return (EFAULT);
1899 if (old != owner)
1900 return (EINVAL);
1901 return (0);
1902 }
1903
1904 /*
1905 * Lock a PP mutex.
1906 */
1907 static int
1908 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909 int try)
1910 {
1911 struct umtx_q *uq, *uq2;
1912 struct umtx_pi *pi;
1913 uint32_t ceiling;
1914 uint32_t owner, id;
1915 int error, pri, old_inherited_pri, su;
1916
1917 id = td->td_tid;
1918 uq = td->td_umtxq;
1919 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920 &uq->uq_key)) != 0)
1921 return (error);
1922 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923 for (;;) {
1924 old_inherited_pri = uq->uq_inherited_pri;
1925 umtxq_lock(&uq->uq_key);
1926 umtxq_busy(&uq->uq_key);
1927 umtxq_unlock(&uq->uq_key);
1928
1929 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930 if (ceiling > RTP_PRIO_MAX) {
1931 error = EINVAL;
1932 goto out;
1933 }
1934
1935 mtx_lock_spin(&umtx_lock);
1936 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937 mtx_unlock_spin(&umtx_lock);
1938 error = EINVAL;
1939 goto out;
1940 }
1941 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943 thread_lock(td);
1944 if (uq->uq_inherited_pri < UPRI(td))
1945 sched_lend_user_prio(td, uq->uq_inherited_pri);
1946 thread_unlock(td);
1947 }
1948 mtx_unlock_spin(&umtx_lock);
1949
1950 owner = casuword32(&m->m_owner,
1951 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952
1953 if (owner == UMUTEX_CONTESTED) {
1954 error = 0;
1955 break;
1956 }
1957
1958 /* The address was invalid. */
1959 if (owner == -1) {
1960 error = EFAULT;
1961 break;
1962 }
1963
1964 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965 (owner & ~UMUTEX_CONTESTED) == id) {
1966 error = EDEADLK;
1967 break;
1968 }
1969
1970 if (try != 0) {
1971 error = EBUSY;
1972 break;
1973 }
1974
1975 /*
1976 * If we caught a signal, we have retried and now
1977 * exit immediately.
1978 */
1979 if (error != 0)
1980 break;
1981
1982 umtxq_lock(&uq->uq_key);
1983 umtxq_insert(uq);
1984 umtxq_unbusy(&uq->uq_key);
1985 error = umtxq_sleep(uq, "umtxpp", timo);
1986 umtxq_remove(uq);
1987 umtxq_unlock(&uq->uq_key);
1988
1989 mtx_lock_spin(&umtx_lock);
1990 uq->uq_inherited_pri = old_inherited_pri;
1991 pri = PRI_MAX;
1992 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994 if (uq2 != NULL) {
1995 if (pri > UPRI(uq2->uq_thread))
1996 pri = UPRI(uq2->uq_thread);
1997 }
1998 }
1999 if (pri > uq->uq_inherited_pri)
2000 pri = uq->uq_inherited_pri;
2001 thread_lock(td);
2002 sched_unlend_user_prio(td, pri);
2003 thread_unlock(td);
2004 mtx_unlock_spin(&umtx_lock);
2005 }
2006
2007 if (error != 0) {
2008 mtx_lock_spin(&umtx_lock);
2009 uq->uq_inherited_pri = old_inherited_pri;
2010 pri = PRI_MAX;
2011 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013 if (uq2 != NULL) {
2014 if (pri > UPRI(uq2->uq_thread))
2015 pri = UPRI(uq2->uq_thread);
2016 }
2017 }
2018 if (pri > uq->uq_inherited_pri)
2019 pri = uq->uq_inherited_pri;
2020 thread_lock(td);
2021 sched_unlend_user_prio(td, pri);
2022 thread_unlock(td);
2023 mtx_unlock_spin(&umtx_lock);
2024 }
2025
2026 out:
2027 umtxq_lock(&uq->uq_key);
2028 umtxq_unbusy(&uq->uq_key);
2029 umtxq_unlock(&uq->uq_key);
2030 umtx_key_release(&uq->uq_key);
2031 return (error);
2032 }
2033
2034 /*
2035 * Unlock a PP mutex.
2036 */
2037 static int
2038 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039 {
2040 struct umtx_key key;
2041 struct umtx_q *uq, *uq2;
2042 struct umtx_pi *pi;
2043 uint32_t owner, id;
2044 uint32_t rceiling;
2045 int error, pri, new_inherited_pri, su;
2046
2047 id = td->td_tid;
2048 uq = td->td_umtxq;
2049 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050
2051 /*
2052 * Make sure we own this mtx.
2053 */
2054 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055 if (owner == -1)
2056 return (EFAULT);
2057
2058 if ((owner & ~UMUTEX_CONTESTED) != id)
2059 return (EPERM);
2060
2061 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062 if (error != 0)
2063 return (error);
2064
2065 if (rceiling == -1)
2066 new_inherited_pri = PRI_MAX;
2067 else {
2068 rceiling = RTP_PRIO_MAX - rceiling;
2069 if (rceiling > RTP_PRIO_MAX)
2070 return (EINVAL);
2071 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072 }
2073
2074 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075 &key)) != 0)
2076 return (error);
2077 umtxq_lock(&key);
2078 umtxq_busy(&key);
2079 umtxq_unlock(&key);
2080 /*
2081 * For priority protected mutex, always set unlocked state
2082 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083 * to lock the mutex, it is necessary because thread priority
2084 * has to be adjusted for such mutex.
2085 */
2086 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087 UMUTEX_CONTESTED);
2088
2089 umtxq_lock(&key);
2090 if (error == 0)
2091 umtxq_signal(&key, 1);
2092 umtxq_unbusy(&key);
2093 umtxq_unlock(&key);
2094
2095 if (error == -1)
2096 error = EFAULT;
2097 else {
2098 mtx_lock_spin(&umtx_lock);
2099 if (su != 0)
2100 uq->uq_inherited_pri = new_inherited_pri;
2101 pri = PRI_MAX;
2102 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104 if (uq2 != NULL) {
2105 if (pri > UPRI(uq2->uq_thread))
2106 pri = UPRI(uq2->uq_thread);
2107 }
2108 }
2109 if (pri > uq->uq_inherited_pri)
2110 pri = uq->uq_inherited_pri;
2111 thread_lock(td);
2112 sched_unlend_user_prio(td, pri);
2113 thread_unlock(td);
2114 mtx_unlock_spin(&umtx_lock);
2115 }
2116 umtx_key_release(&key);
2117 return (error);
2118 }
2119
2120 static int
2121 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122 uint32_t *old_ceiling)
2123 {
2124 struct umtx_q *uq;
2125 uint32_t save_ceiling;
2126 uint32_t owner, id;
2127 uint32_t flags;
2128 int error;
2129
2130 flags = fuword32(&m->m_flags);
2131 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132 return (EINVAL);
2133 if (ceiling > RTP_PRIO_MAX)
2134 return (EINVAL);
2135 id = td->td_tid;
2136 uq = td->td_umtxq;
2137 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138 &uq->uq_key)) != 0)
2139 return (error);
2140 for (;;) {
2141 umtxq_lock(&uq->uq_key);
2142 umtxq_busy(&uq->uq_key);
2143 umtxq_unlock(&uq->uq_key);
2144
2145 save_ceiling = fuword32(&m->m_ceilings[0]);
2146
2147 owner = casuword32(&m->m_owner,
2148 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149
2150 if (owner == UMUTEX_CONTESTED) {
2151 suword32(&m->m_ceilings[0], ceiling);
2152 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153 UMUTEX_CONTESTED);
2154 error = 0;
2155 break;
2156 }
2157
2158 /* The address was invalid. */
2159 if (owner == -1) {
2160 error = EFAULT;
2161 break;
2162 }
2163
2164 if ((owner & ~UMUTEX_CONTESTED) == id) {
2165 suword32(&m->m_ceilings[0], ceiling);
2166 error = 0;
2167 break;
2168 }
2169
2170 /*
2171 * If we caught a signal, we have retried and now
2172 * exit immediately.
2173 */
2174 if (error != 0)
2175 break;
2176
2177 /*
2178 * We set the contested bit, sleep. Otherwise the lock changed
2179 * and we need to retry or we lost a race to the thread
2180 * unlocking the umtx.
2181 */
2182 umtxq_lock(&uq->uq_key);
2183 umtxq_insert(uq);
2184 umtxq_unbusy(&uq->uq_key);
2185 error = umtxq_sleep(uq, "umtxpp", 0);
2186 umtxq_remove(uq);
2187 umtxq_unlock(&uq->uq_key);
2188 }
2189 umtxq_lock(&uq->uq_key);
2190 if (error == 0)
2191 umtxq_signal(&uq->uq_key, INT_MAX);
2192 umtxq_unbusy(&uq->uq_key);
2193 umtxq_unlock(&uq->uq_key);
2194 umtx_key_release(&uq->uq_key);
2195 if (error == 0 && old_ceiling != NULL)
2196 suword32(old_ceiling, save_ceiling);
2197 return (error);
2198 }
2199
2200 static int
2201 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202 int mode)
2203 {
2204 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205 case 0:
2206 return (_do_lock_normal(td, m, flags, timo, mode));
2207 case UMUTEX_PRIO_INHERIT:
2208 return (_do_lock_pi(td, m, flags, timo, mode));
2209 case UMUTEX_PRIO_PROTECT:
2210 return (_do_lock_pp(td, m, flags, timo, mode));
2211 }
2212 return (EINVAL);
2213 }
2214
2215 /*
2216 * Lock a userland POSIX mutex.
2217 */
2218 static int
2219 do_lock_umutex(struct thread *td, struct umutex *m,
2220 struct timespec *timeout, int mode)
2221 {
2222 struct timespec ts, ts2, ts3;
2223 struct timeval tv;
2224 uint32_t flags;
2225 int error;
2226
2227 flags = fuword32(&m->m_flags);
2228 if (flags == -1)
2229 return (EFAULT);
2230
2231 if (timeout == NULL) {
2232 error = _do_lock_umutex(td, m, flags, 0, mode);
2233 /* Mutex locking is restarted if it is interrupted. */
2234 if (error == EINTR && mode != _UMUTEX_WAIT)
2235 error = ERESTART;
2236 } else {
2237 getnanouptime(&ts);
2238 timespecadd(&ts, timeout);
2239 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2240 for (;;) {
2241 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2242 if (error != ETIMEDOUT)
2243 break;
2244 getnanouptime(&ts2);
2245 if (timespeccmp(&ts2, &ts, >=)) {
2246 error = ETIMEDOUT;
2247 break;
2248 }
2249 ts3 = ts;
2250 timespecsub(&ts3, &ts2);
2251 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2252 }
2253 /* Timed-locking is not restarted. */
2254 if (error == ERESTART)
2255 error = EINTR;
2256 }
2257 return (error);
2258 }
2259
2260 /*
2261 * Unlock a userland POSIX mutex.
2262 */
2263 static int
2264 do_unlock_umutex(struct thread *td, struct umutex *m)
2265 {
2266 uint32_t flags;
2267
2268 flags = fuword32(&m->m_flags);
2269 if (flags == -1)
2270 return (EFAULT);
2271
2272 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2273 case 0:
2274 return (do_unlock_normal(td, m, flags));
2275 case UMUTEX_PRIO_INHERIT:
2276 return (do_unlock_pi(td, m, flags));
2277 case UMUTEX_PRIO_PROTECT:
2278 return (do_unlock_pp(td, m, flags));
2279 }
2280
2281 return (EINVAL);
2282 }
2283
2284 static int
2285 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2286 struct timespec *timeout, u_long wflags)
2287 {
2288 struct umtx_q *uq;
2289 struct timeval tv;
2290 struct timespec cts, ets, tts;
2291 uint32_t flags;
2292 int error;
2293
2294 uq = td->td_umtxq;
2295 flags = fuword32(&cv->c_flags);
2296 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2297 if (error != 0)
2298 return (error);
2299 umtxq_lock(&uq->uq_key);
2300 umtxq_busy(&uq->uq_key);
2301 umtxq_insert(uq);
2302 umtxq_unlock(&uq->uq_key);
2303
2304 /*
2305 * The magic thing is we should set c_has_waiters to 1 before
2306 * releasing user mutex.
2307 */
2308 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2309
2310 umtxq_lock(&uq->uq_key);
2311 umtxq_unbusy(&uq->uq_key);
2312 umtxq_unlock(&uq->uq_key);
2313
2314 error = do_unlock_umutex(td, m);
2315
2316 umtxq_lock(&uq->uq_key);
2317 if (error == 0) {
2318 if ((wflags & UMTX_CHECK_UNPARKING) &&
2319 (td->td_pflags & TDP_WAKEUP)) {
2320 td->td_pflags &= ~TDP_WAKEUP;
2321 error = EINTR;
2322 } else if (timeout == NULL) {
2323 error = umtxq_sleep(uq, "ucond", 0);
2324 } else {
2325 getnanouptime(&ets);
2326 timespecadd(&ets, timeout);
2327 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2328 for (;;) {
2329 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2330 if (error != ETIMEDOUT)
2331 break;
2332 getnanouptime(&cts);
2333 if (timespeccmp(&cts, &ets, >=)) {
2334 error = ETIMEDOUT;
2335 break;
2336 }
2337 tts = ets;
2338 timespecsub(&tts, &cts);
2339 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2340 }
2341 }
2342 }
2343
2344 if (error != 0) {
2345 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2346 /*
2347 * If we concurrently got do_cv_signal()d
2348 * and we got an error or UNIX signals or a timeout,
2349 * then, perform another umtxq_signal to avoid
2350 * consuming the wakeup. This may cause supurious
2351 * wakeup for another thread which was just queued,
2352 * but SUSV3 explicitly allows supurious wakeup to
2353 * occur, and indeed a kernel based implementation
2354 * can not avoid it.
2355 */
2356 if (!umtxq_signal(&uq->uq_key, 1))
2357 error = 0;
2358 }
2359 if (error == ERESTART)
2360 error = EINTR;
2361 }
2362 umtxq_remove(uq);
2363 umtxq_unlock(&uq->uq_key);
2364 umtx_key_release(&uq->uq_key);
2365 return (error);
2366 }
2367
2368 /*
2369 * Signal a userland condition variable.
2370 */
2371 static int
2372 do_cv_signal(struct thread *td, struct ucond *cv)
2373 {
2374 struct umtx_key key;
2375 int error, cnt, nwake;
2376 uint32_t flags;
2377
2378 flags = fuword32(&cv->c_flags);
2379 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2380 return (error);
2381 umtxq_lock(&key);
2382 umtxq_busy(&key);
2383 cnt = umtxq_count(&key);
2384 nwake = umtxq_signal(&key, 1);
2385 if (cnt <= nwake) {
2386 umtxq_unlock(&key);
2387 error = suword32(
2388 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2389 umtxq_lock(&key);
2390 }
2391 umtxq_unbusy(&key);
2392 umtxq_unlock(&key);
2393 umtx_key_release(&key);
2394 return (error);
2395 }
2396
2397 static int
2398 do_cv_broadcast(struct thread *td, struct ucond *cv)
2399 {
2400 struct umtx_key key;
2401 int error;
2402 uint32_t flags;
2403
2404 flags = fuword32(&cv->c_flags);
2405 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2406 return (error);
2407
2408 umtxq_lock(&key);
2409 umtxq_busy(&key);
2410 umtxq_signal(&key, INT_MAX);
2411 umtxq_unlock(&key);
2412
2413 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2414
2415 umtxq_lock(&key);
2416 umtxq_unbusy(&key);
2417 umtxq_unlock(&key);
2418
2419 umtx_key_release(&key);
2420 return (error);
2421 }
2422
2423 static int
2424 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2425 {
2426 struct umtx_q *uq;
2427 uint32_t flags, wrflags;
2428 int32_t state, oldstate;
2429 int32_t blocked_readers;
2430 int error;
2431
2432 uq = td->td_umtxq;
2433 flags = fuword32(&rwlock->rw_flags);
2434 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2435 if (error != 0)
2436 return (error);
2437
2438 wrflags = URWLOCK_WRITE_OWNER;
2439 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2440 wrflags |= URWLOCK_WRITE_WAITERS;
2441
2442 for (;;) {
2443 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2444 /* try to lock it */
2445 while (!(state & wrflags)) {
2446 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2447 umtx_key_release(&uq->uq_key);
2448 return (EAGAIN);
2449 }
2450 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2451 if (oldstate == state) {
2452 umtx_key_release(&uq->uq_key);
2453 return (0);
2454 }
2455 state = oldstate;
2456 }
2457
2458 if (error)
2459 break;
2460
2461 /* grab monitor lock */
2462 umtxq_lock(&uq->uq_key);
2463 umtxq_busy(&uq->uq_key);
2464 umtxq_unlock(&uq->uq_key);
2465
2466 /* set read contention bit */
2467 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2468 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2469 if (oldstate == state)
2470 goto sleep;
2471 state = oldstate;
2472 }
2473
2474 /* state is changed while setting flags, restart */
2475 if (!(state & wrflags)) {
2476 umtxq_lock(&uq->uq_key);
2477 umtxq_unbusy(&uq->uq_key);
2478 umtxq_unlock(&uq->uq_key);
2479 continue;
2480 }
2481
2482 sleep:
2483 /* contention bit is set, before sleeping, increase read waiter count */
2484 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2485 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2486
2487 while (state & wrflags) {
2488 umtxq_lock(&uq->uq_key);
2489 umtxq_insert(uq);
2490 umtxq_unbusy(&uq->uq_key);
2491
2492 error = umtxq_sleep(uq, "urdlck", timo);
2493
2494 umtxq_busy(&uq->uq_key);
2495 umtxq_remove(uq);
2496 umtxq_unlock(&uq->uq_key);
2497 if (error)
2498 break;
2499 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2500 }
2501
2502 /* decrease read waiter count, and may clear read contention bit */
2503 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2504 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2505 if (blocked_readers == 1) {
2506 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2507 for (;;) {
2508 oldstate = casuword32(&rwlock->rw_state, state,
2509 state & ~URWLOCK_READ_WAITERS);
2510 if (oldstate == state)
2511 break;
2512 state = oldstate;
2513 }
2514 }
2515
2516 umtxq_lock(&uq->uq_key);
2517 umtxq_unbusy(&uq->uq_key);
2518 umtxq_unlock(&uq->uq_key);
2519 }
2520 umtx_key_release(&uq->uq_key);
2521 return (error);
2522 }
2523
2524 static int
2525 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2526 {
2527 struct timespec ts, ts2, ts3;
2528 struct timeval tv;
2529 int error;
2530
2531 getnanouptime(&ts);
2532 timespecadd(&ts, timeout);
2533 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2534 for (;;) {
2535 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2536 if (error != ETIMEDOUT)
2537 break;
2538 getnanouptime(&ts2);
2539 if (timespeccmp(&ts2, &ts, >=)) {
2540 error = ETIMEDOUT;
2541 break;
2542 }
2543 ts3 = ts;
2544 timespecsub(&ts3, &ts2);
2545 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2546 }
2547 if (error == ERESTART)
2548 error = EINTR;
2549 return (error);
2550 }
2551
2552 static int
2553 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2554 {
2555 struct umtx_q *uq;
2556 uint32_t flags;
2557 int32_t state, oldstate;
2558 int32_t blocked_writers;
2559 int error;
2560
2561 uq = td->td_umtxq;
2562 flags = fuword32(&rwlock->rw_flags);
2563 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2564 if (error != 0)
2565 return (error);
2566
2567 for (;;) {
2568 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2569 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2570 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2571 if (oldstate == state) {
2572 umtx_key_release(&uq->uq_key);
2573 return (0);
2574 }
2575 state = oldstate;
2576 }
2577
2578 if (error)
2579 break;
2580
2581 /* grab monitor lock */
2582 umtxq_lock(&uq->uq_key);
2583 umtxq_busy(&uq->uq_key);
2584 umtxq_unlock(&uq->uq_key);
2585
2586 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2587 (state & URWLOCK_WRITE_WAITERS) == 0) {
2588 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2589 if (oldstate == state)
2590 goto sleep;
2591 state = oldstate;
2592 }
2593
2594 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2595 umtxq_lock(&uq->uq_key);
2596 umtxq_unbusy(&uq->uq_key);
2597 umtxq_unlock(&uq->uq_key);
2598 continue;
2599 }
2600 sleep:
2601 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2602 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2603
2604 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2605 umtxq_lock(&uq->uq_key);
2606 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2607 umtxq_unbusy(&uq->uq_key);
2608
2609 error = umtxq_sleep(uq, "uwrlck", timo);
2610
2611 umtxq_busy(&uq->uq_key);
2612 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2613 umtxq_unlock(&uq->uq_key);
2614 if (error)
2615 break;
2616 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2617 }
2618
2619 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2620 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2621 if (blocked_writers == 1) {
2622 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2623 for (;;) {
2624 oldstate = casuword32(&rwlock->rw_state, state,
2625 state & ~URWLOCK_WRITE_WAITERS);
2626 if (oldstate == state)
2627 break;
2628 state = oldstate;
2629 }
2630 }
2631
2632 umtxq_lock(&uq->uq_key);
2633 umtxq_unbusy(&uq->uq_key);
2634 umtxq_unlock(&uq->uq_key);
2635 }
2636
2637 umtx_key_release(&uq->uq_key);
2638 return (error);
2639 }
2640
2641 static int
2642 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2643 {
2644 struct timespec ts, ts2, ts3;
2645 struct timeval tv;
2646 int error;
2647
2648 getnanouptime(&ts);
2649 timespecadd(&ts, timeout);
2650 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2651 for (;;) {
2652 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2653 if (error != ETIMEDOUT)
2654 break;
2655 getnanouptime(&ts2);
2656 if (timespeccmp(&ts2, &ts, >=)) {
2657 error = ETIMEDOUT;
2658 break;
2659 }
2660 ts3 = ts;
2661 timespecsub(&ts3, &ts2);
2662 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2663 }
2664 if (error == ERESTART)
2665 error = EINTR;
2666 return (error);
2667 }
2668
2669 static int
2670 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2671 {
2672 struct umtx_q *uq;
2673 uint32_t flags;
2674 int32_t state, oldstate;
2675 int error, q, count;
2676
2677 uq = td->td_umtxq;
2678 flags = fuword32(&rwlock->rw_flags);
2679 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2680 if (error != 0)
2681 return (error);
2682
2683 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2684 if (state & URWLOCK_WRITE_OWNER) {
2685 for (;;) {
2686 oldstate = casuword32(&rwlock->rw_state, state,
2687 state & ~URWLOCK_WRITE_OWNER);
2688 if (oldstate != state) {
2689 state = oldstate;
2690 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2691 error = EPERM;
2692 goto out;
2693 }
2694 } else
2695 break;
2696 }
2697 } else if (URWLOCK_READER_COUNT(state) != 0) {
2698 for (;;) {
2699 oldstate = casuword32(&rwlock->rw_state, state,
2700 state - 1);
2701 if (oldstate != state) {
2702 state = oldstate;
2703 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2704 error = EPERM;
2705 goto out;
2706 }
2707 }
2708 else
2709 break;
2710 }
2711 } else {
2712 error = EPERM;
2713 goto out;
2714 }
2715
2716 count = 0;
2717
2718 if (!(flags & URWLOCK_PREFER_READER)) {
2719 if (state & URWLOCK_WRITE_WAITERS) {
2720 count = 1;
2721 q = UMTX_EXCLUSIVE_QUEUE;
2722 } else if (state & URWLOCK_READ_WAITERS) {
2723 count = INT_MAX;
2724 q = UMTX_SHARED_QUEUE;
2725 }
2726 } else {
2727 if (state & URWLOCK_READ_WAITERS) {
2728 count = INT_MAX;
2729 q = UMTX_SHARED_QUEUE;
2730 } else if (state & URWLOCK_WRITE_WAITERS) {
2731 count = 1;
2732 q = UMTX_EXCLUSIVE_QUEUE;
2733 }
2734 }
2735
2736 if (count) {
2737 umtxq_lock(&uq->uq_key);
2738 umtxq_busy(&uq->uq_key);
2739 umtxq_signal_queue(&uq->uq_key, count, q);
2740 umtxq_unbusy(&uq->uq_key);
2741 umtxq_unlock(&uq->uq_key);
2742 }
2743 out:
2744 umtx_key_release(&uq->uq_key);
2745 return (error);
2746 }
2747
2748 int
2749 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2750 /* struct umtx *umtx */
2751 {
2752 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2753 }
2754
2755 int
2756 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2757 /* struct umtx *umtx */
2758 {
2759 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2760 }
2761
2762 static int
2763 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2764 {
2765 struct timespec *ts, timeout;
2766 int error;
2767
2768 /* Allow a null timespec (wait forever). */
2769 if (uap->uaddr2 == NULL)
2770 ts = NULL;
2771 else {
2772 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2773 if (error != 0)
2774 return (error);
2775 if (timeout.tv_nsec >= 1000000000 ||
2776 timeout.tv_nsec < 0) {
2777 return (EINVAL);
2778 }
2779 ts = &timeout;
2780 }
2781 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2782 }
2783
2784 static int
2785 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2786 {
2787 return (do_unlock_umtx(td, uap->obj, uap->val));
2788 }
2789
2790 static int
2791 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2792 {
2793 struct timespec *ts, timeout;
2794 int error;
2795
2796 if (uap->uaddr2 == NULL)
2797 ts = NULL;
2798 else {
2799 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2800 if (error != 0)
2801 return (error);
2802 if (timeout.tv_nsec >= 1000000000 ||
2803 timeout.tv_nsec < 0)
2804 return (EINVAL);
2805 ts = &timeout;
2806 }
2807 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2808 }
2809
2810 static int
2811 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2812 {
2813 struct timespec *ts, timeout;
2814 int error;
2815
2816 if (uap->uaddr2 == NULL)
2817 ts = NULL;
2818 else {
2819 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2820 if (error != 0)
2821 return (error);
2822 if (timeout.tv_nsec >= 1000000000 ||
2823 timeout.tv_nsec < 0)
2824 return (EINVAL);
2825 ts = &timeout;
2826 }
2827 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2828 }
2829
2830 static int
2831 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2832 {
2833 struct timespec *ts, timeout;
2834 int error;
2835
2836 if (uap->uaddr2 == NULL)
2837 ts = NULL;
2838 else {
2839 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2840 if (error != 0)
2841 return (error);
2842 if (timeout.tv_nsec >= 1000000000 ||
2843 timeout.tv_nsec < 0)
2844 return (EINVAL);
2845 ts = &timeout;
2846 }
2847 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2848 }
2849
2850 static int
2851 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2852 {
2853 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2854 }
2855
2856 static int
2857 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2858 {
2859 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2860 }
2861
2862 static int
2863 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2864 {
2865 struct timespec *ts, timeout;
2866 int error;
2867
2868 /* Allow a null timespec (wait forever). */
2869 if (uap->uaddr2 == NULL)
2870 ts = NULL;
2871 else {
2872 error = copyin(uap->uaddr2, &timeout,
2873 sizeof(timeout));
2874 if (error != 0)
2875 return (error);
2876 if (timeout.tv_nsec >= 1000000000 ||
2877 timeout.tv_nsec < 0) {
2878 return (EINVAL);
2879 }
2880 ts = &timeout;
2881 }
2882 return do_lock_umutex(td, uap->obj, ts, 0);
2883 }
2884
2885 static int
2886 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2887 {
2888 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2889 }
2890
2891 static int
2892 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2893 {
2894 struct timespec *ts, timeout;
2895 int error;
2896
2897 /* Allow a null timespec (wait forever). */
2898 if (uap->uaddr2 == NULL)
2899 ts = NULL;
2900 else {
2901 error = copyin(uap->uaddr2, &timeout,
2902 sizeof(timeout));
2903 if (error != 0)
2904 return (error);
2905 if (timeout.tv_nsec >= 1000000000 ||
2906 timeout.tv_nsec < 0) {
2907 return (EINVAL);
2908 }
2909 ts = &timeout;
2910 }
2911 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2912 }
2913
2914 static int
2915 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2916 {
2917 return do_wake_umutex(td, uap->obj);
2918 }
2919
2920 static int
2921 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2922 {
2923 return do_unlock_umutex(td, uap->obj);
2924 }
2925
2926 static int
2927 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2928 {
2929 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2930 }
2931
2932 static int
2933 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2934 {
2935 struct timespec *ts, timeout;
2936 int error;
2937
2938 /* Allow a null timespec (wait forever). */
2939 if (uap->uaddr2 == NULL)
2940 ts = NULL;
2941 else {
2942 error = copyin(uap->uaddr2, &timeout,
2943 sizeof(timeout));
2944 if (error != 0)
2945 return (error);
2946 if (timeout.tv_nsec >= 1000000000 ||
2947 timeout.tv_nsec < 0) {
2948 return (EINVAL);
2949 }
2950 ts = &timeout;
2951 }
2952 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2953 }
2954
2955 static int
2956 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2957 {
2958 return do_cv_signal(td, uap->obj);
2959 }
2960
2961 static int
2962 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2963 {
2964 return do_cv_broadcast(td, uap->obj);
2965 }
2966
2967 static int
2968 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2969 {
2970 struct timespec timeout;
2971 int error;
2972
2973 /* Allow a null timespec (wait forever). */
2974 if (uap->uaddr2 == NULL) {
2975 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2976 } else {
2977 error = copyin(uap->uaddr2, &timeout,
2978 sizeof(timeout));
2979 if (error != 0)
2980 return (error);
2981 if (timeout.tv_nsec >= 1000000000 ||
2982 timeout.tv_nsec < 0) {
2983 return (EINVAL);
2984 }
2985 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
2986 }
2987 return (error);
2988 }
2989
2990 static int
2991 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
2992 {
2993 struct timespec timeout;
2994 int error;
2995
2996 /* Allow a null timespec (wait forever). */
2997 if (uap->uaddr2 == NULL) {
2998 error = do_rw_wrlock(td, uap->obj, 0);
2999 } else {
3000 error = copyin(uap->uaddr2, &timeout,
3001 sizeof(timeout));
3002 if (error != 0)
3003 return (error);
3004 if (timeout.tv_nsec >= 1000000000 ||
3005 timeout.tv_nsec < 0) {
3006 return (EINVAL);
3007 }
3008
3009 error = do_rw_wrlock2(td, uap->obj, &timeout);
3010 }
3011 return (error);
3012 }
3013
3014 static int
3015 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3016 {
3017 return do_rw_unlock(td, uap->obj);
3018 }
3019
3020 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3021
3022 static _umtx_op_func op_table[] = {
3023 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3024 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3025 __umtx_op_wait, /* UMTX_OP_WAIT */
3026 __umtx_op_wake, /* UMTX_OP_WAKE */
3027 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3028 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3029 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3030 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3031 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3032 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3033 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3034 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3035 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3036 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3037 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3038 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3039 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3040 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3041 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3042 };
3043
3044 int
3045 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
3046 {
3047 if ((unsigned)uap->op < UMTX_OP_MAX)
3048 return (*op_table[uap->op])(td, uap);
3049 return (EINVAL);
3050 }
3051
3052 #ifdef COMPAT_IA32
3053 int
3054 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3055 /* struct umtx *umtx */
3056 {
3057 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3058 }
3059
3060 int
3061 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3062 /* struct umtx *umtx */
3063 {
3064 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3065 }
3066
3067 struct timespec32 {
3068 u_int32_t tv_sec;
3069 u_int32_t tv_nsec;
3070 };
3071
3072 static inline int
3073 copyin_timeout32(void *addr, struct timespec *tsp)
3074 {
3075 struct timespec32 ts32;
3076 int error;
3077
3078 error = copyin(addr, &ts32, sizeof(struct timespec32));
3079 if (error == 0) {
3080 tsp->tv_sec = ts32.tv_sec;
3081 tsp->tv_nsec = ts32.tv_nsec;
3082 }
3083 return (error);
3084 }
3085
3086 static int
3087 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3088 {
3089 struct timespec *ts, timeout;
3090 int error;
3091
3092 /* Allow a null timespec (wait forever). */
3093 if (uap->uaddr2 == NULL)
3094 ts = NULL;
3095 else {
3096 error = copyin_timeout32(uap->uaddr2, &timeout);
3097 if (error != 0)
3098 return (error);
3099 if (timeout.tv_nsec >= 1000000000 ||
3100 timeout.tv_nsec < 0) {
3101 return (EINVAL);
3102 }
3103 ts = &timeout;
3104 }
3105 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3106 }
3107
3108 static int
3109 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3110 {
3111 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3112 }
3113
3114 static int
3115 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3116 {
3117 struct timespec *ts, timeout;
3118 int error;
3119
3120 if (uap->uaddr2 == NULL)
3121 ts = NULL;
3122 else {
3123 error = copyin_timeout32(uap->uaddr2, &timeout);
3124 if (error != 0)
3125 return (error);
3126 if (timeout.tv_nsec >= 1000000000 ||
3127 timeout.tv_nsec < 0)
3128 return (EINVAL);
3129 ts = &timeout;
3130 }
3131 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3132 }
3133
3134 static int
3135 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3136 {
3137 struct timespec *ts, timeout;
3138 int error;
3139
3140 /* Allow a null timespec (wait forever). */
3141 if (uap->uaddr2 == NULL)
3142 ts = NULL;
3143 else {
3144 error = copyin_timeout32(uap->uaddr2, &timeout);
3145 if (error != 0)
3146 return (error);
3147 if (timeout.tv_nsec >= 1000000000 ||
3148 timeout.tv_nsec < 0)
3149 return (EINVAL);
3150 ts = &timeout;
3151 }
3152 return do_lock_umutex(td, uap->obj, ts, 0);
3153 }
3154
3155 static int
3156 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3157 {
3158 struct timespec *ts, timeout;
3159 int error;
3160
3161 /* Allow a null timespec (wait forever). */
3162 if (uap->uaddr2 == NULL)
3163 ts = NULL;
3164 else {
3165 error = copyin_timeout32(uap->uaddr2, &timeout);
3166 if (error != 0)
3167 return (error);
3168 if (timeout.tv_nsec >= 1000000000 ||
3169 timeout.tv_nsec < 0)
3170 return (EINVAL);
3171 ts = &timeout;
3172 }
3173 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3174 }
3175
3176 static int
3177 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3178 {
3179 struct timespec *ts, timeout;
3180 int error;
3181
3182 /* Allow a null timespec (wait forever). */
3183 if (uap->uaddr2 == NULL)
3184 ts = NULL;
3185 else {
3186 error = copyin_timeout32(uap->uaddr2, &timeout);
3187 if (error != 0)
3188 return (error);
3189 if (timeout.tv_nsec >= 1000000000 ||
3190 timeout.tv_nsec < 0)
3191 return (EINVAL);
3192 ts = &timeout;
3193 }
3194 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3195 }
3196
3197 static int
3198 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3199 {
3200 struct timespec timeout;
3201 int error;
3202
3203 /* Allow a null timespec (wait forever). */
3204 if (uap->uaddr2 == NULL) {
3205 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3206 } else {
3207 error = copyin(uap->uaddr2, &timeout,
3208 sizeof(timeout));
3209 if (error != 0)
3210 return (error);
3211 if (timeout.tv_nsec >= 1000000000 ||
3212 timeout.tv_nsec < 0) {
3213 return (EINVAL);
3214 }
3215 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3216 }
3217 return (error);
3218 }
3219
3220 static int
3221 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3222 {
3223 struct timespec timeout;
3224 int error;
3225
3226 /* Allow a null timespec (wait forever). */
3227 if (uap->uaddr2 == NULL) {
3228 error = do_rw_wrlock(td, uap->obj, 0);
3229 } else {
3230 error = copyin_timeout32(uap->uaddr2, &timeout);
3231 if (error != 0)
3232 return (error);
3233 if (timeout.tv_nsec >= 1000000000 ||
3234 timeout.tv_nsec < 0) {
3235 return (EINVAL);
3236 }
3237
3238 error = do_rw_wrlock2(td, uap->obj, &timeout);
3239 }
3240 return (error);
3241 }
3242
3243 static int
3244 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3245 {
3246 struct timespec *ts, timeout;
3247 int error;
3248
3249 if (uap->uaddr2 == NULL)
3250 ts = NULL;
3251 else {
3252 error = copyin_timeout32(uap->uaddr2, &timeout);
3253 if (error != 0)
3254 return (error);
3255 if (timeout.tv_nsec >= 1000000000 ||
3256 timeout.tv_nsec < 0)
3257 return (EINVAL);
3258 ts = &timeout;
3259 }
3260 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3261 }
3262
3263 static _umtx_op_func op_table_compat32[] = {
3264 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3265 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3266 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3267 __umtx_op_wake, /* UMTX_OP_WAKE */
3268 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3269 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3270 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3271 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3272 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3273 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3274 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3275 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3276 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3277 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3278 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3279 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3280 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3281 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3282 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3283 };
3284
3285 int
3286 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3287 {
3288 if ((unsigned)uap->op < UMTX_OP_MAX)
3289 return (*op_table_compat32[uap->op])(td,
3290 (struct _umtx_op_args *)uap);
3291 return (EINVAL);
3292 }
3293 #endif
3294
3295 void
3296 umtx_thread_init(struct thread *td)
3297 {
3298 td->td_umtxq = umtxq_alloc();
3299 td->td_umtxq->uq_thread = td;
3300 }
3301
3302 void
3303 umtx_thread_fini(struct thread *td)
3304 {
3305 umtxq_free(td->td_umtxq);
3306 }
3307
3308 /*
3309 * It will be called when new thread is created, e.g fork().
3310 */
3311 void
3312 umtx_thread_alloc(struct thread *td)
3313 {
3314 struct umtx_q *uq;
3315
3316 uq = td->td_umtxq;
3317 uq->uq_inherited_pri = PRI_MAX;
3318
3319 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3320 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3321 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3322 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3323 }
3324
3325 /*
3326 * exec() hook.
3327 */
3328 static void
3329 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3330 struct image_params *imgp __unused)
3331 {
3332 umtx_thread_cleanup(curthread);
3333 }
3334
3335 /*
3336 * thread_exit() hook.
3337 */
3338 void
3339 umtx_thread_exit(struct thread *td)
3340 {
3341 umtx_thread_cleanup(td);
3342 }
3343
3344 /*
3345 * clean up umtx data.
3346 */
3347 static void
3348 umtx_thread_cleanup(struct thread *td)
3349 {
3350 struct umtx_q *uq;
3351 struct umtx_pi *pi;
3352
3353 if ((uq = td->td_umtxq) == NULL)
3354 return;
3355
3356 mtx_lock_spin(&umtx_lock);
3357 uq->uq_inherited_pri = PRI_MAX;
3358 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3359 pi->pi_owner = NULL;
3360 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3361 }
3362 thread_lock(td);
3363 td->td_flags &= ~TDF_UBORROWING;
3364 thread_unlock(td);
3365 mtx_unlock_spin(&umtx_lock);
3366 }
Cache object: 5ea405c46ac74c11a61cd8754f232521
|