FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2015, 2016 The FreeBSD Foundation
3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Konstantin Belousov
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice unmodified, this list of conditions, and the following
15 * disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/11.2/sys/kern/kern_umtx.c 330677 2018-03-09 01:17:03Z brooks $");
34
35 #include "opt_compat.h"
36 #include "opt_umtx_profiling.h"
37
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/file.h>
42 #include <sys/filedesc.h>
43 #include <sys/limits.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mman.h>
47 #include <sys/mutex.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/resource.h>
51 #include <sys/resourcevar.h>
52 #include <sys/rwlock.h>
53 #include <sys/sbuf.h>
54 #include <sys/sched.h>
55 #include <sys/smp.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysent.h>
58 #include <sys/systm.h>
59 #include <sys/sysproto.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/taskqueue.h>
62 #include <sys/time.h>
63 #include <sys/eventhandler.h>
64 #include <sys/umtx.h>
65
66 #include <security/mac/mac_framework.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/pmap.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73
74 #include <machine/atomic.h>
75 #include <machine/cpu.h>
76
77 #ifdef COMPAT_FREEBSD32
78 #include <compat/freebsd32/freebsd32_proto.h>
79 #endif
80
81 #define _UMUTEX_TRY 1
82 #define _UMUTEX_WAIT 2
83
84 #ifdef UMTX_PROFILING
85 #define UPROF_PERC_BIGGER(w, f, sw, sf) \
86 (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
87 #endif
88
89 /* Priority inheritance mutex info. */
90 struct umtx_pi {
91 /* Owner thread */
92 struct thread *pi_owner;
93
94 /* Reference count */
95 int pi_refcount;
96
97 /* List entry to link umtx holding by thread */
98 TAILQ_ENTRY(umtx_pi) pi_link;
99
100 /* List entry in hash */
101 TAILQ_ENTRY(umtx_pi) pi_hashlink;
102
103 /* List for waiters */
104 TAILQ_HEAD(,umtx_q) pi_blocked;
105
106 /* Identify a userland lock object */
107 struct umtx_key pi_key;
108 };
109
110 /* A userland synchronous object user. */
111 struct umtx_q {
112 /* Linked list for the hash. */
113 TAILQ_ENTRY(umtx_q) uq_link;
114
115 /* Umtx key. */
116 struct umtx_key uq_key;
117
118 /* Umtx flags. */
119 int uq_flags;
120 #define UQF_UMTXQ 0x0001
121
122 /* The thread waits on. */
123 struct thread *uq_thread;
124
125 /*
126 * Blocked on PI mutex. read can use chain lock
127 * or umtx_lock, write must have both chain lock and
128 * umtx_lock being hold.
129 */
130 struct umtx_pi *uq_pi_blocked;
131
132 /* On blocked list */
133 TAILQ_ENTRY(umtx_q) uq_lockq;
134
135 /* Thread contending with us */
136 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
137
138 /* Inherited priority from PP mutex */
139 u_char uq_inherited_pri;
140
141 /* Spare queue ready to be reused */
142 struct umtxq_queue *uq_spare_queue;
143
144 /* The queue we on */
145 struct umtxq_queue *uq_cur_queue;
146 };
147
148 TAILQ_HEAD(umtxq_head, umtx_q);
149
150 /* Per-key wait-queue */
151 struct umtxq_queue {
152 struct umtxq_head head;
153 struct umtx_key key;
154 LIST_ENTRY(umtxq_queue) link;
155 int length;
156 };
157
158 LIST_HEAD(umtxq_list, umtxq_queue);
159
160 /* Userland lock object's wait-queue chain */
161 struct umtxq_chain {
162 /* Lock for this chain. */
163 struct mtx uc_lock;
164
165 /* List of sleep queues. */
166 struct umtxq_list uc_queue[2];
167 #define UMTX_SHARED_QUEUE 0
168 #define UMTX_EXCLUSIVE_QUEUE 1
169
170 LIST_HEAD(, umtxq_queue) uc_spare_queue;
171
172 /* Busy flag */
173 char uc_busy;
174
175 /* Chain lock waiters */
176 int uc_waiters;
177
178 /* All PI in the list */
179 TAILQ_HEAD(,umtx_pi) uc_pi_list;
180
181 #ifdef UMTX_PROFILING
182 u_int length;
183 u_int max_length;
184 #endif
185 };
186
187 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
188
189 /*
190 * Don't propagate time-sharing priority, there is a security reason,
191 * a user can simply introduce PI-mutex, let thread A lock the mutex,
192 * and let another thread B block on the mutex, because B is
193 * sleeping, its priority will be boosted, this causes A's priority to
194 * be boosted via priority propagating too and will never be lowered even
195 * if it is using 100%CPU, this is unfair to other processes.
196 */
197
198 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
199 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
200 PRI_MAX_TIMESHARE : (td)->td_user_pri)
201
202 #define GOLDEN_RATIO_PRIME 2654404609U
203 #define UMTX_CHAINS 512
204 #define UMTX_SHIFTS (__WORD_BIT - 9)
205
206 #define GET_SHARE(flags) \
207 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
208
209 #define BUSY_SPINS 200
210
211 struct abs_timeout {
212 int clockid;
213 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */
214 struct timespec cur;
215 struct timespec end;
216 };
217
218 #ifdef COMPAT_FREEBSD32
219 struct umutex32 {
220 volatile __lwpid_t m_owner; /* Owner of the mutex */
221 __uint32_t m_flags; /* Flags of the mutex */
222 __uint32_t m_ceilings[2]; /* Priority protect ceiling */
223 __uint32_t m_rb_lnk; /* Robust linkage */
224 __uint32_t m_pad;
225 __uint32_t m_spare[2];
226 };
227
228 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
229 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
230 __offsetof(struct umutex32, m_spare[0]), "m_spare32");
231 #endif
232
233 int umtx_shm_vnobj_persistent = 0;
234 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
235 &umtx_shm_vnobj_persistent, 0,
236 "False forces destruction of umtx attached to file, on last close");
237 static int umtx_max_rb = 1000;
238 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
239 &umtx_max_rb, 0,
240 "");
241
242 static uma_zone_t umtx_pi_zone;
243 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
244 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
245 static int umtx_pi_allocated;
246
247 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
248 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
249 &umtx_pi_allocated, 0, "Allocated umtx_pi");
250 static int umtx_verbose_rb = 1;
251 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
252 &umtx_verbose_rb, 0,
253 "");
254
255 #ifdef UMTX_PROFILING
256 static long max_length;
257 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
258 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
259 #endif
260
261 static void abs_timeout_update(struct abs_timeout *timo);
262
263 static void umtx_shm_init(void);
264 static void umtxq_sysinit(void *);
265 static void umtxq_hash(struct umtx_key *key);
266 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
267 static void umtxq_lock(struct umtx_key *key);
268 static void umtxq_unlock(struct umtx_key *key);
269 static void umtxq_busy(struct umtx_key *key);
270 static void umtxq_unbusy(struct umtx_key *key);
271 static void umtxq_insert_queue(struct umtx_q *uq, int q);
272 static void umtxq_remove_queue(struct umtx_q *uq, int q);
273 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
274 static int umtxq_count(struct umtx_key *key);
275 static struct umtx_pi *umtx_pi_alloc(int);
276 static void umtx_pi_free(struct umtx_pi *pi);
277 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
278 bool rb);
279 static void umtx_thread_cleanup(struct thread *td);
280 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
281 struct image_params *imgp __unused);
282 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
283
284 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
285 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
286 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
287
288 static struct mtx umtx_lock;
289
290 #ifdef UMTX_PROFILING
291 static void
292 umtx_init_profiling(void)
293 {
294 struct sysctl_oid *chain_oid;
295 char chain_name[10];
296 int i;
297
298 for (i = 0; i < UMTX_CHAINS; ++i) {
299 snprintf(chain_name, sizeof(chain_name), "%d", i);
300 chain_oid = SYSCTL_ADD_NODE(NULL,
301 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
302 chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
303 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
304 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
305 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
306 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
307 }
308 }
309
310 static int
311 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
312 {
313 char buf[512];
314 struct sbuf sb;
315 struct umtxq_chain *uc;
316 u_int fract, i, j, tot, whole;
317 u_int sf0, sf1, sf2, sf3, sf4;
318 u_int si0, si1, si2, si3, si4;
319 u_int sw0, sw1, sw2, sw3, sw4;
320
321 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
322 for (i = 0; i < 2; i++) {
323 tot = 0;
324 for (j = 0; j < UMTX_CHAINS; ++j) {
325 uc = &umtxq_chains[i][j];
326 mtx_lock(&uc->uc_lock);
327 tot += uc->max_length;
328 mtx_unlock(&uc->uc_lock);
329 }
330 if (tot == 0)
331 sbuf_printf(&sb, "%u) Empty ", i);
332 else {
333 sf0 = sf1 = sf2 = sf3 = sf4 = 0;
334 si0 = si1 = si2 = si3 = si4 = 0;
335 sw0 = sw1 = sw2 = sw3 = sw4 = 0;
336 for (j = 0; j < UMTX_CHAINS; j++) {
337 uc = &umtxq_chains[i][j];
338 mtx_lock(&uc->uc_lock);
339 whole = uc->max_length * 100;
340 mtx_unlock(&uc->uc_lock);
341 fract = (whole % tot) * 100;
342 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
343 sf0 = fract;
344 si0 = j;
345 sw0 = whole;
346 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
347 sf1)) {
348 sf1 = fract;
349 si1 = j;
350 sw1 = whole;
351 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
352 sf2)) {
353 sf2 = fract;
354 si2 = j;
355 sw2 = whole;
356 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
357 sf3)) {
358 sf3 = fract;
359 si3 = j;
360 sw3 = whole;
361 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
362 sf4)) {
363 sf4 = fract;
364 si4 = j;
365 sw4 = whole;
366 }
367 }
368 sbuf_printf(&sb, "queue %u:\n", i);
369 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
370 sf0 / tot, si0);
371 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
372 sf1 / tot, si1);
373 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
374 sf2 / tot, si2);
375 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
376 sf3 / tot, si3);
377 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
378 sf4 / tot, si4);
379 }
380 }
381 sbuf_trim(&sb);
382 sbuf_finish(&sb);
383 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
384 sbuf_delete(&sb);
385 return (0);
386 }
387
388 static int
389 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
390 {
391 struct umtxq_chain *uc;
392 u_int i, j;
393 int clear, error;
394
395 clear = 0;
396 error = sysctl_handle_int(oidp, &clear, 0, req);
397 if (error != 0 || req->newptr == NULL)
398 return (error);
399
400 if (clear != 0) {
401 for (i = 0; i < 2; ++i) {
402 for (j = 0; j < UMTX_CHAINS; ++j) {
403 uc = &umtxq_chains[i][j];
404 mtx_lock(&uc->uc_lock);
405 uc->length = 0;
406 uc->max_length = 0;
407 mtx_unlock(&uc->uc_lock);
408 }
409 }
410 }
411 return (0);
412 }
413
414 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
415 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
416 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
418 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
419 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
420 #endif
421
422 static void
423 umtxq_sysinit(void *arg __unused)
424 {
425 int i, j;
426
427 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
428 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
429 for (i = 0; i < 2; ++i) {
430 for (j = 0; j < UMTX_CHAINS; ++j) {
431 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
432 MTX_DEF | MTX_DUPOK);
433 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
434 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
435 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
436 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
437 umtxq_chains[i][j].uc_busy = 0;
438 umtxq_chains[i][j].uc_waiters = 0;
439 #ifdef UMTX_PROFILING
440 umtxq_chains[i][j].length = 0;
441 umtxq_chains[i][j].max_length = 0;
442 #endif
443 }
444 }
445 #ifdef UMTX_PROFILING
446 umtx_init_profiling();
447 #endif
448 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
449 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
450 EVENTHANDLER_PRI_ANY);
451 umtx_shm_init();
452 }
453
454 struct umtx_q *
455 umtxq_alloc(void)
456 {
457 struct umtx_q *uq;
458
459 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
460 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
461 M_WAITOK | M_ZERO);
462 TAILQ_INIT(&uq->uq_spare_queue->head);
463 TAILQ_INIT(&uq->uq_pi_contested);
464 uq->uq_inherited_pri = PRI_MAX;
465 return (uq);
466 }
467
468 void
469 umtxq_free(struct umtx_q *uq)
470 {
471
472 MPASS(uq->uq_spare_queue != NULL);
473 free(uq->uq_spare_queue, M_UMTX);
474 free(uq, M_UMTX);
475 }
476
477 static inline void
478 umtxq_hash(struct umtx_key *key)
479 {
480 unsigned n;
481
482 n = (uintptr_t)key->info.both.a + key->info.both.b;
483 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
484 }
485
486 static inline struct umtxq_chain *
487 umtxq_getchain(struct umtx_key *key)
488 {
489
490 if (key->type <= TYPE_SEM)
491 return (&umtxq_chains[1][key->hash]);
492 return (&umtxq_chains[0][key->hash]);
493 }
494
495 /*
496 * Lock a chain.
497 */
498 static inline void
499 umtxq_lock(struct umtx_key *key)
500 {
501 struct umtxq_chain *uc;
502
503 uc = umtxq_getchain(key);
504 mtx_lock(&uc->uc_lock);
505 }
506
507 /*
508 * Unlock a chain.
509 */
510 static inline void
511 umtxq_unlock(struct umtx_key *key)
512 {
513 struct umtxq_chain *uc;
514
515 uc = umtxq_getchain(key);
516 mtx_unlock(&uc->uc_lock);
517 }
518
519 /*
520 * Set chain to busy state when following operation
521 * may be blocked (kernel mutex can not be used).
522 */
523 static inline void
524 umtxq_busy(struct umtx_key *key)
525 {
526 struct umtxq_chain *uc;
527
528 uc = umtxq_getchain(key);
529 mtx_assert(&uc->uc_lock, MA_OWNED);
530 if (uc->uc_busy) {
531 #ifdef SMP
532 if (smp_cpus > 1) {
533 int count = BUSY_SPINS;
534 if (count > 0) {
535 umtxq_unlock(key);
536 while (uc->uc_busy && --count > 0)
537 cpu_spinwait();
538 umtxq_lock(key);
539 }
540 }
541 #endif
542 while (uc->uc_busy) {
543 uc->uc_waiters++;
544 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
545 uc->uc_waiters--;
546 }
547 }
548 uc->uc_busy = 1;
549 }
550
551 /*
552 * Unbusy a chain.
553 */
554 static inline void
555 umtxq_unbusy(struct umtx_key *key)
556 {
557 struct umtxq_chain *uc;
558
559 uc = umtxq_getchain(key);
560 mtx_assert(&uc->uc_lock, MA_OWNED);
561 KASSERT(uc->uc_busy != 0, ("not busy"));
562 uc->uc_busy = 0;
563 if (uc->uc_waiters)
564 wakeup_one(uc);
565 }
566
567 static inline void
568 umtxq_unbusy_unlocked(struct umtx_key *key)
569 {
570
571 umtxq_lock(key);
572 umtxq_unbusy(key);
573 umtxq_unlock(key);
574 }
575
576 static struct umtxq_queue *
577 umtxq_queue_lookup(struct umtx_key *key, int q)
578 {
579 struct umtxq_queue *uh;
580 struct umtxq_chain *uc;
581
582 uc = umtxq_getchain(key);
583 UMTXQ_LOCKED_ASSERT(uc);
584 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
585 if (umtx_key_match(&uh->key, key))
586 return (uh);
587 }
588
589 return (NULL);
590 }
591
592 static inline void
593 umtxq_insert_queue(struct umtx_q *uq, int q)
594 {
595 struct umtxq_queue *uh;
596 struct umtxq_chain *uc;
597
598 uc = umtxq_getchain(&uq->uq_key);
599 UMTXQ_LOCKED_ASSERT(uc);
600 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
601 uh = umtxq_queue_lookup(&uq->uq_key, q);
602 if (uh != NULL) {
603 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
604 } else {
605 uh = uq->uq_spare_queue;
606 uh->key = uq->uq_key;
607 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
608 #ifdef UMTX_PROFILING
609 uc->length++;
610 if (uc->length > uc->max_length) {
611 uc->max_length = uc->length;
612 if (uc->max_length > max_length)
613 max_length = uc->max_length;
614 }
615 #endif
616 }
617 uq->uq_spare_queue = NULL;
618
619 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
620 uh->length++;
621 uq->uq_flags |= UQF_UMTXQ;
622 uq->uq_cur_queue = uh;
623 return;
624 }
625
626 static inline void
627 umtxq_remove_queue(struct umtx_q *uq, int q)
628 {
629 struct umtxq_chain *uc;
630 struct umtxq_queue *uh;
631
632 uc = umtxq_getchain(&uq->uq_key);
633 UMTXQ_LOCKED_ASSERT(uc);
634 if (uq->uq_flags & UQF_UMTXQ) {
635 uh = uq->uq_cur_queue;
636 TAILQ_REMOVE(&uh->head, uq, uq_link);
637 uh->length--;
638 uq->uq_flags &= ~UQF_UMTXQ;
639 if (TAILQ_EMPTY(&uh->head)) {
640 KASSERT(uh->length == 0,
641 ("inconsistent umtxq_queue length"));
642 #ifdef UMTX_PROFILING
643 uc->length--;
644 #endif
645 LIST_REMOVE(uh, link);
646 } else {
647 uh = LIST_FIRST(&uc->uc_spare_queue);
648 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
649 LIST_REMOVE(uh, link);
650 }
651 uq->uq_spare_queue = uh;
652 uq->uq_cur_queue = NULL;
653 }
654 }
655
656 /*
657 * Check if there are multiple waiters
658 */
659 static int
660 umtxq_count(struct umtx_key *key)
661 {
662 struct umtxq_chain *uc;
663 struct umtxq_queue *uh;
664
665 uc = umtxq_getchain(key);
666 UMTXQ_LOCKED_ASSERT(uc);
667 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
668 if (uh != NULL)
669 return (uh->length);
670 return (0);
671 }
672
673 /*
674 * Check if there are multiple PI waiters and returns first
675 * waiter.
676 */
677 static int
678 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
679 {
680 struct umtxq_chain *uc;
681 struct umtxq_queue *uh;
682
683 *first = NULL;
684 uc = umtxq_getchain(key);
685 UMTXQ_LOCKED_ASSERT(uc);
686 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
687 if (uh != NULL) {
688 *first = TAILQ_FIRST(&uh->head);
689 return (uh->length);
690 }
691 return (0);
692 }
693
694 static int
695 umtxq_check_susp(struct thread *td)
696 {
697 struct proc *p;
698 int error;
699
700 /*
701 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
702 * eventually break the lockstep loop.
703 */
704 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
705 return (0);
706 error = 0;
707 p = td->td_proc;
708 PROC_LOCK(p);
709 if (P_SHOULDSTOP(p) ||
710 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
711 if (p->p_flag & P_SINGLE_EXIT)
712 error = EINTR;
713 else
714 error = ERESTART;
715 }
716 PROC_UNLOCK(p);
717 return (error);
718 }
719
720 /*
721 * Wake up threads waiting on an userland object.
722 */
723
724 static int
725 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
726 {
727 struct umtxq_chain *uc;
728 struct umtxq_queue *uh;
729 struct umtx_q *uq;
730 int ret;
731
732 ret = 0;
733 uc = umtxq_getchain(key);
734 UMTXQ_LOCKED_ASSERT(uc);
735 uh = umtxq_queue_lookup(key, q);
736 if (uh != NULL) {
737 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
738 umtxq_remove_queue(uq, q);
739 wakeup(uq);
740 if (++ret >= n_wake)
741 return (ret);
742 }
743 }
744 return (ret);
745 }
746
747
748 /*
749 * Wake up specified thread.
750 */
751 static inline void
752 umtxq_signal_thread(struct umtx_q *uq)
753 {
754 struct umtxq_chain *uc;
755
756 uc = umtxq_getchain(&uq->uq_key);
757 UMTXQ_LOCKED_ASSERT(uc);
758 umtxq_remove(uq);
759 wakeup(uq);
760 }
761
762 static inline int
763 tstohz(const struct timespec *tsp)
764 {
765 struct timeval tv;
766
767 TIMESPEC_TO_TIMEVAL(&tv, tsp);
768 return tvtohz(&tv);
769 }
770
771 static void
772 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
773 const struct timespec *timeout)
774 {
775
776 timo->clockid = clockid;
777 if (!absolute) {
778 timo->is_abs_real = false;
779 abs_timeout_update(timo);
780 timo->end = timo->cur;
781 timespecadd(&timo->end, timeout);
782 } else {
783 timo->end = *timeout;
784 timo->is_abs_real = clockid == CLOCK_REALTIME ||
785 clockid == CLOCK_REALTIME_FAST ||
786 clockid == CLOCK_REALTIME_PRECISE;
787 /*
788 * If is_abs_real, umtxq_sleep will read the clock
789 * after setting td_rtcgen; otherwise, read it here.
790 */
791 if (!timo->is_abs_real) {
792 abs_timeout_update(timo);
793 }
794 }
795 }
796
797 static void
798 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
799 {
800
801 abs_timeout_init(timo, umtxtime->_clockid,
802 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
803 }
804
805 static inline void
806 abs_timeout_update(struct abs_timeout *timo)
807 {
808
809 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
810 }
811
812 static int
813 abs_timeout_gethz(struct abs_timeout *timo)
814 {
815 struct timespec tts;
816
817 if (timespeccmp(&timo->end, &timo->cur, <=))
818 return (-1);
819 tts = timo->end;
820 timespecsub(&tts, &timo->cur);
821 return (tstohz(&tts));
822 }
823
824 static uint32_t
825 umtx_unlock_val(uint32_t flags, bool rb)
826 {
827
828 if (rb)
829 return (UMUTEX_RB_OWNERDEAD);
830 else if ((flags & UMUTEX_NONCONSISTENT) != 0)
831 return (UMUTEX_RB_NOTRECOV);
832 else
833 return (UMUTEX_UNOWNED);
834
835 }
836
837 /*
838 * Put thread into sleep state, before sleeping, check if
839 * thread was removed from umtx queue.
840 */
841 static inline int
842 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
843 {
844 struct umtxq_chain *uc;
845 int error, timo;
846
847 if (abstime != NULL && abstime->is_abs_real) {
848 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
849 abs_timeout_update(abstime);
850 }
851
852 uc = umtxq_getchain(&uq->uq_key);
853 UMTXQ_LOCKED_ASSERT(uc);
854 for (;;) {
855 if (!(uq->uq_flags & UQF_UMTXQ)) {
856 error = 0;
857 break;
858 }
859 if (abstime != NULL) {
860 timo = abs_timeout_gethz(abstime);
861 if (timo < 0) {
862 error = ETIMEDOUT;
863 break;
864 }
865 } else
866 timo = 0;
867 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
868 if (error == EINTR || error == ERESTART) {
869 umtxq_lock(&uq->uq_key);
870 break;
871 }
872 if (abstime != NULL) {
873 if (abstime->is_abs_real)
874 curthread->td_rtcgen =
875 atomic_load_acq_int(&rtc_generation);
876 abs_timeout_update(abstime);
877 }
878 umtxq_lock(&uq->uq_key);
879 }
880
881 curthread->td_rtcgen = 0;
882 return (error);
883 }
884
885 /*
886 * Convert userspace address into unique logical address.
887 */
888 int
889 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
890 {
891 struct thread *td = curthread;
892 vm_map_t map;
893 vm_map_entry_t entry;
894 vm_pindex_t pindex;
895 vm_prot_t prot;
896 boolean_t wired;
897
898 key->type = type;
899 if (share == THREAD_SHARE) {
900 key->shared = 0;
901 key->info.private.vs = td->td_proc->p_vmspace;
902 key->info.private.addr = (uintptr_t)addr;
903 } else {
904 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
905 map = &td->td_proc->p_vmspace->vm_map;
906 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
907 &entry, &key->info.shared.object, &pindex, &prot,
908 &wired) != KERN_SUCCESS) {
909 return (EFAULT);
910 }
911
912 if ((share == PROCESS_SHARE) ||
913 (share == AUTO_SHARE &&
914 VM_INHERIT_SHARE == entry->inheritance)) {
915 key->shared = 1;
916 key->info.shared.offset = (vm_offset_t)addr -
917 entry->start + entry->offset;
918 vm_object_reference(key->info.shared.object);
919 } else {
920 key->shared = 0;
921 key->info.private.vs = td->td_proc->p_vmspace;
922 key->info.private.addr = (uintptr_t)addr;
923 }
924 vm_map_lookup_done(map, entry);
925 }
926
927 umtxq_hash(key);
928 return (0);
929 }
930
931 /*
932 * Release key.
933 */
934 void
935 umtx_key_release(struct umtx_key *key)
936 {
937 if (key->shared)
938 vm_object_deallocate(key->info.shared.object);
939 }
940
941 /*
942 * Fetch and compare value, sleep on the address if value is not changed.
943 */
944 static int
945 do_wait(struct thread *td, void *addr, u_long id,
946 struct _umtx_time *timeout, int compat32, int is_private)
947 {
948 struct abs_timeout timo;
949 struct umtx_q *uq;
950 u_long tmp;
951 uint32_t tmp32;
952 int error = 0;
953
954 uq = td->td_umtxq;
955 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
956 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
957 return (error);
958
959 if (timeout != NULL)
960 abs_timeout_init2(&timo, timeout);
961
962 umtxq_lock(&uq->uq_key);
963 umtxq_insert(uq);
964 umtxq_unlock(&uq->uq_key);
965 if (compat32 == 0) {
966 error = fueword(addr, &tmp);
967 if (error != 0)
968 error = EFAULT;
969 } else {
970 error = fueword32(addr, &tmp32);
971 if (error == 0)
972 tmp = tmp32;
973 else
974 error = EFAULT;
975 }
976 umtxq_lock(&uq->uq_key);
977 if (error == 0) {
978 if (tmp == id)
979 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
980 NULL : &timo);
981 if ((uq->uq_flags & UQF_UMTXQ) == 0)
982 error = 0;
983 else
984 umtxq_remove(uq);
985 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
986 umtxq_remove(uq);
987 }
988 umtxq_unlock(&uq->uq_key);
989 umtx_key_release(&uq->uq_key);
990 if (error == ERESTART)
991 error = EINTR;
992 return (error);
993 }
994
995 /*
996 * Wake up threads sleeping on the specified address.
997 */
998 int
999 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1000 {
1001 struct umtx_key key;
1002 int ret;
1003
1004 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1005 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1006 return (ret);
1007 umtxq_lock(&key);
1008 umtxq_signal(&key, n_wake);
1009 umtxq_unlock(&key);
1010 umtx_key_release(&key);
1011 return (0);
1012 }
1013
1014 /*
1015 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1016 */
1017 static int
1018 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1019 struct _umtx_time *timeout, int mode)
1020 {
1021 struct abs_timeout timo;
1022 struct umtx_q *uq;
1023 uint32_t owner, old, id;
1024 int error, rv;
1025
1026 id = td->td_tid;
1027 uq = td->td_umtxq;
1028 error = 0;
1029 if (timeout != NULL)
1030 abs_timeout_init2(&timo, timeout);
1031
1032 /*
1033 * Care must be exercised when dealing with umtx structure. It
1034 * can fault on any access.
1035 */
1036 for (;;) {
1037 rv = fueword32(&m->m_owner, &owner);
1038 if (rv == -1)
1039 return (EFAULT);
1040 if (mode == _UMUTEX_WAIT) {
1041 if (owner == UMUTEX_UNOWNED ||
1042 owner == UMUTEX_CONTESTED ||
1043 owner == UMUTEX_RB_OWNERDEAD ||
1044 owner == UMUTEX_RB_NOTRECOV)
1045 return (0);
1046 } else {
1047 /*
1048 * Robust mutex terminated. Kernel duty is to
1049 * return EOWNERDEAD to the userspace. The
1050 * umutex.m_flags UMUTEX_NONCONSISTENT is set
1051 * by the common userspace code.
1052 */
1053 if (owner == UMUTEX_RB_OWNERDEAD) {
1054 rv = casueword32(&m->m_owner,
1055 UMUTEX_RB_OWNERDEAD, &owner,
1056 id | UMUTEX_CONTESTED);
1057 if (rv == -1)
1058 return (EFAULT);
1059 if (owner == UMUTEX_RB_OWNERDEAD)
1060 return (EOWNERDEAD); /* success */
1061 rv = umtxq_check_susp(td);
1062 if (rv != 0)
1063 return (rv);
1064 continue;
1065 }
1066 if (owner == UMUTEX_RB_NOTRECOV)
1067 return (ENOTRECOVERABLE);
1068
1069
1070 /*
1071 * Try the uncontested case. This should be
1072 * done in userland.
1073 */
1074 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1075 &owner, id);
1076 /* The address was invalid. */
1077 if (rv == -1)
1078 return (EFAULT);
1079
1080 /* The acquire succeeded. */
1081 if (owner == UMUTEX_UNOWNED)
1082 return (0);
1083
1084 /*
1085 * If no one owns it but it is contested try
1086 * to acquire it.
1087 */
1088 if (owner == UMUTEX_CONTESTED) {
1089 rv = casueword32(&m->m_owner,
1090 UMUTEX_CONTESTED, &owner,
1091 id | UMUTEX_CONTESTED);
1092 /* The address was invalid. */
1093 if (rv == -1)
1094 return (EFAULT);
1095
1096 if (owner == UMUTEX_CONTESTED)
1097 return (0);
1098
1099 rv = umtxq_check_susp(td);
1100 if (rv != 0)
1101 return (rv);
1102
1103 /*
1104 * If this failed the lock has
1105 * changed, restart.
1106 */
1107 continue;
1108 }
1109 }
1110
1111 if (mode == _UMUTEX_TRY)
1112 return (EBUSY);
1113
1114 /*
1115 * If we caught a signal, we have retried and now
1116 * exit immediately.
1117 */
1118 if (error != 0)
1119 return (error);
1120
1121 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1122 GET_SHARE(flags), &uq->uq_key)) != 0)
1123 return (error);
1124
1125 umtxq_lock(&uq->uq_key);
1126 umtxq_busy(&uq->uq_key);
1127 umtxq_insert(uq);
1128 umtxq_unlock(&uq->uq_key);
1129
1130 /*
1131 * Set the contested bit so that a release in user space
1132 * knows to use the system call for unlock. If this fails
1133 * either some one else has acquired the lock or it has been
1134 * released.
1135 */
1136 rv = casueword32(&m->m_owner, owner, &old,
1137 owner | UMUTEX_CONTESTED);
1138
1139 /* The address was invalid. */
1140 if (rv == -1) {
1141 umtxq_lock(&uq->uq_key);
1142 umtxq_remove(uq);
1143 umtxq_unbusy(&uq->uq_key);
1144 umtxq_unlock(&uq->uq_key);
1145 umtx_key_release(&uq->uq_key);
1146 return (EFAULT);
1147 }
1148
1149 /*
1150 * We set the contested bit, sleep. Otherwise the lock changed
1151 * and we need to retry or we lost a race to the thread
1152 * unlocking the umtx.
1153 */
1154 umtxq_lock(&uq->uq_key);
1155 umtxq_unbusy(&uq->uq_key);
1156 if (old == owner)
1157 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1158 NULL : &timo);
1159 umtxq_remove(uq);
1160 umtxq_unlock(&uq->uq_key);
1161 umtx_key_release(&uq->uq_key);
1162
1163 if (error == 0)
1164 error = umtxq_check_susp(td);
1165 }
1166
1167 return (0);
1168 }
1169
1170 /*
1171 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1172 */
1173 static int
1174 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1175 {
1176 struct umtx_key key;
1177 uint32_t owner, old, id, newlock;
1178 int error, count;
1179
1180 id = td->td_tid;
1181 /*
1182 * Make sure we own this mtx.
1183 */
1184 error = fueword32(&m->m_owner, &owner);
1185 if (error == -1)
1186 return (EFAULT);
1187
1188 if ((owner & ~UMUTEX_CONTESTED) != id)
1189 return (EPERM);
1190
1191 newlock = umtx_unlock_val(flags, rb);
1192 if ((owner & UMUTEX_CONTESTED) == 0) {
1193 error = casueword32(&m->m_owner, owner, &old, newlock);
1194 if (error == -1)
1195 return (EFAULT);
1196 if (old == owner)
1197 return (0);
1198 owner = old;
1199 }
1200
1201 /* We should only ever be in here for contested locks */
1202 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1203 &key)) != 0)
1204 return (error);
1205
1206 umtxq_lock(&key);
1207 umtxq_busy(&key);
1208 count = umtxq_count(&key);
1209 umtxq_unlock(&key);
1210
1211 /*
1212 * When unlocking the umtx, it must be marked as unowned if
1213 * there is zero or one thread only waiting for it.
1214 * Otherwise, it must be marked as contested.
1215 */
1216 if (count > 1)
1217 newlock |= UMUTEX_CONTESTED;
1218 error = casueword32(&m->m_owner, owner, &old, newlock);
1219 umtxq_lock(&key);
1220 umtxq_signal(&key, 1);
1221 umtxq_unbusy(&key);
1222 umtxq_unlock(&key);
1223 umtx_key_release(&key);
1224 if (error == -1)
1225 return (EFAULT);
1226 if (old != owner)
1227 return (EINVAL);
1228 return (0);
1229 }
1230
1231 /*
1232 * Check if the mutex is available and wake up a waiter,
1233 * only for simple mutex.
1234 */
1235 static int
1236 do_wake_umutex(struct thread *td, struct umutex *m)
1237 {
1238 struct umtx_key key;
1239 uint32_t owner;
1240 uint32_t flags;
1241 int error;
1242 int count;
1243
1244 error = fueword32(&m->m_owner, &owner);
1245 if (error == -1)
1246 return (EFAULT);
1247
1248 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
1249 owner != UMUTEX_RB_NOTRECOV)
1250 return (0);
1251
1252 error = fueword32(&m->m_flags, &flags);
1253 if (error == -1)
1254 return (EFAULT);
1255
1256 /* We should only ever be in here for contested locks */
1257 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1258 &key)) != 0)
1259 return (error);
1260
1261 umtxq_lock(&key);
1262 umtxq_busy(&key);
1263 count = umtxq_count(&key);
1264 umtxq_unlock(&key);
1265
1266 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
1267 owner != UMUTEX_RB_NOTRECOV) {
1268 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1269 UMUTEX_UNOWNED);
1270 if (error == -1)
1271 error = EFAULT;
1272 }
1273
1274 umtxq_lock(&key);
1275 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1276 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1277 umtxq_signal(&key, 1);
1278 umtxq_unbusy(&key);
1279 umtxq_unlock(&key);
1280 umtx_key_release(&key);
1281 return (error);
1282 }
1283
1284 /*
1285 * Check if the mutex has waiters and tries to fix contention bit.
1286 */
1287 static int
1288 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1289 {
1290 struct umtx_key key;
1291 uint32_t owner, old;
1292 int type;
1293 int error;
1294 int count;
1295
1296 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
1297 UMUTEX_ROBUST)) {
1298 case 0:
1299 case UMUTEX_ROBUST:
1300 type = TYPE_NORMAL_UMUTEX;
1301 break;
1302 case UMUTEX_PRIO_INHERIT:
1303 type = TYPE_PI_UMUTEX;
1304 break;
1305 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
1306 type = TYPE_PI_ROBUST_UMUTEX;
1307 break;
1308 case UMUTEX_PRIO_PROTECT:
1309 type = TYPE_PP_UMUTEX;
1310 break;
1311 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
1312 type = TYPE_PP_ROBUST_UMUTEX;
1313 break;
1314 default:
1315 return (EINVAL);
1316 }
1317 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
1318 return (error);
1319
1320 owner = 0;
1321 umtxq_lock(&key);
1322 umtxq_busy(&key);
1323 count = umtxq_count(&key);
1324 umtxq_unlock(&key);
1325 /*
1326 * Only repair contention bit if there is a waiter, this means the mutex
1327 * is still being referenced by userland code, otherwise don't update
1328 * any memory.
1329 */
1330 if (count > 1) {
1331 error = fueword32(&m->m_owner, &owner);
1332 if (error == -1)
1333 error = EFAULT;
1334 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
1335 error = casueword32(&m->m_owner, owner, &old,
1336 owner | UMUTEX_CONTESTED);
1337 if (error == -1) {
1338 error = EFAULT;
1339 break;
1340 }
1341 if (old == owner)
1342 break;
1343 owner = old;
1344 error = umtxq_check_susp(td);
1345 if (error != 0)
1346 break;
1347 }
1348 } else if (count == 1) {
1349 error = fueword32(&m->m_owner, &owner);
1350 if (error == -1)
1351 error = EFAULT;
1352 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
1353 (owner & UMUTEX_CONTESTED) == 0) {
1354 error = casueword32(&m->m_owner, owner, &old,
1355 owner | UMUTEX_CONTESTED);
1356 if (error == -1) {
1357 error = EFAULT;
1358 break;
1359 }
1360 if (old == owner)
1361 break;
1362 owner = old;
1363 error = umtxq_check_susp(td);
1364 if (error != 0)
1365 break;
1366 }
1367 }
1368 umtxq_lock(&key);
1369 if (error == EFAULT) {
1370 umtxq_signal(&key, INT_MAX);
1371 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1372 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1373 umtxq_signal(&key, 1);
1374 umtxq_unbusy(&key);
1375 umtxq_unlock(&key);
1376 umtx_key_release(&key);
1377 return (error);
1378 }
1379
1380 static inline struct umtx_pi *
1381 umtx_pi_alloc(int flags)
1382 {
1383 struct umtx_pi *pi;
1384
1385 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1386 TAILQ_INIT(&pi->pi_blocked);
1387 atomic_add_int(&umtx_pi_allocated, 1);
1388 return (pi);
1389 }
1390
1391 static inline void
1392 umtx_pi_free(struct umtx_pi *pi)
1393 {
1394 uma_zfree(umtx_pi_zone, pi);
1395 atomic_add_int(&umtx_pi_allocated, -1);
1396 }
1397
1398 /*
1399 * Adjust the thread's position on a pi_state after its priority has been
1400 * changed.
1401 */
1402 static int
1403 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1404 {
1405 struct umtx_q *uq, *uq1, *uq2;
1406 struct thread *td1;
1407
1408 mtx_assert(&umtx_lock, MA_OWNED);
1409 if (pi == NULL)
1410 return (0);
1411
1412 uq = td->td_umtxq;
1413
1414 /*
1415 * Check if the thread needs to be moved on the blocked chain.
1416 * It needs to be moved if either its priority is lower than
1417 * the previous thread or higher than the next thread.
1418 */
1419 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1420 uq2 = TAILQ_NEXT(uq, uq_lockq);
1421 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1422 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1423 /*
1424 * Remove thread from blocked chain and determine where
1425 * it should be moved to.
1426 */
1427 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1428 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1429 td1 = uq1->uq_thread;
1430 MPASS(td1->td_proc->p_magic == P_MAGIC);
1431 if (UPRI(td1) > UPRI(td))
1432 break;
1433 }
1434
1435 if (uq1 == NULL)
1436 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1437 else
1438 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1439 }
1440 return (1);
1441 }
1442
1443 static struct umtx_pi *
1444 umtx_pi_next(struct umtx_pi *pi)
1445 {
1446 struct umtx_q *uq_owner;
1447
1448 if (pi->pi_owner == NULL)
1449 return (NULL);
1450 uq_owner = pi->pi_owner->td_umtxq;
1451 if (uq_owner == NULL)
1452 return (NULL);
1453 return (uq_owner->uq_pi_blocked);
1454 }
1455
1456 /*
1457 * Floyd's Cycle-Finding Algorithm.
1458 */
1459 static bool
1460 umtx_pi_check_loop(struct umtx_pi *pi)
1461 {
1462 struct umtx_pi *pi1; /* fast iterator */
1463
1464 mtx_assert(&umtx_lock, MA_OWNED);
1465 if (pi == NULL)
1466 return (false);
1467 pi1 = pi;
1468 for (;;) {
1469 pi = umtx_pi_next(pi);
1470 if (pi == NULL)
1471 break;
1472 pi1 = umtx_pi_next(pi1);
1473 if (pi1 == NULL)
1474 break;
1475 pi1 = umtx_pi_next(pi1);
1476 if (pi1 == NULL)
1477 break;
1478 if (pi == pi1)
1479 return (true);
1480 }
1481 return (false);
1482 }
1483
1484 /*
1485 * Propagate priority when a thread is blocked on POSIX
1486 * PI mutex.
1487 */
1488 static void
1489 umtx_propagate_priority(struct thread *td)
1490 {
1491 struct umtx_q *uq;
1492 struct umtx_pi *pi;
1493 int pri;
1494
1495 mtx_assert(&umtx_lock, MA_OWNED);
1496 pri = UPRI(td);
1497 uq = td->td_umtxq;
1498 pi = uq->uq_pi_blocked;
1499 if (pi == NULL)
1500 return;
1501 if (umtx_pi_check_loop(pi))
1502 return;
1503
1504 for (;;) {
1505 td = pi->pi_owner;
1506 if (td == NULL || td == curthread)
1507 return;
1508
1509 MPASS(td->td_proc != NULL);
1510 MPASS(td->td_proc->p_magic == P_MAGIC);
1511
1512 thread_lock(td);
1513 if (td->td_lend_user_pri > pri)
1514 sched_lend_user_prio(td, pri);
1515 else {
1516 thread_unlock(td);
1517 break;
1518 }
1519 thread_unlock(td);
1520
1521 /*
1522 * Pick up the lock that td is blocked on.
1523 */
1524 uq = td->td_umtxq;
1525 pi = uq->uq_pi_blocked;
1526 if (pi == NULL)
1527 break;
1528 /* Resort td on the list if needed. */
1529 umtx_pi_adjust_thread(pi, td);
1530 }
1531 }
1532
1533 /*
1534 * Unpropagate priority for a PI mutex when a thread blocked on
1535 * it is interrupted by signal or resumed by others.
1536 */
1537 static void
1538 umtx_repropagate_priority(struct umtx_pi *pi)
1539 {
1540 struct umtx_q *uq, *uq_owner;
1541 struct umtx_pi *pi2;
1542 int pri;
1543
1544 mtx_assert(&umtx_lock, MA_OWNED);
1545
1546 if (umtx_pi_check_loop(pi))
1547 return;
1548 while (pi != NULL && pi->pi_owner != NULL) {
1549 pri = PRI_MAX;
1550 uq_owner = pi->pi_owner->td_umtxq;
1551
1552 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1553 uq = TAILQ_FIRST(&pi2->pi_blocked);
1554 if (uq != NULL) {
1555 if (pri > UPRI(uq->uq_thread))
1556 pri = UPRI(uq->uq_thread);
1557 }
1558 }
1559
1560 if (pri > uq_owner->uq_inherited_pri)
1561 pri = uq_owner->uq_inherited_pri;
1562 thread_lock(pi->pi_owner);
1563 sched_lend_user_prio(pi->pi_owner, pri);
1564 thread_unlock(pi->pi_owner);
1565 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1566 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1567 }
1568 }
1569
1570 /*
1571 * Insert a PI mutex into owned list.
1572 */
1573 static void
1574 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1575 {
1576 struct umtx_q *uq_owner;
1577
1578 uq_owner = owner->td_umtxq;
1579 mtx_assert(&umtx_lock, MA_OWNED);
1580 MPASS(pi->pi_owner == NULL);
1581 pi->pi_owner = owner;
1582 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1583 }
1584
1585
1586 /*
1587 * Disown a PI mutex, and remove it from the owned list.
1588 */
1589 static void
1590 umtx_pi_disown(struct umtx_pi *pi)
1591 {
1592
1593 mtx_assert(&umtx_lock, MA_OWNED);
1594 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1595 pi->pi_owner = NULL;
1596 }
1597
1598 /*
1599 * Claim ownership of a PI mutex.
1600 */
1601 static int
1602 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1603 {
1604 struct umtx_q *uq;
1605 int pri;
1606
1607 mtx_lock(&umtx_lock);
1608 if (pi->pi_owner == owner) {
1609 mtx_unlock(&umtx_lock);
1610 return (0);
1611 }
1612
1613 if (pi->pi_owner != NULL) {
1614 /*
1615 * userland may have already messed the mutex, sigh.
1616 */
1617 mtx_unlock(&umtx_lock);
1618 return (EPERM);
1619 }
1620 umtx_pi_setowner(pi, owner);
1621 uq = TAILQ_FIRST(&pi->pi_blocked);
1622 if (uq != NULL) {
1623 pri = UPRI(uq->uq_thread);
1624 thread_lock(owner);
1625 if (pri < UPRI(owner))
1626 sched_lend_user_prio(owner, pri);
1627 thread_unlock(owner);
1628 }
1629 mtx_unlock(&umtx_lock);
1630 return (0);
1631 }
1632
1633 /*
1634 * Adjust a thread's order position in its blocked PI mutex,
1635 * this may result new priority propagating process.
1636 */
1637 void
1638 umtx_pi_adjust(struct thread *td, u_char oldpri)
1639 {
1640 struct umtx_q *uq;
1641 struct umtx_pi *pi;
1642
1643 uq = td->td_umtxq;
1644 mtx_lock(&umtx_lock);
1645 /*
1646 * Pick up the lock that td is blocked on.
1647 */
1648 pi = uq->uq_pi_blocked;
1649 if (pi != NULL) {
1650 umtx_pi_adjust_thread(pi, td);
1651 umtx_repropagate_priority(pi);
1652 }
1653 mtx_unlock(&umtx_lock);
1654 }
1655
1656 /*
1657 * Sleep on a PI mutex.
1658 */
1659 static int
1660 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
1661 const char *wmesg, struct abs_timeout *timo, bool shared)
1662 {
1663 struct umtxq_chain *uc;
1664 struct thread *td, *td1;
1665 struct umtx_q *uq1;
1666 int error, pri;
1667
1668 error = 0;
1669 td = uq->uq_thread;
1670 KASSERT(td == curthread, ("inconsistent uq_thread"));
1671 uc = umtxq_getchain(&uq->uq_key);
1672 UMTXQ_LOCKED_ASSERT(uc);
1673 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
1674 umtxq_insert(uq);
1675 mtx_lock(&umtx_lock);
1676 if (pi->pi_owner == NULL) {
1677 mtx_unlock(&umtx_lock);
1678 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
1679 mtx_lock(&umtx_lock);
1680 if (td1 != NULL) {
1681 if (pi->pi_owner == NULL)
1682 umtx_pi_setowner(pi, td1);
1683 PROC_UNLOCK(td1->td_proc);
1684 }
1685 }
1686
1687 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1688 pri = UPRI(uq1->uq_thread);
1689 if (pri > UPRI(td))
1690 break;
1691 }
1692
1693 if (uq1 != NULL)
1694 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1695 else
1696 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1697
1698 uq->uq_pi_blocked = pi;
1699 thread_lock(td);
1700 td->td_flags |= TDF_UPIBLOCKED;
1701 thread_unlock(td);
1702 umtx_propagate_priority(td);
1703 mtx_unlock(&umtx_lock);
1704 umtxq_unbusy(&uq->uq_key);
1705
1706 error = umtxq_sleep(uq, wmesg, timo);
1707 umtxq_remove(uq);
1708
1709 mtx_lock(&umtx_lock);
1710 uq->uq_pi_blocked = NULL;
1711 thread_lock(td);
1712 td->td_flags &= ~TDF_UPIBLOCKED;
1713 thread_unlock(td);
1714 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1715 umtx_repropagate_priority(pi);
1716 mtx_unlock(&umtx_lock);
1717 umtxq_unlock(&uq->uq_key);
1718
1719 return (error);
1720 }
1721
1722 /*
1723 * Add reference count for a PI mutex.
1724 */
1725 static void
1726 umtx_pi_ref(struct umtx_pi *pi)
1727 {
1728 struct umtxq_chain *uc;
1729
1730 uc = umtxq_getchain(&pi->pi_key);
1731 UMTXQ_LOCKED_ASSERT(uc);
1732 pi->pi_refcount++;
1733 }
1734
1735 /*
1736 * Decrease reference count for a PI mutex, if the counter
1737 * is decreased to zero, its memory space is freed.
1738 */
1739 static void
1740 umtx_pi_unref(struct umtx_pi *pi)
1741 {
1742 struct umtxq_chain *uc;
1743
1744 uc = umtxq_getchain(&pi->pi_key);
1745 UMTXQ_LOCKED_ASSERT(uc);
1746 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1747 if (--pi->pi_refcount == 0) {
1748 mtx_lock(&umtx_lock);
1749 if (pi->pi_owner != NULL)
1750 umtx_pi_disown(pi);
1751 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1752 ("blocked queue not empty"));
1753 mtx_unlock(&umtx_lock);
1754 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1755 umtx_pi_free(pi);
1756 }
1757 }
1758
1759 /*
1760 * Find a PI mutex in hash table.
1761 */
1762 static struct umtx_pi *
1763 umtx_pi_lookup(struct umtx_key *key)
1764 {
1765 struct umtxq_chain *uc;
1766 struct umtx_pi *pi;
1767
1768 uc = umtxq_getchain(key);
1769 UMTXQ_LOCKED_ASSERT(uc);
1770
1771 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1772 if (umtx_key_match(&pi->pi_key, key)) {
1773 return (pi);
1774 }
1775 }
1776 return (NULL);
1777 }
1778
1779 /*
1780 * Insert a PI mutex into hash table.
1781 */
1782 static inline void
1783 umtx_pi_insert(struct umtx_pi *pi)
1784 {
1785 struct umtxq_chain *uc;
1786
1787 uc = umtxq_getchain(&pi->pi_key);
1788 UMTXQ_LOCKED_ASSERT(uc);
1789 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1790 }
1791
1792 /*
1793 * Lock a PI mutex.
1794 */
1795 static int
1796 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1797 struct _umtx_time *timeout, int try)
1798 {
1799 struct abs_timeout timo;
1800 struct umtx_q *uq;
1801 struct umtx_pi *pi, *new_pi;
1802 uint32_t id, old_owner, owner, old;
1803 int error, rv;
1804
1805 id = td->td_tid;
1806 uq = td->td_umtxq;
1807
1808 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
1809 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
1810 &uq->uq_key)) != 0)
1811 return (error);
1812
1813 if (timeout != NULL)
1814 abs_timeout_init2(&timo, timeout);
1815
1816 umtxq_lock(&uq->uq_key);
1817 pi = umtx_pi_lookup(&uq->uq_key);
1818 if (pi == NULL) {
1819 new_pi = umtx_pi_alloc(M_NOWAIT);
1820 if (new_pi == NULL) {
1821 umtxq_unlock(&uq->uq_key);
1822 new_pi = umtx_pi_alloc(M_WAITOK);
1823 umtxq_lock(&uq->uq_key);
1824 pi = umtx_pi_lookup(&uq->uq_key);
1825 if (pi != NULL) {
1826 umtx_pi_free(new_pi);
1827 new_pi = NULL;
1828 }
1829 }
1830 if (new_pi != NULL) {
1831 new_pi->pi_key = uq->uq_key;
1832 umtx_pi_insert(new_pi);
1833 pi = new_pi;
1834 }
1835 }
1836 umtx_pi_ref(pi);
1837 umtxq_unlock(&uq->uq_key);
1838
1839 /*
1840 * Care must be exercised when dealing with umtx structure. It
1841 * can fault on any access.
1842 */
1843 for (;;) {
1844 /*
1845 * Try the uncontested case. This should be done in userland.
1846 */
1847 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
1848 /* The address was invalid. */
1849 if (rv == -1) {
1850 error = EFAULT;
1851 break;
1852 }
1853
1854 /* The acquire succeeded. */
1855 if (owner == UMUTEX_UNOWNED) {
1856 error = 0;
1857 break;
1858 }
1859
1860 if (owner == UMUTEX_RB_NOTRECOV) {
1861 error = ENOTRECOVERABLE;
1862 break;
1863 }
1864
1865 /* If no one owns it but it is contested try to acquire it. */
1866 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
1867 old_owner = owner;
1868 rv = casueword32(&m->m_owner, owner, &owner,
1869 id | UMUTEX_CONTESTED);
1870 /* The address was invalid. */
1871 if (rv == -1) {
1872 error = EFAULT;
1873 break;
1874 }
1875
1876 if (owner == old_owner) {
1877 umtxq_lock(&uq->uq_key);
1878 umtxq_busy(&uq->uq_key);
1879 error = umtx_pi_claim(pi, td);
1880 umtxq_unbusy(&uq->uq_key);
1881 umtxq_unlock(&uq->uq_key);
1882 if (error != 0) {
1883 /*
1884 * Since we're going to return an
1885 * error, restore the m_owner to its
1886 * previous, unowned state to avoid
1887 * compounding the problem.
1888 */
1889 (void)casuword32(&m->m_owner,
1890 id | UMUTEX_CONTESTED,
1891 old_owner);
1892 }
1893 if (error == 0 &&
1894 old_owner == UMUTEX_RB_OWNERDEAD)
1895 error = EOWNERDEAD;
1896 break;
1897 }
1898
1899 error = umtxq_check_susp(td);
1900 if (error != 0)
1901 break;
1902
1903 /* If this failed the lock has changed, restart. */
1904 continue;
1905 }
1906
1907 if ((owner & ~UMUTEX_CONTESTED) == id) {
1908 error = EDEADLK;
1909 break;
1910 }
1911
1912 if (try != 0) {
1913 error = EBUSY;
1914 break;
1915 }
1916
1917 /*
1918 * If we caught a signal, we have retried and now
1919 * exit immediately.
1920 */
1921 if (error != 0)
1922 break;
1923
1924 umtxq_lock(&uq->uq_key);
1925 umtxq_busy(&uq->uq_key);
1926 umtxq_unlock(&uq->uq_key);
1927
1928 /*
1929 * Set the contested bit so that a release in user space
1930 * knows to use the system call for unlock. If this fails
1931 * either some one else has acquired the lock or it has been
1932 * released.
1933 */
1934 rv = casueword32(&m->m_owner, owner, &old, owner |
1935 UMUTEX_CONTESTED);
1936
1937 /* The address was invalid. */
1938 if (rv == -1) {
1939 umtxq_unbusy_unlocked(&uq->uq_key);
1940 error = EFAULT;
1941 break;
1942 }
1943
1944 umtxq_lock(&uq->uq_key);
1945 /*
1946 * We set the contested bit, sleep. Otherwise the lock changed
1947 * and we need to retry or we lost a race to the thread
1948 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD
1949 * value for owner is impossible there.
1950 */
1951 if (old == owner) {
1952 error = umtxq_sleep_pi(uq, pi,
1953 owner & ~UMUTEX_CONTESTED,
1954 "umtxpi", timeout == NULL ? NULL : &timo,
1955 (flags & USYNC_PROCESS_SHARED) != 0);
1956 if (error != 0)
1957 continue;
1958 } else {
1959 umtxq_unbusy(&uq->uq_key);
1960 umtxq_unlock(&uq->uq_key);
1961 }
1962
1963 error = umtxq_check_susp(td);
1964 if (error != 0)
1965 break;
1966 }
1967
1968 umtxq_lock(&uq->uq_key);
1969 umtx_pi_unref(pi);
1970 umtxq_unlock(&uq->uq_key);
1971
1972 umtx_key_release(&uq->uq_key);
1973 return (error);
1974 }
1975
1976 /*
1977 * Unlock a PI mutex.
1978 */
1979 static int
1980 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1981 {
1982 struct umtx_key key;
1983 struct umtx_q *uq_first, *uq_first2, *uq_me;
1984 struct umtx_pi *pi, *pi2;
1985 uint32_t id, new_owner, old, owner;
1986 int count, error, pri;
1987
1988 id = td->td_tid;
1989 /*
1990 * Make sure we own this mtx.
1991 */
1992 error = fueword32(&m->m_owner, &owner);
1993 if (error == -1)
1994 return (EFAULT);
1995
1996 if ((owner & ~UMUTEX_CONTESTED) != id)
1997 return (EPERM);
1998
1999 new_owner = umtx_unlock_val(flags, rb);
2000
2001 /* This should be done in userland */
2002 if ((owner & UMUTEX_CONTESTED) == 0) {
2003 error = casueword32(&m->m_owner, owner, &old, new_owner);
2004 if (error == -1)
2005 return (EFAULT);
2006 if (old == owner)
2007 return (0);
2008 owner = old;
2009 }
2010
2011 /* We should only ever be in here for contested locks */
2012 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2013 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2014 &key)) != 0)
2015 return (error);
2016
2017 umtxq_lock(&key);
2018 umtxq_busy(&key);
2019 count = umtxq_count_pi(&key, &uq_first);
2020 if (uq_first != NULL) {
2021 mtx_lock(&umtx_lock);
2022 pi = uq_first->uq_pi_blocked;
2023 KASSERT(pi != NULL, ("pi == NULL?"));
2024 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
2025 mtx_unlock(&umtx_lock);
2026 umtxq_unbusy(&key);
2027 umtxq_unlock(&key);
2028 umtx_key_release(&key);
2029 /* userland messed the mutex */
2030 return (EPERM);
2031 }
2032 uq_me = td->td_umtxq;
2033 if (pi->pi_owner == td)
2034 umtx_pi_disown(pi);
2035 /* get highest priority thread which is still sleeping. */
2036 uq_first = TAILQ_FIRST(&pi->pi_blocked);
2037 while (uq_first != NULL &&
2038 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2039 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2040 }
2041 pri = PRI_MAX;
2042 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2043 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2044 if (uq_first2 != NULL) {
2045 if (pri > UPRI(uq_first2->uq_thread))
2046 pri = UPRI(uq_first2->uq_thread);
2047 }
2048 }
2049 thread_lock(td);
2050 sched_lend_user_prio(td, pri);
2051 thread_unlock(td);
2052 mtx_unlock(&umtx_lock);
2053 if (uq_first)
2054 umtxq_signal_thread(uq_first);
2055 } else {
2056 pi = umtx_pi_lookup(&key);
2057 /*
2058 * A umtx_pi can exist if a signal or timeout removed the
2059 * last waiter from the umtxq, but there is still
2060 * a thread in do_lock_pi() holding the umtx_pi.
2061 */
2062 if (pi != NULL) {
2063 /*
2064 * The umtx_pi can be unowned, such as when a thread
2065 * has just entered do_lock_pi(), allocated the
2066 * umtx_pi, and unlocked the umtxq.
2067 * If the current thread owns it, it must disown it.
2068 */
2069 mtx_lock(&umtx_lock);
2070 if (pi->pi_owner == td)
2071 umtx_pi_disown(pi);
2072 mtx_unlock(&umtx_lock);
2073 }
2074 }
2075 umtxq_unlock(&key);
2076
2077 /*
2078 * When unlocking the umtx, it must be marked as unowned if
2079 * there is zero or one thread only waiting for it.
2080 * Otherwise, it must be marked as contested.
2081 */
2082
2083 if (count > 1)
2084 new_owner |= UMUTEX_CONTESTED;
2085 error = casueword32(&m->m_owner, owner, &old, new_owner);
2086
2087 umtxq_unbusy_unlocked(&key);
2088 umtx_key_release(&key);
2089 if (error == -1)
2090 return (EFAULT);
2091 if (old != owner)
2092 return (EINVAL);
2093 return (0);
2094 }
2095
2096 /*
2097 * Lock a PP mutex.
2098 */
2099 static int
2100 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2101 struct _umtx_time *timeout, int try)
2102 {
2103 struct abs_timeout timo;
2104 struct umtx_q *uq, *uq2;
2105 struct umtx_pi *pi;
2106 uint32_t ceiling;
2107 uint32_t owner, id;
2108 int error, pri, old_inherited_pri, su, rv;
2109
2110 id = td->td_tid;
2111 uq = td->td_umtxq;
2112 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2113 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2114 &uq->uq_key)) != 0)
2115 return (error);
2116
2117 if (timeout != NULL)
2118 abs_timeout_init2(&timo, timeout);
2119
2120 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2121 for (;;) {
2122 old_inherited_pri = uq->uq_inherited_pri;
2123 umtxq_lock(&uq->uq_key);
2124 umtxq_busy(&uq->uq_key);
2125 umtxq_unlock(&uq->uq_key);
2126
2127 rv = fueword32(&m->m_ceilings[0], &ceiling);
2128 if (rv == -1) {
2129 error = EFAULT;
2130 goto out;
2131 }
2132 ceiling = RTP_PRIO_MAX - ceiling;
2133 if (ceiling > RTP_PRIO_MAX) {
2134 error = EINVAL;
2135 goto out;
2136 }
2137
2138 mtx_lock(&umtx_lock);
2139 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2140 mtx_unlock(&umtx_lock);
2141 error = EINVAL;
2142 goto out;
2143 }
2144 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2145 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2146 thread_lock(td);
2147 if (uq->uq_inherited_pri < UPRI(td))
2148 sched_lend_user_prio(td, uq->uq_inherited_pri);
2149 thread_unlock(td);
2150 }
2151 mtx_unlock(&umtx_lock);
2152
2153 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2154 id | UMUTEX_CONTESTED);
2155 /* The address was invalid. */
2156 if (rv == -1) {
2157 error = EFAULT;
2158 break;
2159 }
2160
2161 if (owner == UMUTEX_CONTESTED) {
2162 error = 0;
2163 break;
2164 } else if (owner == UMUTEX_RB_OWNERDEAD) {
2165 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
2166 &owner, id | UMUTEX_CONTESTED);
2167 if (rv == -1) {
2168 error = EFAULT;
2169 break;
2170 }
2171 if (owner == UMUTEX_RB_OWNERDEAD) {
2172 error = EOWNERDEAD; /* success */
2173 break;
2174 }
2175 error = 0;
2176 } else if (owner == UMUTEX_RB_NOTRECOV) {
2177 error = ENOTRECOVERABLE;
2178 break;
2179 }
2180
2181 if (try != 0) {
2182 error = EBUSY;
2183 break;
2184 }
2185
2186 /*
2187 * If we caught a signal, we have retried and now
2188 * exit immediately.
2189 */
2190 if (error != 0)
2191 break;
2192
2193 umtxq_lock(&uq->uq_key);
2194 umtxq_insert(uq);
2195 umtxq_unbusy(&uq->uq_key);
2196 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2197 NULL : &timo);
2198 umtxq_remove(uq);
2199 umtxq_unlock(&uq->uq_key);
2200
2201 mtx_lock(&umtx_lock);
2202 uq->uq_inherited_pri = old_inherited_pri;
2203 pri = PRI_MAX;
2204 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2205 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2206 if (uq2 != NULL) {
2207 if (pri > UPRI(uq2->uq_thread))
2208 pri = UPRI(uq2->uq_thread);
2209 }
2210 }
2211 if (pri > uq->uq_inherited_pri)
2212 pri = uq->uq_inherited_pri;
2213 thread_lock(td);
2214 sched_lend_user_prio(td, pri);
2215 thread_unlock(td);
2216 mtx_unlock(&umtx_lock);
2217 }
2218
2219 if (error != 0 && error != EOWNERDEAD) {
2220 mtx_lock(&umtx_lock);
2221 uq->uq_inherited_pri = old_inherited_pri;
2222 pri = PRI_MAX;
2223 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2224 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2225 if (uq2 != NULL) {
2226 if (pri > UPRI(uq2->uq_thread))
2227 pri = UPRI(uq2->uq_thread);
2228 }
2229 }
2230 if (pri > uq->uq_inherited_pri)
2231 pri = uq->uq_inherited_pri;
2232 thread_lock(td);
2233 sched_lend_user_prio(td, pri);
2234 thread_unlock(td);
2235 mtx_unlock(&umtx_lock);
2236 }
2237
2238 out:
2239 umtxq_unbusy_unlocked(&uq->uq_key);
2240 umtx_key_release(&uq->uq_key);
2241 return (error);
2242 }
2243
2244 /*
2245 * Unlock a PP mutex.
2246 */
2247 static int
2248 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2249 {
2250 struct umtx_key key;
2251 struct umtx_q *uq, *uq2;
2252 struct umtx_pi *pi;
2253 uint32_t id, owner, rceiling;
2254 int error, pri, new_inherited_pri, su;
2255
2256 id = td->td_tid;
2257 uq = td->td_umtxq;
2258 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2259
2260 /*
2261 * Make sure we own this mtx.
2262 */
2263 error = fueword32(&m->m_owner, &owner);
2264 if (error == -1)
2265 return (EFAULT);
2266
2267 if ((owner & ~UMUTEX_CONTESTED) != id)
2268 return (EPERM);
2269
2270 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2271 if (error != 0)
2272 return (error);
2273
2274 if (rceiling == -1)
2275 new_inherited_pri = PRI_MAX;
2276 else {
2277 rceiling = RTP_PRIO_MAX - rceiling;
2278 if (rceiling > RTP_PRIO_MAX)
2279 return (EINVAL);
2280 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2281 }
2282
2283 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2284 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2285 &key)) != 0)
2286 return (error);
2287 umtxq_lock(&key);
2288 umtxq_busy(&key);
2289 umtxq_unlock(&key);
2290 /*
2291 * For priority protected mutex, always set unlocked state
2292 * to UMUTEX_CONTESTED, so that userland always enters kernel
2293 * to lock the mutex, it is necessary because thread priority
2294 * has to be adjusted for such mutex.
2295 */
2296 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
2297 UMUTEX_CONTESTED);
2298
2299 umtxq_lock(&key);
2300 if (error == 0)
2301 umtxq_signal(&key, 1);
2302 umtxq_unbusy(&key);
2303 umtxq_unlock(&key);
2304
2305 if (error == -1)
2306 error = EFAULT;
2307 else {
2308 mtx_lock(&umtx_lock);
2309 if (su != 0)
2310 uq->uq_inherited_pri = new_inherited_pri;
2311 pri = PRI_MAX;
2312 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2313 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2314 if (uq2 != NULL) {
2315 if (pri > UPRI(uq2->uq_thread))
2316 pri = UPRI(uq2->uq_thread);
2317 }
2318 }
2319 if (pri > uq->uq_inherited_pri)
2320 pri = uq->uq_inherited_pri;
2321 thread_lock(td);
2322 sched_lend_user_prio(td, pri);
2323 thread_unlock(td);
2324 mtx_unlock(&umtx_lock);
2325 }
2326 umtx_key_release(&key);
2327 return (error);
2328 }
2329
2330 static int
2331 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2332 uint32_t *old_ceiling)
2333 {
2334 struct umtx_q *uq;
2335 uint32_t flags, id, owner, save_ceiling;
2336 int error, rv, rv1;
2337
2338 error = fueword32(&m->m_flags, &flags);
2339 if (error == -1)
2340 return (EFAULT);
2341 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2342 return (EINVAL);
2343 if (ceiling > RTP_PRIO_MAX)
2344 return (EINVAL);
2345 id = td->td_tid;
2346 uq = td->td_umtxq;
2347 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2348 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2349 &uq->uq_key)) != 0)
2350 return (error);
2351 for (;;) {
2352 umtxq_lock(&uq->uq_key);
2353 umtxq_busy(&uq->uq_key);
2354 umtxq_unlock(&uq->uq_key);
2355
2356 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2357 if (rv == -1) {
2358 error = EFAULT;
2359 break;
2360 }
2361
2362 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2363 id | UMUTEX_CONTESTED);
2364 if (rv == -1) {
2365 error = EFAULT;
2366 break;
2367 }
2368
2369 if (owner == UMUTEX_CONTESTED) {
2370 rv = suword32(&m->m_ceilings[0], ceiling);
2371 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
2372 error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
2373 break;
2374 }
2375
2376 if ((owner & ~UMUTEX_CONTESTED) == id) {
2377 rv = suword32(&m->m_ceilings[0], ceiling);
2378 error = rv == 0 ? 0 : EFAULT;
2379 break;
2380 }
2381
2382 if (owner == UMUTEX_RB_OWNERDEAD) {
2383 error = EOWNERDEAD;
2384 break;
2385 } else if (owner == UMUTEX_RB_NOTRECOV) {
2386 error = ENOTRECOVERABLE;
2387 break;
2388 }
2389
2390 /*
2391 * If we caught a signal, we have retried and now
2392 * exit immediately.
2393 */
2394 if (error != 0)
2395 break;
2396
2397 /*
2398 * We set the contested bit, sleep. Otherwise the lock changed
2399 * and we need to retry or we lost a race to the thread
2400 * unlocking the umtx.
2401 */
2402 umtxq_lock(&uq->uq_key);
2403 umtxq_insert(uq);
2404 umtxq_unbusy(&uq->uq_key);
2405 error = umtxq_sleep(uq, "umtxpp", NULL);
2406 umtxq_remove(uq);
2407 umtxq_unlock(&uq->uq_key);
2408 }
2409 umtxq_lock(&uq->uq_key);
2410 if (error == 0)
2411 umtxq_signal(&uq->uq_key, INT_MAX);
2412 umtxq_unbusy(&uq->uq_key);
2413 umtxq_unlock(&uq->uq_key);
2414 umtx_key_release(&uq->uq_key);
2415 if (error == 0 && old_ceiling != NULL) {
2416 rv = suword32(old_ceiling, save_ceiling);
2417 error = rv == 0 ? 0 : EFAULT;
2418 }
2419 return (error);
2420 }
2421
2422 /*
2423 * Lock a userland POSIX mutex.
2424 */
2425 static int
2426 do_lock_umutex(struct thread *td, struct umutex *m,
2427 struct _umtx_time *timeout, int mode)
2428 {
2429 uint32_t flags;
2430 int error;
2431
2432 error = fueword32(&m->m_flags, &flags);
2433 if (error == -1)
2434 return (EFAULT);
2435
2436 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2437 case 0:
2438 error = do_lock_normal(td, m, flags, timeout, mode);
2439 break;
2440 case UMUTEX_PRIO_INHERIT:
2441 error = do_lock_pi(td, m, flags, timeout, mode);
2442 break;
2443 case UMUTEX_PRIO_PROTECT:
2444 error = do_lock_pp(td, m, flags, timeout, mode);
2445 break;
2446 default:
2447 return (EINVAL);
2448 }
2449 if (timeout == NULL) {
2450 if (error == EINTR && mode != _UMUTEX_WAIT)
2451 error = ERESTART;
2452 } else {
2453 /* Timed-locking is not restarted. */
2454 if (error == ERESTART)
2455 error = EINTR;
2456 }
2457 return (error);
2458 }
2459
2460 /*
2461 * Unlock a userland POSIX mutex.
2462 */
2463 static int
2464 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
2465 {
2466 uint32_t flags;
2467 int error;
2468
2469 error = fueword32(&m->m_flags, &flags);
2470 if (error == -1)
2471 return (EFAULT);
2472
2473 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2474 case 0:
2475 return (do_unlock_normal(td, m, flags, rb));
2476 case UMUTEX_PRIO_INHERIT:
2477 return (do_unlock_pi(td, m, flags, rb));
2478 case UMUTEX_PRIO_PROTECT:
2479 return (do_unlock_pp(td, m, flags, rb));
2480 }
2481
2482 return (EINVAL);
2483 }
2484
2485 static int
2486 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2487 struct timespec *timeout, u_long wflags)
2488 {
2489 struct abs_timeout timo;
2490 struct umtx_q *uq;
2491 uint32_t flags, clockid, hasw;
2492 int error;
2493
2494 uq = td->td_umtxq;
2495 error = fueword32(&cv->c_flags, &flags);
2496 if (error == -1)
2497 return (EFAULT);
2498 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2499 if (error != 0)
2500 return (error);
2501
2502 if ((wflags & CVWAIT_CLOCKID) != 0) {
2503 error = fueword32(&cv->c_clockid, &clockid);
2504 if (error == -1) {
2505 umtx_key_release(&uq->uq_key);
2506 return (EFAULT);
2507 }
2508 if (clockid < CLOCK_REALTIME ||
2509 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2510 /* hmm, only HW clock id will work. */
2511 umtx_key_release(&uq->uq_key);
2512 return (EINVAL);
2513 }
2514 } else {
2515 clockid = CLOCK_REALTIME;
2516 }
2517
2518 umtxq_lock(&uq->uq_key);
2519 umtxq_busy(&uq->uq_key);
2520 umtxq_insert(uq);
2521 umtxq_unlock(&uq->uq_key);
2522
2523 /*
2524 * Set c_has_waiters to 1 before releasing user mutex, also
2525 * don't modify cache line when unnecessary.
2526 */
2527 error = fueword32(&cv->c_has_waiters, &hasw);
2528 if (error == 0 && hasw == 0)
2529 suword32(&cv->c_has_waiters, 1);
2530
2531 umtxq_unbusy_unlocked(&uq->uq_key);
2532
2533 error = do_unlock_umutex(td, m, false);
2534
2535 if (timeout != NULL)
2536 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0,
2537 timeout);
2538
2539 umtxq_lock(&uq->uq_key);
2540 if (error == 0) {
2541 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2542 NULL : &timo);
2543 }
2544
2545 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2546 error = 0;
2547 else {
2548 /*
2549 * This must be timeout,interrupted by signal or
2550 * surprious wakeup, clear c_has_waiter flag when
2551 * necessary.
2552 */
2553 umtxq_busy(&uq->uq_key);
2554 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2555 int oldlen = uq->uq_cur_queue->length;
2556 umtxq_remove(uq);
2557 if (oldlen == 1) {
2558 umtxq_unlock(&uq->uq_key);
2559 suword32(&cv->c_has_waiters, 0);
2560 umtxq_lock(&uq->uq_key);
2561 }
2562 }
2563 umtxq_unbusy(&uq->uq_key);
2564 if (error == ERESTART)
2565 error = EINTR;
2566 }
2567
2568 umtxq_unlock(&uq->uq_key);
2569 umtx_key_release(&uq->uq_key);
2570 return (error);
2571 }
2572
2573 /*
2574 * Signal a userland condition variable.
2575 */
2576 static int
2577 do_cv_signal(struct thread *td, struct ucond *cv)
2578 {
2579 struct umtx_key key;
2580 int error, cnt, nwake;
2581 uint32_t flags;
2582
2583 error = fueword32(&cv->c_flags, &flags);
2584 if (error == -1)
2585 return (EFAULT);
2586 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2587 return (error);
2588 umtxq_lock(&key);
2589 umtxq_busy(&key);
2590 cnt = umtxq_count(&key);
2591 nwake = umtxq_signal(&key, 1);
2592 if (cnt <= nwake) {
2593 umtxq_unlock(&key);
2594 error = suword32(&cv->c_has_waiters, 0);
2595 if (error == -1)
2596 error = EFAULT;
2597 umtxq_lock(&key);
2598 }
2599 umtxq_unbusy(&key);
2600 umtxq_unlock(&key);
2601 umtx_key_release(&key);
2602 return (error);
2603 }
2604
2605 static int
2606 do_cv_broadcast(struct thread *td, struct ucond *cv)
2607 {
2608 struct umtx_key key;
2609 int error;
2610 uint32_t flags;
2611
2612 error = fueword32(&cv->c_flags, &flags);
2613 if (error == -1)
2614 return (EFAULT);
2615 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2616 return (error);
2617
2618 umtxq_lock(&key);
2619 umtxq_busy(&key);
2620 umtxq_signal(&key, INT_MAX);
2621 umtxq_unlock(&key);
2622
2623 error = suword32(&cv->c_has_waiters, 0);
2624 if (error == -1)
2625 error = EFAULT;
2626
2627 umtxq_unbusy_unlocked(&key);
2628
2629 umtx_key_release(&key);
2630 return (error);
2631 }
2632
2633 static int
2634 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2635 {
2636 struct abs_timeout timo;
2637 struct umtx_q *uq;
2638 uint32_t flags, wrflags;
2639 int32_t state, oldstate;
2640 int32_t blocked_readers;
2641 int error, error1, rv;
2642
2643 uq = td->td_umtxq;
2644 error = fueword32(&rwlock->rw_flags, &flags);
2645 if (error == -1)
2646 return (EFAULT);
2647 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2648 if (error != 0)
2649 return (error);
2650
2651 if (timeout != NULL)
2652 abs_timeout_init2(&timo, timeout);
2653
2654 wrflags = URWLOCK_WRITE_OWNER;
2655 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2656 wrflags |= URWLOCK_WRITE_WAITERS;
2657
2658 for (;;) {
2659 rv = fueword32(&rwlock->rw_state, &state);
2660 if (rv == -1) {
2661 umtx_key_release(&uq->uq_key);
2662 return (EFAULT);
2663 }
2664
2665 /* try to lock it */
2666 while (!(state & wrflags)) {
2667 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2668 umtx_key_release(&uq->uq_key);
2669 return (EAGAIN);
2670 }
2671 rv = casueword32(&rwlock->rw_state, state,
2672 &oldstate, state + 1);
2673 if (rv == -1) {
2674 umtx_key_release(&uq->uq_key);
2675 return (EFAULT);
2676 }
2677 if (oldstate == state) {
2678 umtx_key_release(&uq->uq_key);
2679 return (0);
2680 }
2681 error = umtxq_check_susp(td);
2682 if (error != 0)
2683 break;
2684 state = oldstate;
2685 }
2686
2687 if (error)
2688 break;
2689
2690 /* grab monitor lock */
2691 umtxq_lock(&uq->uq_key);
2692 umtxq_busy(&uq->uq_key);
2693 umtxq_unlock(&uq->uq_key);
2694
2695 /*
2696 * re-read the state, in case it changed between the try-lock above
2697 * and the check below
2698 */
2699 rv = fueword32(&rwlock->rw_state, &state);
2700 if (rv == -1)
2701 error = EFAULT;
2702
2703 /* set read contention bit */
2704 while (error == 0 && (state & wrflags) &&
2705 !(state & URWLOCK_READ_WAITERS)) {
2706 rv = casueword32(&rwlock->rw_state, state,
2707 &oldstate, state | URWLOCK_READ_WAITERS);
2708 if (rv == -1) {
2709 error = EFAULT;
2710 break;
2711 }
2712 if (oldstate == state)
2713 goto sleep;
2714 state = oldstate;
2715 error = umtxq_check_susp(td);
2716 if (error != 0)
2717 break;
2718 }
2719 if (error != 0) {
2720 umtxq_unbusy_unlocked(&uq->uq_key);
2721 break;
2722 }
2723
2724 /* state is changed while setting flags, restart */
2725 if (!(state & wrflags)) {
2726 umtxq_unbusy_unlocked(&uq->uq_key);
2727 error = umtxq_check_susp(td);
2728 if (error != 0)
2729 break;
2730 continue;
2731 }
2732
2733 sleep:
2734 /* contention bit is set, before sleeping, increase read waiter count */
2735 rv = fueword32(&rwlock->rw_blocked_readers,
2736 &blocked_readers);
2737 if (rv == -1) {
2738 umtxq_unbusy_unlocked(&uq->uq_key);
2739 error = EFAULT;
2740 break;
2741 }
2742 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2743
2744 while (state & wrflags) {
2745 umtxq_lock(&uq->uq_key);
2746 umtxq_insert(uq);
2747 umtxq_unbusy(&uq->uq_key);
2748
2749 error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2750 NULL : &timo);
2751
2752 umtxq_busy(&uq->uq_key);
2753 umtxq_remove(uq);
2754 umtxq_unlock(&uq->uq_key);
2755 if (error)
2756 break;
2757 rv = fueword32(&rwlock->rw_state, &state);
2758 if (rv == -1) {
2759 error = EFAULT;
2760 break;
2761 }
2762 }
2763
2764 /* decrease read waiter count, and may clear read contention bit */
2765 rv = fueword32(&rwlock->rw_blocked_readers,
2766 &blocked_readers);
2767 if (rv == -1) {
2768 umtxq_unbusy_unlocked(&uq->uq_key);
2769 error = EFAULT;
2770 break;
2771 }
2772 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2773 if (blocked_readers == 1) {
2774 rv = fueword32(&rwlock->rw_state, &state);
2775 if (rv == -1) {
2776 umtxq_unbusy_unlocked(&uq->uq_key);
2777 error = EFAULT;
2778 break;
2779 }
2780 for (;;) {
2781 rv = casueword32(&rwlock->rw_state, state,
2782 &oldstate, state & ~URWLOCK_READ_WAITERS);
2783 if (rv == -1) {
2784 error = EFAULT;
2785 break;
2786 }
2787 if (oldstate == state)
2788 break;
2789 state = oldstate;
2790 error1 = umtxq_check_susp(td);
2791 if (error1 != 0) {
2792 if (error == 0)
2793 error = error1;
2794 break;
2795 }
2796 }
2797 }
2798
2799 umtxq_unbusy_unlocked(&uq->uq_key);
2800 if (error != 0)
2801 break;
2802 }
2803 umtx_key_release(&uq->uq_key);
2804 if (error == ERESTART)
2805 error = EINTR;
2806 return (error);
2807 }
2808
2809 static int
2810 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2811 {
2812 struct abs_timeout timo;
2813 struct umtx_q *uq;
2814 uint32_t flags;
2815 int32_t state, oldstate;
2816 int32_t blocked_writers;
2817 int32_t blocked_readers;
2818 int error, error1, rv;
2819
2820 uq = td->td_umtxq;
2821 error = fueword32(&rwlock->rw_flags, &flags);
2822 if (error == -1)
2823 return (EFAULT);
2824 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2825 if (error != 0)
2826 return (error);
2827
2828 if (timeout != NULL)
2829 abs_timeout_init2(&timo, timeout);
2830
2831 blocked_readers = 0;
2832 for (;;) {
2833 rv = fueword32(&rwlock->rw_state, &state);
2834 if (rv == -1) {
2835 umtx_key_release(&uq->uq_key);
2836 return (EFAULT);
2837 }
2838 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2839 rv = casueword32(&rwlock->rw_state, state,
2840 &oldstate, state | URWLOCK_WRITE_OWNER);
2841 if (rv == -1) {
2842 umtx_key_release(&uq->uq_key);
2843 return (EFAULT);
2844 }
2845 if (oldstate == state) {
2846 umtx_key_release(&uq->uq_key);
2847 return (0);
2848 }
2849 state = oldstate;
2850 error = umtxq_check_susp(td);
2851 if (error != 0)
2852 break;
2853 }
2854
2855 if (error) {
2856 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2857 blocked_readers != 0) {
2858 umtxq_lock(&uq->uq_key);
2859 umtxq_busy(&uq->uq_key);
2860 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2861 umtxq_unbusy(&uq->uq_key);
2862 umtxq_unlock(&uq->uq_key);
2863 }
2864
2865 break;
2866 }
2867
2868 /* grab monitor lock */
2869 umtxq_lock(&uq->uq_key);
2870 umtxq_busy(&uq->uq_key);
2871 umtxq_unlock(&uq->uq_key);
2872
2873 /*
2874 * re-read the state, in case it changed between the try-lock above
2875 * and the check below
2876 */
2877 rv = fueword32(&rwlock->rw_state, &state);
2878 if (rv == -1)
2879 error = EFAULT;
2880
2881 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
2882 URWLOCK_READER_COUNT(state) != 0) &&
2883 (state & URWLOCK_WRITE_WAITERS) == 0) {
2884 rv = casueword32(&rwlock->rw_state, state,
2885 &oldstate, state | URWLOCK_WRITE_WAITERS);
2886 if (rv == -1) {
2887 error = EFAULT;
2888 break;
2889 }
2890 if (oldstate == state)
2891 goto sleep;
2892 state = oldstate;
2893 error = umtxq_check_susp(td);
2894 if (error != 0)
2895 break;
2896 }
2897 if (error != 0) {
2898 umtxq_unbusy_unlocked(&uq->uq_key);
2899 break;
2900 }
2901
2902 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2903 umtxq_unbusy_unlocked(&uq->uq_key);
2904 error = umtxq_check_susp(td);
2905 if (error != 0)
2906 break;
2907 continue;
2908 }
2909 sleep:
2910 rv = fueword32(&rwlock->rw_blocked_writers,
2911 &blocked_writers);
2912 if (rv == -1) {
2913 umtxq_unbusy_unlocked(&uq->uq_key);
2914 error = EFAULT;
2915 break;
2916 }
2917 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2918
2919 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2920 umtxq_lock(&uq->uq_key);
2921 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2922 umtxq_unbusy(&uq->uq_key);
2923
2924 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2925 NULL : &timo);
2926
2927 umtxq_busy(&uq->uq_key);
2928 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2929 umtxq_unlock(&uq->uq_key);
2930 if (error)
2931 break;
2932 rv = fueword32(&rwlock->rw_state, &state);
2933 if (rv == -1) {
2934 error = EFAULT;
2935 break;
2936 }
2937 }
2938
2939 rv = fueword32(&rwlock->rw_blocked_writers,
2940 &blocked_writers);
2941 if (rv == -1) {
2942 umtxq_unbusy_unlocked(&uq->uq_key);
2943 error = EFAULT;
2944 break;
2945 }
2946 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2947 if (blocked_writers == 1) {
2948 rv = fueword32(&rwlock->rw_state, &state);
2949 if (rv == -1) {
2950 umtxq_unbusy_unlocked(&uq->uq_key);
2951 error = EFAULT;
2952 break;
2953 }
2954 for (;;) {
2955 rv = casueword32(&rwlock->rw_state, state,
2956 &oldstate, state & ~URWLOCK_WRITE_WAITERS);
2957 if (rv == -1) {
2958 error = EFAULT;
2959 break;
2960 }
2961 if (oldstate == state)
2962 break;
2963 state = oldstate;
2964 error1 = umtxq_check_susp(td);
2965 /*
2966 * We are leaving the URWLOCK_WRITE_WAITERS
2967 * behind, but this should not harm the
2968 * correctness.
2969 */
2970 if (error1 != 0) {
2971 if (error == 0)
2972 error = error1;
2973 break;
2974 }
2975 }
2976 rv = fueword32(&rwlock->rw_blocked_readers,
2977 &blocked_readers);
2978 if (rv == -1) {
2979 umtxq_unbusy_unlocked(&uq->uq_key);
2980 error = EFAULT;
2981 break;
2982 }
2983 } else
2984 blocked_readers = 0;
2985
2986 umtxq_unbusy_unlocked(&uq->uq_key);
2987 }
2988
2989 umtx_key_release(&uq->uq_key);
2990 if (error == ERESTART)
2991 error = EINTR;
2992 return (error);
2993 }
2994
2995 static int
2996 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2997 {
2998 struct umtx_q *uq;
2999 uint32_t flags;
3000 int32_t state, oldstate;
3001 int error, rv, q, count;
3002
3003 uq = td->td_umtxq;
3004 error = fueword32(&rwlock->rw_flags, &flags);
3005 if (error == -1)
3006 return (EFAULT);
3007 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3008 if (error != 0)
3009 return (error);
3010
3011 error = fueword32(&rwlock->rw_state, &state);
3012 if (error == -1) {
3013 error = EFAULT;
3014 goto out;
3015 }
3016 if (state & URWLOCK_WRITE_OWNER) {
3017 for (;;) {
3018 rv = casueword32(&rwlock->rw_state, state,
3019 &oldstate, state & ~URWLOCK_WRITE_OWNER);
3020 if (rv == -1) {
3021 error = EFAULT;
3022 goto out;
3023 }
3024 if (oldstate != state) {
3025 state = oldstate;
3026 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3027 error = EPERM;
3028 goto out;
3029 }
3030 error = umtxq_check_susp(td);
3031 if (error != 0)
3032 goto out;
3033 } else
3034 break;
3035 }
3036 } else if (URWLOCK_READER_COUNT(state) != 0) {
3037 for (;;) {
3038 rv = casueword32(&rwlock->rw_state, state,
3039 &oldstate, state - 1);
3040 if (rv == -1) {
3041 error = EFAULT;
3042 goto out;
3043 }
3044 if (oldstate != state) {
3045 state = oldstate;
3046 if (URWLOCK_READER_COUNT(oldstate) == 0) {
3047 error = EPERM;
3048 goto out;
3049 }
3050 error = umtxq_check_susp(td);
3051 if (error != 0)
3052 goto out;
3053 } else
3054 break;
3055 }
3056 } else {
3057 error = EPERM;
3058 goto out;
3059 }
3060
3061 count = 0;
3062
3063 if (!(flags & URWLOCK_PREFER_READER)) {
3064 if (state & URWLOCK_WRITE_WAITERS) {
3065 count = 1;
3066 q = UMTX_EXCLUSIVE_QUEUE;
3067 } else if (state & URWLOCK_READ_WAITERS) {
3068 count = INT_MAX;
3069 q = UMTX_SHARED_QUEUE;
3070 }
3071 } else {
3072 if (state & URWLOCK_READ_WAITERS) {
3073 count = INT_MAX;
3074 q = UMTX_SHARED_QUEUE;
3075 } else if (state & URWLOCK_WRITE_WAITERS) {
3076 count = 1;
3077 q = UMTX_EXCLUSIVE_QUEUE;
3078 }
3079 }
3080
3081 if (count) {
3082 umtxq_lock(&uq->uq_key);
3083 umtxq_busy(&uq->uq_key);
3084 umtxq_signal_queue(&uq->uq_key, count, q);
3085 umtxq_unbusy(&uq->uq_key);
3086 umtxq_unlock(&uq->uq_key);
3087 }
3088 out:
3089 umtx_key_release(&uq->uq_key);
3090 return (error);
3091 }
3092
3093 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3094 static int
3095 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3096 {
3097 struct abs_timeout timo;
3098 struct umtx_q *uq;
3099 uint32_t flags, count, count1;
3100 int error, rv;
3101
3102 uq = td->td_umtxq;
3103 error = fueword32(&sem->_flags, &flags);
3104 if (error == -1)
3105 return (EFAULT);
3106 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3107 if (error != 0)
3108 return (error);
3109
3110 if (timeout != NULL)
3111 abs_timeout_init2(&timo, timeout);
3112
3113 umtxq_lock(&uq->uq_key);
3114 umtxq_busy(&uq->uq_key);
3115 umtxq_insert(uq);
3116 umtxq_unlock(&uq->uq_key);
3117 rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3118 if (rv == 0)
3119 rv = fueword32(&sem->_count, &count);
3120 if (rv == -1 || count != 0) {
3121 umtxq_lock(&uq->uq_key);
3122 umtxq_unbusy(&uq->uq_key);
3123 umtxq_remove(uq);
3124 umtxq_unlock(&uq->uq_key);
3125 umtx_key_release(&uq->uq_key);
3126 return (rv == -1 ? EFAULT : 0);
3127 }
3128 umtxq_lock(&uq->uq_key);
3129 umtxq_unbusy(&uq->uq_key);
3130
3131 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3132
3133 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3134 error = 0;
3135 else {
3136 umtxq_remove(uq);
3137 /* A relative timeout cannot be restarted. */
3138 if (error == ERESTART && timeout != NULL &&
3139 (timeout->_flags & UMTX_ABSTIME) == 0)
3140 error = EINTR;
3141 }
3142 umtxq_unlock(&uq->uq_key);
3143 umtx_key_release(&uq->uq_key);
3144 return (error);
3145 }
3146
3147 /*
3148 * Signal a userland semaphore.
3149 */
3150 static int
3151 do_sem_wake(struct thread *td, struct _usem *sem)
3152 {
3153 struct umtx_key key;
3154 int error, cnt;
3155 uint32_t flags;
3156
3157 error = fueword32(&sem->_flags, &flags);
3158 if (error == -1)
3159 return (EFAULT);
3160 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3161 return (error);
3162 umtxq_lock(&key);
3163 umtxq_busy(&key);
3164 cnt = umtxq_count(&key);
3165 if (cnt > 0) {
3166 /*
3167 * Check if count is greater than 0, this means the memory is
3168 * still being referenced by user code, so we can safely
3169 * update _has_waiters flag.
3170 */
3171 if (cnt == 1) {
3172 umtxq_unlock(&key);
3173 error = suword32(&sem->_has_waiters, 0);
3174 umtxq_lock(&key);
3175 if (error == -1)
3176 error = EFAULT;
3177 }
3178 umtxq_signal(&key, 1);
3179 }
3180 umtxq_unbusy(&key);
3181 umtxq_unlock(&key);
3182 umtx_key_release(&key);
3183 return (error);
3184 }
3185 #endif
3186
3187 static int
3188 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
3189 {
3190 struct abs_timeout timo;
3191 struct umtx_q *uq;
3192 uint32_t count, flags;
3193 int error, rv;
3194
3195 uq = td->td_umtxq;
3196 flags = fuword32(&sem->_flags);
3197 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3198 if (error != 0)
3199 return (error);
3200
3201 if (timeout != NULL)
3202 abs_timeout_init2(&timo, timeout);
3203
3204 umtxq_lock(&uq->uq_key);
3205 umtxq_busy(&uq->uq_key);
3206 umtxq_insert(uq);
3207 umtxq_unlock(&uq->uq_key);
3208 rv = fueword32(&sem->_count, &count);
3209 if (rv == -1) {
3210 umtxq_lock(&uq->uq_key);
3211 umtxq_unbusy(&uq->uq_key);
3212 umtxq_remove(uq);
3213 umtxq_unlock(&uq->uq_key);
3214 umtx_key_release(&uq->uq_key);
3215 return (EFAULT);
3216 }
3217 for (;;) {
3218 if (USEM_COUNT(count) != 0) {
3219 umtxq_lock(&uq->uq_key);
3220 umtxq_unbusy(&uq->uq_key);
3221 umtxq_remove(uq);
3222 umtxq_unlock(&uq->uq_key);
3223 umtx_key_release(&uq->uq_key);
3224 return (0);
3225 }
3226 if (count == USEM_HAS_WAITERS)
3227 break;
3228 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
3229 if (rv == -1) {
3230 umtxq_lock(&uq->uq_key);
3231 umtxq_unbusy(&uq->uq_key);
3232 umtxq_remove(uq);
3233 umtxq_unlock(&uq->uq_key);
3234 umtx_key_release(&uq->uq_key);
3235 return (EFAULT);
3236 }
3237 if (count == 0)
3238 break;
3239 }
3240 umtxq_lock(&uq->uq_key);
3241 umtxq_unbusy(&uq->uq_key);
3242
3243 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3244
3245 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3246 error = 0;
3247 else {
3248 umtxq_remove(uq);
3249 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
3250 /* A relative timeout cannot be restarted. */
3251 if (error == ERESTART)
3252 error = EINTR;
3253 if (error == EINTR) {
3254 abs_timeout_update(&timo);
3255 timeout->_timeout = timo.end;
3256 timespecsub(&timeout->_timeout, &timo.cur);
3257 }
3258 }
3259 }
3260 umtxq_unlock(&uq->uq_key);
3261 umtx_key_release(&uq->uq_key);
3262 return (error);
3263 }
3264
3265 /*
3266 * Signal a userland semaphore.
3267 */
3268 static int
3269 do_sem2_wake(struct thread *td, struct _usem2 *sem)
3270 {
3271 struct umtx_key key;
3272 int error, cnt, rv;
3273 uint32_t count, flags;
3274
3275 rv = fueword32(&sem->_flags, &flags);
3276 if (rv == -1)
3277 return (EFAULT);
3278 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3279 return (error);
3280 umtxq_lock(&key);
3281 umtxq_busy(&key);
3282 cnt = umtxq_count(&key);
3283 if (cnt > 0) {
3284 /*
3285 * If this was the last sleeping thread, clear the waiters
3286 * flag in _count.
3287 */
3288 if (cnt == 1) {
3289 umtxq_unlock(&key);
3290 rv = fueword32(&sem->_count, &count);
3291 while (rv != -1 && count & USEM_HAS_WAITERS)
3292 rv = casueword32(&sem->_count, count, &count,
3293 count & ~USEM_HAS_WAITERS);
3294 if (rv == -1)
3295 error = EFAULT;
3296 umtxq_lock(&key);
3297 }
3298
3299 umtxq_signal(&key, 1);
3300 }
3301 umtxq_unbusy(&key);
3302 umtxq_unlock(&key);
3303 umtx_key_release(&key);
3304 return (error);
3305 }
3306
3307 inline int
3308 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3309 {
3310 int error;
3311
3312 error = copyin(addr, tsp, sizeof(struct timespec));
3313 if (error == 0) {
3314 if (tsp->tv_sec < 0 ||
3315 tsp->tv_nsec >= 1000000000 ||
3316 tsp->tv_nsec < 0)
3317 error = EINVAL;
3318 }
3319 return (error);
3320 }
3321
3322 static inline int
3323 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
3324 {
3325 int error;
3326
3327 if (size <= sizeof(struct timespec)) {
3328 tp->_clockid = CLOCK_REALTIME;
3329 tp->_flags = 0;
3330 error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
3331 } else
3332 error = copyin(addr, tp, sizeof(struct _umtx_time));
3333 if (error != 0)
3334 return (error);
3335 if (tp->_timeout.tv_sec < 0 ||
3336 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3337 return (EINVAL);
3338 return (0);
3339 }
3340
3341 static int
3342 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap)
3343 {
3344
3345 return (EOPNOTSUPP);
3346 }
3347
3348 static int
3349 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3350 {
3351 struct _umtx_time timeout, *tm_p;
3352 int error;
3353
3354 if (uap->uaddr2 == NULL)
3355 tm_p = NULL;
3356 else {
3357 error = umtx_copyin_umtx_time(
3358 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3359 if (error != 0)
3360 return (error);
3361 tm_p = &timeout;
3362 }
3363 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0));
3364 }
3365
3366 static int
3367 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3368 {
3369 struct _umtx_time timeout, *tm_p;
3370 int error;
3371
3372 if (uap->uaddr2 == NULL)
3373 tm_p = NULL;
3374 else {
3375 error = umtx_copyin_umtx_time(
3376 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3377 if (error != 0)
3378 return (error);
3379 tm_p = &timeout;
3380 }
3381 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
3382 }
3383
3384 static int
3385 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3386 {
3387 struct _umtx_time *tm_p, timeout;
3388 int error;
3389
3390 if (uap->uaddr2 == NULL)
3391 tm_p = NULL;
3392 else {
3393 error = umtx_copyin_umtx_time(
3394 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3395 if (error != 0)
3396 return (error);
3397 tm_p = &timeout;
3398 }
3399 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
3400 }
3401
3402 static int
3403 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3404 {
3405
3406 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3407 }
3408
3409 #define BATCH_SIZE 128
3410 static int
3411 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3412 {
3413 char *uaddrs[BATCH_SIZE], **upp;
3414 int count, error, i, pos, tocopy;
3415
3416 upp = (char **)uap->obj;
3417 error = 0;
3418 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3419 pos += tocopy) {
3420 tocopy = MIN(count, BATCH_SIZE);
3421 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
3422 if (error != 0)
3423 break;
3424 for (i = 0; i < tocopy; ++i)
3425 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3426 maybe_yield();
3427 }
3428 return (error);
3429 }
3430
3431 static int
3432 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3433 {
3434
3435 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3436 }
3437
3438 static int
3439 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3440 {
3441 struct _umtx_time *tm_p, timeout;
3442 int error;
3443
3444 /* Allow a null timespec (wait forever). */
3445 if (uap->uaddr2 == NULL)
3446 tm_p = NULL;
3447 else {
3448 error = umtx_copyin_umtx_time(
3449 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3450 if (error != 0)
3451 return (error);
3452 tm_p = &timeout;
3453 }
3454 return (do_lock_umutex(td, uap->obj, tm_p, 0));
3455 }
3456
3457 static int
3458 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3459 {
3460
3461 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
3462 }
3463
3464 static int
3465 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3466 {
3467 struct _umtx_time *tm_p, timeout;
3468 int error;
3469
3470 /* Allow a null timespec (wait forever). */
3471 if (uap->uaddr2 == NULL)
3472 tm_p = NULL;
3473 else {
3474 error = umtx_copyin_umtx_time(
3475 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3476 if (error != 0)
3477 return (error);
3478 tm_p = &timeout;
3479 }
3480 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
3481 }
3482
3483 static int
3484 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3485 {
3486
3487 return (do_wake_umutex(td, uap->obj));
3488 }
3489
3490 static int
3491 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3492 {
3493
3494 return (do_unlock_umutex(td, uap->obj, false));
3495 }
3496
3497 static int
3498 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3499 {
3500
3501 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
3502 }
3503
3504 static int
3505 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3506 {
3507 struct timespec *ts, timeout;
3508 int error;
3509
3510 /* Allow a null timespec (wait forever). */
3511 if (uap->uaddr2 == NULL)
3512 ts = NULL;
3513 else {
3514 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3515 if (error != 0)
3516 return (error);
3517 ts = &timeout;
3518 }
3519 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3520 }
3521
3522 static int
3523 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3524 {
3525
3526 return (do_cv_signal(td, uap->obj));
3527 }
3528
3529 static int
3530 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3531 {
3532
3533 return (do_cv_broadcast(td, uap->obj));
3534 }
3535
3536 static int
3537 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3538 {
3539 struct _umtx_time timeout;
3540 int error;
3541
3542 /* Allow a null timespec (wait forever). */
3543 if (uap->uaddr2 == NULL) {
3544 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3545 } else {
3546 error = umtx_copyin_umtx_time(uap->uaddr2,
3547 (size_t)uap->uaddr1, &timeout);
3548 if (error != 0)
3549 return (error);
3550 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3551 }
3552 return (error);
3553 }
3554
3555 static int
3556 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3557 {
3558 struct _umtx_time timeout;
3559 int error;
3560
3561 /* Allow a null timespec (wait forever). */
3562 if (uap->uaddr2 == NULL) {
3563 error = do_rw_wrlock(td, uap->obj, 0);
3564 } else {
3565 error = umtx_copyin_umtx_time(uap->uaddr2,
3566 (size_t)uap->uaddr1, &timeout);
3567 if (error != 0)
3568 return (error);
3569
3570 error = do_rw_wrlock(td, uap->obj, &timeout);
3571 }
3572 return (error);
3573 }
3574
3575 static int
3576 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3577 {
3578
3579 return (do_rw_unlock(td, uap->obj));
3580 }
3581
3582 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3583 static int
3584 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3585 {
3586 struct _umtx_time *tm_p, timeout;
3587 int error;
3588
3589 /* Allow a null timespec (wait forever). */
3590 if (uap->uaddr2 == NULL)
3591 tm_p = NULL;
3592 else {
3593 error = umtx_copyin_umtx_time(
3594 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3595 if (error != 0)
3596 return (error);
3597 tm_p = &timeout;
3598 }
3599 return (do_sem_wait(td, uap->obj, tm_p));
3600 }
3601
3602 static int
3603 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3604 {
3605
3606 return (do_sem_wake(td, uap->obj));
3607 }
3608 #endif
3609
3610 static int
3611 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3612 {
3613
3614 return (do_wake2_umutex(td, uap->obj, uap->val));
3615 }
3616
3617 static int
3618 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap)
3619 {
3620 struct _umtx_time *tm_p, timeout;
3621 size_t uasize;
3622 int error;
3623
3624 /* Allow a null timespec (wait forever). */
3625 if (uap->uaddr2 == NULL) {
3626 uasize = 0;
3627 tm_p = NULL;
3628 } else {
3629 uasize = (size_t)uap->uaddr1;
3630 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout);
3631 if (error != 0)
3632 return (error);
3633 tm_p = &timeout;
3634 }
3635 error = do_sem2_wait(td, uap->obj, tm_p);
3636 if (error == EINTR && uap->uaddr2 != NULL &&
3637 (timeout._flags & UMTX_ABSTIME) == 0 &&
3638 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) {
3639 error = copyout(&timeout._timeout,
3640 (struct _umtx_time *)uap->uaddr2 + 1,
3641 sizeof(struct timespec));
3642 if (error == 0) {
3643 error = EINTR;
3644 }
3645 }
3646
3647 return (error);
3648 }
3649
3650 static int
3651 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap)
3652 {
3653
3654 return (do_sem2_wake(td, uap->obj));
3655 }
3656
3657 #define USHM_OBJ_UMTX(o) \
3658 ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
3659
3660 #define USHMF_REG_LINKED 0x0001
3661 #define USHMF_OBJ_LINKED 0x0002
3662 struct umtx_shm_reg {
3663 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
3664 LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
3665 struct umtx_key ushm_key;
3666 struct ucred *ushm_cred;
3667 struct shmfd *ushm_obj;
3668 u_int ushm_refcnt;
3669 u_int ushm_flags;
3670 };
3671
3672 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
3673 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
3674
3675 static uma_zone_t umtx_shm_reg_zone;
3676 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
3677 static struct mtx umtx_shm_lock;
3678 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
3679 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
3680
3681 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
3682
3683 static void
3684 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
3685 {
3686 struct umtx_shm_reg_head d;
3687 struct umtx_shm_reg *reg, *reg1;
3688
3689 TAILQ_INIT(&d);
3690 mtx_lock(&umtx_shm_lock);
3691 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
3692 mtx_unlock(&umtx_shm_lock);
3693 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
3694 TAILQ_REMOVE(&d, reg, ushm_reg_link);
3695 umtx_shm_free_reg(reg);
3696 }
3697 }
3698
3699 static struct task umtx_shm_reg_delfree_task =
3700 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
3701
3702 static struct umtx_shm_reg *
3703 umtx_shm_find_reg_locked(const struct umtx_key *key)
3704 {
3705 struct umtx_shm_reg *reg;
3706 struct umtx_shm_reg_head *reg_head;
3707
3708 KASSERT(key->shared, ("umtx_p_find_rg: private key"));
3709 mtx_assert(&umtx_shm_lock, MA_OWNED);
3710 reg_head = &umtx_shm_registry[key->hash];
3711 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
3712 KASSERT(reg->ushm_key.shared,
3713 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
3714 if (reg->ushm_key.info.shared.object ==
3715 key->info.shared.object &&
3716 reg->ushm_key.info.shared.offset ==
3717 key->info.shared.offset) {
3718 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
3719 KASSERT(reg->ushm_refcnt > 0,
3720 ("reg %p refcnt 0 onlist", reg));
3721 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
3722 ("reg %p not linked", reg));
3723 reg->ushm_refcnt++;
3724 return (reg);
3725 }
3726 }
3727 return (NULL);
3728 }
3729
3730 static struct umtx_shm_reg *
3731 umtx_shm_find_reg(const struct umtx_key *key)
3732 {
3733 struct umtx_shm_reg *reg;
3734
3735 mtx_lock(&umtx_shm_lock);
3736 reg = umtx_shm_find_reg_locked(key);
3737 mtx_unlock(&umtx_shm_lock);
3738 return (reg);
3739 }
3740
3741 static void
3742 umtx_shm_free_reg(struct umtx_shm_reg *reg)
3743 {
3744
3745 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
3746 crfree(reg->ushm_cred);
3747 shm_drop(reg->ushm_obj);
3748 uma_zfree(umtx_shm_reg_zone, reg);
3749 }
3750
3751 static bool
3752 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
3753 {
3754 bool res;
3755
3756 mtx_assert(&umtx_shm_lock, MA_OWNED);
3757 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
3758 reg->ushm_refcnt--;
3759 res = reg->ushm_refcnt == 0;
3760 if (res || force) {
3761 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
3762 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
3763 reg, ushm_reg_link);
3764 reg->ushm_flags &= ~USHMF_REG_LINKED;
3765 }
3766 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
3767 LIST_REMOVE(reg, ushm_obj_link);
3768 reg->ushm_flags &= ~USHMF_OBJ_LINKED;
3769 }
3770 }
3771 return (res);
3772 }
3773
3774 static void
3775 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
3776 {
3777 vm_object_t object;
3778 bool dofree;
3779
3780 if (force) {
3781 object = reg->ushm_obj->shm_object;
3782 VM_OBJECT_WLOCK(object);
3783 object->flags |= OBJ_UMTXDEAD;
3784 VM_OBJECT_WUNLOCK(object);
3785 }
3786 mtx_lock(&umtx_shm_lock);
3787 dofree = umtx_shm_unref_reg_locked(reg, force);
3788 mtx_unlock(&umtx_shm_lock);
3789 if (dofree)
3790 umtx_shm_free_reg(reg);
3791 }
3792
3793 void
3794 umtx_shm_object_init(vm_object_t object)
3795 {
3796
3797 LIST_INIT(USHM_OBJ_UMTX(object));
3798 }
3799
3800 void
3801 umtx_shm_object_terminated(vm_object_t object)
3802 {
3803 struct umtx_shm_reg *reg, *reg1;
3804 bool dofree;
3805
3806 dofree = false;
3807 mtx_lock(&umtx_shm_lock);
3808 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
3809 if (umtx_shm_unref_reg_locked(reg, true)) {
3810 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
3811 ushm_reg_link);
3812 dofree = true;
3813 }
3814 }
3815 mtx_unlock(&umtx_shm_lock);
3816 if (dofree)
3817 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
3818 }
3819
3820 static int
3821 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
3822 struct umtx_shm_reg **res)
3823 {
3824 struct umtx_shm_reg *reg, *reg1;
3825 struct ucred *cred;
3826 int error;
3827
3828 reg = umtx_shm_find_reg(key);
3829 if (reg != NULL) {
3830 *res = reg;
3831 return (0);
3832 }
3833 cred = td->td_ucred;
3834 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
3835 return (ENOMEM);
3836 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
3837 reg->ushm_refcnt = 1;
3838 bcopy(key, ®->ushm_key, sizeof(*key));
3839 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR);
3840 reg->ushm_cred = crhold(cred);
3841 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
3842 if (error != 0) {
3843 umtx_shm_free_reg(reg);
3844 return (error);
3845 }
3846 mtx_lock(&umtx_shm_lock);
3847 reg1 = umtx_shm_find_reg_locked(key);
3848 if (reg1 != NULL) {
3849 mtx_unlock(&umtx_shm_lock);
3850 umtx_shm_free_reg(reg);
3851 *res = reg1;
3852 return (0);
3853 }
3854 reg->ushm_refcnt++;
3855 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
3856 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
3857 ushm_obj_link);
3858 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
3859 mtx_unlock(&umtx_shm_lock);
3860 *res = reg;
3861 return (0);
3862 }
3863
3864 static int
3865 umtx_shm_alive(struct thread *td, void *addr)
3866 {
3867 vm_map_t map;
3868 vm_map_entry_t entry;
3869 vm_object_t object;
3870 vm_pindex_t pindex;
3871 vm_prot_t prot;
3872 int res, ret;
3873 boolean_t wired;
3874
3875 map = &td->td_proc->p_vmspace->vm_map;
3876 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
3877 &object, &pindex, &prot, &wired);
3878 if (res != KERN_SUCCESS)
3879 return (EFAULT);
3880 if (object == NULL)
3881 ret = EINVAL;
3882 else
3883 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
3884 vm_map_lookup_done(map, entry);
3885 return (ret);
3886 }
3887
3888 static void
3889 umtx_shm_init(void)
3890 {
3891 int i;
3892
3893 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
3894 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
3895 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
3896 for (i = 0; i < nitems(umtx_shm_registry); i++)
3897 TAILQ_INIT(&umtx_shm_registry[i]);
3898 }
3899
3900 static int
3901 umtx_shm(struct thread *td, void *addr, u_int flags)
3902 {
3903 struct umtx_key key;
3904 struct umtx_shm_reg *reg;
3905 struct file *fp;
3906 int error, fd;
3907
3908 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
3909 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
3910 return (EINVAL);
3911 if ((flags & UMTX_SHM_ALIVE) != 0)
3912 return (umtx_shm_alive(td, addr));
3913 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
3914 if (error != 0)
3915 return (error);
3916 KASSERT(key.shared == 1, ("non-shared key"));
3917 if ((flags & UMTX_SHM_CREAT) != 0) {
3918 error = umtx_shm_create_reg(td, &key, ®);
3919 } else {
3920 reg = umtx_shm_find_reg(&key);
3921 if (reg == NULL)
3922 error = ESRCH;
3923 }
3924 umtx_key_release(&key);
3925 if (error != 0)
3926 return (error);
3927 KASSERT(reg != NULL, ("no reg"));
3928 if ((flags & UMTX_SHM_DESTROY) != 0) {
3929 umtx_shm_unref_reg(reg, true);
3930 } else {
3931 #if 0
3932 #ifdef MAC
3933 error = mac_posixshm_check_open(td->td_ucred,
3934 reg->ushm_obj, FFLAGS(O_RDWR));
3935 if (error == 0)
3936 #endif
3937 error = shm_access(reg->ushm_obj, td->td_ucred,
3938 FFLAGS(O_RDWR));
3939 if (error == 0)
3940 #endif
3941 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
3942 if (error == 0) {
3943 shm_hold(reg->ushm_obj);
3944 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
3945 &shm_ops);
3946 td->td_retval[0] = fd;
3947 fdrop(fp, td);
3948 }
3949 }
3950 umtx_shm_unref_reg(reg, false);
3951 return (error);
3952 }
3953
3954 static int
3955 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap)
3956 {
3957
3958 return (umtx_shm(td, uap->uaddr1, uap->val));
3959 }
3960
3961 static int
3962 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp)
3963 {
3964
3965 td->td_rb_list = rbp->robust_list_offset;
3966 td->td_rbp_list = rbp->robust_priv_list_offset;
3967 td->td_rb_inact = rbp->robust_inact_offset;
3968 return (0);
3969 }
3970
3971 static int
3972 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap)
3973 {
3974 struct umtx_robust_lists_params rb;
3975 int error;
3976
3977 if (uap->val > sizeof(rb))
3978 return (EINVAL);
3979 bzero(&rb, sizeof(rb));
3980 error = copyin(uap->uaddr1, &rb, uap->val);
3981 if (error != 0)
3982 return (error);
3983 return (umtx_robust_lists(td, &rb));
3984 }
3985
3986 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3987
3988 static const _umtx_op_func op_table[] = {
3989 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
3990 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
3991 [UMTX_OP_WAIT] = __umtx_op_wait,
3992 [UMTX_OP_WAKE] = __umtx_op_wake,
3993 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
3994 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex,
3995 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
3996 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
3997 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait,
3998 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
3999 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4000 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint,
4001 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock,
4002 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock,
4003 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4004 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
4005 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4006 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex,
4007 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4008 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4009 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait,
4010 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4011 #else
4012 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4013 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4014 #endif
4015 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
4016 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4017 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait,
4018 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4019 [UMTX_OP_SHM] = __umtx_op_shm,
4020 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists,
4021 };
4022
4023 int
4024 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
4025 {
4026
4027 if ((unsigned)uap->op < nitems(op_table))
4028 return (*op_table[uap->op])(td, uap);
4029 return (EINVAL);
4030 }
4031
4032 #ifdef COMPAT_FREEBSD32
4033
4034 struct timespec32 {
4035 int32_t tv_sec;
4036 int32_t tv_nsec;
4037 };
4038
4039 struct umtx_time32 {
4040 struct timespec32 timeout;
4041 uint32_t flags;
4042 uint32_t clockid;
4043 };
4044
4045 static inline int
4046 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
4047 {
4048 struct timespec32 ts32;
4049 int error;
4050
4051 error = copyin(addr, &ts32, sizeof(struct timespec32));
4052 if (error == 0) {
4053 if (ts32.tv_sec < 0 ||
4054 ts32.tv_nsec >= 1000000000 ||
4055 ts32.tv_nsec < 0)
4056 error = EINVAL;
4057 else {
4058 tsp->tv_sec = ts32.tv_sec;
4059 tsp->tv_nsec = ts32.tv_nsec;
4060 }
4061 }
4062 return (error);
4063 }
4064
4065 static inline int
4066 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
4067 {
4068 struct umtx_time32 t32;
4069 int error;
4070
4071 t32.clockid = CLOCK_REALTIME;
4072 t32.flags = 0;
4073 if (size <= sizeof(struct timespec32))
4074 error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
4075 else
4076 error = copyin(addr, &t32, sizeof(struct umtx_time32));
4077 if (error != 0)
4078 return (error);
4079 if (t32.timeout.tv_sec < 0 ||
4080 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
4081 return (EINVAL);
4082 tp->_timeout.tv_sec = t32.timeout.tv_sec;
4083 tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
4084 tp->_flags = t32.flags;
4085 tp->_clockid = t32.clockid;
4086 return (0);
4087 }
4088
4089 static int
4090 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4091 {
4092 struct _umtx_time *tm_p, timeout;
4093 int error;
4094
4095 if (uap->uaddr2 == NULL)
4096 tm_p = NULL;
4097 else {
4098 error = umtx_copyin_umtx_time32(uap->uaddr2,
4099 (size_t)uap->uaddr1, &timeout);
4100 if (error != 0)
4101 return (error);
4102 tm_p = &timeout;
4103 }
4104 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
4105 }
4106
4107 static int
4108 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4109 {
4110 struct _umtx_time *tm_p, timeout;
4111 int error;
4112
4113 /* Allow a null timespec (wait forever). */
4114 if (uap->uaddr2 == NULL)
4115 tm_p = NULL;
4116 else {
4117 error = umtx_copyin_umtx_time32(uap->uaddr2,
4118 (size_t)uap->uaddr1, &timeout);
4119 if (error != 0)
4120 return (error);
4121 tm_p = &timeout;
4122 }
4123 return (do_lock_umutex(td, uap->obj, tm_p, 0));
4124 }
4125
4126 static int
4127 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4128 {
4129 struct _umtx_time *tm_p, timeout;
4130 int error;
4131
4132 /* Allow a null timespec (wait forever). */
4133 if (uap->uaddr2 == NULL)
4134 tm_p = NULL;
4135 else {
4136 error = umtx_copyin_umtx_time32(uap->uaddr2,
4137 (size_t)uap->uaddr1, &timeout);
4138 if (error != 0)
4139 return (error);
4140 tm_p = &timeout;
4141 }
4142 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
4143 }
4144
4145 static int
4146 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4147 {
4148 struct timespec *ts, timeout;
4149 int error;
4150
4151 /* Allow a null timespec (wait forever). */
4152 if (uap->uaddr2 == NULL)
4153 ts = NULL;
4154 else {
4155 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
4156 if (error != 0)
4157 return (error);
4158 ts = &timeout;
4159 }
4160 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
4161 }
4162
4163 static int
4164 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4165 {
4166 struct _umtx_time timeout;
4167 int error;
4168
4169 /* Allow a null timespec (wait forever). */
4170 if (uap->uaddr2 == NULL) {
4171 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
4172 } else {
4173 error = umtx_copyin_umtx_time32(uap->uaddr2,
4174 (size_t)uap->uaddr1, &timeout);
4175 if (error != 0)
4176 return (error);
4177 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
4178 }
4179 return (error);
4180 }
4181
4182 static int
4183 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4184 {
4185 struct _umtx_time timeout;
4186 int error;
4187
4188 /* Allow a null timespec (wait forever). */
4189 if (uap->uaddr2 == NULL) {
4190 error = do_rw_wrlock(td, uap->obj, 0);
4191 } else {
4192 error = umtx_copyin_umtx_time32(uap->uaddr2,
4193 (size_t)uap->uaddr1, &timeout);
4194 if (error != 0)
4195 return (error);
4196 error = do_rw_wrlock(td, uap->obj, &timeout);
4197 }
4198 return (error);
4199 }
4200
4201 static int
4202 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
4203 {
4204 struct _umtx_time *tm_p, timeout;
4205 int error;
4206
4207 if (uap->uaddr2 == NULL)
4208 tm_p = NULL;
4209 else {
4210 error = umtx_copyin_umtx_time32(
4211 uap->uaddr2, (size_t)uap->uaddr1,&timeout);
4212 if (error != 0)
4213 return (error);
4214 tm_p = &timeout;
4215 }
4216 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
4217 }
4218
4219 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4220 static int
4221 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4222 {
4223 struct _umtx_time *tm_p, timeout;
4224 int error;
4225
4226 /* Allow a null timespec (wait forever). */
4227 if (uap->uaddr2 == NULL)
4228 tm_p = NULL;
4229 else {
4230 error = umtx_copyin_umtx_time32(uap->uaddr2,
4231 (size_t)uap->uaddr1, &timeout);
4232 if (error != 0)
4233 return (error);
4234 tm_p = &timeout;
4235 }
4236 return (do_sem_wait(td, uap->obj, tm_p));
4237 }
4238 #endif
4239
4240 static int
4241 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4242 {
4243 struct _umtx_time *tm_p, timeout;
4244 size_t uasize;
4245 int error;
4246
4247 /* Allow a null timespec (wait forever). */
4248 if (uap->uaddr2 == NULL) {
4249 uasize = 0;
4250 tm_p = NULL;
4251 } else {
4252 uasize = (size_t)uap->uaddr1;
4253 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout);
4254 if (error != 0)
4255 return (error);
4256 tm_p = &timeout;
4257 }
4258 error = do_sem2_wait(td, uap->obj, tm_p);
4259 if (error == EINTR && uap->uaddr2 != NULL &&
4260 (timeout._flags & UMTX_ABSTIME) == 0 &&
4261 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) {
4262 struct timespec32 remain32 = {
4263 .tv_sec = timeout._timeout.tv_sec,
4264 .tv_nsec = timeout._timeout.tv_nsec
4265 };
4266 error = copyout(&remain32,
4267 (struct umtx_time32 *)uap->uaddr2 + 1,
4268 sizeof(struct timespec32));
4269 if (error == 0) {
4270 error = EINTR;
4271 }
4272 }
4273
4274 return (error);
4275 }
4276
4277 static int
4278 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
4279 {
4280 uint32_t uaddrs[BATCH_SIZE], **upp;
4281 int count, error, i, pos, tocopy;
4282
4283 upp = (uint32_t **)uap->obj;
4284 error = 0;
4285 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
4286 pos += tocopy) {
4287 tocopy = MIN(count, BATCH_SIZE);
4288 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
4289 if (error != 0)
4290 break;
4291 for (i = 0; i < tocopy; ++i)
4292 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
4293 INT_MAX, 1);
4294 maybe_yield();
4295 }
4296 return (error);
4297 }
4298
4299 struct umtx_robust_lists_params_compat32 {
4300 uint32_t robust_list_offset;
4301 uint32_t robust_priv_list_offset;
4302 uint32_t robust_inact_offset;
4303 };
4304
4305 static int
4306 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap)
4307 {
4308 struct umtx_robust_lists_params rb;
4309 struct umtx_robust_lists_params_compat32 rb32;
4310 int error;
4311
4312 if (uap->val > sizeof(rb32))
4313 return (EINVAL);
4314 bzero(&rb, sizeof(rb));
4315 bzero(&rb32, sizeof(rb32));
4316 error = copyin(uap->uaddr1, &rb32, uap->val);
4317 if (error != 0)
4318 return (error);
4319 rb.robust_list_offset = rb32.robust_list_offset;
4320 rb.robust_priv_list_offset = rb32.robust_priv_list_offset;
4321 rb.robust_inact_offset = rb32.robust_inact_offset;
4322 return (umtx_robust_lists(td, &rb));
4323 }
4324
4325 static const _umtx_op_func op_table_compat32[] = {
4326 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
4327 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
4328 [UMTX_OP_WAIT] = __umtx_op_wait_compat32,
4329 [UMTX_OP_WAKE] = __umtx_op_wake,
4330 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
4331 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32,
4332 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
4333 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
4334 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32,
4335 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
4336 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4337 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32,
4338 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32,
4339 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32,
4340 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4341 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32,
4342 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4343 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32,
4344 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4345 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4346 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32,
4347 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4348 #else
4349 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4350 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4351 #endif
4352 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32,
4353 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4354 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32,
4355 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4356 [UMTX_OP_SHM] = __umtx_op_shm,
4357 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32,
4358 };
4359
4360 int
4361 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
4362 {
4363
4364 if ((unsigned)uap->op < nitems(op_table_compat32)) {
4365 return (*op_table_compat32[uap->op])(td,
4366 (struct _umtx_op_args *)uap);
4367 }
4368 return (EINVAL);
4369 }
4370 #endif
4371
4372 void
4373 umtx_thread_init(struct thread *td)
4374 {
4375
4376 td->td_umtxq = umtxq_alloc();
4377 td->td_umtxq->uq_thread = td;
4378 }
4379
4380 void
4381 umtx_thread_fini(struct thread *td)
4382 {
4383
4384 umtxq_free(td->td_umtxq);
4385 }
4386
4387 /*
4388 * It will be called when new thread is created, e.g fork().
4389 */
4390 void
4391 umtx_thread_alloc(struct thread *td)
4392 {
4393 struct umtx_q *uq;
4394
4395 uq = td->td_umtxq;
4396 uq->uq_inherited_pri = PRI_MAX;
4397
4398 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4399 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4400 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4401 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4402 }
4403
4404 /*
4405 * exec() hook.
4406 *
4407 * Clear robust lists for all process' threads, not delaying the
4408 * cleanup to thread_exit hook, since the relevant address space is
4409 * destroyed right now.
4410 */
4411 static void
4412 umtx_exec_hook(void *arg __unused, struct proc *p,
4413 struct image_params *imgp __unused)
4414 {
4415 struct thread *td;
4416
4417 KASSERT(p == curproc, ("need curproc"));
4418 PROC_LOCK(p);
4419 KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
4420 (p->p_flag & P_STOPPED_SINGLE) != 0,
4421 ("curproc must be single-threaded"));
4422 FOREACH_THREAD_IN_PROC(p, td) {
4423 KASSERT(td == curthread ||
4424 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
4425 ("running thread %p %p", p, td));
4426 PROC_UNLOCK(p);
4427 umtx_thread_cleanup(td);
4428 PROC_LOCK(p);
4429 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
4430 }
4431 PROC_UNLOCK(p);
4432 }
4433
4434 /*
4435 * thread_exit() hook.
4436 */
4437 void
4438 umtx_thread_exit(struct thread *td)
4439 {
4440
4441 umtx_thread_cleanup(td);
4442 }
4443
4444 static int
4445 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res)
4446 {
4447 u_long res1;
4448 #ifdef COMPAT_FREEBSD32
4449 uint32_t res32;
4450 #endif
4451 int error;
4452
4453 #ifdef COMPAT_FREEBSD32
4454 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4455 error = fueword32((void *)ptr, &res32);
4456 if (error == 0)
4457 res1 = res32;
4458 } else
4459 #endif
4460 {
4461 error = fueword((void *)ptr, &res1);
4462 }
4463 if (error == 0)
4464 *res = res1;
4465 else
4466 error = EFAULT;
4467 return (error);
4468 }
4469
4470 static void
4471 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list)
4472 {
4473 #ifdef COMPAT_FREEBSD32
4474 struct umutex32 m32;
4475
4476 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4477 memcpy(&m32, m, sizeof(m32));
4478 *rb_list = m32.m_rb_lnk;
4479 } else
4480 #endif
4481 *rb_list = m->m_rb_lnk;
4482 }
4483
4484 static int
4485 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact)
4486 {
4487 struct umutex m;
4488 int error;
4489
4490 KASSERT(td->td_proc == curproc, ("need current vmspace"));
4491 error = copyin((void *)rbp, &m, sizeof(m));
4492 if (error != 0)
4493 return (error);
4494 if (rb_list != NULL)
4495 umtx_read_rb_list(td, &m, rb_list);
4496 if ((m.m_flags & UMUTEX_ROBUST) == 0)
4497 return (EINVAL);
4498 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
4499 /* inact is cleared after unlock, allow the inconsistency */
4500 return (inact ? 0 : EINVAL);
4501 return (do_unlock_umutex(td, (struct umutex *)rbp, true));
4502 }
4503
4504 static void
4505 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
4506 const char *name)
4507 {
4508 int error, i;
4509 uintptr_t rbp;
4510 bool inact;
4511
4512 if (rb_list == 0)
4513 return;
4514 error = umtx_read_uptr(td, rb_list, &rbp);
4515 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
4516 if (rbp == *rb_inact) {
4517 inact = true;
4518 *rb_inact = 0;
4519 } else
4520 inact = false;
4521 error = umtx_handle_rb(td, rbp, &rbp, inact);
4522 }
4523 if (i == umtx_max_rb && umtx_verbose_rb) {
4524 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
4525 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
4526 }
4527 if (error != 0 && umtx_verbose_rb) {
4528 uprintf("comm %s pid %d: handling %srb error %d\n",
4529 td->td_proc->p_comm, td->td_proc->p_pid, name, error);
4530 }
4531 }
4532
4533 /*
4534 * Clean up umtx data.
4535 */
4536 static void
4537 umtx_thread_cleanup(struct thread *td)
4538 {
4539 struct umtx_q *uq;
4540 struct umtx_pi *pi;
4541 uintptr_t rb_inact;
4542
4543 /*
4544 * Disown pi mutexes.
4545 */
4546 uq = td->td_umtxq;
4547 if (uq != NULL) {
4548 mtx_lock(&umtx_lock);
4549 uq->uq_inherited_pri = PRI_MAX;
4550 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
4551 pi->pi_owner = NULL;
4552 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
4553 }
4554 mtx_unlock(&umtx_lock);
4555 thread_lock(td);
4556 sched_lend_user_prio(td, PRI_MAX);
4557 thread_unlock(td);
4558 }
4559
4560 /*
4561 * Handle terminated robust mutexes. Must be done after
4562 * robust pi disown, otherwise unlock could see unowned
4563 * entries.
4564 */
4565 rb_inact = td->td_rb_inact;
4566 if (rb_inact != 0)
4567 (void)umtx_read_uptr(td, rb_inact, &rb_inact);
4568 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "");
4569 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ");
4570 if (rb_inact != 0)
4571 (void)umtx_handle_rb(td, rb_inact, NULL, true);
4572 }
Cache object: 739dad9c0851e9edc411b9ff1713cbe9
|