FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015, 2016 The FreeBSD Foundation
5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
7 * All rights reserved.
8 *
9 * Portions of this software were developed by Konstantin Belousov
10 * under sponsorship from the FreeBSD Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice unmodified, this list of conditions, and the following
17 * disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_umtx_profiling.h"
38
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/fcntl.h>
42 #include <sys/file.h>
43 #include <sys/filedesc.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mutex.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/resource.h>
52 #include <sys/resourcevar.h>
53 #include <sys/rwlock.h>
54 #include <sys/sbuf.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #include <sys/sysproto.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/taskqueue.h>
62 #include <sys/time.h>
63 #include <sys/eventhandler.h>
64 #include <sys/umtx.h>
65 #include <sys/umtxvar.h>
66
67 #include <security/mac/mac_framework.h>
68
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/pmap.h>
72 #include <vm/vm_map.h>
73 #include <vm/vm_object.h>
74
75 #include <machine/atomic.h>
76 #include <machine/cpu.h>
77
78 #include <compat/freebsd32/freebsd32.h>
79 #ifdef COMPAT_FREEBSD32
80 #include <compat/freebsd32/freebsd32_proto.h>
81 #endif
82
83 #define _UMUTEX_TRY 1
84 #define _UMUTEX_WAIT 2
85
86 #ifdef UMTX_PROFILING
87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \
88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
89 #endif
90
91 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
92 #ifdef INVARIANTS
93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \
94 struct umtxq_chain *uc; \
95 \
96 uc = umtxq_getchain(key); \
97 mtx_assert(&uc->uc_lock, MA_OWNED); \
98 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \
99 } while (0)
100 #else
101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0)
102 #endif
103
104 /*
105 * Don't propagate time-sharing priority, there is a security reason,
106 * a user can simply introduce PI-mutex, let thread A lock the mutex,
107 * and let another thread B block on the mutex, because B is
108 * sleeping, its priority will be boosted, this causes A's priority to
109 * be boosted via priority propagating too and will never be lowered even
110 * if it is using 100%CPU, this is unfair to other processes.
111 */
112
113 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
114 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
115 PRI_MAX_TIMESHARE : (td)->td_user_pri)
116
117 #define GOLDEN_RATIO_PRIME 2654404609U
118 #ifndef UMTX_CHAINS
119 #define UMTX_CHAINS 512
120 #endif
121 #define UMTX_SHIFTS (__WORD_BIT - 9)
122
123 #define GET_SHARE(flags) \
124 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
125
126 #define BUSY_SPINS 200
127
128 struct umtx_copyops {
129 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp);
130 int (*copyin_umtx_time)(const void *uaddr, size_t size,
131 struct _umtx_time *tp);
132 int (*copyin_robust_lists)(const void *uaddr, size_t size,
133 struct umtx_robust_lists_params *rbp);
134 int (*copyout_timeout)(void *uaddr, size_t size,
135 struct timespec *tsp);
136 const size_t timespec_sz;
137 const size_t umtx_time_sz;
138 const bool compat32;
139 };
140
141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
142 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
143 __offsetof(struct umutex32, m_spare[0]), "m_spare32");
144
145 int umtx_shm_vnobj_persistent = 0;
146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
147 &umtx_shm_vnobj_persistent, 0,
148 "False forces destruction of umtx attached to file, on last close");
149 static int umtx_max_rb = 1000;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
151 &umtx_max_rb, 0,
152 "Maximum number of robust mutexes allowed for each thread");
153
154 static uma_zone_t umtx_pi_zone;
155 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
157 static int umtx_pi_allocated;
158
159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
160 "umtx debug");
161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
162 &umtx_pi_allocated, 0, "Allocated umtx_pi");
163 static int umtx_verbose_rb = 1;
164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
165 &umtx_verbose_rb, 0,
166 "");
167
168 #ifdef UMTX_PROFILING
169 static long max_length;
170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
172 "umtx chain stats");
173 #endif
174
175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
176 const struct _umtx_time *umtxtime);
177
178 static void umtx_shm_init(void);
179 static void umtxq_sysinit(void *);
180 static void umtxq_hash(struct umtx_key *key);
181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
182 bool rb);
183 static void umtx_thread_cleanup(struct thread *td);
184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
185
186 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
187
188 static struct mtx umtx_lock;
189
190 #ifdef UMTX_PROFILING
191 static void
192 umtx_init_profiling(void)
193 {
194 struct sysctl_oid *chain_oid;
195 char chain_name[10];
196 int i;
197
198 for (i = 0; i < UMTX_CHAINS; ++i) {
199 snprintf(chain_name, sizeof(chain_name), "%d", i);
200 chain_oid = SYSCTL_ADD_NODE(NULL,
201 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
202 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
203 "umtx hash stats");
204 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
205 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
206 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
207 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
208 }
209 }
210
211 static int
212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
213 {
214 char buf[512];
215 struct sbuf sb;
216 struct umtxq_chain *uc;
217 u_int fract, i, j, tot, whole;
218 u_int sf0, sf1, sf2, sf3, sf4;
219 u_int si0, si1, si2, si3, si4;
220 u_int sw0, sw1, sw2, sw3, sw4;
221
222 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
223 for (i = 0; i < 2; i++) {
224 tot = 0;
225 for (j = 0; j < UMTX_CHAINS; ++j) {
226 uc = &umtxq_chains[i][j];
227 mtx_lock(&uc->uc_lock);
228 tot += uc->max_length;
229 mtx_unlock(&uc->uc_lock);
230 }
231 if (tot == 0)
232 sbuf_printf(&sb, "%u) Empty ", i);
233 else {
234 sf0 = sf1 = sf2 = sf3 = sf4 = 0;
235 si0 = si1 = si2 = si3 = si4 = 0;
236 sw0 = sw1 = sw2 = sw3 = sw4 = 0;
237 for (j = 0; j < UMTX_CHAINS; j++) {
238 uc = &umtxq_chains[i][j];
239 mtx_lock(&uc->uc_lock);
240 whole = uc->max_length * 100;
241 mtx_unlock(&uc->uc_lock);
242 fract = (whole % tot) * 100;
243 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
244 sf0 = fract;
245 si0 = j;
246 sw0 = whole;
247 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
248 sf1)) {
249 sf1 = fract;
250 si1 = j;
251 sw1 = whole;
252 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
253 sf2)) {
254 sf2 = fract;
255 si2 = j;
256 sw2 = whole;
257 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
258 sf3)) {
259 sf3 = fract;
260 si3 = j;
261 sw3 = whole;
262 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
263 sf4)) {
264 sf4 = fract;
265 si4 = j;
266 sw4 = whole;
267 }
268 }
269 sbuf_printf(&sb, "queue %u:\n", i);
270 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
271 sf0 / tot, si0);
272 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
273 sf1 / tot, si1);
274 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
275 sf2 / tot, si2);
276 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
277 sf3 / tot, si3);
278 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
279 sf4 / tot, si4);
280 }
281 }
282 sbuf_trim(&sb);
283 sbuf_finish(&sb);
284 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
285 sbuf_delete(&sb);
286 return (0);
287 }
288
289 static int
290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
291 {
292 struct umtxq_chain *uc;
293 u_int i, j;
294 int clear, error;
295
296 clear = 0;
297 error = sysctl_handle_int(oidp, &clear, 0, req);
298 if (error != 0 || req->newptr == NULL)
299 return (error);
300
301 if (clear != 0) {
302 for (i = 0; i < 2; ++i) {
303 for (j = 0; j < UMTX_CHAINS; ++j) {
304 uc = &umtxq_chains[i][j];
305 mtx_lock(&uc->uc_lock);
306 uc->length = 0;
307 uc->max_length = 0;
308 mtx_unlock(&uc->uc_lock);
309 }
310 }
311 }
312 return (0);
313 }
314
315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
316 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
317 sysctl_debug_umtx_chains_clear, "I",
318 "Clear umtx chains statistics");
319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
320 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
321 sysctl_debug_umtx_chains_peaks, "A",
322 "Highest peaks in chains max length");
323 #endif
324
325 static void
326 umtxq_sysinit(void *arg __unused)
327 {
328 int i, j;
329
330 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
331 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
332 for (i = 0; i < 2; ++i) {
333 for (j = 0; j < UMTX_CHAINS; ++j) {
334 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
335 MTX_DEF | MTX_DUPOK);
336 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
337 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
338 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
339 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
340 umtxq_chains[i][j].uc_busy = 0;
341 umtxq_chains[i][j].uc_waiters = 0;
342 #ifdef UMTX_PROFILING
343 umtxq_chains[i][j].length = 0;
344 umtxq_chains[i][j].max_length = 0;
345 #endif
346 }
347 }
348 #ifdef UMTX_PROFILING
349 umtx_init_profiling();
350 #endif
351 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
352 umtx_shm_init();
353 }
354
355 struct umtx_q *
356 umtxq_alloc(void)
357 {
358 struct umtx_q *uq;
359
360 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
361 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
362 M_WAITOK | M_ZERO);
363 TAILQ_INIT(&uq->uq_spare_queue->head);
364 TAILQ_INIT(&uq->uq_pi_contested);
365 uq->uq_inherited_pri = PRI_MAX;
366 return (uq);
367 }
368
369 void
370 umtxq_free(struct umtx_q *uq)
371 {
372
373 MPASS(uq->uq_spare_queue != NULL);
374 free(uq->uq_spare_queue, M_UMTX);
375 free(uq, M_UMTX);
376 }
377
378 static inline void
379 umtxq_hash(struct umtx_key *key)
380 {
381 unsigned n;
382
383 n = (uintptr_t)key->info.both.a + key->info.both.b;
384 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
385 }
386
387 struct umtxq_chain *
388 umtxq_getchain(struct umtx_key *key)
389 {
390
391 if (key->type <= TYPE_SEM)
392 return (&umtxq_chains[1][key->hash]);
393 return (&umtxq_chains[0][key->hash]);
394 }
395
396 /*
397 * Set chain to busy state when following operation
398 * may be blocked (kernel mutex can not be used).
399 */
400 void
401 umtxq_busy(struct umtx_key *key)
402 {
403 struct umtxq_chain *uc;
404
405 uc = umtxq_getchain(key);
406 mtx_assert(&uc->uc_lock, MA_OWNED);
407 if (uc->uc_busy) {
408 #ifdef SMP
409 if (smp_cpus > 1) {
410 int count = BUSY_SPINS;
411 if (count > 0) {
412 umtxq_unlock(key);
413 while (uc->uc_busy && --count > 0)
414 cpu_spinwait();
415 umtxq_lock(key);
416 }
417 }
418 #endif
419 while (uc->uc_busy) {
420 uc->uc_waiters++;
421 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
422 uc->uc_waiters--;
423 }
424 }
425 uc->uc_busy = 1;
426 }
427
428 /*
429 * Unbusy a chain.
430 */
431 void
432 umtxq_unbusy(struct umtx_key *key)
433 {
434 struct umtxq_chain *uc;
435
436 uc = umtxq_getchain(key);
437 mtx_assert(&uc->uc_lock, MA_OWNED);
438 KASSERT(uc->uc_busy != 0, ("not busy"));
439 uc->uc_busy = 0;
440 if (uc->uc_waiters)
441 wakeup_one(uc);
442 }
443
444 void
445 umtxq_unbusy_unlocked(struct umtx_key *key)
446 {
447
448 umtxq_lock(key);
449 umtxq_unbusy(key);
450 umtxq_unlock(key);
451 }
452
453 static struct umtxq_queue *
454 umtxq_queue_lookup(struct umtx_key *key, int q)
455 {
456 struct umtxq_queue *uh;
457 struct umtxq_chain *uc;
458
459 uc = umtxq_getchain(key);
460 UMTXQ_LOCKED_ASSERT(uc);
461 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
462 if (umtx_key_match(&uh->key, key))
463 return (uh);
464 }
465
466 return (NULL);
467 }
468
469 void
470 umtxq_insert_queue(struct umtx_q *uq, int q)
471 {
472 struct umtxq_queue *uh;
473 struct umtxq_chain *uc;
474
475 uc = umtxq_getchain(&uq->uq_key);
476 UMTXQ_LOCKED_ASSERT(uc);
477 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
478 uh = umtxq_queue_lookup(&uq->uq_key, q);
479 if (uh != NULL) {
480 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
481 } else {
482 uh = uq->uq_spare_queue;
483 uh->key = uq->uq_key;
484 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
485 #ifdef UMTX_PROFILING
486 uc->length++;
487 if (uc->length > uc->max_length) {
488 uc->max_length = uc->length;
489 if (uc->max_length > max_length)
490 max_length = uc->max_length;
491 }
492 #endif
493 }
494 uq->uq_spare_queue = NULL;
495
496 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
497 uh->length++;
498 uq->uq_flags |= UQF_UMTXQ;
499 uq->uq_cur_queue = uh;
500 return;
501 }
502
503 void
504 umtxq_remove_queue(struct umtx_q *uq, int q)
505 {
506 struct umtxq_chain *uc;
507 struct umtxq_queue *uh;
508
509 uc = umtxq_getchain(&uq->uq_key);
510 UMTXQ_LOCKED_ASSERT(uc);
511 if (uq->uq_flags & UQF_UMTXQ) {
512 uh = uq->uq_cur_queue;
513 TAILQ_REMOVE(&uh->head, uq, uq_link);
514 uh->length--;
515 uq->uq_flags &= ~UQF_UMTXQ;
516 if (TAILQ_EMPTY(&uh->head)) {
517 KASSERT(uh->length == 0,
518 ("inconsistent umtxq_queue length"));
519 #ifdef UMTX_PROFILING
520 uc->length--;
521 #endif
522 LIST_REMOVE(uh, link);
523 } else {
524 uh = LIST_FIRST(&uc->uc_spare_queue);
525 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
526 LIST_REMOVE(uh, link);
527 }
528 uq->uq_spare_queue = uh;
529 uq->uq_cur_queue = NULL;
530 }
531 }
532
533 /*
534 * Check if there are multiple waiters
535 */
536 int
537 umtxq_count(struct umtx_key *key)
538 {
539 struct umtxq_queue *uh;
540
541 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
542 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
543 if (uh != NULL)
544 return (uh->length);
545 return (0);
546 }
547
548 /*
549 * Check if there are multiple PI waiters and returns first
550 * waiter.
551 */
552 static int
553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
554 {
555 struct umtxq_queue *uh;
556
557 *first = NULL;
558 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
559 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
560 if (uh != NULL) {
561 *first = TAILQ_FIRST(&uh->head);
562 return (uh->length);
563 }
564 return (0);
565 }
566
567 /*
568 * Wake up threads waiting on an userland object by a bit mask.
569 */
570 int
571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset)
572 {
573 struct umtxq_queue *uh;
574 struct umtx_q *uq, *uq_temp;
575 int ret;
576
577 ret = 0;
578 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
579 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
580 if (uh == NULL)
581 return (0);
582 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
583 if ((uq->uq_bitset & bitset) == 0)
584 continue;
585 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE);
586 wakeup_one(uq);
587 if (++ret >= n_wake)
588 break;
589 }
590 return (ret);
591 }
592
593 /*
594 * Wake up threads waiting on an userland object.
595 */
596
597 static int
598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
599 {
600 struct umtxq_queue *uh;
601 struct umtx_q *uq;
602 int ret;
603
604 ret = 0;
605 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
606 uh = umtxq_queue_lookup(key, q);
607 if (uh != NULL) {
608 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
609 umtxq_remove_queue(uq, q);
610 wakeup(uq);
611 if (++ret >= n_wake)
612 return (ret);
613 }
614 }
615 return (ret);
616 }
617
618 /*
619 * Wake up specified thread.
620 */
621 static inline void
622 umtxq_signal_thread(struct umtx_q *uq)
623 {
624
625 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
626 umtxq_remove(uq);
627 wakeup(uq);
628 }
629
630 /*
631 * Wake up a maximum of n_wake threads that are waiting on an userland
632 * object identified by key. The remaining threads are removed from queue
633 * identified by key and added to the queue identified by key2 (requeued).
634 * The n_requeue specifies an upper limit on the number of threads that
635 * are requeued to the second queue.
636 */
637 int
638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2,
639 int n_requeue)
640 {
641 struct umtxq_queue *uh;
642 struct umtx_q *uq, *uq_temp;
643 int ret;
644
645 ret = 0;
646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2));
648 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
649 if (uh == NULL)
650 return (0);
651 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
652 if (++ret <= n_wake) {
653 umtxq_remove(uq);
654 wakeup_one(uq);
655 } else {
656 umtxq_remove(uq);
657 uq->uq_key = *key2;
658 umtxq_insert(uq);
659 if (ret - n_wake == n_requeue)
660 break;
661 }
662 }
663 return (ret);
664 }
665
666 static inline int
667 tstohz(const struct timespec *tsp)
668 {
669 struct timeval tv;
670
671 TIMESPEC_TO_TIMEVAL(&tv, tsp);
672 return tvtohz(&tv);
673 }
674
675 void
676 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid,
677 int absolute, const struct timespec *timeout)
678 {
679
680 timo->clockid = clockid;
681 if (!absolute) {
682 timo->is_abs_real = false;
683 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
684 timespecadd(&timo->cur, timeout, &timo->end);
685 } else {
686 timo->end = *timeout;
687 timo->is_abs_real = clockid == CLOCK_REALTIME ||
688 clockid == CLOCK_REALTIME_FAST ||
689 clockid == CLOCK_REALTIME_PRECISE ||
690 clockid == CLOCK_SECOND;
691 }
692 }
693
694 static void
695 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
696 const struct _umtx_time *umtxtime)
697 {
698
699 umtx_abs_timeout_init(timo, umtxtime->_clockid,
700 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
701 }
702
703 static int
704 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt,
705 int *flags)
706 {
707 struct bintime bt, bbt;
708 struct timespec tts;
709 sbintime_t rem;
710
711 switch (timo->clockid) {
712
713 /* Clocks that can be converted into absolute time. */
714 case CLOCK_REALTIME:
715 case CLOCK_REALTIME_PRECISE:
716 case CLOCK_REALTIME_FAST:
717 case CLOCK_MONOTONIC:
718 case CLOCK_MONOTONIC_PRECISE:
719 case CLOCK_MONOTONIC_FAST:
720 case CLOCK_UPTIME:
721 case CLOCK_UPTIME_PRECISE:
722 case CLOCK_UPTIME_FAST:
723 case CLOCK_SECOND:
724 timespec2bintime(&timo->end, &bt);
725 switch (timo->clockid) {
726 case CLOCK_REALTIME:
727 case CLOCK_REALTIME_PRECISE:
728 case CLOCK_REALTIME_FAST:
729 case CLOCK_SECOND:
730 getboottimebin(&bbt);
731 bintime_sub(&bt, &bbt);
732 break;
733 }
734 if (bt.sec < 0)
735 return (ETIMEDOUT);
736 if (bt.sec >= (SBT_MAX >> 32)) {
737 *sbt = 0;
738 *flags = 0;
739 return (0);
740 }
741 *sbt = bttosbt(bt);
742
743 /*
744 * Check if the absolute time should be aligned to
745 * avoid firing multiple timer events in non-periodic
746 * timer mode.
747 */
748 switch (timo->clockid) {
749 case CLOCK_REALTIME_FAST:
750 case CLOCK_MONOTONIC_FAST:
751 case CLOCK_UPTIME_FAST:
752 rem = *sbt % tc_tick_sbt;
753 if (__predict_true(rem != 0))
754 *sbt += tc_tick_sbt - rem;
755 break;
756 case CLOCK_SECOND:
757 rem = *sbt % SBT_1S;
758 if (__predict_true(rem != 0))
759 *sbt += SBT_1S - rem;
760 break;
761 }
762 *flags = C_ABSOLUTE;
763 return (0);
764
765 /* Clocks that has to be periodically polled. */
766 case CLOCK_VIRTUAL:
767 case CLOCK_PROF:
768 case CLOCK_THREAD_CPUTIME_ID:
769 case CLOCK_PROCESS_CPUTIME_ID:
770 default:
771 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
772 if (timespeccmp(&timo->end, &timo->cur, <=))
773 return (ETIMEDOUT);
774 timespecsub(&timo->end, &timo->cur, &tts);
775 *sbt = tick_sbt * tstohz(&tts);
776 *flags = C_HARDCLOCK;
777 return (0);
778 }
779 }
780
781 static uint32_t
782 umtx_unlock_val(uint32_t flags, bool rb)
783 {
784
785 if (rb)
786 return (UMUTEX_RB_OWNERDEAD);
787 else if ((flags & UMUTEX_NONCONSISTENT) != 0)
788 return (UMUTEX_RB_NOTRECOV);
789 else
790 return (UMUTEX_UNOWNED);
791
792 }
793
794 /*
795 * Put thread into sleep state, before sleeping, check if
796 * thread was removed from umtx queue.
797 */
798 int
799 umtxq_sleep(struct umtx_q *uq, const char *wmesg,
800 struct umtx_abs_timeout *timo)
801 {
802 struct umtxq_chain *uc;
803 sbintime_t sbt = 0;
804 int error, flags = 0;
805
806 uc = umtxq_getchain(&uq->uq_key);
807 UMTXQ_LOCKED_ASSERT(uc);
808 for (;;) {
809 if (!(uq->uq_flags & UQF_UMTXQ)) {
810 error = 0;
811 break;
812 }
813 if (timo != NULL) {
814 if (timo->is_abs_real)
815 curthread->td_rtcgen =
816 atomic_load_acq_int(&rtc_generation);
817 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags);
818 if (error != 0)
819 break;
820 }
821 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg,
822 sbt, 0, flags);
823 uc = umtxq_getchain(&uq->uq_key);
824 mtx_lock(&uc->uc_lock);
825 if (error == EINTR || error == ERESTART)
826 break;
827 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) {
828 error = ETIMEDOUT;
829 break;
830 }
831 }
832
833 curthread->td_rtcgen = 0;
834 return (error);
835 }
836
837 /*
838 * Convert userspace address into unique logical address.
839 */
840 int
841 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
842 {
843 struct thread *td = curthread;
844 vm_map_t map;
845 vm_map_entry_t entry;
846 vm_pindex_t pindex;
847 vm_prot_t prot;
848 boolean_t wired;
849
850 key->type = type;
851 if (share == THREAD_SHARE) {
852 key->shared = 0;
853 key->info.private.vs = td->td_proc->p_vmspace;
854 key->info.private.addr = (uintptr_t)addr;
855 } else {
856 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
857 map = &td->td_proc->p_vmspace->vm_map;
858 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
859 &entry, &key->info.shared.object, &pindex, &prot,
860 &wired) != KERN_SUCCESS) {
861 return (EFAULT);
862 }
863
864 if ((share == PROCESS_SHARE) ||
865 (share == AUTO_SHARE &&
866 VM_INHERIT_SHARE == entry->inheritance)) {
867 key->shared = 1;
868 key->info.shared.offset = (vm_offset_t)addr -
869 entry->start + entry->offset;
870 vm_object_reference(key->info.shared.object);
871 } else {
872 key->shared = 0;
873 key->info.private.vs = td->td_proc->p_vmspace;
874 key->info.private.addr = (uintptr_t)addr;
875 }
876 vm_map_lookup_done(map, entry);
877 }
878
879 umtxq_hash(key);
880 return (0);
881 }
882
883 /*
884 * Release key.
885 */
886 void
887 umtx_key_release(struct umtx_key *key)
888 {
889 if (key->shared)
890 vm_object_deallocate(key->info.shared.object);
891 }
892
893 #ifdef COMPAT_FREEBSD10
894 /*
895 * Lock a umtx object.
896 */
897 static int
898 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
899 const struct timespec *timeout)
900 {
901 struct umtx_abs_timeout timo;
902 struct umtx_q *uq;
903 u_long owner;
904 u_long old;
905 int error = 0;
906
907 uq = td->td_umtxq;
908 if (timeout != NULL)
909 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
910
911 /*
912 * Care must be exercised when dealing with umtx structure. It
913 * can fault on any access.
914 */
915 for (;;) {
916 /*
917 * Try the uncontested case. This should be done in userland.
918 */
919 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
920
921 /* The acquire succeeded. */
922 if (owner == UMTX_UNOWNED)
923 return (0);
924
925 /* The address was invalid. */
926 if (owner == -1)
927 return (EFAULT);
928
929 /* If no one owns it but it is contested try to acquire it. */
930 if (owner == UMTX_CONTESTED) {
931 owner = casuword(&umtx->u_owner,
932 UMTX_CONTESTED, id | UMTX_CONTESTED);
933
934 if (owner == UMTX_CONTESTED)
935 return (0);
936
937 /* The address was invalid. */
938 if (owner == -1)
939 return (EFAULT);
940
941 error = thread_check_susp(td, false);
942 if (error != 0)
943 break;
944
945 /* If this failed the lock has changed, restart. */
946 continue;
947 }
948
949 /*
950 * If we caught a signal, we have retried and now
951 * exit immediately.
952 */
953 if (error != 0)
954 break;
955
956 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
957 AUTO_SHARE, &uq->uq_key)) != 0)
958 return (error);
959
960 umtxq_lock(&uq->uq_key);
961 umtxq_busy(&uq->uq_key);
962 umtxq_insert(uq);
963 umtxq_unbusy(&uq->uq_key);
964 umtxq_unlock(&uq->uq_key);
965
966 /*
967 * Set the contested bit so that a release in user space
968 * knows to use the system call for unlock. If this fails
969 * either some one else has acquired the lock or it has been
970 * released.
971 */
972 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
973
974 /* The address was invalid. */
975 if (old == -1) {
976 umtxq_lock(&uq->uq_key);
977 umtxq_remove(uq);
978 umtxq_unlock(&uq->uq_key);
979 umtx_key_release(&uq->uq_key);
980 return (EFAULT);
981 }
982
983 /*
984 * We set the contested bit, sleep. Otherwise the lock changed
985 * and we need to retry or we lost a race to the thread
986 * unlocking the umtx.
987 */
988 umtxq_lock(&uq->uq_key);
989 if (old == owner)
990 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
991 &timo);
992 umtxq_remove(uq);
993 umtxq_unlock(&uq->uq_key);
994 umtx_key_release(&uq->uq_key);
995
996 if (error == 0)
997 error = thread_check_susp(td, false);
998 }
999
1000 if (timeout == NULL) {
1001 /* Mutex locking is restarted if it is interrupted. */
1002 if (error == EINTR)
1003 error = ERESTART;
1004 } else {
1005 /* Timed-locking is not restarted. */
1006 if (error == ERESTART)
1007 error = EINTR;
1008 }
1009 return (error);
1010 }
1011
1012 /*
1013 * Unlock a umtx object.
1014 */
1015 static int
1016 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
1017 {
1018 struct umtx_key key;
1019 u_long owner;
1020 u_long old;
1021 int error;
1022 int count;
1023
1024 /*
1025 * Make sure we own this mtx.
1026 */
1027 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
1028 if (owner == -1)
1029 return (EFAULT);
1030
1031 if ((owner & ~UMTX_CONTESTED) != id)
1032 return (EPERM);
1033
1034 /* This should be done in userland */
1035 if ((owner & UMTX_CONTESTED) == 0) {
1036 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
1037 if (old == -1)
1038 return (EFAULT);
1039 if (old == owner)
1040 return (0);
1041 owner = old;
1042 }
1043
1044 /* We should only ever be in here for contested locks */
1045 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1046 &key)) != 0)
1047 return (error);
1048
1049 umtxq_lock(&key);
1050 umtxq_busy(&key);
1051 count = umtxq_count(&key);
1052 umtxq_unlock(&key);
1053
1054 /*
1055 * When unlocking the umtx, it must be marked as unowned if
1056 * there is zero or one thread only waiting for it.
1057 * Otherwise, it must be marked as contested.
1058 */
1059 old = casuword(&umtx->u_owner, owner,
1060 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
1061 umtxq_lock(&key);
1062 umtxq_signal(&key,1);
1063 umtxq_unbusy(&key);
1064 umtxq_unlock(&key);
1065 umtx_key_release(&key);
1066 if (old == -1)
1067 return (EFAULT);
1068 if (old != owner)
1069 return (EINVAL);
1070 return (0);
1071 }
1072
1073 #ifdef COMPAT_FREEBSD32
1074
1075 /*
1076 * Lock a umtx object.
1077 */
1078 static int
1079 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
1080 const struct timespec *timeout)
1081 {
1082 struct umtx_abs_timeout timo;
1083 struct umtx_q *uq;
1084 uint32_t owner;
1085 uint32_t old;
1086 int error = 0;
1087
1088 uq = td->td_umtxq;
1089
1090 if (timeout != NULL)
1091 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
1092
1093 /*
1094 * Care must be exercised when dealing with umtx structure. It
1095 * can fault on any access.
1096 */
1097 for (;;) {
1098 /*
1099 * Try the uncontested case. This should be done in userland.
1100 */
1101 owner = casuword32(m, UMUTEX_UNOWNED, id);
1102
1103 /* The acquire succeeded. */
1104 if (owner == UMUTEX_UNOWNED)
1105 return (0);
1106
1107 /* The address was invalid. */
1108 if (owner == -1)
1109 return (EFAULT);
1110
1111 /* If no one owns it but it is contested try to acquire it. */
1112 if (owner == UMUTEX_CONTESTED) {
1113 owner = casuword32(m,
1114 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1115 if (owner == UMUTEX_CONTESTED)
1116 return (0);
1117
1118 /* The address was invalid. */
1119 if (owner == -1)
1120 return (EFAULT);
1121
1122 error = thread_check_susp(td, false);
1123 if (error != 0)
1124 break;
1125
1126 /* If this failed the lock has changed, restart. */
1127 continue;
1128 }
1129
1130 /*
1131 * If we caught a signal, we have retried and now
1132 * exit immediately.
1133 */
1134 if (error != 0)
1135 return (error);
1136
1137 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
1138 AUTO_SHARE, &uq->uq_key)) != 0)
1139 return (error);
1140
1141 umtxq_lock(&uq->uq_key);
1142 umtxq_busy(&uq->uq_key);
1143 umtxq_insert(uq);
1144 umtxq_unbusy(&uq->uq_key);
1145 umtxq_unlock(&uq->uq_key);
1146
1147 /*
1148 * Set the contested bit so that a release in user space
1149 * knows to use the system call for unlock. If this fails
1150 * either some one else has acquired the lock or it has been
1151 * released.
1152 */
1153 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
1154
1155 /* The address was invalid. */
1156 if (old == -1) {
1157 umtxq_lock(&uq->uq_key);
1158 umtxq_remove(uq);
1159 umtxq_unlock(&uq->uq_key);
1160 umtx_key_release(&uq->uq_key);
1161 return (EFAULT);
1162 }
1163
1164 /*
1165 * We set the contested bit, sleep. Otherwise the lock changed
1166 * and we need to retry or we lost a race to the thread
1167 * unlocking the umtx.
1168 */
1169 umtxq_lock(&uq->uq_key);
1170 if (old == owner)
1171 error = umtxq_sleep(uq, "umtx", timeout == NULL ?
1172 NULL : &timo);
1173 umtxq_remove(uq);
1174 umtxq_unlock(&uq->uq_key);
1175 umtx_key_release(&uq->uq_key);
1176
1177 if (error == 0)
1178 error = thread_check_susp(td, false);
1179 }
1180
1181 if (timeout == NULL) {
1182 /* Mutex locking is restarted if it is interrupted. */
1183 if (error == EINTR)
1184 error = ERESTART;
1185 } else {
1186 /* Timed-locking is not restarted. */
1187 if (error == ERESTART)
1188 error = EINTR;
1189 }
1190 return (error);
1191 }
1192
1193 /*
1194 * Unlock a umtx object.
1195 */
1196 static int
1197 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
1198 {
1199 struct umtx_key key;
1200 uint32_t owner;
1201 uint32_t old;
1202 int error;
1203 int count;
1204
1205 /*
1206 * Make sure we own this mtx.
1207 */
1208 owner = fuword32(m);
1209 if (owner == -1)
1210 return (EFAULT);
1211
1212 if ((owner & ~UMUTEX_CONTESTED) != id)
1213 return (EPERM);
1214
1215 /* This should be done in userland */
1216 if ((owner & UMUTEX_CONTESTED) == 0) {
1217 old = casuword32(m, owner, UMUTEX_UNOWNED);
1218 if (old == -1)
1219 return (EFAULT);
1220 if (old == owner)
1221 return (0);
1222 owner = old;
1223 }
1224
1225 /* We should only ever be in here for contested locks */
1226 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1227 &key)) != 0)
1228 return (error);
1229
1230 umtxq_lock(&key);
1231 umtxq_busy(&key);
1232 count = umtxq_count(&key);
1233 umtxq_unlock(&key);
1234
1235 /*
1236 * When unlocking the umtx, it must be marked as unowned if
1237 * there is zero or one thread only waiting for it.
1238 * Otherwise, it must be marked as contested.
1239 */
1240 old = casuword32(m, owner,
1241 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1242 umtxq_lock(&key);
1243 umtxq_signal(&key,1);
1244 umtxq_unbusy(&key);
1245 umtxq_unlock(&key);
1246 umtx_key_release(&key);
1247 if (old == -1)
1248 return (EFAULT);
1249 if (old != owner)
1250 return (EINVAL);
1251 return (0);
1252 }
1253 #endif /* COMPAT_FREEBSD32 */
1254 #endif /* COMPAT_FREEBSD10 */
1255
1256 /*
1257 * Fetch and compare value, sleep on the address if value is not changed.
1258 */
1259 static int
1260 do_wait(struct thread *td, void *addr, u_long id,
1261 struct _umtx_time *timeout, int compat32, int is_private)
1262 {
1263 struct umtx_abs_timeout timo;
1264 struct umtx_q *uq;
1265 u_long tmp;
1266 uint32_t tmp32;
1267 int error = 0;
1268
1269 uq = td->td_umtxq;
1270 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1271 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1272 return (error);
1273
1274 if (timeout != NULL)
1275 umtx_abs_timeout_init2(&timo, timeout);
1276
1277 umtxq_lock(&uq->uq_key);
1278 umtxq_insert(uq);
1279 umtxq_unlock(&uq->uq_key);
1280 if (compat32 == 0) {
1281 error = fueword(addr, &tmp);
1282 if (error != 0)
1283 error = EFAULT;
1284 } else {
1285 error = fueword32(addr, &tmp32);
1286 if (error == 0)
1287 tmp = tmp32;
1288 else
1289 error = EFAULT;
1290 }
1291 umtxq_lock(&uq->uq_key);
1292 if (error == 0) {
1293 if (tmp == id)
1294 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
1295 NULL : &timo);
1296 if ((uq->uq_flags & UQF_UMTXQ) == 0)
1297 error = 0;
1298 else
1299 umtxq_remove(uq);
1300 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
1301 umtxq_remove(uq);
1302 }
1303 umtxq_unlock(&uq->uq_key);
1304 umtx_key_release(&uq->uq_key);
1305 if (error == ERESTART)
1306 error = EINTR;
1307 return (error);
1308 }
1309
1310 /*
1311 * Wake up threads sleeping on the specified address.
1312 */
1313 int
1314 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1315 {
1316 struct umtx_key key;
1317 int ret;
1318
1319 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1320 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1321 return (ret);
1322 umtxq_lock(&key);
1323 umtxq_signal(&key, n_wake);
1324 umtxq_unlock(&key);
1325 umtx_key_release(&key);
1326 return (0);
1327 }
1328
1329 /*
1330 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1331 */
1332 static int
1333 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1334 struct _umtx_time *timeout, int mode)
1335 {
1336 struct umtx_abs_timeout timo;
1337 struct umtx_q *uq;
1338 uint32_t owner, old, id;
1339 int error, rv;
1340
1341 id = td->td_tid;
1342 uq = td->td_umtxq;
1343 error = 0;
1344 if (timeout != NULL)
1345 umtx_abs_timeout_init2(&timo, timeout);
1346
1347 /*
1348 * Care must be exercised when dealing with umtx structure. It
1349 * can fault on any access.
1350 */
1351 for (;;) {
1352 rv = fueword32(&m->m_owner, &owner);
1353 if (rv == -1)
1354 return (EFAULT);
1355 if (mode == _UMUTEX_WAIT) {
1356 if (owner == UMUTEX_UNOWNED ||
1357 owner == UMUTEX_CONTESTED ||
1358 owner == UMUTEX_RB_OWNERDEAD ||
1359 owner == UMUTEX_RB_NOTRECOV)
1360 return (0);
1361 } else {
1362 /*
1363 * Robust mutex terminated. Kernel duty is to
1364 * return EOWNERDEAD to the userspace. The
1365 * umutex.m_flags UMUTEX_NONCONSISTENT is set
1366 * by the common userspace code.
1367 */
1368 if (owner == UMUTEX_RB_OWNERDEAD) {
1369 rv = casueword32(&m->m_owner,
1370 UMUTEX_RB_OWNERDEAD, &owner,
1371 id | UMUTEX_CONTESTED);
1372 if (rv == -1)
1373 return (EFAULT);
1374 if (rv == 0) {
1375 MPASS(owner == UMUTEX_RB_OWNERDEAD);
1376 return (EOWNERDEAD); /* success */
1377 }
1378 MPASS(rv == 1);
1379 rv = thread_check_susp(td, false);
1380 if (rv != 0)
1381 return (rv);
1382 continue;
1383 }
1384 if (owner == UMUTEX_RB_NOTRECOV)
1385 return (ENOTRECOVERABLE);
1386
1387 /*
1388 * Try the uncontested case. This should be
1389 * done in userland.
1390 */
1391 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1392 &owner, id);
1393 /* The address was invalid. */
1394 if (rv == -1)
1395 return (EFAULT);
1396
1397 /* The acquire succeeded. */
1398 if (rv == 0) {
1399 MPASS(owner == UMUTEX_UNOWNED);
1400 return (0);
1401 }
1402
1403 /*
1404 * If no one owns it but it is contested try
1405 * to acquire it.
1406 */
1407 MPASS(rv == 1);
1408 if (owner == UMUTEX_CONTESTED) {
1409 rv = casueword32(&m->m_owner,
1410 UMUTEX_CONTESTED, &owner,
1411 id | UMUTEX_CONTESTED);
1412 /* The address was invalid. */
1413 if (rv == -1)
1414 return (EFAULT);
1415 if (rv == 0) {
1416 MPASS(owner == UMUTEX_CONTESTED);
1417 return (0);
1418 }
1419 if (rv == 1) {
1420 rv = thread_check_susp(td, false);
1421 if (rv != 0)
1422 return (rv);
1423 }
1424
1425 /*
1426 * If this failed the lock has
1427 * changed, restart.
1428 */
1429 continue;
1430 }
1431
1432 /* rv == 1 but not contested, likely store failure */
1433 rv = thread_check_susp(td, false);
1434 if (rv != 0)
1435 return (rv);
1436 }
1437
1438 if (mode == _UMUTEX_TRY)
1439 return (EBUSY);
1440
1441 /*
1442 * If we caught a signal, we have retried and now
1443 * exit immediately.
1444 */
1445 if (error != 0)
1446 return (error);
1447
1448 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1449 GET_SHARE(flags), &uq->uq_key)) != 0)
1450 return (error);
1451
1452 umtxq_lock(&uq->uq_key);
1453 umtxq_busy(&uq->uq_key);
1454 umtxq_insert(uq);
1455 umtxq_unlock(&uq->uq_key);
1456
1457 /*
1458 * Set the contested bit so that a release in user space
1459 * knows to use the system call for unlock. If this fails
1460 * either some one else has acquired the lock or it has been
1461 * released.
1462 */
1463 rv = casueword32(&m->m_owner, owner, &old,
1464 owner | UMUTEX_CONTESTED);
1465
1466 /* The address was invalid or casueword failed to store. */
1467 if (rv == -1 || rv == 1) {
1468 umtxq_lock(&uq->uq_key);
1469 umtxq_remove(uq);
1470 umtxq_unbusy(&uq->uq_key);
1471 umtxq_unlock(&uq->uq_key);
1472 umtx_key_release(&uq->uq_key);
1473 if (rv == -1)
1474 return (EFAULT);
1475 if (rv == 1) {
1476 rv = thread_check_susp(td, false);
1477 if (rv != 0)
1478 return (rv);
1479 }
1480 continue;
1481 }
1482
1483 /*
1484 * We set the contested bit, sleep. Otherwise the lock changed
1485 * and we need to retry or we lost a race to the thread
1486 * unlocking the umtx.
1487 */
1488 umtxq_lock(&uq->uq_key);
1489 umtxq_unbusy(&uq->uq_key);
1490 MPASS(old == owner);
1491 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1492 NULL : &timo);
1493 umtxq_remove(uq);
1494 umtxq_unlock(&uq->uq_key);
1495 umtx_key_release(&uq->uq_key);
1496
1497 if (error == 0)
1498 error = thread_check_susp(td, false);
1499 }
1500
1501 return (0);
1502 }
1503
1504 /*
1505 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1506 */
1507 static int
1508 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1509 {
1510 struct umtx_key key;
1511 uint32_t owner, old, id, newlock;
1512 int error, count;
1513
1514 id = td->td_tid;
1515
1516 again:
1517 /*
1518 * Make sure we own this mtx.
1519 */
1520 error = fueword32(&m->m_owner, &owner);
1521 if (error == -1)
1522 return (EFAULT);
1523
1524 if ((owner & ~UMUTEX_CONTESTED) != id)
1525 return (EPERM);
1526
1527 newlock = umtx_unlock_val(flags, rb);
1528 if ((owner & UMUTEX_CONTESTED) == 0) {
1529 error = casueword32(&m->m_owner, owner, &old, newlock);
1530 if (error == -1)
1531 return (EFAULT);
1532 if (error == 1) {
1533 error = thread_check_susp(td, false);
1534 if (error != 0)
1535 return (error);
1536 goto again;
1537 }
1538 MPASS(old == owner);
1539 return (0);
1540 }
1541
1542 /* We should only ever be in here for contested locks */
1543 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1544 &key)) != 0)
1545 return (error);
1546
1547 umtxq_lock(&key);
1548 umtxq_busy(&key);
1549 count = umtxq_count(&key);
1550 umtxq_unlock(&key);
1551
1552 /*
1553 * When unlocking the umtx, it must be marked as unowned if
1554 * there is zero or one thread only waiting for it.
1555 * Otherwise, it must be marked as contested.
1556 */
1557 if (count > 1)
1558 newlock |= UMUTEX_CONTESTED;
1559 error = casueword32(&m->m_owner, owner, &old, newlock);
1560 umtxq_lock(&key);
1561 umtxq_signal(&key, 1);
1562 umtxq_unbusy(&key);
1563 umtxq_unlock(&key);
1564 umtx_key_release(&key);
1565 if (error == -1)
1566 return (EFAULT);
1567 if (error == 1) {
1568 if (old != owner)
1569 return (EINVAL);
1570 error = thread_check_susp(td, false);
1571 if (error != 0)
1572 return (error);
1573 goto again;
1574 }
1575 return (0);
1576 }
1577
1578 /*
1579 * Check if the mutex is available and wake up a waiter,
1580 * only for simple mutex.
1581 */
1582 static int
1583 do_wake_umutex(struct thread *td, struct umutex *m)
1584 {
1585 struct umtx_key key;
1586 uint32_t owner;
1587 uint32_t flags;
1588 int error;
1589 int count;
1590
1591 again:
1592 error = fueword32(&m->m_owner, &owner);
1593 if (error == -1)
1594 return (EFAULT);
1595
1596 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
1597 owner != UMUTEX_RB_NOTRECOV)
1598 return (0);
1599
1600 error = fueword32(&m->m_flags, &flags);
1601 if (error == -1)
1602 return (EFAULT);
1603
1604 /* We should only ever be in here for contested locks */
1605 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1606 &key)) != 0)
1607 return (error);
1608
1609 umtxq_lock(&key);
1610 umtxq_busy(&key);
1611 count = umtxq_count(&key);
1612 umtxq_unlock(&key);
1613
1614 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
1615 owner != UMUTEX_RB_NOTRECOV) {
1616 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1617 UMUTEX_UNOWNED);
1618 if (error == -1) {
1619 error = EFAULT;
1620 } else if (error == 1) {
1621 umtxq_lock(&key);
1622 umtxq_unbusy(&key);
1623 umtxq_unlock(&key);
1624 umtx_key_release(&key);
1625 error = thread_check_susp(td, false);
1626 if (error != 0)
1627 return (error);
1628 goto again;
1629 }
1630 }
1631
1632 umtxq_lock(&key);
1633 if (error == 0 && count != 0) {
1634 MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
1635 owner == UMUTEX_RB_OWNERDEAD ||
1636 owner == UMUTEX_RB_NOTRECOV);
1637 umtxq_signal(&key, 1);
1638 }
1639 umtxq_unbusy(&key);
1640 umtxq_unlock(&key);
1641 umtx_key_release(&key);
1642 return (error);
1643 }
1644
1645 /*
1646 * Check if the mutex has waiters and tries to fix contention bit.
1647 */
1648 static int
1649 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1650 {
1651 struct umtx_key key;
1652 uint32_t owner, old;
1653 int type;
1654 int error;
1655 int count;
1656
1657 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
1658 UMUTEX_ROBUST)) {
1659 case 0:
1660 case UMUTEX_ROBUST:
1661 type = TYPE_NORMAL_UMUTEX;
1662 break;
1663 case UMUTEX_PRIO_INHERIT:
1664 type = TYPE_PI_UMUTEX;
1665 break;
1666 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
1667 type = TYPE_PI_ROBUST_UMUTEX;
1668 break;
1669 case UMUTEX_PRIO_PROTECT:
1670 type = TYPE_PP_UMUTEX;
1671 break;
1672 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
1673 type = TYPE_PP_ROBUST_UMUTEX;
1674 break;
1675 default:
1676 return (EINVAL);
1677 }
1678 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
1679 return (error);
1680
1681 owner = 0;
1682 umtxq_lock(&key);
1683 umtxq_busy(&key);
1684 count = umtxq_count(&key);
1685 umtxq_unlock(&key);
1686
1687 error = fueword32(&m->m_owner, &owner);
1688 if (error == -1)
1689 error = EFAULT;
1690
1691 /*
1692 * Only repair contention bit if there is a waiter, this means
1693 * the mutex is still being referenced by userland code,
1694 * otherwise don't update any memory.
1695 */
1696 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
1697 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
1698 error = casueword32(&m->m_owner, owner, &old,
1699 owner | UMUTEX_CONTESTED);
1700 if (error == -1) {
1701 error = EFAULT;
1702 break;
1703 }
1704 if (error == 0) {
1705 MPASS(old == owner);
1706 break;
1707 }
1708 owner = old;
1709 error = thread_check_susp(td, false);
1710 }
1711
1712 umtxq_lock(&key);
1713 if (error == EFAULT) {
1714 umtxq_signal(&key, INT_MAX);
1715 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1716 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1717 umtxq_signal(&key, 1);
1718 umtxq_unbusy(&key);
1719 umtxq_unlock(&key);
1720 umtx_key_release(&key);
1721 return (error);
1722 }
1723
1724 struct umtx_pi *
1725 umtx_pi_alloc(int flags)
1726 {
1727 struct umtx_pi *pi;
1728
1729 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1730 TAILQ_INIT(&pi->pi_blocked);
1731 atomic_add_int(&umtx_pi_allocated, 1);
1732 return (pi);
1733 }
1734
1735 void
1736 umtx_pi_free(struct umtx_pi *pi)
1737 {
1738 uma_zfree(umtx_pi_zone, pi);
1739 atomic_add_int(&umtx_pi_allocated, -1);
1740 }
1741
1742 /*
1743 * Adjust the thread's position on a pi_state after its priority has been
1744 * changed.
1745 */
1746 static int
1747 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1748 {
1749 struct umtx_q *uq, *uq1, *uq2;
1750 struct thread *td1;
1751
1752 mtx_assert(&umtx_lock, MA_OWNED);
1753 if (pi == NULL)
1754 return (0);
1755
1756 uq = td->td_umtxq;
1757
1758 /*
1759 * Check if the thread needs to be moved on the blocked chain.
1760 * It needs to be moved if either its priority is lower than
1761 * the previous thread or higher than the next thread.
1762 */
1763 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1764 uq2 = TAILQ_NEXT(uq, uq_lockq);
1765 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1766 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1767 /*
1768 * Remove thread from blocked chain and determine where
1769 * it should be moved to.
1770 */
1771 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1772 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1773 td1 = uq1->uq_thread;
1774 MPASS(td1->td_proc->p_magic == P_MAGIC);
1775 if (UPRI(td1) > UPRI(td))
1776 break;
1777 }
1778
1779 if (uq1 == NULL)
1780 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1781 else
1782 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1783 }
1784 return (1);
1785 }
1786
1787 static struct umtx_pi *
1788 umtx_pi_next(struct umtx_pi *pi)
1789 {
1790 struct umtx_q *uq_owner;
1791
1792 if (pi->pi_owner == NULL)
1793 return (NULL);
1794 uq_owner = pi->pi_owner->td_umtxq;
1795 if (uq_owner == NULL)
1796 return (NULL);
1797 return (uq_owner->uq_pi_blocked);
1798 }
1799
1800 /*
1801 * Floyd's Cycle-Finding Algorithm.
1802 */
1803 static bool
1804 umtx_pi_check_loop(struct umtx_pi *pi)
1805 {
1806 struct umtx_pi *pi1; /* fast iterator */
1807
1808 mtx_assert(&umtx_lock, MA_OWNED);
1809 if (pi == NULL)
1810 return (false);
1811 pi1 = pi;
1812 for (;;) {
1813 pi = umtx_pi_next(pi);
1814 if (pi == NULL)
1815 break;
1816 pi1 = umtx_pi_next(pi1);
1817 if (pi1 == NULL)
1818 break;
1819 pi1 = umtx_pi_next(pi1);
1820 if (pi1 == NULL)
1821 break;
1822 if (pi == pi1)
1823 return (true);
1824 }
1825 return (false);
1826 }
1827
1828 /*
1829 * Propagate priority when a thread is blocked on POSIX
1830 * PI mutex.
1831 */
1832 static void
1833 umtx_propagate_priority(struct thread *td)
1834 {
1835 struct umtx_q *uq;
1836 struct umtx_pi *pi;
1837 int pri;
1838
1839 mtx_assert(&umtx_lock, MA_OWNED);
1840 pri = UPRI(td);
1841 uq = td->td_umtxq;
1842 pi = uq->uq_pi_blocked;
1843 if (pi == NULL)
1844 return;
1845 if (umtx_pi_check_loop(pi))
1846 return;
1847
1848 for (;;) {
1849 td = pi->pi_owner;
1850 if (td == NULL || td == curthread)
1851 return;
1852
1853 MPASS(td->td_proc != NULL);
1854 MPASS(td->td_proc->p_magic == P_MAGIC);
1855
1856 thread_lock(td);
1857 if (td->td_lend_user_pri > pri)
1858 sched_lend_user_prio(td, pri);
1859 else {
1860 thread_unlock(td);
1861 break;
1862 }
1863 thread_unlock(td);
1864
1865 /*
1866 * Pick up the lock that td is blocked on.
1867 */
1868 uq = td->td_umtxq;
1869 pi = uq->uq_pi_blocked;
1870 if (pi == NULL)
1871 break;
1872 /* Resort td on the list if needed. */
1873 umtx_pi_adjust_thread(pi, td);
1874 }
1875 }
1876
1877 /*
1878 * Unpropagate priority for a PI mutex when a thread blocked on
1879 * it is interrupted by signal or resumed by others.
1880 */
1881 static void
1882 umtx_repropagate_priority(struct umtx_pi *pi)
1883 {
1884 struct umtx_q *uq, *uq_owner;
1885 struct umtx_pi *pi2;
1886 int pri;
1887
1888 mtx_assert(&umtx_lock, MA_OWNED);
1889
1890 if (umtx_pi_check_loop(pi))
1891 return;
1892 while (pi != NULL && pi->pi_owner != NULL) {
1893 pri = PRI_MAX;
1894 uq_owner = pi->pi_owner->td_umtxq;
1895
1896 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1897 uq = TAILQ_FIRST(&pi2->pi_blocked);
1898 if (uq != NULL) {
1899 if (pri > UPRI(uq->uq_thread))
1900 pri = UPRI(uq->uq_thread);
1901 }
1902 }
1903
1904 if (pri > uq_owner->uq_inherited_pri)
1905 pri = uq_owner->uq_inherited_pri;
1906 thread_lock(pi->pi_owner);
1907 sched_lend_user_prio(pi->pi_owner, pri);
1908 thread_unlock(pi->pi_owner);
1909 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1910 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1911 }
1912 }
1913
1914 /*
1915 * Insert a PI mutex into owned list.
1916 */
1917 static void
1918 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1919 {
1920 struct umtx_q *uq_owner;
1921
1922 uq_owner = owner->td_umtxq;
1923 mtx_assert(&umtx_lock, MA_OWNED);
1924 MPASS(pi->pi_owner == NULL);
1925 pi->pi_owner = owner;
1926 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1927 }
1928
1929 /*
1930 * Disown a PI mutex, and remove it from the owned list.
1931 */
1932 static void
1933 umtx_pi_disown(struct umtx_pi *pi)
1934 {
1935
1936 mtx_assert(&umtx_lock, MA_OWNED);
1937 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1938 pi->pi_owner = NULL;
1939 }
1940
1941 /*
1942 * Claim ownership of a PI mutex.
1943 */
1944 int
1945 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1946 {
1947 struct umtx_q *uq;
1948 int pri;
1949
1950 mtx_lock(&umtx_lock);
1951 if (pi->pi_owner == owner) {
1952 mtx_unlock(&umtx_lock);
1953 return (0);
1954 }
1955
1956 if (pi->pi_owner != NULL) {
1957 /*
1958 * userland may have already messed the mutex, sigh.
1959 */
1960 mtx_unlock(&umtx_lock);
1961 return (EPERM);
1962 }
1963 umtx_pi_setowner(pi, owner);
1964 uq = TAILQ_FIRST(&pi->pi_blocked);
1965 if (uq != NULL) {
1966 pri = UPRI(uq->uq_thread);
1967 thread_lock(owner);
1968 if (pri < UPRI(owner))
1969 sched_lend_user_prio(owner, pri);
1970 thread_unlock(owner);
1971 }
1972 mtx_unlock(&umtx_lock);
1973 return (0);
1974 }
1975
1976 /*
1977 * Adjust a thread's order position in its blocked PI mutex,
1978 * this may result new priority propagating process.
1979 */
1980 void
1981 umtx_pi_adjust(struct thread *td, u_char oldpri)
1982 {
1983 struct umtx_q *uq;
1984 struct umtx_pi *pi;
1985
1986 uq = td->td_umtxq;
1987 mtx_lock(&umtx_lock);
1988 /*
1989 * Pick up the lock that td is blocked on.
1990 */
1991 pi = uq->uq_pi_blocked;
1992 if (pi != NULL) {
1993 umtx_pi_adjust_thread(pi, td);
1994 umtx_repropagate_priority(pi);
1995 }
1996 mtx_unlock(&umtx_lock);
1997 }
1998
1999 /*
2000 * Sleep on a PI mutex.
2001 */
2002 int
2003 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
2004 const char *wmesg, struct umtx_abs_timeout *timo, bool shared)
2005 {
2006 struct thread *td, *td1;
2007 struct umtx_q *uq1;
2008 int error, pri;
2009 #ifdef INVARIANTS
2010 struct umtxq_chain *uc;
2011
2012 uc = umtxq_getchain(&pi->pi_key);
2013 #endif
2014 error = 0;
2015 td = uq->uq_thread;
2016 KASSERT(td == curthread, ("inconsistent uq_thread"));
2017 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
2018 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
2019 umtxq_insert(uq);
2020 mtx_lock(&umtx_lock);
2021 if (pi->pi_owner == NULL) {
2022 mtx_unlock(&umtx_lock);
2023 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
2024 mtx_lock(&umtx_lock);
2025 if (td1 != NULL) {
2026 if (pi->pi_owner == NULL)
2027 umtx_pi_setowner(pi, td1);
2028 PROC_UNLOCK(td1->td_proc);
2029 }
2030 }
2031
2032 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
2033 pri = UPRI(uq1->uq_thread);
2034 if (pri > UPRI(td))
2035 break;
2036 }
2037
2038 if (uq1 != NULL)
2039 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
2040 else
2041 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
2042
2043 uq->uq_pi_blocked = pi;
2044 thread_lock(td);
2045 td->td_flags |= TDF_UPIBLOCKED;
2046 thread_unlock(td);
2047 umtx_propagate_priority(td);
2048 mtx_unlock(&umtx_lock);
2049 umtxq_unbusy(&uq->uq_key);
2050
2051 error = umtxq_sleep(uq, wmesg, timo);
2052 umtxq_remove(uq);
2053
2054 mtx_lock(&umtx_lock);
2055 uq->uq_pi_blocked = NULL;
2056 thread_lock(td);
2057 td->td_flags &= ~TDF_UPIBLOCKED;
2058 thread_unlock(td);
2059 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
2060 umtx_repropagate_priority(pi);
2061 mtx_unlock(&umtx_lock);
2062 umtxq_unlock(&uq->uq_key);
2063
2064 return (error);
2065 }
2066
2067 /*
2068 * Add reference count for a PI mutex.
2069 */
2070 void
2071 umtx_pi_ref(struct umtx_pi *pi)
2072 {
2073
2074 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
2075 pi->pi_refcount++;
2076 }
2077
2078 /*
2079 * Decrease reference count for a PI mutex, if the counter
2080 * is decreased to zero, its memory space is freed.
2081 */
2082 void
2083 umtx_pi_unref(struct umtx_pi *pi)
2084 {
2085 struct umtxq_chain *uc;
2086
2087 uc = umtxq_getchain(&pi->pi_key);
2088 UMTXQ_LOCKED_ASSERT(uc);
2089 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
2090 if (--pi->pi_refcount == 0) {
2091 mtx_lock(&umtx_lock);
2092 if (pi->pi_owner != NULL)
2093 umtx_pi_disown(pi);
2094 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
2095 ("blocked queue not empty"));
2096 mtx_unlock(&umtx_lock);
2097 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
2098 umtx_pi_free(pi);
2099 }
2100 }
2101
2102 /*
2103 * Find a PI mutex in hash table.
2104 */
2105 struct umtx_pi *
2106 umtx_pi_lookup(struct umtx_key *key)
2107 {
2108 struct umtxq_chain *uc;
2109 struct umtx_pi *pi;
2110
2111 uc = umtxq_getchain(key);
2112 UMTXQ_LOCKED_ASSERT(uc);
2113
2114 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
2115 if (umtx_key_match(&pi->pi_key, key)) {
2116 return (pi);
2117 }
2118 }
2119 return (NULL);
2120 }
2121
2122 /*
2123 * Insert a PI mutex into hash table.
2124 */
2125 void
2126 umtx_pi_insert(struct umtx_pi *pi)
2127 {
2128 struct umtxq_chain *uc;
2129
2130 uc = umtxq_getchain(&pi->pi_key);
2131 UMTXQ_LOCKED_ASSERT(uc);
2132 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
2133 }
2134
2135 /*
2136 * Drop a PI mutex and wakeup a top waiter.
2137 */
2138 int
2139 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count)
2140 {
2141 struct umtx_q *uq_first, *uq_first2, *uq_me;
2142 struct umtx_pi *pi, *pi2;
2143 int pri;
2144
2145 UMTXQ_ASSERT_LOCKED_BUSY(key);
2146 *count = umtxq_count_pi(key, &uq_first);
2147 if (uq_first != NULL) {
2148 mtx_lock(&umtx_lock);
2149 pi = uq_first->uq_pi_blocked;
2150 KASSERT(pi != NULL, ("pi == NULL?"));
2151 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
2152 mtx_unlock(&umtx_lock);
2153 /* userland messed the mutex */
2154 return (EPERM);
2155 }
2156 uq_me = td->td_umtxq;
2157 if (pi->pi_owner == td)
2158 umtx_pi_disown(pi);
2159 /* get highest priority thread which is still sleeping. */
2160 uq_first = TAILQ_FIRST(&pi->pi_blocked);
2161 while (uq_first != NULL &&
2162 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2163 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2164 }
2165 pri = PRI_MAX;
2166 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2167 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2168 if (uq_first2 != NULL) {
2169 if (pri > UPRI(uq_first2->uq_thread))
2170 pri = UPRI(uq_first2->uq_thread);
2171 }
2172 }
2173 thread_lock(td);
2174 sched_lend_user_prio(td, pri);
2175 thread_unlock(td);
2176 mtx_unlock(&umtx_lock);
2177 if (uq_first)
2178 umtxq_signal_thread(uq_first);
2179 } else {
2180 pi = umtx_pi_lookup(key);
2181 /*
2182 * A umtx_pi can exist if a signal or timeout removed the
2183 * last waiter from the umtxq, but there is still
2184 * a thread in do_lock_pi() holding the umtx_pi.
2185 */
2186 if (pi != NULL) {
2187 /*
2188 * The umtx_pi can be unowned, such as when a thread
2189 * has just entered do_lock_pi(), allocated the
2190 * umtx_pi, and unlocked the umtxq.
2191 * If the current thread owns it, it must disown it.
2192 */
2193 mtx_lock(&umtx_lock);
2194 if (pi->pi_owner == td)
2195 umtx_pi_disown(pi);
2196 mtx_unlock(&umtx_lock);
2197 }
2198 }
2199 return (0);
2200 }
2201
2202 /*
2203 * Lock a PI mutex.
2204 */
2205 static int
2206 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
2207 struct _umtx_time *timeout, int try)
2208 {
2209 struct umtx_abs_timeout timo;
2210 struct umtx_q *uq;
2211 struct umtx_pi *pi, *new_pi;
2212 uint32_t id, old_owner, owner, old;
2213 int error, rv;
2214
2215 id = td->td_tid;
2216 uq = td->td_umtxq;
2217
2218 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2219 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2220 &uq->uq_key)) != 0)
2221 return (error);
2222
2223 if (timeout != NULL)
2224 umtx_abs_timeout_init2(&timo, timeout);
2225
2226 umtxq_lock(&uq->uq_key);
2227 pi = umtx_pi_lookup(&uq->uq_key);
2228 if (pi == NULL) {
2229 new_pi = umtx_pi_alloc(M_NOWAIT);
2230 if (new_pi == NULL) {
2231 umtxq_unlock(&uq->uq_key);
2232 new_pi = umtx_pi_alloc(M_WAITOK);
2233 umtxq_lock(&uq->uq_key);
2234 pi = umtx_pi_lookup(&uq->uq_key);
2235 if (pi != NULL) {
2236 umtx_pi_free(new_pi);
2237 new_pi = NULL;
2238 }
2239 }
2240 if (new_pi != NULL) {
2241 new_pi->pi_key = uq->uq_key;
2242 umtx_pi_insert(new_pi);
2243 pi = new_pi;
2244 }
2245 }
2246 umtx_pi_ref(pi);
2247 umtxq_unlock(&uq->uq_key);
2248
2249 /*
2250 * Care must be exercised when dealing with umtx structure. It
2251 * can fault on any access.
2252 */
2253 for (;;) {
2254 /*
2255 * Try the uncontested case. This should be done in userland.
2256 */
2257 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
2258 /* The address was invalid. */
2259 if (rv == -1) {
2260 error = EFAULT;
2261 break;
2262 }
2263 /* The acquire succeeded. */
2264 if (rv == 0) {
2265 MPASS(owner == UMUTEX_UNOWNED);
2266 error = 0;
2267 break;
2268 }
2269
2270 if (owner == UMUTEX_RB_NOTRECOV) {
2271 error = ENOTRECOVERABLE;
2272 break;
2273 }
2274
2275 /*
2276 * Nobody owns it, but the acquire failed. This can happen
2277 * with ll/sc atomics.
2278 */
2279 if (owner == UMUTEX_UNOWNED) {
2280 error = thread_check_susp(td, true);
2281 if (error != 0)
2282 break;
2283 continue;
2284 }
2285
2286 /*
2287 * Avoid overwriting a possible error from sleep due
2288 * to the pending signal with suspension check result.
2289 */
2290 if (error == 0) {
2291 error = thread_check_susp(td, true);
2292 if (error != 0)
2293 break;
2294 }
2295
2296 /* If no one owns it but it is contested try to acquire it. */
2297 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
2298 old_owner = owner;
2299 rv = casueword32(&m->m_owner, owner, &owner,
2300 id | UMUTEX_CONTESTED);
2301 /* The address was invalid. */
2302 if (rv == -1) {
2303 error = EFAULT;
2304 break;
2305 }
2306 if (rv == 1) {
2307 if (error == 0) {
2308 error = thread_check_susp(td, true);
2309 if (error != 0)
2310 break;
2311 }
2312
2313 /*
2314 * If this failed the lock could
2315 * changed, restart.
2316 */
2317 continue;
2318 }
2319
2320 MPASS(rv == 0);
2321 MPASS(owner == old_owner);
2322 umtxq_lock(&uq->uq_key);
2323 umtxq_busy(&uq->uq_key);
2324 error = umtx_pi_claim(pi, td);
2325 umtxq_unbusy(&uq->uq_key);
2326 umtxq_unlock(&uq->uq_key);
2327 if (error != 0) {
2328 /*
2329 * Since we're going to return an
2330 * error, restore the m_owner to its
2331 * previous, unowned state to avoid
2332 * compounding the problem.
2333 */
2334 (void)casuword32(&m->m_owner,
2335 id | UMUTEX_CONTESTED, old_owner);
2336 }
2337 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
2338 error = EOWNERDEAD;
2339 break;
2340 }
2341
2342 if ((owner & ~UMUTEX_CONTESTED) == id) {
2343 error = EDEADLK;
2344 break;
2345 }
2346
2347 if (try != 0) {
2348 error = EBUSY;
2349 break;
2350 }
2351
2352 /*
2353 * If we caught a signal, we have retried and now
2354 * exit immediately.
2355 */
2356 if (error != 0)
2357 break;
2358
2359 umtxq_lock(&uq->uq_key);
2360 umtxq_busy(&uq->uq_key);
2361 umtxq_unlock(&uq->uq_key);
2362
2363 /*
2364 * Set the contested bit so that a release in user space
2365 * knows to use the system call for unlock. If this fails
2366 * either some one else has acquired the lock or it has been
2367 * released.
2368 */
2369 rv = casueword32(&m->m_owner, owner, &old, owner |
2370 UMUTEX_CONTESTED);
2371
2372 /* The address was invalid. */
2373 if (rv == -1) {
2374 umtxq_unbusy_unlocked(&uq->uq_key);
2375 error = EFAULT;
2376 break;
2377 }
2378 if (rv == 1) {
2379 umtxq_unbusy_unlocked(&uq->uq_key);
2380 error = thread_check_susp(td, true);
2381 if (error != 0)
2382 break;
2383
2384 /*
2385 * The lock changed and we need to retry or we
2386 * lost a race to the thread unlocking the
2387 * umtx. Note that the UMUTEX_RB_OWNERDEAD
2388 * value for owner is impossible there.
2389 */
2390 continue;
2391 }
2392
2393 umtxq_lock(&uq->uq_key);
2394
2395 /* We set the contested bit, sleep. */
2396 MPASS(old == owner);
2397 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
2398 "umtxpi", timeout == NULL ? NULL : &timo,
2399 (flags & USYNC_PROCESS_SHARED) != 0);
2400 if (error != 0)
2401 continue;
2402
2403 error = thread_check_susp(td, false);
2404 if (error != 0)
2405 break;
2406 }
2407
2408 umtxq_lock(&uq->uq_key);
2409 umtx_pi_unref(pi);
2410 umtxq_unlock(&uq->uq_key);
2411
2412 umtx_key_release(&uq->uq_key);
2413 return (error);
2414 }
2415
2416 /*
2417 * Unlock a PI mutex.
2418 */
2419 static int
2420 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2421 {
2422 struct umtx_key key;
2423 uint32_t id, new_owner, old, owner;
2424 int count, error;
2425
2426 id = td->td_tid;
2427
2428 usrloop:
2429 /*
2430 * Make sure we own this mtx.
2431 */
2432 error = fueword32(&m->m_owner, &owner);
2433 if (error == -1)
2434 return (EFAULT);
2435
2436 if ((owner & ~UMUTEX_CONTESTED) != id)
2437 return (EPERM);
2438
2439 new_owner = umtx_unlock_val(flags, rb);
2440
2441 /* This should be done in userland */
2442 if ((owner & UMUTEX_CONTESTED) == 0) {
2443 error = casueword32(&m->m_owner, owner, &old, new_owner);
2444 if (error == -1)
2445 return (EFAULT);
2446 if (error == 1) {
2447 error = thread_check_susp(td, true);
2448 if (error != 0)
2449 return (error);
2450 goto usrloop;
2451 }
2452 if (old == owner)
2453 return (0);
2454 owner = old;
2455 }
2456
2457 /* We should only ever be in here for contested locks */
2458 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2459 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2460 &key)) != 0)
2461 return (error);
2462
2463 umtxq_lock(&key);
2464 umtxq_busy(&key);
2465 error = umtx_pi_drop(td, &key, rb, &count);
2466 if (error != 0) {
2467 umtxq_unbusy(&key);
2468 umtxq_unlock(&key);
2469 umtx_key_release(&key);
2470 /* userland messed the mutex */
2471 return (error);
2472 }
2473 umtxq_unlock(&key);
2474
2475 /*
2476 * When unlocking the umtx, it must be marked as unowned if
2477 * there is zero or one thread only waiting for it.
2478 * Otherwise, it must be marked as contested.
2479 */
2480
2481 if (count > 1)
2482 new_owner |= UMUTEX_CONTESTED;
2483 again:
2484 error = casueword32(&m->m_owner, owner, &old, new_owner);
2485 if (error == 1) {
2486 error = thread_check_susp(td, false);
2487 if (error == 0)
2488 goto again;
2489 }
2490 umtxq_unbusy_unlocked(&key);
2491 umtx_key_release(&key);
2492 if (error == -1)
2493 return (EFAULT);
2494 if (error == 0 && old != owner)
2495 return (EINVAL);
2496 return (error);
2497 }
2498
2499 /*
2500 * Lock a PP mutex.
2501 */
2502 static int
2503 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2504 struct _umtx_time *timeout, int try)
2505 {
2506 struct umtx_abs_timeout timo;
2507 struct umtx_q *uq, *uq2;
2508 struct umtx_pi *pi;
2509 uint32_t ceiling;
2510 uint32_t owner, id;
2511 int error, pri, old_inherited_pri, su, rv;
2512
2513 id = td->td_tid;
2514 uq = td->td_umtxq;
2515 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2516 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2517 &uq->uq_key)) != 0)
2518 return (error);
2519
2520 if (timeout != NULL)
2521 umtx_abs_timeout_init2(&timo, timeout);
2522
2523 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2524 for (;;) {
2525 old_inherited_pri = uq->uq_inherited_pri;
2526 umtxq_lock(&uq->uq_key);
2527 umtxq_busy(&uq->uq_key);
2528 umtxq_unlock(&uq->uq_key);
2529
2530 rv = fueword32(&m->m_ceilings[0], &ceiling);
2531 if (rv == -1) {
2532 error = EFAULT;
2533 goto out;
2534 }
2535 ceiling = RTP_PRIO_MAX - ceiling;
2536 if (ceiling > RTP_PRIO_MAX) {
2537 error = EINVAL;
2538 goto out;
2539 }
2540
2541 mtx_lock(&umtx_lock);
2542 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2543 mtx_unlock(&umtx_lock);
2544 error = EINVAL;
2545 goto out;
2546 }
2547 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2548 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2549 thread_lock(td);
2550 if (uq->uq_inherited_pri < UPRI(td))
2551 sched_lend_user_prio(td, uq->uq_inherited_pri);
2552 thread_unlock(td);
2553 }
2554 mtx_unlock(&umtx_lock);
2555
2556 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2557 id | UMUTEX_CONTESTED);
2558 /* The address was invalid. */
2559 if (rv == -1) {
2560 error = EFAULT;
2561 break;
2562 }
2563 if (rv == 0) {
2564 MPASS(owner == UMUTEX_CONTESTED);
2565 error = 0;
2566 break;
2567 }
2568 /* rv == 1 */
2569 if (owner == UMUTEX_RB_OWNERDEAD) {
2570 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
2571 &owner, id | UMUTEX_CONTESTED);
2572 if (rv == -1) {
2573 error = EFAULT;
2574 break;
2575 }
2576 if (rv == 0) {
2577 MPASS(owner == UMUTEX_RB_OWNERDEAD);
2578 error = EOWNERDEAD; /* success */
2579 break;
2580 }
2581
2582 /*
2583 * rv == 1, only check for suspension if we
2584 * did not already catched a signal. If we
2585 * get an error from the check, the same
2586 * condition is checked by the umtxq_sleep()
2587 * call below, so we should obliterate the
2588 * error to not skip the last loop iteration.
2589 */
2590 if (error == 0) {
2591 error = thread_check_susp(td, false);
2592 if (error == 0) {
2593 if (try != 0)
2594 error = EBUSY;
2595 else
2596 continue;
2597 }
2598 error = 0;
2599 }
2600 } else if (owner == UMUTEX_RB_NOTRECOV) {
2601 error = ENOTRECOVERABLE;
2602 }
2603
2604 if (try != 0)
2605 error = EBUSY;
2606
2607 /*
2608 * If we caught a signal, we have retried and now
2609 * exit immediately.
2610 */
2611 if (error != 0)
2612 break;
2613
2614 umtxq_lock(&uq->uq_key);
2615 umtxq_insert(uq);
2616 umtxq_unbusy(&uq->uq_key);
2617 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2618 NULL : &timo);
2619 umtxq_remove(uq);
2620 umtxq_unlock(&uq->uq_key);
2621
2622 mtx_lock(&umtx_lock);
2623 uq->uq_inherited_pri = old_inherited_pri;
2624 pri = PRI_MAX;
2625 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2626 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2627 if (uq2 != NULL) {
2628 if (pri > UPRI(uq2->uq_thread))
2629 pri = UPRI(uq2->uq_thread);
2630 }
2631 }
2632 if (pri > uq->uq_inherited_pri)
2633 pri = uq->uq_inherited_pri;
2634 thread_lock(td);
2635 sched_lend_user_prio(td, pri);
2636 thread_unlock(td);
2637 mtx_unlock(&umtx_lock);
2638 }
2639
2640 if (error != 0 && error != EOWNERDEAD) {
2641 mtx_lock(&umtx_lock);
2642 uq->uq_inherited_pri = old_inherited_pri;
2643 pri = PRI_MAX;
2644 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2645 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2646 if (uq2 != NULL) {
2647 if (pri > UPRI(uq2->uq_thread))
2648 pri = UPRI(uq2->uq_thread);
2649 }
2650 }
2651 if (pri > uq->uq_inherited_pri)
2652 pri = uq->uq_inherited_pri;
2653 thread_lock(td);
2654 sched_lend_user_prio(td, pri);
2655 thread_unlock(td);
2656 mtx_unlock(&umtx_lock);
2657 }
2658
2659 out:
2660 umtxq_unbusy_unlocked(&uq->uq_key);
2661 umtx_key_release(&uq->uq_key);
2662 return (error);
2663 }
2664
2665 /*
2666 * Unlock a PP mutex.
2667 */
2668 static int
2669 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2670 {
2671 struct umtx_key key;
2672 struct umtx_q *uq, *uq2;
2673 struct umtx_pi *pi;
2674 uint32_t id, owner, rceiling;
2675 int error, pri, new_inherited_pri, su;
2676
2677 id = td->td_tid;
2678 uq = td->td_umtxq;
2679 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2680
2681 /*
2682 * Make sure we own this mtx.
2683 */
2684 error = fueword32(&m->m_owner, &owner);
2685 if (error == -1)
2686 return (EFAULT);
2687
2688 if ((owner & ~UMUTEX_CONTESTED) != id)
2689 return (EPERM);
2690
2691 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2692 if (error != 0)
2693 return (error);
2694
2695 if (rceiling == -1)
2696 new_inherited_pri = PRI_MAX;
2697 else {
2698 rceiling = RTP_PRIO_MAX - rceiling;
2699 if (rceiling > RTP_PRIO_MAX)
2700 return (EINVAL);
2701 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2702 }
2703
2704 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2705 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2706 &key)) != 0)
2707 return (error);
2708 umtxq_lock(&key);
2709 umtxq_busy(&key);
2710 umtxq_unlock(&key);
2711 /*
2712 * For priority protected mutex, always set unlocked state
2713 * to UMUTEX_CONTESTED, so that userland always enters kernel
2714 * to lock the mutex, it is necessary because thread priority
2715 * has to be adjusted for such mutex.
2716 */
2717 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
2718 UMUTEX_CONTESTED);
2719
2720 umtxq_lock(&key);
2721 if (error == 0)
2722 umtxq_signal(&key, 1);
2723 umtxq_unbusy(&key);
2724 umtxq_unlock(&key);
2725
2726 if (error == -1)
2727 error = EFAULT;
2728 else {
2729 mtx_lock(&umtx_lock);
2730 if (su != 0)
2731 uq->uq_inherited_pri = new_inherited_pri;
2732 pri = PRI_MAX;
2733 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2734 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2735 if (uq2 != NULL) {
2736 if (pri > UPRI(uq2->uq_thread))
2737 pri = UPRI(uq2->uq_thread);
2738 }
2739 }
2740 if (pri > uq->uq_inherited_pri)
2741 pri = uq->uq_inherited_pri;
2742 thread_lock(td);
2743 sched_lend_user_prio(td, pri);
2744 thread_unlock(td);
2745 mtx_unlock(&umtx_lock);
2746 }
2747 umtx_key_release(&key);
2748 return (error);
2749 }
2750
2751 static int
2752 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2753 uint32_t *old_ceiling)
2754 {
2755 struct umtx_q *uq;
2756 uint32_t flags, id, owner, save_ceiling;
2757 int error, rv, rv1;
2758
2759 error = fueword32(&m->m_flags, &flags);
2760 if (error == -1)
2761 return (EFAULT);
2762 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2763 return (EINVAL);
2764 if (ceiling > RTP_PRIO_MAX)
2765 return (EINVAL);
2766 id = td->td_tid;
2767 uq = td->td_umtxq;
2768 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2769 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2770 &uq->uq_key)) != 0)
2771 return (error);
2772 for (;;) {
2773 umtxq_lock(&uq->uq_key);
2774 umtxq_busy(&uq->uq_key);
2775 umtxq_unlock(&uq->uq_key);
2776
2777 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2778 if (rv == -1) {
2779 error = EFAULT;
2780 break;
2781 }
2782
2783 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2784 id | UMUTEX_CONTESTED);
2785 if (rv == -1) {
2786 error = EFAULT;
2787 break;
2788 }
2789
2790 if (rv == 0) {
2791 MPASS(owner == UMUTEX_CONTESTED);
2792 rv = suword32(&m->m_ceilings[0], ceiling);
2793 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
2794 error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
2795 break;
2796 }
2797
2798 if ((owner & ~UMUTEX_CONTESTED) == id) {
2799 rv = suword32(&m->m_ceilings[0], ceiling);
2800 error = rv == 0 ? 0 : EFAULT;
2801 break;
2802 }
2803
2804 if (owner == UMUTEX_RB_OWNERDEAD) {
2805 error = EOWNERDEAD;
2806 break;
2807 } else if (owner == UMUTEX_RB_NOTRECOV) {
2808 error = ENOTRECOVERABLE;
2809 break;
2810 }
2811
2812 /*
2813 * If we caught a signal, we have retried and now
2814 * exit immediately.
2815 */
2816 if (error != 0)
2817 break;
2818
2819 /*
2820 * We set the contested bit, sleep. Otherwise the lock changed
2821 * and we need to retry or we lost a race to the thread
2822 * unlocking the umtx.
2823 */
2824 umtxq_lock(&uq->uq_key);
2825 umtxq_insert(uq);
2826 umtxq_unbusy(&uq->uq_key);
2827 error = umtxq_sleep(uq, "umtxpp", NULL);
2828 umtxq_remove(uq);
2829 umtxq_unlock(&uq->uq_key);
2830 }
2831 umtxq_lock(&uq->uq_key);
2832 if (error == 0)
2833 umtxq_signal(&uq->uq_key, INT_MAX);
2834 umtxq_unbusy(&uq->uq_key);
2835 umtxq_unlock(&uq->uq_key);
2836 umtx_key_release(&uq->uq_key);
2837 if (error == 0 && old_ceiling != NULL) {
2838 rv = suword32(old_ceiling, save_ceiling);
2839 error = rv == 0 ? 0 : EFAULT;
2840 }
2841 return (error);
2842 }
2843
2844 /*
2845 * Lock a userland POSIX mutex.
2846 */
2847 static int
2848 do_lock_umutex(struct thread *td, struct umutex *m,
2849 struct _umtx_time *timeout, int mode)
2850 {
2851 uint32_t flags;
2852 int error;
2853
2854 error = fueword32(&m->m_flags, &flags);
2855 if (error == -1)
2856 return (EFAULT);
2857
2858 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2859 case 0:
2860 error = do_lock_normal(td, m, flags, timeout, mode);
2861 break;
2862 case UMUTEX_PRIO_INHERIT:
2863 error = do_lock_pi(td, m, flags, timeout, mode);
2864 break;
2865 case UMUTEX_PRIO_PROTECT:
2866 error = do_lock_pp(td, m, flags, timeout, mode);
2867 break;
2868 default:
2869 return (EINVAL);
2870 }
2871 if (timeout == NULL) {
2872 if (error == EINTR && mode != _UMUTEX_WAIT)
2873 error = ERESTART;
2874 } else {
2875 /* Timed-locking is not restarted. */
2876 if (error == ERESTART)
2877 error = EINTR;
2878 }
2879 return (error);
2880 }
2881
2882 /*
2883 * Unlock a userland POSIX mutex.
2884 */
2885 static int
2886 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
2887 {
2888 uint32_t flags;
2889 int error;
2890
2891 error = fueword32(&m->m_flags, &flags);
2892 if (error == -1)
2893 return (EFAULT);
2894
2895 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2896 case 0:
2897 return (do_unlock_normal(td, m, flags, rb));
2898 case UMUTEX_PRIO_INHERIT:
2899 return (do_unlock_pi(td, m, flags, rb));
2900 case UMUTEX_PRIO_PROTECT:
2901 return (do_unlock_pp(td, m, flags, rb));
2902 }
2903
2904 return (EINVAL);
2905 }
2906
2907 static int
2908 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2909 struct timespec *timeout, u_long wflags)
2910 {
2911 struct umtx_abs_timeout timo;
2912 struct umtx_q *uq;
2913 uint32_t flags, clockid, hasw;
2914 int error;
2915
2916 uq = td->td_umtxq;
2917 error = fueword32(&cv->c_flags, &flags);
2918 if (error == -1)
2919 return (EFAULT);
2920 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2921 if (error != 0)
2922 return (error);
2923
2924 if ((wflags & CVWAIT_CLOCKID) != 0) {
2925 error = fueword32(&cv->c_clockid, &clockid);
2926 if (error == -1) {
2927 umtx_key_release(&uq->uq_key);
2928 return (EFAULT);
2929 }
2930 if (clockid < CLOCK_REALTIME ||
2931 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2932 /* hmm, only HW clock id will work. */
2933 umtx_key_release(&uq->uq_key);
2934 return (EINVAL);
2935 }
2936 } else {
2937 clockid = CLOCK_REALTIME;
2938 }
2939
2940 umtxq_lock(&uq->uq_key);
2941 umtxq_busy(&uq->uq_key);
2942 umtxq_insert(uq);
2943 umtxq_unlock(&uq->uq_key);
2944
2945 /*
2946 * Set c_has_waiters to 1 before releasing user mutex, also
2947 * don't modify cache line when unnecessary.
2948 */
2949 error = fueword32(&cv->c_has_waiters, &hasw);
2950 if (error == 0 && hasw == 0)
2951 suword32(&cv->c_has_waiters, 1);
2952
2953 umtxq_unbusy_unlocked(&uq->uq_key);
2954
2955 error = do_unlock_umutex(td, m, false);
2956
2957 if (timeout != NULL)
2958 umtx_abs_timeout_init(&timo, clockid,
2959 (wflags & CVWAIT_ABSTIME) != 0, timeout);
2960
2961 umtxq_lock(&uq->uq_key);
2962 if (error == 0) {
2963 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2964 NULL : &timo);
2965 }
2966
2967 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2968 error = 0;
2969 else {
2970 /*
2971 * This must be timeout,interrupted by signal or
2972 * surprious wakeup, clear c_has_waiter flag when
2973 * necessary.
2974 */
2975 umtxq_busy(&uq->uq_key);
2976 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2977 int oldlen = uq->uq_cur_queue->length;
2978 umtxq_remove(uq);
2979 if (oldlen == 1) {
2980 umtxq_unlock(&uq->uq_key);
2981 suword32(&cv->c_has_waiters, 0);
2982 umtxq_lock(&uq->uq_key);
2983 }
2984 }
2985 umtxq_unbusy(&uq->uq_key);
2986 if (error == ERESTART)
2987 error = EINTR;
2988 }
2989
2990 umtxq_unlock(&uq->uq_key);
2991 umtx_key_release(&uq->uq_key);
2992 return (error);
2993 }
2994
2995 /*
2996 * Signal a userland condition variable.
2997 */
2998 static int
2999 do_cv_signal(struct thread *td, struct ucond *cv)
3000 {
3001 struct umtx_key key;
3002 int error, cnt, nwake;
3003 uint32_t flags;
3004
3005 error = fueword32(&cv->c_flags, &flags);
3006 if (error == -1)
3007 return (EFAULT);
3008 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
3009 return (error);
3010 umtxq_lock(&key);
3011 umtxq_busy(&key);
3012 cnt = umtxq_count(&key);
3013 nwake = umtxq_signal(&key, 1);
3014 if (cnt <= nwake) {
3015 umtxq_unlock(&key);
3016 error = suword32(&cv->c_has_waiters, 0);
3017 if (error == -1)
3018 error = EFAULT;
3019 umtxq_lock(&key);
3020 }
3021 umtxq_unbusy(&key);
3022 umtxq_unlock(&key);
3023 umtx_key_release(&key);
3024 return (error);
3025 }
3026
3027 static int
3028 do_cv_broadcast(struct thread *td, struct ucond *cv)
3029 {
3030 struct umtx_key key;
3031 int error;
3032 uint32_t flags;
3033
3034 error = fueword32(&cv->c_flags, &flags);
3035 if (error == -1)
3036 return (EFAULT);
3037 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
3038 return (error);
3039
3040 umtxq_lock(&key);
3041 umtxq_busy(&key);
3042 umtxq_signal(&key, INT_MAX);
3043 umtxq_unlock(&key);
3044
3045 error = suword32(&cv->c_has_waiters, 0);
3046 if (error == -1)
3047 error = EFAULT;
3048
3049 umtxq_unbusy_unlocked(&key);
3050
3051 umtx_key_release(&key);
3052 return (error);
3053 }
3054
3055 static int
3056 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag,
3057 struct _umtx_time *timeout)
3058 {
3059 struct umtx_abs_timeout timo;
3060 struct umtx_q *uq;
3061 uint32_t flags, wrflags;
3062 int32_t state, oldstate;
3063 int32_t blocked_readers;
3064 int error, error1, rv;
3065
3066 uq = td->td_umtxq;
3067 error = fueword32(&rwlock->rw_flags, &flags);
3068 if (error == -1)
3069 return (EFAULT);
3070 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3071 if (error != 0)
3072 return (error);
3073
3074 if (timeout != NULL)
3075 umtx_abs_timeout_init2(&timo, timeout);
3076
3077 wrflags = URWLOCK_WRITE_OWNER;
3078 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
3079 wrflags |= URWLOCK_WRITE_WAITERS;
3080
3081 for (;;) {
3082 rv = fueword32(&rwlock->rw_state, &state);
3083 if (rv == -1) {
3084 umtx_key_release(&uq->uq_key);
3085 return (EFAULT);
3086 }
3087
3088 /* try to lock it */
3089 while (!(state & wrflags)) {
3090 if (__predict_false(URWLOCK_READER_COUNT(state) ==
3091 URWLOCK_MAX_READERS)) {
3092 umtx_key_release(&uq->uq_key);
3093 return (EAGAIN);
3094 }
3095 rv = casueword32(&rwlock->rw_state, state,
3096 &oldstate, state + 1);
3097 if (rv == -1) {
3098 umtx_key_release(&uq->uq_key);
3099 return (EFAULT);
3100 }
3101 if (rv == 0) {
3102 MPASS(oldstate == state);
3103 umtx_key_release(&uq->uq_key);
3104 return (0);
3105 }
3106 error = thread_check_susp(td, true);
3107 if (error != 0)
3108 break;
3109 state = oldstate;
3110 }
3111
3112 if (error)
3113 break;
3114
3115 /* grab monitor lock */
3116 umtxq_lock(&uq->uq_key);
3117 umtxq_busy(&uq->uq_key);
3118 umtxq_unlock(&uq->uq_key);
3119
3120 /*
3121 * re-read the state, in case it changed between the try-lock above
3122 * and the check below
3123 */
3124 rv = fueword32(&rwlock->rw_state, &state);
3125 if (rv == -1)
3126 error = EFAULT;
3127
3128 /* set read contention bit */
3129 while (error == 0 && (state & wrflags) &&
3130 !(state & URWLOCK_READ_WAITERS)) {
3131 rv = casueword32(&rwlock->rw_state, state,
3132 &oldstate, state | URWLOCK_READ_WAITERS);
3133 if (rv == -1) {
3134 error = EFAULT;
3135 break;
3136 }
3137 if (rv == 0) {
3138 MPASS(oldstate == state);
3139 goto sleep;
3140 }
3141 state = oldstate;
3142 error = thread_check_susp(td, false);
3143 if (error != 0)
3144 break;
3145 }
3146 if (error != 0) {
3147 umtxq_unbusy_unlocked(&uq->uq_key);
3148 break;
3149 }
3150
3151 /* state is changed while setting flags, restart */
3152 if (!(state & wrflags)) {
3153 umtxq_unbusy_unlocked(&uq->uq_key);
3154 error = thread_check_susp(td, true);
3155 if (error != 0)
3156 break;
3157 continue;
3158 }
3159
3160 sleep:
3161 /*
3162 * Contention bit is set, before sleeping, increase
3163 * read waiter count.
3164 */
3165 rv = fueword32(&rwlock->rw_blocked_readers,
3166 &blocked_readers);
3167 if (rv == -1) {
3168 umtxq_unbusy_unlocked(&uq->uq_key);
3169 error = EFAULT;
3170 break;
3171 }
3172 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
3173
3174 while (state & wrflags) {
3175 umtxq_lock(&uq->uq_key);
3176 umtxq_insert(uq);
3177 umtxq_unbusy(&uq->uq_key);
3178
3179 error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
3180 NULL : &timo);
3181
3182 umtxq_busy(&uq->uq_key);
3183 umtxq_remove(uq);
3184 umtxq_unlock(&uq->uq_key);
3185 if (error)
3186 break;
3187 rv = fueword32(&rwlock->rw_state, &state);
3188 if (rv == -1) {
3189 error = EFAULT;
3190 break;
3191 }
3192 }
3193
3194 /* decrease read waiter count, and may clear read contention bit */
3195 rv = fueword32(&rwlock->rw_blocked_readers,
3196 &blocked_readers);
3197 if (rv == -1) {
3198 umtxq_unbusy_unlocked(&uq->uq_key);
3199 error = EFAULT;
3200 break;
3201 }
3202 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
3203 if (blocked_readers == 1) {
3204 rv = fueword32(&rwlock->rw_state, &state);
3205 if (rv == -1) {
3206 umtxq_unbusy_unlocked(&uq->uq_key);
3207 error = EFAULT;
3208 break;
3209 }
3210 for (;;) {
3211 rv = casueword32(&rwlock->rw_state, state,
3212 &oldstate, state & ~URWLOCK_READ_WAITERS);
3213 if (rv == -1) {
3214 error = EFAULT;
3215 break;
3216 }
3217 if (rv == 0) {
3218 MPASS(oldstate == state);
3219 break;
3220 }
3221 state = oldstate;
3222 error1 = thread_check_susp(td, false);
3223 if (error1 != 0) {
3224 if (error == 0)
3225 error = error1;
3226 break;
3227 }
3228 }
3229 }
3230
3231 umtxq_unbusy_unlocked(&uq->uq_key);
3232 if (error != 0)
3233 break;
3234 }
3235 umtx_key_release(&uq->uq_key);
3236 if (error == ERESTART)
3237 error = EINTR;
3238 return (error);
3239 }
3240
3241 static int
3242 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
3243 {
3244 struct umtx_abs_timeout timo;
3245 struct umtx_q *uq;
3246 uint32_t flags;
3247 int32_t state, oldstate;
3248 int32_t blocked_writers;
3249 int32_t blocked_readers;
3250 int error, error1, rv;
3251
3252 uq = td->td_umtxq;
3253 error = fueword32(&rwlock->rw_flags, &flags);
3254 if (error == -1)
3255 return (EFAULT);
3256 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3257 if (error != 0)
3258 return (error);
3259
3260 if (timeout != NULL)
3261 umtx_abs_timeout_init2(&timo, timeout);
3262
3263 blocked_readers = 0;
3264 for (;;) {
3265 rv = fueword32(&rwlock->rw_state, &state);
3266 if (rv == -1) {
3267 umtx_key_release(&uq->uq_key);
3268 return (EFAULT);
3269 }
3270 while ((state & URWLOCK_WRITE_OWNER) == 0 &&
3271 URWLOCK_READER_COUNT(state) == 0) {
3272 rv = casueword32(&rwlock->rw_state, state,
3273 &oldstate, state | URWLOCK_WRITE_OWNER);
3274 if (rv == -1) {
3275 umtx_key_release(&uq->uq_key);
3276 return (EFAULT);
3277 }
3278 if (rv == 0) {
3279 MPASS(oldstate == state);
3280 umtx_key_release(&uq->uq_key);
3281 return (0);
3282 }
3283 state = oldstate;
3284 error = thread_check_susp(td, true);
3285 if (error != 0)
3286 break;
3287 }
3288
3289 if (error) {
3290 if ((state & (URWLOCK_WRITE_OWNER |
3291 URWLOCK_WRITE_WAITERS)) == 0 &&
3292 blocked_readers != 0) {
3293 umtxq_lock(&uq->uq_key);
3294 umtxq_busy(&uq->uq_key);
3295 umtxq_signal_queue(&uq->uq_key, INT_MAX,
3296 UMTX_SHARED_QUEUE);
3297 umtxq_unbusy(&uq->uq_key);
3298 umtxq_unlock(&uq->uq_key);
3299 }
3300
3301 break;
3302 }
3303
3304 /* grab monitor lock */
3305 umtxq_lock(&uq->uq_key);
3306 umtxq_busy(&uq->uq_key);
3307 umtxq_unlock(&uq->uq_key);
3308
3309 /*
3310 * Re-read the state, in case it changed between the
3311 * try-lock above and the check below.
3312 */
3313 rv = fueword32(&rwlock->rw_state, &state);
3314 if (rv == -1)
3315 error = EFAULT;
3316
3317 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
3318 URWLOCK_READER_COUNT(state) != 0) &&
3319 (state & URWLOCK_WRITE_WAITERS) == 0) {
3320 rv = casueword32(&rwlock->rw_state, state,
3321 &oldstate, state | URWLOCK_WRITE_WAITERS);
3322 if (rv == -1) {
3323 error = EFAULT;
3324 break;
3325 }
3326 if (rv == 0) {
3327 MPASS(oldstate == state);
3328 goto sleep;
3329 }
3330 state = oldstate;
3331 error = thread_check_susp(td, false);
3332 if (error != 0)
3333 break;
3334 }
3335 if (error != 0) {
3336 umtxq_unbusy_unlocked(&uq->uq_key);
3337 break;
3338 }
3339
3340 if ((state & URWLOCK_WRITE_OWNER) == 0 &&
3341 URWLOCK_READER_COUNT(state) == 0) {
3342 umtxq_unbusy_unlocked(&uq->uq_key);
3343 error = thread_check_susp(td, false);
3344 if (error != 0)
3345 break;
3346 continue;
3347 }
3348 sleep:
3349 rv = fueword32(&rwlock->rw_blocked_writers,
3350 &blocked_writers);
3351 if (rv == -1) {
3352 umtxq_unbusy_unlocked(&uq->uq_key);
3353 error = EFAULT;
3354 break;
3355 }
3356 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1);
3357
3358 while ((state & URWLOCK_WRITE_OWNER) ||
3359 URWLOCK_READER_COUNT(state) != 0) {
3360 umtxq_lock(&uq->uq_key);
3361 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
3362 umtxq_unbusy(&uq->uq_key);
3363
3364 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
3365 NULL : &timo);
3366
3367 umtxq_busy(&uq->uq_key);
3368 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
3369 umtxq_unlock(&uq->uq_key);
3370 if (error)
3371 break;
3372 rv = fueword32(&rwlock->rw_state, &state);
3373 if (rv == -1) {
3374 error = EFAULT;
3375 break;
3376 }
3377 }
3378
3379 rv = fueword32(&rwlock->rw_blocked_writers,
3380 &blocked_writers);
3381 if (rv == -1) {
3382 umtxq_unbusy_unlocked(&uq->uq_key);
3383 error = EFAULT;
3384 break;
3385 }
3386 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
3387 if (blocked_writers == 1) {
3388 rv = fueword32(&rwlock->rw_state, &state);
3389 if (rv == -1) {
3390 umtxq_unbusy_unlocked(&uq->uq_key);
3391 error = EFAULT;
3392 break;
3393 }
3394 for (;;) {
3395 rv = casueword32(&rwlock->rw_state, state,
3396 &oldstate, state & ~URWLOCK_WRITE_WAITERS);
3397 if (rv == -1) {
3398 error = EFAULT;
3399 break;
3400 }
3401 if (rv == 0) {
3402 MPASS(oldstate == state);
3403 break;
3404 }
3405 state = oldstate;
3406 error1 = thread_check_susp(td, false);
3407 /*
3408 * We are leaving the URWLOCK_WRITE_WAITERS
3409 * behind, but this should not harm the
3410 * correctness.
3411 */
3412 if (error1 != 0) {
3413 if (error == 0)
3414 error = error1;
3415 break;
3416 }
3417 }
3418 rv = fueword32(&rwlock->rw_blocked_readers,
3419 &blocked_readers);
3420 if (rv == -1) {
3421 umtxq_unbusy_unlocked(&uq->uq_key);
3422 error = EFAULT;
3423 break;
3424 }
3425 } else
3426 blocked_readers = 0;
3427
3428 umtxq_unbusy_unlocked(&uq->uq_key);
3429 }
3430
3431 umtx_key_release(&uq->uq_key);
3432 if (error == ERESTART)
3433 error = EINTR;
3434 return (error);
3435 }
3436
3437 static int
3438 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
3439 {
3440 struct umtx_q *uq;
3441 uint32_t flags;
3442 int32_t state, oldstate;
3443 int error, rv, q, count;
3444
3445 uq = td->td_umtxq;
3446 error = fueword32(&rwlock->rw_flags, &flags);
3447 if (error == -1)
3448 return (EFAULT);
3449 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3450 if (error != 0)
3451 return (error);
3452
3453 error = fueword32(&rwlock->rw_state, &state);
3454 if (error == -1) {
3455 error = EFAULT;
3456 goto out;
3457 }
3458 if (state & URWLOCK_WRITE_OWNER) {
3459 for (;;) {
3460 rv = casueword32(&rwlock->rw_state, state,
3461 &oldstate, state & ~URWLOCK_WRITE_OWNER);
3462 if (rv == -1) {
3463 error = EFAULT;
3464 goto out;
3465 }
3466 if (rv == 1) {
3467 state = oldstate;
3468 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3469 error = EPERM;
3470 goto out;
3471 }
3472 error = thread_check_susp(td, true);
3473 if (error != 0)
3474 goto out;
3475 } else
3476 break;
3477 }
3478 } else if (URWLOCK_READER_COUNT(state) != 0) {
3479 for (;;) {
3480 rv = casueword32(&rwlock->rw_state, state,
3481 &oldstate, state - 1);
3482 if (rv == -1) {
3483 error = EFAULT;
3484 goto out;
3485 }
3486 if (rv == 1) {
3487 state = oldstate;
3488 if (URWLOCK_READER_COUNT(oldstate) == 0) {
3489 error = EPERM;
3490 goto out;
3491 }
3492 error = thread_check_susp(td, true);
3493 if (error != 0)
3494 goto out;
3495 } else
3496 break;
3497 }
3498 } else {
3499 error = EPERM;
3500 goto out;
3501 }
3502
3503 count = 0;
3504
3505 if (!(flags & URWLOCK_PREFER_READER)) {
3506 if (state & URWLOCK_WRITE_WAITERS) {
3507 count = 1;
3508 q = UMTX_EXCLUSIVE_QUEUE;
3509 } else if (state & URWLOCK_READ_WAITERS) {
3510 count = INT_MAX;
3511 q = UMTX_SHARED_QUEUE;
3512 }
3513 } else {
3514 if (state & URWLOCK_READ_WAITERS) {
3515 count = INT_MAX;
3516 q = UMTX_SHARED_QUEUE;
3517 } else if (state & URWLOCK_WRITE_WAITERS) {
3518 count = 1;
3519 q = UMTX_EXCLUSIVE_QUEUE;
3520 }
3521 }
3522
3523 if (count) {
3524 umtxq_lock(&uq->uq_key);
3525 umtxq_busy(&uq->uq_key);
3526 umtxq_signal_queue(&uq->uq_key, count, q);
3527 umtxq_unbusy(&uq->uq_key);
3528 umtxq_unlock(&uq->uq_key);
3529 }
3530 out:
3531 umtx_key_release(&uq->uq_key);
3532 return (error);
3533 }
3534
3535 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3536 static int
3537 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3538 {
3539 struct umtx_abs_timeout timo;
3540 struct umtx_q *uq;
3541 uint32_t flags, count, count1;
3542 int error, rv, rv1;
3543
3544 uq = td->td_umtxq;
3545 error = fueword32(&sem->_flags, &flags);
3546 if (error == -1)
3547 return (EFAULT);
3548 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3549 if (error != 0)
3550 return (error);
3551
3552 if (timeout != NULL)
3553 umtx_abs_timeout_init2(&timo, timeout);
3554
3555 again:
3556 umtxq_lock(&uq->uq_key);
3557 umtxq_busy(&uq->uq_key);
3558 umtxq_insert(uq);
3559 umtxq_unlock(&uq->uq_key);
3560 rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3561 if (rv != -1)
3562 rv1 = fueword32(&sem->_count, &count);
3563 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) {
3564 if (rv == 0)
3565 suword32(&sem->_has_waiters, 0);
3566 umtxq_lock(&uq->uq_key);
3567 umtxq_unbusy(&uq->uq_key);
3568 umtxq_remove(uq);
3569 umtxq_unlock(&uq->uq_key);
3570 if (rv == -1 || rv1 == -1) {
3571 error = EFAULT;
3572 goto out;
3573 }
3574 if (count != 0) {
3575 error = 0;
3576 goto out;
3577 }
3578 MPASS(rv == 1 && count1 == 0);
3579 rv = thread_check_susp(td, true);
3580 if (rv == 0)
3581 goto again;
3582 error = rv;
3583 goto out;
3584 }
3585 umtxq_lock(&uq->uq_key);
3586 umtxq_unbusy(&uq->uq_key);
3587
3588 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3589
3590 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3591 error = 0;
3592 else {
3593 umtxq_remove(uq);
3594 /* A relative timeout cannot be restarted. */
3595 if (error == ERESTART && timeout != NULL &&
3596 (timeout->_flags & UMTX_ABSTIME) == 0)
3597 error = EINTR;
3598 }
3599 umtxq_unlock(&uq->uq_key);
3600 out:
3601 umtx_key_release(&uq->uq_key);
3602 return (error);
3603 }
3604
3605 /*
3606 * Signal a userland semaphore.
3607 */
3608 static int
3609 do_sem_wake(struct thread *td, struct _usem *sem)
3610 {
3611 struct umtx_key key;
3612 int error, cnt;
3613 uint32_t flags;
3614
3615 error = fueword32(&sem->_flags, &flags);
3616 if (error == -1)
3617 return (EFAULT);
3618 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3619 return (error);
3620 umtxq_lock(&key);
3621 umtxq_busy(&key);
3622 cnt = umtxq_count(&key);
3623 if (cnt > 0) {
3624 /*
3625 * Check if count is greater than 0, this means the memory is
3626 * still being referenced by user code, so we can safely
3627 * update _has_waiters flag.
3628 */
3629 if (cnt == 1) {
3630 umtxq_unlock(&key);
3631 error = suword32(&sem->_has_waiters, 0);
3632 umtxq_lock(&key);
3633 if (error == -1)
3634 error = EFAULT;
3635 }
3636 umtxq_signal(&key, 1);
3637 }
3638 umtxq_unbusy(&key);
3639 umtxq_unlock(&key);
3640 umtx_key_release(&key);
3641 return (error);
3642 }
3643 #endif
3644
3645 static int
3646 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
3647 {
3648 struct umtx_abs_timeout timo;
3649 struct umtx_q *uq;
3650 uint32_t count, flags;
3651 int error, rv;
3652
3653 uq = td->td_umtxq;
3654 flags = fuword32(&sem->_flags);
3655 if (timeout != NULL)
3656 umtx_abs_timeout_init2(&timo, timeout);
3657
3658 again:
3659 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3660 if (error != 0)
3661 return (error);
3662 umtxq_lock(&uq->uq_key);
3663 umtxq_busy(&uq->uq_key);
3664 umtxq_insert(uq);
3665 umtxq_unlock(&uq->uq_key);
3666 rv = fueword32(&sem->_count, &count);
3667 if (rv == -1) {
3668 umtxq_lock(&uq->uq_key);
3669 umtxq_unbusy(&uq->uq_key);
3670 umtxq_remove(uq);
3671 umtxq_unlock(&uq->uq_key);
3672 umtx_key_release(&uq->uq_key);
3673 return (EFAULT);
3674 }
3675 for (;;) {
3676 if (USEM_COUNT(count) != 0) {
3677 umtxq_lock(&uq->uq_key);
3678 umtxq_unbusy(&uq->uq_key);
3679 umtxq_remove(uq);
3680 umtxq_unlock(&uq->uq_key);
3681 umtx_key_release(&uq->uq_key);
3682 return (0);
3683 }
3684 if (count == USEM_HAS_WAITERS)
3685 break;
3686 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
3687 if (rv == 0)
3688 break;
3689 umtxq_lock(&uq->uq_key);
3690 umtxq_unbusy(&uq->uq_key);
3691 umtxq_remove(uq);
3692 umtxq_unlock(&uq->uq_key);
3693 umtx_key_release(&uq->uq_key);
3694 if (rv == -1)
3695 return (EFAULT);
3696 rv = thread_check_susp(td, true);
3697 if (rv != 0)
3698 return (rv);
3699 goto again;
3700 }
3701 umtxq_lock(&uq->uq_key);
3702 umtxq_unbusy(&uq->uq_key);
3703
3704 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3705
3706 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3707 error = 0;
3708 else {
3709 umtxq_remove(uq);
3710 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
3711 /* A relative timeout cannot be restarted. */
3712 if (error == ERESTART)
3713 error = EINTR;
3714 if (error == EINTR) {
3715 kern_clock_gettime(curthread, timo.clockid,
3716 &timo.cur);
3717 timespecsub(&timo.end, &timo.cur,
3718 &timeout->_timeout);
3719 }
3720 }
3721 }
3722 umtxq_unlock(&uq->uq_key);
3723 umtx_key_release(&uq->uq_key);
3724 return (error);
3725 }
3726
3727 /*
3728 * Signal a userland semaphore.
3729 */
3730 static int
3731 do_sem2_wake(struct thread *td, struct _usem2 *sem)
3732 {
3733 struct umtx_key key;
3734 int error, cnt, rv;
3735 uint32_t count, flags;
3736
3737 rv = fueword32(&sem->_flags, &flags);
3738 if (rv == -1)
3739 return (EFAULT);
3740 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3741 return (error);
3742 umtxq_lock(&key);
3743 umtxq_busy(&key);
3744 cnt = umtxq_count(&key);
3745 if (cnt > 0) {
3746 /*
3747 * If this was the last sleeping thread, clear the waiters
3748 * flag in _count.
3749 */
3750 if (cnt == 1) {
3751 umtxq_unlock(&key);
3752 rv = fueword32(&sem->_count, &count);
3753 while (rv != -1 && count & USEM_HAS_WAITERS) {
3754 rv = casueword32(&sem->_count, count, &count,
3755 count & ~USEM_HAS_WAITERS);
3756 if (rv == 1) {
3757 rv = thread_check_susp(td, true);
3758 if (rv != 0)
3759 break;
3760 }
3761 }
3762 if (rv == -1)
3763 error = EFAULT;
3764 else if (rv > 0) {
3765 error = rv;
3766 }
3767 umtxq_lock(&key);
3768 }
3769
3770 umtxq_signal(&key, 1);
3771 }
3772 umtxq_unbusy(&key);
3773 umtxq_unlock(&key);
3774 umtx_key_release(&key);
3775 return (error);
3776 }
3777
3778 #ifdef COMPAT_FREEBSD10
3779 int
3780 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap)
3781 {
3782 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0));
3783 }
3784
3785 int
3786 freebsd10__umtx_unlock(struct thread *td,
3787 struct freebsd10__umtx_unlock_args *uap)
3788 {
3789 return (do_unlock_umtx(td, uap->umtx, td->td_tid));
3790 }
3791 #endif
3792
3793 inline int
3794 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp)
3795 {
3796 int error;
3797
3798 error = copyin(uaddr, tsp, sizeof(*tsp));
3799 if (error == 0) {
3800 if (!timespecvalid_interval(tsp))
3801 error = EINVAL;
3802 }
3803 return (error);
3804 }
3805
3806 static inline int
3807 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp)
3808 {
3809 int error;
3810
3811 if (size <= sizeof(tp->_timeout)) {
3812 tp->_clockid = CLOCK_REALTIME;
3813 tp->_flags = 0;
3814 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout));
3815 } else
3816 error = copyin(uaddr, tp, sizeof(*tp));
3817 if (error != 0)
3818 return (error);
3819 if (!timespecvalid_interval(&tp->_timeout))
3820 return (EINVAL);
3821 return (0);
3822 }
3823
3824 static int
3825 umtx_copyin_robust_lists(const void *uaddr, size_t size,
3826 struct umtx_robust_lists_params *rb)
3827 {
3828
3829 if (size > sizeof(*rb))
3830 return (EINVAL);
3831 return (copyin(uaddr, rb, size));
3832 }
3833
3834 static int
3835 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp)
3836 {
3837
3838 /*
3839 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
3840 * and we're only called if sz >= sizeof(timespec) as supplied in the
3841 * copyops.
3842 */
3843 KASSERT(sz >= sizeof(*tsp),
3844 ("umtx_copyops specifies incorrect sizes"));
3845
3846 return (copyout(tsp, uaddr, sizeof(*tsp)));
3847 }
3848
3849 #ifdef COMPAT_FREEBSD10
3850 static int
3851 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap,
3852 const struct umtx_copyops *ops)
3853 {
3854 struct timespec *ts, timeout;
3855 int error;
3856
3857 /* Allow a null timespec (wait forever). */
3858 if (uap->uaddr2 == NULL)
3859 ts = NULL;
3860 else {
3861 error = ops->copyin_timeout(uap->uaddr2, &timeout);
3862 if (error != 0)
3863 return (error);
3864 ts = &timeout;
3865 }
3866 #ifdef COMPAT_FREEBSD32
3867 if (ops->compat32)
3868 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3869 #endif
3870 return (do_lock_umtx(td, uap->obj, uap->val, ts));
3871 }
3872
3873 static int
3874 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap,
3875 const struct umtx_copyops *ops)
3876 {
3877 #ifdef COMPAT_FREEBSD32
3878 if (ops->compat32)
3879 return (do_unlock_umtx32(td, uap->obj, uap->val));
3880 #endif
3881 return (do_unlock_umtx(td, uap->obj, uap->val));
3882 }
3883 #endif /* COMPAT_FREEBSD10 */
3884
3885 #if !defined(COMPAT_FREEBSD10)
3886 static int
3887 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused,
3888 const struct umtx_copyops *ops __unused)
3889 {
3890 return (EOPNOTSUPP);
3891 }
3892 #endif /* COMPAT_FREEBSD10 */
3893
3894 static int
3895 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap,
3896 const struct umtx_copyops *ops)
3897 {
3898 struct _umtx_time timeout, *tm_p;
3899 int error;
3900
3901 if (uap->uaddr2 == NULL)
3902 tm_p = NULL;
3903 else {
3904 error = ops->copyin_umtx_time(
3905 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3906 if (error != 0)
3907 return (error);
3908 tm_p = &timeout;
3909 }
3910 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0));
3911 }
3912
3913 static int
3914 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap,
3915 const struct umtx_copyops *ops)
3916 {
3917 struct _umtx_time timeout, *tm_p;
3918 int error;
3919
3920 if (uap->uaddr2 == NULL)
3921 tm_p = NULL;
3922 else {
3923 error = ops->copyin_umtx_time(
3924 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3925 if (error != 0)
3926 return (error);
3927 tm_p = &timeout;
3928 }
3929 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
3930 }
3931
3932 static int
3933 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap,
3934 const struct umtx_copyops *ops)
3935 {
3936 struct _umtx_time *tm_p, timeout;
3937 int error;
3938
3939 if (uap->uaddr2 == NULL)
3940 tm_p = NULL;
3941 else {
3942 error = ops->copyin_umtx_time(
3943 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3944 if (error != 0)
3945 return (error);
3946 tm_p = &timeout;
3947 }
3948 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
3949 }
3950
3951 static int
3952 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap,
3953 const struct umtx_copyops *ops __unused)
3954 {
3955
3956 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3957 }
3958
3959 #define BATCH_SIZE 128
3960 static int
3961 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap)
3962 {
3963 char *uaddrs[BATCH_SIZE], **upp;
3964 int count, error, i, pos, tocopy;
3965
3966 upp = (char **)uap->obj;
3967 error = 0;
3968 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3969 pos += tocopy) {
3970 tocopy = MIN(count, BATCH_SIZE);
3971 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
3972 if (error != 0)
3973 break;
3974 for (i = 0; i < tocopy; ++i) {
3975 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3976 }
3977 maybe_yield();
3978 }
3979 return (error);
3980 }
3981
3982 static int
3983 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3984 {
3985 uint32_t uaddrs[BATCH_SIZE], *upp;
3986 int count, error, i, pos, tocopy;
3987
3988 upp = (uint32_t *)uap->obj;
3989 error = 0;
3990 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3991 pos += tocopy) {
3992 tocopy = MIN(count, BATCH_SIZE);
3993 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
3994 if (error != 0)
3995 break;
3996 for (i = 0; i < tocopy; ++i) {
3997 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i],
3998 INT_MAX, 1);
3999 }
4000 maybe_yield();
4001 }
4002 return (error);
4003 }
4004
4005 static int
4006 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap,
4007 const struct umtx_copyops *ops)
4008 {
4009
4010 if (ops->compat32)
4011 return (__umtx_op_nwake_private_compat32(td, uap));
4012 return (__umtx_op_nwake_private_native(td, uap));
4013 }
4014
4015 static int
4016 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap,
4017 const struct umtx_copyops *ops __unused)
4018 {
4019
4020 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
4021 }
4022
4023 static int
4024 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap,
4025 const struct umtx_copyops *ops)
4026 {
4027 struct _umtx_time *tm_p, timeout;
4028 int error;
4029
4030 /* Allow a null timespec (wait forever). */
4031 if (uap->uaddr2 == NULL)
4032 tm_p = NULL;
4033 else {
4034 error = ops->copyin_umtx_time(
4035 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
4036 if (error != 0)
4037 return (error);
4038 tm_p = &timeout;
4039 }
4040 return (do_lock_umutex(td, uap->obj, tm_p, 0));
4041 }
4042
4043 static int
4044 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap,
4045 const struct umtx_copyops *ops __unused)
4046 {
4047
4048 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
4049 }
4050
4051 static int
4052 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap,
4053 const struct umtx_copyops *ops)
4054 {
4055 struct _umtx_time *tm_p, timeout;
4056 int error;
4057
4058 /* Allow a null timespec (wait forever). */
4059 if (uap->uaddr2 == NULL)
4060 tm_p = NULL;
4061 else {
4062 error = ops->copyin_umtx_time(
4063 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
4064 if (error != 0)
4065 return (error);
4066 tm_p = &timeout;
4067 }
4068 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
4069 }
4070
4071 static int
4072 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap,
4073 const struct umtx_copyops *ops __unused)
4074 {
4075
4076 return (do_wake_umutex(td, uap->obj));
4077 }
4078
4079 static int
4080 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap,
4081 const struct umtx_copyops *ops __unused)
4082 {
4083
4084 return (do_unlock_umutex(td, uap->obj, false));
4085 }
4086
4087 static int
4088 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap,
4089 const struct umtx_copyops *ops __unused)
4090 {
4091
4092 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
4093 }
4094
4095 static int
4096 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap,
4097 const struct umtx_copyops *ops)
4098 {
4099 struct timespec *ts, timeout;
4100 int error;
4101
4102 /* Allow a null timespec (wait forever). */
4103 if (uap->uaddr2 == NULL)
4104 ts = NULL;
4105 else {
4106 error = ops->copyin_timeout(uap->uaddr2, &timeout);
4107 if (error != 0)
4108 return (error);
4109 ts = &timeout;
4110 }
4111 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
4112 }
4113
4114 static int
4115 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap,
4116 const struct umtx_copyops *ops __unused)
4117 {
4118
4119 return (do_cv_signal(td, uap->obj));
4120 }
4121
4122 static int
4123 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap,
4124 const struct umtx_copyops *ops __unused)
4125 {
4126
4127 return (do_cv_broadcast(td, uap->obj));
4128 }
4129
4130 static int
4131 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap,
4132 const struct umtx_copyops *ops)
4133 {
4134 struct _umtx_time timeout;
4135 int error;
4136
4137 /* Allow a null timespec (wait forever). */
4138 if (uap->uaddr2 == NULL) {
4139 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
4140 } else {
4141 error = ops->copyin_umtx_time(uap->uaddr2,
4142 (size_t)uap->uaddr1, &timeout);
4143 if (error != 0)
4144 return (error);
4145 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
4146 }
4147 return (error);
4148 }
4149
4150 static int
4151 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap,
4152 const struct umtx_copyops *ops)
4153 {
4154 struct _umtx_time timeout;
4155 int error;
4156
4157 /* Allow a null timespec (wait forever). */
4158 if (uap->uaddr2 == NULL) {
4159 error = do_rw_wrlock(td, uap->obj, 0);
4160 } else {
4161 error = ops->copyin_umtx_time(uap->uaddr2,
4162 (size_t)uap->uaddr1, &timeout);
4163 if (error != 0)
4164 return (error);
4165
4166 error = do_rw_wrlock(td, uap->obj, &timeout);
4167 }
4168 return (error);
4169 }
4170
4171 static int
4172 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap,
4173 const struct umtx_copyops *ops __unused)
4174 {
4175
4176 return (do_rw_unlock(td, uap->obj));
4177 }
4178
4179 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4180 static int
4181 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap,
4182 const struct umtx_copyops *ops)
4183 {
4184 struct _umtx_time *tm_p, timeout;
4185 int error;
4186
4187 /* Allow a null timespec (wait forever). */
4188 if (uap->uaddr2 == NULL)
4189 tm_p = NULL;
4190 else {
4191 error = ops->copyin_umtx_time(
4192 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
4193 if (error != 0)
4194 return (error);
4195 tm_p = &timeout;
4196 }
4197 return (do_sem_wait(td, uap->obj, tm_p));
4198 }
4199
4200 static int
4201 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap,
4202 const struct umtx_copyops *ops __unused)
4203 {
4204
4205 return (do_sem_wake(td, uap->obj));
4206 }
4207 #endif
4208
4209 static int
4210 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap,
4211 const struct umtx_copyops *ops __unused)
4212 {
4213
4214 return (do_wake2_umutex(td, uap->obj, uap->val));
4215 }
4216
4217 static int
4218 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap,
4219 const struct umtx_copyops *ops)
4220 {
4221 struct _umtx_time *tm_p, timeout;
4222 size_t uasize;
4223 int error;
4224
4225 /* Allow a null timespec (wait forever). */
4226 if (uap->uaddr2 == NULL) {
4227 uasize = 0;
4228 tm_p = NULL;
4229 } else {
4230 uasize = (size_t)uap->uaddr1;
4231 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout);
4232 if (error != 0)
4233 return (error);
4234 tm_p = &timeout;
4235 }
4236 error = do_sem2_wait(td, uap->obj, tm_p);
4237 if (error == EINTR && uap->uaddr2 != NULL &&
4238 (timeout._flags & UMTX_ABSTIME) == 0 &&
4239 uasize >= ops->umtx_time_sz + ops->timespec_sz) {
4240 error = ops->copyout_timeout(
4241 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz),
4242 uasize - ops->umtx_time_sz, &timeout._timeout);
4243 if (error == 0) {
4244 error = EINTR;
4245 }
4246 }
4247
4248 return (error);
4249 }
4250
4251 static int
4252 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap,
4253 const struct umtx_copyops *ops __unused)
4254 {
4255
4256 return (do_sem2_wake(td, uap->obj));
4257 }
4258
4259 #define USHM_OBJ_UMTX(o) \
4260 ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
4261
4262 #define USHMF_REG_LINKED 0x0001
4263 #define USHMF_OBJ_LINKED 0x0002
4264 struct umtx_shm_reg {
4265 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
4266 LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
4267 struct umtx_key ushm_key;
4268 struct ucred *ushm_cred;
4269 struct shmfd *ushm_obj;
4270 u_int ushm_refcnt;
4271 u_int ushm_flags;
4272 };
4273
4274 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
4275 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
4276
4277 static uma_zone_t umtx_shm_reg_zone;
4278 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
4279 static struct mtx umtx_shm_lock;
4280 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
4281 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
4282
4283 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
4284
4285 static void
4286 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
4287 {
4288 struct umtx_shm_reg_head d;
4289 struct umtx_shm_reg *reg, *reg1;
4290
4291 TAILQ_INIT(&d);
4292 mtx_lock(&umtx_shm_lock);
4293 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
4294 mtx_unlock(&umtx_shm_lock);
4295 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
4296 TAILQ_REMOVE(&d, reg, ushm_reg_link);
4297 umtx_shm_free_reg(reg);
4298 }
4299 }
4300
4301 static struct task umtx_shm_reg_delfree_task =
4302 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
4303
4304 static struct umtx_shm_reg *
4305 umtx_shm_find_reg_locked(const struct umtx_key *key)
4306 {
4307 struct umtx_shm_reg *reg;
4308 struct umtx_shm_reg_head *reg_head;
4309
4310 KASSERT(key->shared, ("umtx_p_find_rg: private key"));
4311 mtx_assert(&umtx_shm_lock, MA_OWNED);
4312 reg_head = &umtx_shm_registry[key->hash];
4313 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
4314 KASSERT(reg->ushm_key.shared,
4315 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
4316 if (reg->ushm_key.info.shared.object ==
4317 key->info.shared.object &&
4318 reg->ushm_key.info.shared.offset ==
4319 key->info.shared.offset) {
4320 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
4321 KASSERT(reg->ushm_refcnt > 0,
4322 ("reg %p refcnt 0 onlist", reg));
4323 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
4324 ("reg %p not linked", reg));
4325 reg->ushm_refcnt++;
4326 return (reg);
4327 }
4328 }
4329 return (NULL);
4330 }
4331
4332 static struct umtx_shm_reg *
4333 umtx_shm_find_reg(const struct umtx_key *key)
4334 {
4335 struct umtx_shm_reg *reg;
4336
4337 mtx_lock(&umtx_shm_lock);
4338 reg = umtx_shm_find_reg_locked(key);
4339 mtx_unlock(&umtx_shm_lock);
4340 return (reg);
4341 }
4342
4343 static void
4344 umtx_shm_free_reg(struct umtx_shm_reg *reg)
4345 {
4346
4347 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
4348 crfree(reg->ushm_cred);
4349 shm_drop(reg->ushm_obj);
4350 uma_zfree(umtx_shm_reg_zone, reg);
4351 }
4352
4353 static bool
4354 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
4355 {
4356 bool res;
4357
4358 mtx_assert(&umtx_shm_lock, MA_OWNED);
4359 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
4360 reg->ushm_refcnt--;
4361 res = reg->ushm_refcnt == 0;
4362 if (res || force) {
4363 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
4364 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
4365 reg, ushm_reg_link);
4366 reg->ushm_flags &= ~USHMF_REG_LINKED;
4367 }
4368 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
4369 LIST_REMOVE(reg, ushm_obj_link);
4370 reg->ushm_flags &= ~USHMF_OBJ_LINKED;
4371 }
4372 }
4373 return (res);
4374 }
4375
4376 static void
4377 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
4378 {
4379 vm_object_t object;
4380 bool dofree;
4381
4382 if (force) {
4383 object = reg->ushm_obj->shm_object;
4384 VM_OBJECT_WLOCK(object);
4385 vm_object_set_flag(object, OBJ_UMTXDEAD);
4386 VM_OBJECT_WUNLOCK(object);
4387 }
4388 mtx_lock(&umtx_shm_lock);
4389 dofree = umtx_shm_unref_reg_locked(reg, force);
4390 mtx_unlock(&umtx_shm_lock);
4391 if (dofree)
4392 umtx_shm_free_reg(reg);
4393 }
4394
4395 void
4396 umtx_shm_object_init(vm_object_t object)
4397 {
4398
4399 LIST_INIT(USHM_OBJ_UMTX(object));
4400 }
4401
4402 void
4403 umtx_shm_object_terminated(vm_object_t object)
4404 {
4405 struct umtx_shm_reg *reg, *reg1;
4406 bool dofree;
4407
4408 if (LIST_EMPTY(USHM_OBJ_UMTX(object)))
4409 return;
4410
4411 dofree = false;
4412 mtx_lock(&umtx_shm_lock);
4413 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
4414 if (umtx_shm_unref_reg_locked(reg, true)) {
4415 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
4416 ushm_reg_link);
4417 dofree = true;
4418 }
4419 }
4420 mtx_unlock(&umtx_shm_lock);
4421 if (dofree)
4422 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
4423 }
4424
4425 static int
4426 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
4427 struct umtx_shm_reg **res)
4428 {
4429 struct umtx_shm_reg *reg, *reg1;
4430 struct ucred *cred;
4431 int error;
4432
4433 reg = umtx_shm_find_reg(key);
4434 if (reg != NULL) {
4435 *res = reg;
4436 return (0);
4437 }
4438 cred = td->td_ucred;
4439 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
4440 return (ENOMEM);
4441 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
4442 reg->ushm_refcnt = 1;
4443 bcopy(key, ®->ushm_key, sizeof(*key));
4444 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
4445 reg->ushm_cred = crhold(cred);
4446 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
4447 if (error != 0) {
4448 umtx_shm_free_reg(reg);
4449 return (error);
4450 }
4451 mtx_lock(&umtx_shm_lock);
4452 reg1 = umtx_shm_find_reg_locked(key);
4453 if (reg1 != NULL) {
4454 mtx_unlock(&umtx_shm_lock);
4455 umtx_shm_free_reg(reg);
4456 *res = reg1;
4457 return (0);
4458 }
4459 reg->ushm_refcnt++;
4460 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
4461 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
4462 ushm_obj_link);
4463 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
4464 mtx_unlock(&umtx_shm_lock);
4465 *res = reg;
4466 return (0);
4467 }
4468
4469 static int
4470 umtx_shm_alive(struct thread *td, void *addr)
4471 {
4472 vm_map_t map;
4473 vm_map_entry_t entry;
4474 vm_object_t object;
4475 vm_pindex_t pindex;
4476 vm_prot_t prot;
4477 int res, ret;
4478 boolean_t wired;
4479
4480 map = &td->td_proc->p_vmspace->vm_map;
4481 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
4482 &object, &pindex, &prot, &wired);
4483 if (res != KERN_SUCCESS)
4484 return (EFAULT);
4485 if (object == NULL)
4486 ret = EINVAL;
4487 else
4488 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
4489 vm_map_lookup_done(map, entry);
4490 return (ret);
4491 }
4492
4493 static void
4494 umtx_shm_init(void)
4495 {
4496 int i;
4497
4498 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
4499 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
4500 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
4501 for (i = 0; i < nitems(umtx_shm_registry); i++)
4502 TAILQ_INIT(&umtx_shm_registry[i]);
4503 }
4504
4505 static int
4506 umtx_shm(struct thread *td, void *addr, u_int flags)
4507 {
4508 struct umtx_key key;
4509 struct umtx_shm_reg *reg;
4510 struct file *fp;
4511 int error, fd;
4512
4513 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
4514 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
4515 return (EINVAL);
4516 if ((flags & UMTX_SHM_ALIVE) != 0)
4517 return (umtx_shm_alive(td, addr));
4518 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
4519 if (error != 0)
4520 return (error);
4521 KASSERT(key.shared == 1, ("non-shared key"));
4522 if ((flags & UMTX_SHM_CREAT) != 0) {
4523 error = umtx_shm_create_reg(td, &key, ®);
4524 } else {
4525 reg = umtx_shm_find_reg(&key);
4526 if (reg == NULL)
4527 error = ESRCH;
4528 }
4529 umtx_key_release(&key);
4530 if (error != 0)
4531 return (error);
4532 KASSERT(reg != NULL, ("no reg"));
4533 if ((flags & UMTX_SHM_DESTROY) != 0) {
4534 umtx_shm_unref_reg(reg, true);
4535 } else {
4536 #if 0
4537 #ifdef MAC
4538 error = mac_posixshm_check_open(td->td_ucred,
4539 reg->ushm_obj, FFLAGS(O_RDWR));
4540 if (error == 0)
4541 #endif
4542 error = shm_access(reg->ushm_obj, td->td_ucred,
4543 FFLAGS(O_RDWR));
4544 if (error == 0)
4545 #endif
4546 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
4547 if (error == 0) {
4548 shm_hold(reg->ushm_obj);
4549 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
4550 &shm_ops);
4551 td->td_retval[0] = fd;
4552 fdrop(fp, td);
4553 }
4554 }
4555 umtx_shm_unref_reg(reg, false);
4556 return (error);
4557 }
4558
4559 static int
4560 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap,
4561 const struct umtx_copyops *ops __unused)
4562 {
4563
4564 return (umtx_shm(td, uap->uaddr1, uap->val));
4565 }
4566
4567 static int
4568 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap,
4569 const struct umtx_copyops *ops)
4570 {
4571 struct umtx_robust_lists_params rb;
4572 int error;
4573
4574 if (ops->compat32) {
4575 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 &&
4576 (td->td_rb_list != 0 || td->td_rbp_list != 0 ||
4577 td->td_rb_inact != 0))
4578 return (EBUSY);
4579 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) {
4580 return (EBUSY);
4581 }
4582
4583 bzero(&rb, sizeof(rb));
4584 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb);
4585 if (error != 0)
4586 return (error);
4587
4588 if (ops->compat32)
4589 td->td_pflags2 |= TDP2_COMPAT32RB;
4590
4591 td->td_rb_list = rb.robust_list_offset;
4592 td->td_rbp_list = rb.robust_priv_list_offset;
4593 td->td_rb_inact = rb.robust_inact_offset;
4594 return (0);
4595 }
4596
4597 #if defined(__i386__) || defined(__amd64__)
4598 /*
4599 * Provide the standard 32-bit definitions for x86, since native/compat32 use a
4600 * 32-bit time_t there. Other architectures just need the i386 definitions
4601 * along with their standard compat32.
4602 */
4603 struct timespecx32 {
4604 int64_t tv_sec;
4605 int32_t tv_nsec;
4606 };
4607
4608 struct umtx_timex32 {
4609 struct timespecx32 _timeout;
4610 uint32_t _flags;
4611 uint32_t _clockid;
4612 };
4613
4614 #ifndef __i386__
4615 #define timespeci386 timespec32
4616 #define umtx_timei386 umtx_time32
4617 #endif
4618 #else /* !__i386__ && !__amd64__ */
4619 /* 32-bit architectures can emulate i386, so define these almost everywhere. */
4620 struct timespeci386 {
4621 int32_t tv_sec;
4622 int32_t tv_nsec;
4623 };
4624
4625 struct umtx_timei386 {
4626 struct timespeci386 _timeout;
4627 uint32_t _flags;
4628 uint32_t _clockid;
4629 };
4630
4631 #if defined(__LP64__)
4632 #define timespecx32 timespec32
4633 #define umtx_timex32 umtx_time32
4634 #endif
4635 #endif
4636
4637 static int
4638 umtx_copyin_robust_lists32(const void *uaddr, size_t size,
4639 struct umtx_robust_lists_params *rbp)
4640 {
4641 struct umtx_robust_lists_params_compat32 rb32;
4642 int error;
4643
4644 if (size > sizeof(rb32))
4645 return (EINVAL);
4646 bzero(&rb32, sizeof(rb32));
4647 error = copyin(uaddr, &rb32, size);
4648 if (error != 0)
4649 return (error);
4650 CP(rb32, *rbp, robust_list_offset);
4651 CP(rb32, *rbp, robust_priv_list_offset);
4652 CP(rb32, *rbp, robust_inact_offset);
4653 return (0);
4654 }
4655
4656 #ifndef __i386__
4657 static inline int
4658 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp)
4659 {
4660 struct timespeci386 ts32;
4661 int error;
4662
4663 error = copyin(uaddr, &ts32, sizeof(ts32));
4664 if (error == 0) {
4665 if (!timespecvalid_interval(&ts32))
4666 error = EINVAL;
4667 else {
4668 CP(ts32, *tsp, tv_sec);
4669 CP(ts32, *tsp, tv_nsec);
4670 }
4671 }
4672 return (error);
4673 }
4674
4675 static inline int
4676 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp)
4677 {
4678 struct umtx_timei386 t32;
4679 int error;
4680
4681 t32._clockid = CLOCK_REALTIME;
4682 t32._flags = 0;
4683 if (size <= sizeof(t32._timeout))
4684 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
4685 else
4686 error = copyin(uaddr, &t32, sizeof(t32));
4687 if (error != 0)
4688 return (error);
4689 if (!timespecvalid_interval(&t32._timeout))
4690 return (EINVAL);
4691 TS_CP(t32, *tp, _timeout);
4692 CP(t32, *tp, _flags);
4693 CP(t32, *tp, _clockid);
4694 return (0);
4695 }
4696
4697 static int
4698 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp)
4699 {
4700 struct timespeci386 remain32 = {
4701 .tv_sec = tsp->tv_sec,
4702 .tv_nsec = tsp->tv_nsec,
4703 };
4704
4705 /*
4706 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
4707 * and we're only called if sz >= sizeof(timespec) as supplied in the
4708 * copyops.
4709 */
4710 KASSERT(sz >= sizeof(remain32),
4711 ("umtx_copyops specifies incorrect sizes"));
4712
4713 return (copyout(&remain32, uaddr, sizeof(remain32)));
4714 }
4715 #endif /* !__i386__ */
4716
4717 #if defined(__i386__) || defined(__LP64__)
4718 static inline int
4719 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp)
4720 {
4721 struct timespecx32 ts32;
4722 int error;
4723
4724 error = copyin(uaddr, &ts32, sizeof(ts32));
4725 if (error == 0) {
4726 if (!timespecvalid_interval(&ts32))
4727 error = EINVAL;
4728 else {
4729 CP(ts32, *tsp, tv_sec);
4730 CP(ts32, *tsp, tv_nsec);
4731 }
4732 }
4733 return (error);
4734 }
4735
4736 static inline int
4737 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp)
4738 {
4739 struct umtx_timex32 t32;
4740 int error;
4741
4742 t32._clockid = CLOCK_REALTIME;
4743 t32._flags = 0;
4744 if (size <= sizeof(t32._timeout))
4745 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
4746 else
4747 error = copyin(uaddr, &t32, sizeof(t32));
4748 if (error != 0)
4749 return (error);
4750 if (!timespecvalid_interval(&t32._timeout))
4751 return (EINVAL);
4752 TS_CP(t32, *tp, _timeout);
4753 CP(t32, *tp, _flags);
4754 CP(t32, *tp, _clockid);
4755 return (0);
4756 }
4757
4758 static int
4759 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp)
4760 {
4761 struct timespecx32 remain32 = {
4762 .tv_sec = tsp->tv_sec,
4763 .tv_nsec = tsp->tv_nsec,
4764 };
4765
4766 /*
4767 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
4768 * and we're only called if sz >= sizeof(timespec) as supplied in the
4769 * copyops.
4770 */
4771 KASSERT(sz >= sizeof(remain32),
4772 ("umtx_copyops specifies incorrect sizes"));
4773
4774 return (copyout(&remain32, uaddr, sizeof(remain32)));
4775 }
4776 #endif /* __i386__ || __LP64__ */
4777
4778 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap,
4779 const struct umtx_copyops *umtx_ops);
4780
4781 static const _umtx_op_func op_table[] = {
4782 #ifdef COMPAT_FREEBSD10
4783 [UMTX_OP_LOCK] = __umtx_op_lock_umtx,
4784 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx,
4785 #else
4786 [UMTX_OP_LOCK] = __umtx_op_unimpl,
4787 [UMTX_OP_UNLOCK] = __umtx_op_unimpl,
4788 #endif
4789 [UMTX_OP_WAIT] = __umtx_op_wait,
4790 [UMTX_OP_WAKE] = __umtx_op_wake,
4791 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
4792 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex,
4793 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
4794 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
4795 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait,
4796 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
4797 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4798 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint,
4799 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock,
4800 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock,
4801 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4802 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
4803 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4804 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex,
4805 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4806 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4807 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait,
4808 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4809 #else
4810 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4811 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4812 #endif
4813 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
4814 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4815 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait,
4816 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4817 [UMTX_OP_SHM] = __umtx_op_shm,
4818 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists,
4819 };
4820
4821 static const struct umtx_copyops umtx_native_ops = {
4822 .copyin_timeout = umtx_copyin_timeout,
4823 .copyin_umtx_time = umtx_copyin_umtx_time,
4824 .copyin_robust_lists = umtx_copyin_robust_lists,
4825 .copyout_timeout = umtx_copyout_timeout,
4826 .timespec_sz = sizeof(struct timespec),
4827 .umtx_time_sz = sizeof(struct _umtx_time),
4828 };
4829
4830 #ifndef __i386__
4831 static const struct umtx_copyops umtx_native_opsi386 = {
4832 .copyin_timeout = umtx_copyin_timeouti386,
4833 .copyin_umtx_time = umtx_copyin_umtx_timei386,
4834 .copyin_robust_lists = umtx_copyin_robust_lists32,
4835 .copyout_timeout = umtx_copyout_timeouti386,
4836 .timespec_sz = sizeof(struct timespeci386),
4837 .umtx_time_sz = sizeof(struct umtx_timei386),
4838 .compat32 = true,
4839 };
4840 #endif
4841
4842 #if defined(__i386__) || defined(__LP64__)
4843 /* i386 can emulate other 32-bit archs, too! */
4844 static const struct umtx_copyops umtx_native_opsx32 = {
4845 .copyin_timeout = umtx_copyin_timeoutx32,
4846 .copyin_umtx_time = umtx_copyin_umtx_timex32,
4847 .copyin_robust_lists = umtx_copyin_robust_lists32,
4848 .copyout_timeout = umtx_copyout_timeoutx32,
4849 .timespec_sz = sizeof(struct timespecx32),
4850 .umtx_time_sz = sizeof(struct umtx_timex32),
4851 .compat32 = true,
4852 };
4853
4854 #ifdef COMPAT_FREEBSD32
4855 #ifdef __amd64__
4856 #define umtx_native_ops32 umtx_native_opsi386
4857 #else
4858 #define umtx_native_ops32 umtx_native_opsx32
4859 #endif
4860 #endif /* COMPAT_FREEBSD32 */
4861 #endif /* __i386__ || __LP64__ */
4862
4863 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386)
4864
4865 static int
4866 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val,
4867 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops)
4868 {
4869 struct _umtx_op_args uap = {
4870 .obj = obj,
4871 .op = op & ~UMTX_OP__FLAGS,
4872 .val = val,
4873 .uaddr1 = uaddr1,
4874 .uaddr2 = uaddr2
4875 };
4876
4877 if ((uap.op >= nitems(op_table)))
4878 return (EINVAL);
4879 return ((*op_table[uap.op])(td, &uap, ops));
4880 }
4881
4882 int
4883 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
4884 {
4885 static const struct umtx_copyops *umtx_ops;
4886
4887 umtx_ops = &umtx_native_ops;
4888 #ifdef __LP64__
4889 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) {
4890 if ((uap->op & UMTX_OP__I386) != 0)
4891 umtx_ops = &umtx_native_opsi386;
4892 else
4893 umtx_ops = &umtx_native_opsx32;
4894 }
4895 #elif !defined(__i386__)
4896 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */
4897 if ((uap->op & UMTX_OP__I386) != 0)
4898 umtx_ops = &umtx_native_opsi386;
4899 #else
4900 /* Likewise, UMTX_OP__I386 is a nop on i386. */
4901 if ((uap->op & UMTX_OP__32BIT) != 0)
4902 umtx_ops = &umtx_native_opsx32;
4903 #endif
4904 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
4905 uap->uaddr2, umtx_ops));
4906 }
4907
4908 #ifdef COMPAT_FREEBSD32
4909 #ifdef COMPAT_FREEBSD10
4910 int
4911 freebsd10_freebsd32__umtx_lock(struct thread *td,
4912 struct freebsd10_freebsd32__umtx_lock_args *uap)
4913 {
4914 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
4915 }
4916
4917 int
4918 freebsd10_freebsd32__umtx_unlock(struct thread *td,
4919 struct freebsd10_freebsd32__umtx_unlock_args *uap)
4920 {
4921 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
4922 }
4923 #endif /* COMPAT_FREEBSD10 */
4924
4925 int
4926 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap)
4927 {
4928
4929 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
4930 uap->uaddr2, &umtx_native_ops32));
4931 }
4932 #endif /* COMPAT_FREEBSD32 */
4933
4934 void
4935 umtx_thread_init(struct thread *td)
4936 {
4937
4938 td->td_umtxq = umtxq_alloc();
4939 td->td_umtxq->uq_thread = td;
4940 }
4941
4942 void
4943 umtx_thread_fini(struct thread *td)
4944 {
4945
4946 umtxq_free(td->td_umtxq);
4947 }
4948
4949 /*
4950 * It will be called when new thread is created, e.g fork().
4951 */
4952 void
4953 umtx_thread_alloc(struct thread *td)
4954 {
4955 struct umtx_q *uq;
4956
4957 uq = td->td_umtxq;
4958 uq->uq_inherited_pri = PRI_MAX;
4959
4960 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4961 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4962 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4963 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4964 }
4965
4966 /*
4967 * exec() hook.
4968 *
4969 * Clear robust lists for all process' threads, not delaying the
4970 * cleanup to thread exit, since the relevant address space is
4971 * destroyed right now.
4972 */
4973 void
4974 umtx_exec(struct proc *p)
4975 {
4976 struct thread *td;
4977
4978 KASSERT(p == curproc, ("need curproc"));
4979 KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
4980 (p->p_flag & P_STOPPED_SINGLE) != 0,
4981 ("curproc must be single-threaded"));
4982 /*
4983 * There is no need to lock the list as only this thread can be
4984 * running.
4985 */
4986 FOREACH_THREAD_IN_PROC(p, td) {
4987 KASSERT(td == curthread ||
4988 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
4989 ("running thread %p %p", p, td));
4990 umtx_thread_cleanup(td);
4991 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
4992 }
4993 }
4994
4995 /*
4996 * thread exit hook.
4997 */
4998 void
4999 umtx_thread_exit(struct thread *td)
5000 {
5001
5002 umtx_thread_cleanup(td);
5003 }
5004
5005 static int
5006 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32)
5007 {
5008 u_long res1;
5009 uint32_t res32;
5010 int error;
5011
5012 if (compat32) {
5013 error = fueword32((void *)ptr, &res32);
5014 if (error == 0)
5015 res1 = res32;
5016 } else {
5017 error = fueword((void *)ptr, &res1);
5018 }
5019 if (error == 0)
5020 *res = res1;
5021 else
5022 error = EFAULT;
5023 return (error);
5024 }
5025
5026 static void
5027 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list,
5028 bool compat32)
5029 {
5030 struct umutex32 m32;
5031
5032 if (compat32) {
5033 memcpy(&m32, m, sizeof(m32));
5034 *rb_list = m32.m_rb_lnk;
5035 } else {
5036 *rb_list = m->m_rb_lnk;
5037 }
5038 }
5039
5040 static int
5041 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact,
5042 bool compat32)
5043 {
5044 struct umutex m;
5045 int error;
5046
5047 KASSERT(td->td_proc == curproc, ("need current vmspace"));
5048 error = copyin((void *)rbp, &m, sizeof(m));
5049 if (error != 0)
5050 return (error);
5051 if (rb_list != NULL)
5052 umtx_read_rb_list(td, &m, rb_list, compat32);
5053 if ((m.m_flags & UMUTEX_ROBUST) == 0)
5054 return (EINVAL);
5055 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
5056 /* inact is cleared after unlock, allow the inconsistency */
5057 return (inact ? 0 : EINVAL);
5058 return (do_unlock_umutex(td, (struct umutex *)rbp, true));
5059 }
5060
5061 static void
5062 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
5063 const char *name, bool compat32)
5064 {
5065 int error, i;
5066 uintptr_t rbp;
5067 bool inact;
5068
5069 if (rb_list == 0)
5070 return;
5071 error = umtx_read_uptr(td, rb_list, &rbp, compat32);
5072 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
5073 if (rbp == *rb_inact) {
5074 inact = true;
5075 *rb_inact = 0;
5076 } else
5077 inact = false;
5078 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32);
5079 }
5080 if (i == umtx_max_rb && umtx_verbose_rb) {
5081 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
5082 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
5083 }
5084 if (error != 0 && umtx_verbose_rb) {
5085 uprintf("comm %s pid %d: handling %srb error %d\n",
5086 td->td_proc->p_comm, td->td_proc->p_pid, name, error);
5087 }
5088 }
5089
5090 /*
5091 * Clean up umtx data.
5092 */
5093 static void
5094 umtx_thread_cleanup(struct thread *td)
5095 {
5096 struct umtx_q *uq;
5097 struct umtx_pi *pi;
5098 uintptr_t rb_inact;
5099 bool compat32;
5100
5101 /*
5102 * Disown pi mutexes.
5103 */
5104 uq = td->td_umtxq;
5105 if (uq != NULL) {
5106 if (uq->uq_inherited_pri != PRI_MAX ||
5107 !TAILQ_EMPTY(&uq->uq_pi_contested)) {
5108 mtx_lock(&umtx_lock);
5109 uq->uq_inherited_pri = PRI_MAX;
5110 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
5111 pi->pi_owner = NULL;
5112 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
5113 }
5114 mtx_unlock(&umtx_lock);
5115 }
5116 sched_lend_user_prio_cond(td, PRI_MAX);
5117 }
5118
5119 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0;
5120 td->td_pflags2 &= ~TDP2_COMPAT32RB;
5121
5122 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0)
5123 return;
5124
5125 /*
5126 * Handle terminated robust mutexes. Must be done after
5127 * robust pi disown, otherwise unlock could see unowned
5128 * entries.
5129 */
5130 rb_inact = td->td_rb_inact;
5131 if (rb_inact != 0)
5132 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32);
5133 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32);
5134 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32);
5135 if (rb_inact != 0)
5136 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32);
5137 }
Cache object: 0c66613badbac9ab8e73fabd1ce59b04
|