FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015, 2016 The FreeBSD Foundation
5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
7 * All rights reserved.
8 *
9 * Portions of this software were developed by Konstantin Belousov
10 * under sponsorship from the FreeBSD Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice unmodified, this list of conditions, and the following
17 * disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/12.0/sys/kern/kern_umtx.c 340378 2018-11-12 18:21:17Z brooks $");
36
37 #include "opt_umtx_profiling.h"
38
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/fcntl.h>
42 #include <sys/file.h>
43 #include <sys/filedesc.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mutex.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/resource.h>
52 #include <sys/resourcevar.h>
53 #include <sys/rwlock.h>
54 #include <sys/sbuf.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/sysproto.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/taskqueue.h>
63 #include <sys/time.h>
64 #include <sys/eventhandler.h>
65 #include <sys/umtx.h>
66
67 #include <security/mac/mac_framework.h>
68
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/pmap.h>
72 #include <vm/vm_map.h>
73 #include <vm/vm_object.h>
74
75 #include <machine/atomic.h>
76 #include <machine/cpu.h>
77
78 #ifdef COMPAT_FREEBSD32
79 #include <compat/freebsd32/freebsd32_proto.h>
80 #endif
81
82 #define _UMUTEX_TRY 1
83 #define _UMUTEX_WAIT 2
84
85 #ifdef UMTX_PROFILING
86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \
87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
88 #endif
89
90 /* Priority inheritance mutex info. */
91 struct umtx_pi {
92 /* Owner thread */
93 struct thread *pi_owner;
94
95 /* Reference count */
96 int pi_refcount;
97
98 /* List entry to link umtx holding by thread */
99 TAILQ_ENTRY(umtx_pi) pi_link;
100
101 /* List entry in hash */
102 TAILQ_ENTRY(umtx_pi) pi_hashlink;
103
104 /* List for waiters */
105 TAILQ_HEAD(,umtx_q) pi_blocked;
106
107 /* Identify a userland lock object */
108 struct umtx_key pi_key;
109 };
110
111 /* A userland synchronous object user. */
112 struct umtx_q {
113 /* Linked list for the hash. */
114 TAILQ_ENTRY(umtx_q) uq_link;
115
116 /* Umtx key. */
117 struct umtx_key uq_key;
118
119 /* Umtx flags. */
120 int uq_flags;
121 #define UQF_UMTXQ 0x0001
122
123 /* The thread waits on. */
124 struct thread *uq_thread;
125
126 /*
127 * Blocked on PI mutex. read can use chain lock
128 * or umtx_lock, write must have both chain lock and
129 * umtx_lock being hold.
130 */
131 struct umtx_pi *uq_pi_blocked;
132
133 /* On blocked list */
134 TAILQ_ENTRY(umtx_q) uq_lockq;
135
136 /* Thread contending with us */
137 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
138
139 /* Inherited priority from PP mutex */
140 u_char uq_inherited_pri;
141
142 /* Spare queue ready to be reused */
143 struct umtxq_queue *uq_spare_queue;
144
145 /* The queue we on */
146 struct umtxq_queue *uq_cur_queue;
147 };
148
149 TAILQ_HEAD(umtxq_head, umtx_q);
150
151 /* Per-key wait-queue */
152 struct umtxq_queue {
153 struct umtxq_head head;
154 struct umtx_key key;
155 LIST_ENTRY(umtxq_queue) link;
156 int length;
157 };
158
159 LIST_HEAD(umtxq_list, umtxq_queue);
160
161 /* Userland lock object's wait-queue chain */
162 struct umtxq_chain {
163 /* Lock for this chain. */
164 struct mtx uc_lock;
165
166 /* List of sleep queues. */
167 struct umtxq_list uc_queue[2];
168 #define UMTX_SHARED_QUEUE 0
169 #define UMTX_EXCLUSIVE_QUEUE 1
170
171 LIST_HEAD(, umtxq_queue) uc_spare_queue;
172
173 /* Busy flag */
174 char uc_busy;
175
176 /* Chain lock waiters */
177 int uc_waiters;
178
179 /* All PI in the list */
180 TAILQ_HEAD(,umtx_pi) uc_pi_list;
181
182 #ifdef UMTX_PROFILING
183 u_int length;
184 u_int max_length;
185 #endif
186 };
187
188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
189
190 /*
191 * Don't propagate time-sharing priority, there is a security reason,
192 * a user can simply introduce PI-mutex, let thread A lock the mutex,
193 * and let another thread B block on the mutex, because B is
194 * sleeping, its priority will be boosted, this causes A's priority to
195 * be boosted via priority propagating too and will never be lowered even
196 * if it is using 100%CPU, this is unfair to other processes.
197 */
198
199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
201 PRI_MAX_TIMESHARE : (td)->td_user_pri)
202
203 #define GOLDEN_RATIO_PRIME 2654404609U
204 #ifndef UMTX_CHAINS
205 #define UMTX_CHAINS 512
206 #endif
207 #define UMTX_SHIFTS (__WORD_BIT - 9)
208
209 #define GET_SHARE(flags) \
210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
211
212 #define BUSY_SPINS 200
213
214 struct abs_timeout {
215 int clockid;
216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */
217 struct timespec cur;
218 struct timespec end;
219 };
220
221 #ifdef COMPAT_FREEBSD32
222 struct umutex32 {
223 volatile __lwpid_t m_owner; /* Owner of the mutex */
224 __uint32_t m_flags; /* Flags of the mutex */
225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */
226 __uint32_t m_rb_lnk; /* Robust linkage */
227 __uint32_t m_pad;
228 __uint32_t m_spare[2];
229 };
230
231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
232 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
233 __offsetof(struct umutex32, m_spare[0]), "m_spare32");
234 #endif
235
236 int umtx_shm_vnobj_persistent = 0;
237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
238 &umtx_shm_vnobj_persistent, 0,
239 "False forces destruction of umtx attached to file, on last close");
240 static int umtx_max_rb = 1000;
241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
242 &umtx_max_rb, 0,
243 "");
244
245 static uma_zone_t umtx_pi_zone;
246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
248 static int umtx_pi_allocated;
249
250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
252 &umtx_pi_allocated, 0, "Allocated umtx_pi");
253 static int umtx_verbose_rb = 1;
254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
255 &umtx_verbose_rb, 0,
256 "");
257
258 #ifdef UMTX_PROFILING
259 static long max_length;
260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
262 #endif
263
264 static void abs_timeout_update(struct abs_timeout *timo);
265
266 static void umtx_shm_init(void);
267 static void umtxq_sysinit(void *);
268 static void umtxq_hash(struct umtx_key *key);
269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
270 static void umtxq_lock(struct umtx_key *key);
271 static void umtxq_unlock(struct umtx_key *key);
272 static void umtxq_busy(struct umtx_key *key);
273 static void umtxq_unbusy(struct umtx_key *key);
274 static void umtxq_insert_queue(struct umtx_q *uq, int q);
275 static void umtxq_remove_queue(struct umtx_q *uq, int q);
276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
277 static int umtxq_count(struct umtx_key *key);
278 static struct umtx_pi *umtx_pi_alloc(int);
279 static void umtx_pi_free(struct umtx_pi *pi);
280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
281 bool rb);
282 static void umtx_thread_cleanup(struct thread *td);
283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
284 struct image_params *imgp __unused);
285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
286
287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
290
291 static struct mtx umtx_lock;
292
293 #ifdef UMTX_PROFILING
294 static void
295 umtx_init_profiling(void)
296 {
297 struct sysctl_oid *chain_oid;
298 char chain_name[10];
299 int i;
300
301 for (i = 0; i < UMTX_CHAINS; ++i) {
302 snprintf(chain_name, sizeof(chain_name), "%d", i);
303 chain_oid = SYSCTL_ADD_NODE(NULL,
304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
310 }
311 }
312
313 static int
314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
315 {
316 char buf[512];
317 struct sbuf sb;
318 struct umtxq_chain *uc;
319 u_int fract, i, j, tot, whole;
320 u_int sf0, sf1, sf2, sf3, sf4;
321 u_int si0, si1, si2, si3, si4;
322 u_int sw0, sw1, sw2, sw3, sw4;
323
324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
325 for (i = 0; i < 2; i++) {
326 tot = 0;
327 for (j = 0; j < UMTX_CHAINS; ++j) {
328 uc = &umtxq_chains[i][j];
329 mtx_lock(&uc->uc_lock);
330 tot += uc->max_length;
331 mtx_unlock(&uc->uc_lock);
332 }
333 if (tot == 0)
334 sbuf_printf(&sb, "%u) Empty ", i);
335 else {
336 sf0 = sf1 = sf2 = sf3 = sf4 = 0;
337 si0 = si1 = si2 = si3 = si4 = 0;
338 sw0 = sw1 = sw2 = sw3 = sw4 = 0;
339 for (j = 0; j < UMTX_CHAINS; j++) {
340 uc = &umtxq_chains[i][j];
341 mtx_lock(&uc->uc_lock);
342 whole = uc->max_length * 100;
343 mtx_unlock(&uc->uc_lock);
344 fract = (whole % tot) * 100;
345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
346 sf0 = fract;
347 si0 = j;
348 sw0 = whole;
349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
350 sf1)) {
351 sf1 = fract;
352 si1 = j;
353 sw1 = whole;
354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
355 sf2)) {
356 sf2 = fract;
357 si2 = j;
358 sw2 = whole;
359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
360 sf3)) {
361 sf3 = fract;
362 si3 = j;
363 sw3 = whole;
364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
365 sf4)) {
366 sf4 = fract;
367 si4 = j;
368 sw4 = whole;
369 }
370 }
371 sbuf_printf(&sb, "queue %u:\n", i);
372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
373 sf0 / tot, si0);
374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
375 sf1 / tot, si1);
376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
377 sf2 / tot, si2);
378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
379 sf3 / tot, si3);
380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
381 sf4 / tot, si4);
382 }
383 }
384 sbuf_trim(&sb);
385 sbuf_finish(&sb);
386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
387 sbuf_delete(&sb);
388 return (0);
389 }
390
391 static int
392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
393 {
394 struct umtxq_chain *uc;
395 u_int i, j;
396 int clear, error;
397
398 clear = 0;
399 error = sysctl_handle_int(oidp, &clear, 0, req);
400 if (error != 0 || req->newptr == NULL)
401 return (error);
402
403 if (clear != 0) {
404 for (i = 0; i < 2; ++i) {
405 for (j = 0; j < UMTX_CHAINS; ++j) {
406 uc = &umtxq_chains[i][j];
407 mtx_lock(&uc->uc_lock);
408 uc->length = 0;
409 uc->max_length = 0;
410 mtx_unlock(&uc->uc_lock);
411 }
412 }
413 }
414 return (0);
415 }
416
417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
423 #endif
424
425 static void
426 umtxq_sysinit(void *arg __unused)
427 {
428 int i, j;
429
430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
432 for (i = 0; i < 2; ++i) {
433 for (j = 0; j < UMTX_CHAINS; ++j) {
434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
435 MTX_DEF | MTX_DUPOK);
436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
440 umtxq_chains[i][j].uc_busy = 0;
441 umtxq_chains[i][j].uc_waiters = 0;
442 #ifdef UMTX_PROFILING
443 umtxq_chains[i][j].length = 0;
444 umtxq_chains[i][j].max_length = 0;
445 #endif
446 }
447 }
448 #ifdef UMTX_PROFILING
449 umtx_init_profiling();
450 #endif
451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
453 EVENTHANDLER_PRI_ANY);
454 umtx_shm_init();
455 }
456
457 struct umtx_q *
458 umtxq_alloc(void)
459 {
460 struct umtx_q *uq;
461
462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
464 M_WAITOK | M_ZERO);
465 TAILQ_INIT(&uq->uq_spare_queue->head);
466 TAILQ_INIT(&uq->uq_pi_contested);
467 uq->uq_inherited_pri = PRI_MAX;
468 return (uq);
469 }
470
471 void
472 umtxq_free(struct umtx_q *uq)
473 {
474
475 MPASS(uq->uq_spare_queue != NULL);
476 free(uq->uq_spare_queue, M_UMTX);
477 free(uq, M_UMTX);
478 }
479
480 static inline void
481 umtxq_hash(struct umtx_key *key)
482 {
483 unsigned n;
484
485 n = (uintptr_t)key->info.both.a + key->info.both.b;
486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
487 }
488
489 static inline struct umtxq_chain *
490 umtxq_getchain(struct umtx_key *key)
491 {
492
493 if (key->type <= TYPE_SEM)
494 return (&umtxq_chains[1][key->hash]);
495 return (&umtxq_chains[0][key->hash]);
496 }
497
498 /*
499 * Lock a chain.
500 */
501 static inline void
502 umtxq_lock(struct umtx_key *key)
503 {
504 struct umtxq_chain *uc;
505
506 uc = umtxq_getchain(key);
507 mtx_lock(&uc->uc_lock);
508 }
509
510 /*
511 * Unlock a chain.
512 */
513 static inline void
514 umtxq_unlock(struct umtx_key *key)
515 {
516 struct umtxq_chain *uc;
517
518 uc = umtxq_getchain(key);
519 mtx_unlock(&uc->uc_lock);
520 }
521
522 /*
523 * Set chain to busy state when following operation
524 * may be blocked (kernel mutex can not be used).
525 */
526 static inline void
527 umtxq_busy(struct umtx_key *key)
528 {
529 struct umtxq_chain *uc;
530
531 uc = umtxq_getchain(key);
532 mtx_assert(&uc->uc_lock, MA_OWNED);
533 if (uc->uc_busy) {
534 #ifdef SMP
535 if (smp_cpus > 1) {
536 int count = BUSY_SPINS;
537 if (count > 0) {
538 umtxq_unlock(key);
539 while (uc->uc_busy && --count > 0)
540 cpu_spinwait();
541 umtxq_lock(key);
542 }
543 }
544 #endif
545 while (uc->uc_busy) {
546 uc->uc_waiters++;
547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
548 uc->uc_waiters--;
549 }
550 }
551 uc->uc_busy = 1;
552 }
553
554 /*
555 * Unbusy a chain.
556 */
557 static inline void
558 umtxq_unbusy(struct umtx_key *key)
559 {
560 struct umtxq_chain *uc;
561
562 uc = umtxq_getchain(key);
563 mtx_assert(&uc->uc_lock, MA_OWNED);
564 KASSERT(uc->uc_busy != 0, ("not busy"));
565 uc->uc_busy = 0;
566 if (uc->uc_waiters)
567 wakeup_one(uc);
568 }
569
570 static inline void
571 umtxq_unbusy_unlocked(struct umtx_key *key)
572 {
573
574 umtxq_lock(key);
575 umtxq_unbusy(key);
576 umtxq_unlock(key);
577 }
578
579 static struct umtxq_queue *
580 umtxq_queue_lookup(struct umtx_key *key, int q)
581 {
582 struct umtxq_queue *uh;
583 struct umtxq_chain *uc;
584
585 uc = umtxq_getchain(key);
586 UMTXQ_LOCKED_ASSERT(uc);
587 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
588 if (umtx_key_match(&uh->key, key))
589 return (uh);
590 }
591
592 return (NULL);
593 }
594
595 static inline void
596 umtxq_insert_queue(struct umtx_q *uq, int q)
597 {
598 struct umtxq_queue *uh;
599 struct umtxq_chain *uc;
600
601 uc = umtxq_getchain(&uq->uq_key);
602 UMTXQ_LOCKED_ASSERT(uc);
603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
604 uh = umtxq_queue_lookup(&uq->uq_key, q);
605 if (uh != NULL) {
606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
607 } else {
608 uh = uq->uq_spare_queue;
609 uh->key = uq->uq_key;
610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
611 #ifdef UMTX_PROFILING
612 uc->length++;
613 if (uc->length > uc->max_length) {
614 uc->max_length = uc->length;
615 if (uc->max_length > max_length)
616 max_length = uc->max_length;
617 }
618 #endif
619 }
620 uq->uq_spare_queue = NULL;
621
622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
623 uh->length++;
624 uq->uq_flags |= UQF_UMTXQ;
625 uq->uq_cur_queue = uh;
626 return;
627 }
628
629 static inline void
630 umtxq_remove_queue(struct umtx_q *uq, int q)
631 {
632 struct umtxq_chain *uc;
633 struct umtxq_queue *uh;
634
635 uc = umtxq_getchain(&uq->uq_key);
636 UMTXQ_LOCKED_ASSERT(uc);
637 if (uq->uq_flags & UQF_UMTXQ) {
638 uh = uq->uq_cur_queue;
639 TAILQ_REMOVE(&uh->head, uq, uq_link);
640 uh->length--;
641 uq->uq_flags &= ~UQF_UMTXQ;
642 if (TAILQ_EMPTY(&uh->head)) {
643 KASSERT(uh->length == 0,
644 ("inconsistent umtxq_queue length"));
645 #ifdef UMTX_PROFILING
646 uc->length--;
647 #endif
648 LIST_REMOVE(uh, link);
649 } else {
650 uh = LIST_FIRST(&uc->uc_spare_queue);
651 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
652 LIST_REMOVE(uh, link);
653 }
654 uq->uq_spare_queue = uh;
655 uq->uq_cur_queue = NULL;
656 }
657 }
658
659 /*
660 * Check if there are multiple waiters
661 */
662 static int
663 umtxq_count(struct umtx_key *key)
664 {
665 struct umtxq_queue *uh;
666
667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
669 if (uh != NULL)
670 return (uh->length);
671 return (0);
672 }
673
674 /*
675 * Check if there are multiple PI waiters and returns first
676 * waiter.
677 */
678 static int
679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
680 {
681 struct umtxq_queue *uh;
682
683 *first = NULL;
684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
686 if (uh != NULL) {
687 *first = TAILQ_FIRST(&uh->head);
688 return (uh->length);
689 }
690 return (0);
691 }
692
693 static int
694 umtxq_check_susp(struct thread *td)
695 {
696 struct proc *p;
697 int error;
698
699 /*
700 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
701 * eventually break the lockstep loop.
702 */
703 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
704 return (0);
705 error = 0;
706 p = td->td_proc;
707 PROC_LOCK(p);
708 if (P_SHOULDSTOP(p) ||
709 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
710 if (p->p_flag & P_SINGLE_EXIT)
711 error = EINTR;
712 else
713 error = ERESTART;
714 }
715 PROC_UNLOCK(p);
716 return (error);
717 }
718
719 /*
720 * Wake up threads waiting on an userland object.
721 */
722
723 static int
724 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
725 {
726 struct umtxq_queue *uh;
727 struct umtx_q *uq;
728 int ret;
729
730 ret = 0;
731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
732 uh = umtxq_queue_lookup(key, q);
733 if (uh != NULL) {
734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
735 umtxq_remove_queue(uq, q);
736 wakeup(uq);
737 if (++ret >= n_wake)
738 return (ret);
739 }
740 }
741 return (ret);
742 }
743
744
745 /*
746 * Wake up specified thread.
747 */
748 static inline void
749 umtxq_signal_thread(struct umtx_q *uq)
750 {
751
752 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
753 umtxq_remove(uq);
754 wakeup(uq);
755 }
756
757 static inline int
758 tstohz(const struct timespec *tsp)
759 {
760 struct timeval tv;
761
762 TIMESPEC_TO_TIMEVAL(&tv, tsp);
763 return tvtohz(&tv);
764 }
765
766 static void
767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
768 const struct timespec *timeout)
769 {
770
771 timo->clockid = clockid;
772 if (!absolute) {
773 timo->is_abs_real = false;
774 abs_timeout_update(timo);
775 timespecadd(&timo->cur, timeout, &timo->end);
776 } else {
777 timo->end = *timeout;
778 timo->is_abs_real = clockid == CLOCK_REALTIME ||
779 clockid == CLOCK_REALTIME_FAST ||
780 clockid == CLOCK_REALTIME_PRECISE;
781 /*
782 * If is_abs_real, umtxq_sleep will read the clock
783 * after setting td_rtcgen; otherwise, read it here.
784 */
785 if (!timo->is_abs_real) {
786 abs_timeout_update(timo);
787 }
788 }
789 }
790
791 static void
792 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
793 {
794
795 abs_timeout_init(timo, umtxtime->_clockid,
796 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
797 }
798
799 static inline void
800 abs_timeout_update(struct abs_timeout *timo)
801 {
802
803 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
804 }
805
806 static int
807 abs_timeout_gethz(struct abs_timeout *timo)
808 {
809 struct timespec tts;
810
811 if (timespeccmp(&timo->end, &timo->cur, <=))
812 return (-1);
813 timespecsub(&timo->end, &timo->cur, &tts);
814 return (tstohz(&tts));
815 }
816
817 static uint32_t
818 umtx_unlock_val(uint32_t flags, bool rb)
819 {
820
821 if (rb)
822 return (UMUTEX_RB_OWNERDEAD);
823 else if ((flags & UMUTEX_NONCONSISTENT) != 0)
824 return (UMUTEX_RB_NOTRECOV);
825 else
826 return (UMUTEX_UNOWNED);
827
828 }
829
830 /*
831 * Put thread into sleep state, before sleeping, check if
832 * thread was removed from umtx queue.
833 */
834 static inline int
835 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
836 {
837 struct umtxq_chain *uc;
838 int error, timo;
839
840 if (abstime != NULL && abstime->is_abs_real) {
841 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
842 abs_timeout_update(abstime);
843 }
844
845 uc = umtxq_getchain(&uq->uq_key);
846 UMTXQ_LOCKED_ASSERT(uc);
847 for (;;) {
848 if (!(uq->uq_flags & UQF_UMTXQ)) {
849 error = 0;
850 break;
851 }
852 if (abstime != NULL) {
853 timo = abs_timeout_gethz(abstime);
854 if (timo < 0) {
855 error = ETIMEDOUT;
856 break;
857 }
858 } else
859 timo = 0;
860 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
861 if (error == EINTR || error == ERESTART) {
862 umtxq_lock(&uq->uq_key);
863 break;
864 }
865 if (abstime != NULL) {
866 if (abstime->is_abs_real)
867 curthread->td_rtcgen =
868 atomic_load_acq_int(&rtc_generation);
869 abs_timeout_update(abstime);
870 }
871 umtxq_lock(&uq->uq_key);
872 }
873
874 curthread->td_rtcgen = 0;
875 return (error);
876 }
877
878 /*
879 * Convert userspace address into unique logical address.
880 */
881 int
882 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
883 {
884 struct thread *td = curthread;
885 vm_map_t map;
886 vm_map_entry_t entry;
887 vm_pindex_t pindex;
888 vm_prot_t prot;
889 boolean_t wired;
890
891 key->type = type;
892 if (share == THREAD_SHARE) {
893 key->shared = 0;
894 key->info.private.vs = td->td_proc->p_vmspace;
895 key->info.private.addr = (uintptr_t)addr;
896 } else {
897 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
898 map = &td->td_proc->p_vmspace->vm_map;
899 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
900 &entry, &key->info.shared.object, &pindex, &prot,
901 &wired) != KERN_SUCCESS) {
902 return (EFAULT);
903 }
904
905 if ((share == PROCESS_SHARE) ||
906 (share == AUTO_SHARE &&
907 VM_INHERIT_SHARE == entry->inheritance)) {
908 key->shared = 1;
909 key->info.shared.offset = (vm_offset_t)addr -
910 entry->start + entry->offset;
911 vm_object_reference(key->info.shared.object);
912 } else {
913 key->shared = 0;
914 key->info.private.vs = td->td_proc->p_vmspace;
915 key->info.private.addr = (uintptr_t)addr;
916 }
917 vm_map_lookup_done(map, entry);
918 }
919
920 umtxq_hash(key);
921 return (0);
922 }
923
924 /*
925 * Release key.
926 */
927 void
928 umtx_key_release(struct umtx_key *key)
929 {
930 if (key->shared)
931 vm_object_deallocate(key->info.shared.object);
932 }
933
934 /*
935 * Fetch and compare value, sleep on the address if value is not changed.
936 */
937 static int
938 do_wait(struct thread *td, void *addr, u_long id,
939 struct _umtx_time *timeout, int compat32, int is_private)
940 {
941 struct abs_timeout timo;
942 struct umtx_q *uq;
943 u_long tmp;
944 uint32_t tmp32;
945 int error = 0;
946
947 uq = td->td_umtxq;
948 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
949 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
950 return (error);
951
952 if (timeout != NULL)
953 abs_timeout_init2(&timo, timeout);
954
955 umtxq_lock(&uq->uq_key);
956 umtxq_insert(uq);
957 umtxq_unlock(&uq->uq_key);
958 if (compat32 == 0) {
959 error = fueword(addr, &tmp);
960 if (error != 0)
961 error = EFAULT;
962 } else {
963 error = fueword32(addr, &tmp32);
964 if (error == 0)
965 tmp = tmp32;
966 else
967 error = EFAULT;
968 }
969 umtxq_lock(&uq->uq_key);
970 if (error == 0) {
971 if (tmp == id)
972 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
973 NULL : &timo);
974 if ((uq->uq_flags & UQF_UMTXQ) == 0)
975 error = 0;
976 else
977 umtxq_remove(uq);
978 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
979 umtxq_remove(uq);
980 }
981 umtxq_unlock(&uq->uq_key);
982 umtx_key_release(&uq->uq_key);
983 if (error == ERESTART)
984 error = EINTR;
985 return (error);
986 }
987
988 /*
989 * Wake up threads sleeping on the specified address.
990 */
991 int
992 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
993 {
994 struct umtx_key key;
995 int ret;
996
997 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
998 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
999 return (ret);
1000 umtxq_lock(&key);
1001 umtxq_signal(&key, n_wake);
1002 umtxq_unlock(&key);
1003 umtx_key_release(&key);
1004 return (0);
1005 }
1006
1007 /*
1008 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1009 */
1010 static int
1011 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1012 struct _umtx_time *timeout, int mode)
1013 {
1014 struct abs_timeout timo;
1015 struct umtx_q *uq;
1016 uint32_t owner, old, id;
1017 int error, rv;
1018
1019 id = td->td_tid;
1020 uq = td->td_umtxq;
1021 error = 0;
1022 if (timeout != NULL)
1023 abs_timeout_init2(&timo, timeout);
1024
1025 /*
1026 * Care must be exercised when dealing with umtx structure. It
1027 * can fault on any access.
1028 */
1029 for (;;) {
1030 rv = fueword32(&m->m_owner, &owner);
1031 if (rv == -1)
1032 return (EFAULT);
1033 if (mode == _UMUTEX_WAIT) {
1034 if (owner == UMUTEX_UNOWNED ||
1035 owner == UMUTEX_CONTESTED ||
1036 owner == UMUTEX_RB_OWNERDEAD ||
1037 owner == UMUTEX_RB_NOTRECOV)
1038 return (0);
1039 } else {
1040 /*
1041 * Robust mutex terminated. Kernel duty is to
1042 * return EOWNERDEAD to the userspace. The
1043 * umutex.m_flags UMUTEX_NONCONSISTENT is set
1044 * by the common userspace code.
1045 */
1046 if (owner == UMUTEX_RB_OWNERDEAD) {
1047 rv = casueword32(&m->m_owner,
1048 UMUTEX_RB_OWNERDEAD, &owner,
1049 id | UMUTEX_CONTESTED);
1050 if (rv == -1)
1051 return (EFAULT);
1052 if (owner == UMUTEX_RB_OWNERDEAD)
1053 return (EOWNERDEAD); /* success */
1054 rv = umtxq_check_susp(td);
1055 if (rv != 0)
1056 return (rv);
1057 continue;
1058 }
1059 if (owner == UMUTEX_RB_NOTRECOV)
1060 return (ENOTRECOVERABLE);
1061
1062
1063 /*
1064 * Try the uncontested case. This should be
1065 * done in userland.
1066 */
1067 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1068 &owner, id);
1069 /* The address was invalid. */
1070 if (rv == -1)
1071 return (EFAULT);
1072
1073 /* The acquire succeeded. */
1074 if (owner == UMUTEX_UNOWNED)
1075 return (0);
1076
1077 /*
1078 * If no one owns it but it is contested try
1079 * to acquire it.
1080 */
1081 if (owner == UMUTEX_CONTESTED) {
1082 rv = casueword32(&m->m_owner,
1083 UMUTEX_CONTESTED, &owner,
1084 id | UMUTEX_CONTESTED);
1085 /* The address was invalid. */
1086 if (rv == -1)
1087 return (EFAULT);
1088
1089 if (owner == UMUTEX_CONTESTED)
1090 return (0);
1091
1092 rv = umtxq_check_susp(td);
1093 if (rv != 0)
1094 return (rv);
1095
1096 /*
1097 * If this failed the lock has
1098 * changed, restart.
1099 */
1100 continue;
1101 }
1102 }
1103
1104 if (mode == _UMUTEX_TRY)
1105 return (EBUSY);
1106
1107 /*
1108 * If we caught a signal, we have retried and now
1109 * exit immediately.
1110 */
1111 if (error != 0)
1112 return (error);
1113
1114 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1115 GET_SHARE(flags), &uq->uq_key)) != 0)
1116 return (error);
1117
1118 umtxq_lock(&uq->uq_key);
1119 umtxq_busy(&uq->uq_key);
1120 umtxq_insert(uq);
1121 umtxq_unlock(&uq->uq_key);
1122
1123 /*
1124 * Set the contested bit so that a release in user space
1125 * knows to use the system call for unlock. If this fails
1126 * either some one else has acquired the lock or it has been
1127 * released.
1128 */
1129 rv = casueword32(&m->m_owner, owner, &old,
1130 owner | UMUTEX_CONTESTED);
1131
1132 /* The address was invalid. */
1133 if (rv == -1) {
1134 umtxq_lock(&uq->uq_key);
1135 umtxq_remove(uq);
1136 umtxq_unbusy(&uq->uq_key);
1137 umtxq_unlock(&uq->uq_key);
1138 umtx_key_release(&uq->uq_key);
1139 return (EFAULT);
1140 }
1141
1142 /*
1143 * We set the contested bit, sleep. Otherwise the lock changed
1144 * and we need to retry or we lost a race to the thread
1145 * unlocking the umtx.
1146 */
1147 umtxq_lock(&uq->uq_key);
1148 umtxq_unbusy(&uq->uq_key);
1149 if (old == owner)
1150 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1151 NULL : &timo);
1152 umtxq_remove(uq);
1153 umtxq_unlock(&uq->uq_key);
1154 umtx_key_release(&uq->uq_key);
1155
1156 if (error == 0)
1157 error = umtxq_check_susp(td);
1158 }
1159
1160 return (0);
1161 }
1162
1163 /*
1164 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1165 */
1166 static int
1167 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1168 {
1169 struct umtx_key key;
1170 uint32_t owner, old, id, newlock;
1171 int error, count;
1172
1173 id = td->td_tid;
1174 /*
1175 * Make sure we own this mtx.
1176 */
1177 error = fueword32(&m->m_owner, &owner);
1178 if (error == -1)
1179 return (EFAULT);
1180
1181 if ((owner & ~UMUTEX_CONTESTED) != id)
1182 return (EPERM);
1183
1184 newlock = umtx_unlock_val(flags, rb);
1185 if ((owner & UMUTEX_CONTESTED) == 0) {
1186 error = casueword32(&m->m_owner, owner, &old, newlock);
1187 if (error == -1)
1188 return (EFAULT);
1189 if (old == owner)
1190 return (0);
1191 owner = old;
1192 }
1193
1194 /* We should only ever be in here for contested locks */
1195 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1196 &key)) != 0)
1197 return (error);
1198
1199 umtxq_lock(&key);
1200 umtxq_busy(&key);
1201 count = umtxq_count(&key);
1202 umtxq_unlock(&key);
1203
1204 /*
1205 * When unlocking the umtx, it must be marked as unowned if
1206 * there is zero or one thread only waiting for it.
1207 * Otherwise, it must be marked as contested.
1208 */
1209 if (count > 1)
1210 newlock |= UMUTEX_CONTESTED;
1211 error = casueword32(&m->m_owner, owner, &old, newlock);
1212 umtxq_lock(&key);
1213 umtxq_signal(&key, 1);
1214 umtxq_unbusy(&key);
1215 umtxq_unlock(&key);
1216 umtx_key_release(&key);
1217 if (error == -1)
1218 return (EFAULT);
1219 if (old != owner)
1220 return (EINVAL);
1221 return (0);
1222 }
1223
1224 /*
1225 * Check if the mutex is available and wake up a waiter,
1226 * only for simple mutex.
1227 */
1228 static int
1229 do_wake_umutex(struct thread *td, struct umutex *m)
1230 {
1231 struct umtx_key key;
1232 uint32_t owner;
1233 uint32_t flags;
1234 int error;
1235 int count;
1236
1237 error = fueword32(&m->m_owner, &owner);
1238 if (error == -1)
1239 return (EFAULT);
1240
1241 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
1242 owner != UMUTEX_RB_NOTRECOV)
1243 return (0);
1244
1245 error = fueword32(&m->m_flags, &flags);
1246 if (error == -1)
1247 return (EFAULT);
1248
1249 /* We should only ever be in here for contested locks */
1250 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1251 &key)) != 0)
1252 return (error);
1253
1254 umtxq_lock(&key);
1255 umtxq_busy(&key);
1256 count = umtxq_count(&key);
1257 umtxq_unlock(&key);
1258
1259 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
1260 owner != UMUTEX_RB_NOTRECOV) {
1261 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1262 UMUTEX_UNOWNED);
1263 if (error == -1)
1264 error = EFAULT;
1265 }
1266
1267 umtxq_lock(&key);
1268 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1269 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1270 umtxq_signal(&key, 1);
1271 umtxq_unbusy(&key);
1272 umtxq_unlock(&key);
1273 umtx_key_release(&key);
1274 return (error);
1275 }
1276
1277 /*
1278 * Check if the mutex has waiters and tries to fix contention bit.
1279 */
1280 static int
1281 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1282 {
1283 struct umtx_key key;
1284 uint32_t owner, old;
1285 int type;
1286 int error;
1287 int count;
1288
1289 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
1290 UMUTEX_ROBUST)) {
1291 case 0:
1292 case UMUTEX_ROBUST:
1293 type = TYPE_NORMAL_UMUTEX;
1294 break;
1295 case UMUTEX_PRIO_INHERIT:
1296 type = TYPE_PI_UMUTEX;
1297 break;
1298 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
1299 type = TYPE_PI_ROBUST_UMUTEX;
1300 break;
1301 case UMUTEX_PRIO_PROTECT:
1302 type = TYPE_PP_UMUTEX;
1303 break;
1304 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
1305 type = TYPE_PP_ROBUST_UMUTEX;
1306 break;
1307 default:
1308 return (EINVAL);
1309 }
1310 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
1311 return (error);
1312
1313 owner = 0;
1314 umtxq_lock(&key);
1315 umtxq_busy(&key);
1316 count = umtxq_count(&key);
1317 umtxq_unlock(&key);
1318 /*
1319 * Only repair contention bit if there is a waiter, this means the mutex
1320 * is still being referenced by userland code, otherwise don't update
1321 * any memory.
1322 */
1323 if (count > 1) {
1324 error = fueword32(&m->m_owner, &owner);
1325 if (error == -1)
1326 error = EFAULT;
1327 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
1328 error = casueword32(&m->m_owner, owner, &old,
1329 owner | UMUTEX_CONTESTED);
1330 if (error == -1) {
1331 error = EFAULT;
1332 break;
1333 }
1334 if (old == owner)
1335 break;
1336 owner = old;
1337 error = umtxq_check_susp(td);
1338 if (error != 0)
1339 break;
1340 }
1341 } else if (count == 1) {
1342 error = fueword32(&m->m_owner, &owner);
1343 if (error == -1)
1344 error = EFAULT;
1345 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
1346 (owner & UMUTEX_CONTESTED) == 0) {
1347 error = casueword32(&m->m_owner, owner, &old,
1348 owner | UMUTEX_CONTESTED);
1349 if (error == -1) {
1350 error = EFAULT;
1351 break;
1352 }
1353 if (old == owner)
1354 break;
1355 owner = old;
1356 error = umtxq_check_susp(td);
1357 if (error != 0)
1358 break;
1359 }
1360 }
1361 umtxq_lock(&key);
1362 if (error == EFAULT) {
1363 umtxq_signal(&key, INT_MAX);
1364 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1365 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1366 umtxq_signal(&key, 1);
1367 umtxq_unbusy(&key);
1368 umtxq_unlock(&key);
1369 umtx_key_release(&key);
1370 return (error);
1371 }
1372
1373 static inline struct umtx_pi *
1374 umtx_pi_alloc(int flags)
1375 {
1376 struct umtx_pi *pi;
1377
1378 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1379 TAILQ_INIT(&pi->pi_blocked);
1380 atomic_add_int(&umtx_pi_allocated, 1);
1381 return (pi);
1382 }
1383
1384 static inline void
1385 umtx_pi_free(struct umtx_pi *pi)
1386 {
1387 uma_zfree(umtx_pi_zone, pi);
1388 atomic_add_int(&umtx_pi_allocated, -1);
1389 }
1390
1391 /*
1392 * Adjust the thread's position on a pi_state after its priority has been
1393 * changed.
1394 */
1395 static int
1396 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1397 {
1398 struct umtx_q *uq, *uq1, *uq2;
1399 struct thread *td1;
1400
1401 mtx_assert(&umtx_lock, MA_OWNED);
1402 if (pi == NULL)
1403 return (0);
1404
1405 uq = td->td_umtxq;
1406
1407 /*
1408 * Check if the thread needs to be moved on the blocked chain.
1409 * It needs to be moved if either its priority is lower than
1410 * the previous thread or higher than the next thread.
1411 */
1412 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1413 uq2 = TAILQ_NEXT(uq, uq_lockq);
1414 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1415 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1416 /*
1417 * Remove thread from blocked chain and determine where
1418 * it should be moved to.
1419 */
1420 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1421 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1422 td1 = uq1->uq_thread;
1423 MPASS(td1->td_proc->p_magic == P_MAGIC);
1424 if (UPRI(td1) > UPRI(td))
1425 break;
1426 }
1427
1428 if (uq1 == NULL)
1429 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1430 else
1431 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1432 }
1433 return (1);
1434 }
1435
1436 static struct umtx_pi *
1437 umtx_pi_next(struct umtx_pi *pi)
1438 {
1439 struct umtx_q *uq_owner;
1440
1441 if (pi->pi_owner == NULL)
1442 return (NULL);
1443 uq_owner = pi->pi_owner->td_umtxq;
1444 if (uq_owner == NULL)
1445 return (NULL);
1446 return (uq_owner->uq_pi_blocked);
1447 }
1448
1449 /*
1450 * Floyd's Cycle-Finding Algorithm.
1451 */
1452 static bool
1453 umtx_pi_check_loop(struct umtx_pi *pi)
1454 {
1455 struct umtx_pi *pi1; /* fast iterator */
1456
1457 mtx_assert(&umtx_lock, MA_OWNED);
1458 if (pi == NULL)
1459 return (false);
1460 pi1 = pi;
1461 for (;;) {
1462 pi = umtx_pi_next(pi);
1463 if (pi == NULL)
1464 break;
1465 pi1 = umtx_pi_next(pi1);
1466 if (pi1 == NULL)
1467 break;
1468 pi1 = umtx_pi_next(pi1);
1469 if (pi1 == NULL)
1470 break;
1471 if (pi == pi1)
1472 return (true);
1473 }
1474 return (false);
1475 }
1476
1477 /*
1478 * Propagate priority when a thread is blocked on POSIX
1479 * PI mutex.
1480 */
1481 static void
1482 umtx_propagate_priority(struct thread *td)
1483 {
1484 struct umtx_q *uq;
1485 struct umtx_pi *pi;
1486 int pri;
1487
1488 mtx_assert(&umtx_lock, MA_OWNED);
1489 pri = UPRI(td);
1490 uq = td->td_umtxq;
1491 pi = uq->uq_pi_blocked;
1492 if (pi == NULL)
1493 return;
1494 if (umtx_pi_check_loop(pi))
1495 return;
1496
1497 for (;;) {
1498 td = pi->pi_owner;
1499 if (td == NULL || td == curthread)
1500 return;
1501
1502 MPASS(td->td_proc != NULL);
1503 MPASS(td->td_proc->p_magic == P_MAGIC);
1504
1505 thread_lock(td);
1506 if (td->td_lend_user_pri > pri)
1507 sched_lend_user_prio(td, pri);
1508 else {
1509 thread_unlock(td);
1510 break;
1511 }
1512 thread_unlock(td);
1513
1514 /*
1515 * Pick up the lock that td is blocked on.
1516 */
1517 uq = td->td_umtxq;
1518 pi = uq->uq_pi_blocked;
1519 if (pi == NULL)
1520 break;
1521 /* Resort td on the list if needed. */
1522 umtx_pi_adjust_thread(pi, td);
1523 }
1524 }
1525
1526 /*
1527 * Unpropagate priority for a PI mutex when a thread blocked on
1528 * it is interrupted by signal or resumed by others.
1529 */
1530 static void
1531 umtx_repropagate_priority(struct umtx_pi *pi)
1532 {
1533 struct umtx_q *uq, *uq_owner;
1534 struct umtx_pi *pi2;
1535 int pri;
1536
1537 mtx_assert(&umtx_lock, MA_OWNED);
1538
1539 if (umtx_pi_check_loop(pi))
1540 return;
1541 while (pi != NULL && pi->pi_owner != NULL) {
1542 pri = PRI_MAX;
1543 uq_owner = pi->pi_owner->td_umtxq;
1544
1545 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1546 uq = TAILQ_FIRST(&pi2->pi_blocked);
1547 if (uq != NULL) {
1548 if (pri > UPRI(uq->uq_thread))
1549 pri = UPRI(uq->uq_thread);
1550 }
1551 }
1552
1553 if (pri > uq_owner->uq_inherited_pri)
1554 pri = uq_owner->uq_inherited_pri;
1555 thread_lock(pi->pi_owner);
1556 sched_lend_user_prio(pi->pi_owner, pri);
1557 thread_unlock(pi->pi_owner);
1558 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1559 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1560 }
1561 }
1562
1563 /*
1564 * Insert a PI mutex into owned list.
1565 */
1566 static void
1567 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1568 {
1569 struct umtx_q *uq_owner;
1570
1571 uq_owner = owner->td_umtxq;
1572 mtx_assert(&umtx_lock, MA_OWNED);
1573 MPASS(pi->pi_owner == NULL);
1574 pi->pi_owner = owner;
1575 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1576 }
1577
1578
1579 /*
1580 * Disown a PI mutex, and remove it from the owned list.
1581 */
1582 static void
1583 umtx_pi_disown(struct umtx_pi *pi)
1584 {
1585
1586 mtx_assert(&umtx_lock, MA_OWNED);
1587 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1588 pi->pi_owner = NULL;
1589 }
1590
1591 /*
1592 * Claim ownership of a PI mutex.
1593 */
1594 static int
1595 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1596 {
1597 struct umtx_q *uq;
1598 int pri;
1599
1600 mtx_lock(&umtx_lock);
1601 if (pi->pi_owner == owner) {
1602 mtx_unlock(&umtx_lock);
1603 return (0);
1604 }
1605
1606 if (pi->pi_owner != NULL) {
1607 /*
1608 * userland may have already messed the mutex, sigh.
1609 */
1610 mtx_unlock(&umtx_lock);
1611 return (EPERM);
1612 }
1613 umtx_pi_setowner(pi, owner);
1614 uq = TAILQ_FIRST(&pi->pi_blocked);
1615 if (uq != NULL) {
1616 pri = UPRI(uq->uq_thread);
1617 thread_lock(owner);
1618 if (pri < UPRI(owner))
1619 sched_lend_user_prio(owner, pri);
1620 thread_unlock(owner);
1621 }
1622 mtx_unlock(&umtx_lock);
1623 return (0);
1624 }
1625
1626 /*
1627 * Adjust a thread's order position in its blocked PI mutex,
1628 * this may result new priority propagating process.
1629 */
1630 void
1631 umtx_pi_adjust(struct thread *td, u_char oldpri)
1632 {
1633 struct umtx_q *uq;
1634 struct umtx_pi *pi;
1635
1636 uq = td->td_umtxq;
1637 mtx_lock(&umtx_lock);
1638 /*
1639 * Pick up the lock that td is blocked on.
1640 */
1641 pi = uq->uq_pi_blocked;
1642 if (pi != NULL) {
1643 umtx_pi_adjust_thread(pi, td);
1644 umtx_repropagate_priority(pi);
1645 }
1646 mtx_unlock(&umtx_lock);
1647 }
1648
1649 /*
1650 * Sleep on a PI mutex.
1651 */
1652 static int
1653 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
1654 const char *wmesg, struct abs_timeout *timo, bool shared)
1655 {
1656 struct thread *td, *td1;
1657 struct umtx_q *uq1;
1658 int error, pri;
1659 #ifdef INVARIANTS
1660 struct umtxq_chain *uc;
1661
1662 uc = umtxq_getchain(&pi->pi_key);
1663 #endif
1664 error = 0;
1665 td = uq->uq_thread;
1666 KASSERT(td == curthread, ("inconsistent uq_thread"));
1667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
1668 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
1669 umtxq_insert(uq);
1670 mtx_lock(&umtx_lock);
1671 if (pi->pi_owner == NULL) {
1672 mtx_unlock(&umtx_lock);
1673 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
1674 mtx_lock(&umtx_lock);
1675 if (td1 != NULL) {
1676 if (pi->pi_owner == NULL)
1677 umtx_pi_setowner(pi, td1);
1678 PROC_UNLOCK(td1->td_proc);
1679 }
1680 }
1681
1682 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1683 pri = UPRI(uq1->uq_thread);
1684 if (pri > UPRI(td))
1685 break;
1686 }
1687
1688 if (uq1 != NULL)
1689 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1690 else
1691 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1692
1693 uq->uq_pi_blocked = pi;
1694 thread_lock(td);
1695 td->td_flags |= TDF_UPIBLOCKED;
1696 thread_unlock(td);
1697 umtx_propagate_priority(td);
1698 mtx_unlock(&umtx_lock);
1699 umtxq_unbusy(&uq->uq_key);
1700
1701 error = umtxq_sleep(uq, wmesg, timo);
1702 umtxq_remove(uq);
1703
1704 mtx_lock(&umtx_lock);
1705 uq->uq_pi_blocked = NULL;
1706 thread_lock(td);
1707 td->td_flags &= ~TDF_UPIBLOCKED;
1708 thread_unlock(td);
1709 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1710 umtx_repropagate_priority(pi);
1711 mtx_unlock(&umtx_lock);
1712 umtxq_unlock(&uq->uq_key);
1713
1714 return (error);
1715 }
1716
1717 /*
1718 * Add reference count for a PI mutex.
1719 */
1720 static void
1721 umtx_pi_ref(struct umtx_pi *pi)
1722 {
1723
1724 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
1725 pi->pi_refcount++;
1726 }
1727
1728 /*
1729 * Decrease reference count for a PI mutex, if the counter
1730 * is decreased to zero, its memory space is freed.
1731 */
1732 static void
1733 umtx_pi_unref(struct umtx_pi *pi)
1734 {
1735 struct umtxq_chain *uc;
1736
1737 uc = umtxq_getchain(&pi->pi_key);
1738 UMTXQ_LOCKED_ASSERT(uc);
1739 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1740 if (--pi->pi_refcount == 0) {
1741 mtx_lock(&umtx_lock);
1742 if (pi->pi_owner != NULL)
1743 umtx_pi_disown(pi);
1744 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1745 ("blocked queue not empty"));
1746 mtx_unlock(&umtx_lock);
1747 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1748 umtx_pi_free(pi);
1749 }
1750 }
1751
1752 /*
1753 * Find a PI mutex in hash table.
1754 */
1755 static struct umtx_pi *
1756 umtx_pi_lookup(struct umtx_key *key)
1757 {
1758 struct umtxq_chain *uc;
1759 struct umtx_pi *pi;
1760
1761 uc = umtxq_getchain(key);
1762 UMTXQ_LOCKED_ASSERT(uc);
1763
1764 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1765 if (umtx_key_match(&pi->pi_key, key)) {
1766 return (pi);
1767 }
1768 }
1769 return (NULL);
1770 }
1771
1772 /*
1773 * Insert a PI mutex into hash table.
1774 */
1775 static inline void
1776 umtx_pi_insert(struct umtx_pi *pi)
1777 {
1778 struct umtxq_chain *uc;
1779
1780 uc = umtxq_getchain(&pi->pi_key);
1781 UMTXQ_LOCKED_ASSERT(uc);
1782 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1783 }
1784
1785 /*
1786 * Lock a PI mutex.
1787 */
1788 static int
1789 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1790 struct _umtx_time *timeout, int try)
1791 {
1792 struct abs_timeout timo;
1793 struct umtx_q *uq;
1794 struct umtx_pi *pi, *new_pi;
1795 uint32_t id, old_owner, owner, old;
1796 int error, rv;
1797
1798 id = td->td_tid;
1799 uq = td->td_umtxq;
1800
1801 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
1802 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
1803 &uq->uq_key)) != 0)
1804 return (error);
1805
1806 if (timeout != NULL)
1807 abs_timeout_init2(&timo, timeout);
1808
1809 umtxq_lock(&uq->uq_key);
1810 pi = umtx_pi_lookup(&uq->uq_key);
1811 if (pi == NULL) {
1812 new_pi = umtx_pi_alloc(M_NOWAIT);
1813 if (new_pi == NULL) {
1814 umtxq_unlock(&uq->uq_key);
1815 new_pi = umtx_pi_alloc(M_WAITOK);
1816 umtxq_lock(&uq->uq_key);
1817 pi = umtx_pi_lookup(&uq->uq_key);
1818 if (pi != NULL) {
1819 umtx_pi_free(new_pi);
1820 new_pi = NULL;
1821 }
1822 }
1823 if (new_pi != NULL) {
1824 new_pi->pi_key = uq->uq_key;
1825 umtx_pi_insert(new_pi);
1826 pi = new_pi;
1827 }
1828 }
1829 umtx_pi_ref(pi);
1830 umtxq_unlock(&uq->uq_key);
1831
1832 /*
1833 * Care must be exercised when dealing with umtx structure. It
1834 * can fault on any access.
1835 */
1836 for (;;) {
1837 /*
1838 * Try the uncontested case. This should be done in userland.
1839 */
1840 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
1841 /* The address was invalid. */
1842 if (rv == -1) {
1843 error = EFAULT;
1844 break;
1845 }
1846
1847 /* The acquire succeeded. */
1848 if (owner == UMUTEX_UNOWNED) {
1849 error = 0;
1850 break;
1851 }
1852
1853 if (owner == UMUTEX_RB_NOTRECOV) {
1854 error = ENOTRECOVERABLE;
1855 break;
1856 }
1857
1858 /* If no one owns it but it is contested try to acquire it. */
1859 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
1860 old_owner = owner;
1861 rv = casueword32(&m->m_owner, owner, &owner,
1862 id | UMUTEX_CONTESTED);
1863 /* The address was invalid. */
1864 if (rv == -1) {
1865 error = EFAULT;
1866 break;
1867 }
1868
1869 if (owner == old_owner) {
1870 umtxq_lock(&uq->uq_key);
1871 umtxq_busy(&uq->uq_key);
1872 error = umtx_pi_claim(pi, td);
1873 umtxq_unbusy(&uq->uq_key);
1874 umtxq_unlock(&uq->uq_key);
1875 if (error != 0) {
1876 /*
1877 * Since we're going to return an
1878 * error, restore the m_owner to its
1879 * previous, unowned state to avoid
1880 * compounding the problem.
1881 */
1882 (void)casuword32(&m->m_owner,
1883 id | UMUTEX_CONTESTED,
1884 old_owner);
1885 }
1886 if (error == 0 &&
1887 old_owner == UMUTEX_RB_OWNERDEAD)
1888 error = EOWNERDEAD;
1889 break;
1890 }
1891
1892 error = umtxq_check_susp(td);
1893 if (error != 0)
1894 break;
1895
1896 /* If this failed the lock has changed, restart. */
1897 continue;
1898 }
1899
1900 if ((owner & ~UMUTEX_CONTESTED) == id) {
1901 error = EDEADLK;
1902 break;
1903 }
1904
1905 if (try != 0) {
1906 error = EBUSY;
1907 break;
1908 }
1909
1910 /*
1911 * If we caught a signal, we have retried and now
1912 * exit immediately.
1913 */
1914 if (error != 0)
1915 break;
1916
1917 umtxq_lock(&uq->uq_key);
1918 umtxq_busy(&uq->uq_key);
1919 umtxq_unlock(&uq->uq_key);
1920
1921 /*
1922 * Set the contested bit so that a release in user space
1923 * knows to use the system call for unlock. If this fails
1924 * either some one else has acquired the lock or it has been
1925 * released.
1926 */
1927 rv = casueword32(&m->m_owner, owner, &old, owner |
1928 UMUTEX_CONTESTED);
1929
1930 /* The address was invalid. */
1931 if (rv == -1) {
1932 umtxq_unbusy_unlocked(&uq->uq_key);
1933 error = EFAULT;
1934 break;
1935 }
1936
1937 umtxq_lock(&uq->uq_key);
1938 /*
1939 * We set the contested bit, sleep. Otherwise the lock changed
1940 * and we need to retry or we lost a race to the thread
1941 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD
1942 * value for owner is impossible there.
1943 */
1944 if (old == owner) {
1945 error = umtxq_sleep_pi(uq, pi,
1946 owner & ~UMUTEX_CONTESTED,
1947 "umtxpi", timeout == NULL ? NULL : &timo,
1948 (flags & USYNC_PROCESS_SHARED) != 0);
1949 if (error != 0)
1950 continue;
1951 } else {
1952 umtxq_unbusy(&uq->uq_key);
1953 umtxq_unlock(&uq->uq_key);
1954 }
1955
1956 error = umtxq_check_susp(td);
1957 if (error != 0)
1958 break;
1959 }
1960
1961 umtxq_lock(&uq->uq_key);
1962 umtx_pi_unref(pi);
1963 umtxq_unlock(&uq->uq_key);
1964
1965 umtx_key_release(&uq->uq_key);
1966 return (error);
1967 }
1968
1969 /*
1970 * Unlock a PI mutex.
1971 */
1972 static int
1973 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1974 {
1975 struct umtx_key key;
1976 struct umtx_q *uq_first, *uq_first2, *uq_me;
1977 struct umtx_pi *pi, *pi2;
1978 uint32_t id, new_owner, old, owner;
1979 int count, error, pri;
1980
1981 id = td->td_tid;
1982 /*
1983 * Make sure we own this mtx.
1984 */
1985 error = fueword32(&m->m_owner, &owner);
1986 if (error == -1)
1987 return (EFAULT);
1988
1989 if ((owner & ~UMUTEX_CONTESTED) != id)
1990 return (EPERM);
1991
1992 new_owner = umtx_unlock_val(flags, rb);
1993
1994 /* This should be done in userland */
1995 if ((owner & UMUTEX_CONTESTED) == 0) {
1996 error = casueword32(&m->m_owner, owner, &old, new_owner);
1997 if (error == -1)
1998 return (EFAULT);
1999 if (old == owner)
2000 return (0);
2001 owner = old;
2002 }
2003
2004 /* We should only ever be in here for contested locks */
2005 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2006 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2007 &key)) != 0)
2008 return (error);
2009
2010 umtxq_lock(&key);
2011 umtxq_busy(&key);
2012 count = umtxq_count_pi(&key, &uq_first);
2013 if (uq_first != NULL) {
2014 mtx_lock(&umtx_lock);
2015 pi = uq_first->uq_pi_blocked;
2016 KASSERT(pi != NULL, ("pi == NULL?"));
2017 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
2018 mtx_unlock(&umtx_lock);
2019 umtxq_unbusy(&key);
2020 umtxq_unlock(&key);
2021 umtx_key_release(&key);
2022 /* userland messed the mutex */
2023 return (EPERM);
2024 }
2025 uq_me = td->td_umtxq;
2026 if (pi->pi_owner == td)
2027 umtx_pi_disown(pi);
2028 /* get highest priority thread which is still sleeping. */
2029 uq_first = TAILQ_FIRST(&pi->pi_blocked);
2030 while (uq_first != NULL &&
2031 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2032 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2033 }
2034 pri = PRI_MAX;
2035 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2036 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2037 if (uq_first2 != NULL) {
2038 if (pri > UPRI(uq_first2->uq_thread))
2039 pri = UPRI(uq_first2->uq_thread);
2040 }
2041 }
2042 thread_lock(td);
2043 sched_lend_user_prio(td, pri);
2044 thread_unlock(td);
2045 mtx_unlock(&umtx_lock);
2046 if (uq_first)
2047 umtxq_signal_thread(uq_first);
2048 } else {
2049 pi = umtx_pi_lookup(&key);
2050 /*
2051 * A umtx_pi can exist if a signal or timeout removed the
2052 * last waiter from the umtxq, but there is still
2053 * a thread in do_lock_pi() holding the umtx_pi.
2054 */
2055 if (pi != NULL) {
2056 /*
2057 * The umtx_pi can be unowned, such as when a thread
2058 * has just entered do_lock_pi(), allocated the
2059 * umtx_pi, and unlocked the umtxq.
2060 * If the current thread owns it, it must disown it.
2061 */
2062 mtx_lock(&umtx_lock);
2063 if (pi->pi_owner == td)
2064 umtx_pi_disown(pi);
2065 mtx_unlock(&umtx_lock);
2066 }
2067 }
2068 umtxq_unlock(&key);
2069
2070 /*
2071 * When unlocking the umtx, it must be marked as unowned if
2072 * there is zero or one thread only waiting for it.
2073 * Otherwise, it must be marked as contested.
2074 */
2075
2076 if (count > 1)
2077 new_owner |= UMUTEX_CONTESTED;
2078 error = casueword32(&m->m_owner, owner, &old, new_owner);
2079
2080 umtxq_unbusy_unlocked(&key);
2081 umtx_key_release(&key);
2082 if (error == -1)
2083 return (EFAULT);
2084 if (old != owner)
2085 return (EINVAL);
2086 return (0);
2087 }
2088
2089 /*
2090 * Lock a PP mutex.
2091 */
2092 static int
2093 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2094 struct _umtx_time *timeout, int try)
2095 {
2096 struct abs_timeout timo;
2097 struct umtx_q *uq, *uq2;
2098 struct umtx_pi *pi;
2099 uint32_t ceiling;
2100 uint32_t owner, id;
2101 int error, pri, old_inherited_pri, su, rv;
2102
2103 id = td->td_tid;
2104 uq = td->td_umtxq;
2105 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2106 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2107 &uq->uq_key)) != 0)
2108 return (error);
2109
2110 if (timeout != NULL)
2111 abs_timeout_init2(&timo, timeout);
2112
2113 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2114 for (;;) {
2115 old_inherited_pri = uq->uq_inherited_pri;
2116 umtxq_lock(&uq->uq_key);
2117 umtxq_busy(&uq->uq_key);
2118 umtxq_unlock(&uq->uq_key);
2119
2120 rv = fueword32(&m->m_ceilings[0], &ceiling);
2121 if (rv == -1) {
2122 error = EFAULT;
2123 goto out;
2124 }
2125 ceiling = RTP_PRIO_MAX - ceiling;
2126 if (ceiling > RTP_PRIO_MAX) {
2127 error = EINVAL;
2128 goto out;
2129 }
2130
2131 mtx_lock(&umtx_lock);
2132 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2133 mtx_unlock(&umtx_lock);
2134 error = EINVAL;
2135 goto out;
2136 }
2137 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2138 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2139 thread_lock(td);
2140 if (uq->uq_inherited_pri < UPRI(td))
2141 sched_lend_user_prio(td, uq->uq_inherited_pri);
2142 thread_unlock(td);
2143 }
2144 mtx_unlock(&umtx_lock);
2145
2146 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2147 id | UMUTEX_CONTESTED);
2148 /* The address was invalid. */
2149 if (rv == -1) {
2150 error = EFAULT;
2151 break;
2152 }
2153
2154 if (owner == UMUTEX_CONTESTED) {
2155 error = 0;
2156 break;
2157 } else if (owner == UMUTEX_RB_OWNERDEAD) {
2158 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
2159 &owner, id | UMUTEX_CONTESTED);
2160 if (rv == -1) {
2161 error = EFAULT;
2162 break;
2163 }
2164 if (owner == UMUTEX_RB_OWNERDEAD) {
2165 error = EOWNERDEAD; /* success */
2166 break;
2167 }
2168 error = 0;
2169 } else if (owner == UMUTEX_RB_NOTRECOV) {
2170 error = ENOTRECOVERABLE;
2171 break;
2172 }
2173
2174 if (try != 0) {
2175 error = EBUSY;
2176 break;
2177 }
2178
2179 /*
2180 * If we caught a signal, we have retried and now
2181 * exit immediately.
2182 */
2183 if (error != 0)
2184 break;
2185
2186 umtxq_lock(&uq->uq_key);
2187 umtxq_insert(uq);
2188 umtxq_unbusy(&uq->uq_key);
2189 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2190 NULL : &timo);
2191 umtxq_remove(uq);
2192 umtxq_unlock(&uq->uq_key);
2193
2194 mtx_lock(&umtx_lock);
2195 uq->uq_inherited_pri = old_inherited_pri;
2196 pri = PRI_MAX;
2197 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2198 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2199 if (uq2 != NULL) {
2200 if (pri > UPRI(uq2->uq_thread))
2201 pri = UPRI(uq2->uq_thread);
2202 }
2203 }
2204 if (pri > uq->uq_inherited_pri)
2205 pri = uq->uq_inherited_pri;
2206 thread_lock(td);
2207 sched_lend_user_prio(td, pri);
2208 thread_unlock(td);
2209 mtx_unlock(&umtx_lock);
2210 }
2211
2212 if (error != 0 && error != EOWNERDEAD) {
2213 mtx_lock(&umtx_lock);
2214 uq->uq_inherited_pri = old_inherited_pri;
2215 pri = PRI_MAX;
2216 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2217 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2218 if (uq2 != NULL) {
2219 if (pri > UPRI(uq2->uq_thread))
2220 pri = UPRI(uq2->uq_thread);
2221 }
2222 }
2223 if (pri > uq->uq_inherited_pri)
2224 pri = uq->uq_inherited_pri;
2225 thread_lock(td);
2226 sched_lend_user_prio(td, pri);
2227 thread_unlock(td);
2228 mtx_unlock(&umtx_lock);
2229 }
2230
2231 out:
2232 umtxq_unbusy_unlocked(&uq->uq_key);
2233 umtx_key_release(&uq->uq_key);
2234 return (error);
2235 }
2236
2237 /*
2238 * Unlock a PP mutex.
2239 */
2240 static int
2241 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2242 {
2243 struct umtx_key key;
2244 struct umtx_q *uq, *uq2;
2245 struct umtx_pi *pi;
2246 uint32_t id, owner, rceiling;
2247 int error, pri, new_inherited_pri, su;
2248
2249 id = td->td_tid;
2250 uq = td->td_umtxq;
2251 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2252
2253 /*
2254 * Make sure we own this mtx.
2255 */
2256 error = fueword32(&m->m_owner, &owner);
2257 if (error == -1)
2258 return (EFAULT);
2259
2260 if ((owner & ~UMUTEX_CONTESTED) != id)
2261 return (EPERM);
2262
2263 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2264 if (error != 0)
2265 return (error);
2266
2267 if (rceiling == -1)
2268 new_inherited_pri = PRI_MAX;
2269 else {
2270 rceiling = RTP_PRIO_MAX - rceiling;
2271 if (rceiling > RTP_PRIO_MAX)
2272 return (EINVAL);
2273 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2274 }
2275
2276 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2277 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2278 &key)) != 0)
2279 return (error);
2280 umtxq_lock(&key);
2281 umtxq_busy(&key);
2282 umtxq_unlock(&key);
2283 /*
2284 * For priority protected mutex, always set unlocked state
2285 * to UMUTEX_CONTESTED, so that userland always enters kernel
2286 * to lock the mutex, it is necessary because thread priority
2287 * has to be adjusted for such mutex.
2288 */
2289 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
2290 UMUTEX_CONTESTED);
2291
2292 umtxq_lock(&key);
2293 if (error == 0)
2294 umtxq_signal(&key, 1);
2295 umtxq_unbusy(&key);
2296 umtxq_unlock(&key);
2297
2298 if (error == -1)
2299 error = EFAULT;
2300 else {
2301 mtx_lock(&umtx_lock);
2302 if (su != 0)
2303 uq->uq_inherited_pri = new_inherited_pri;
2304 pri = PRI_MAX;
2305 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2306 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2307 if (uq2 != NULL) {
2308 if (pri > UPRI(uq2->uq_thread))
2309 pri = UPRI(uq2->uq_thread);
2310 }
2311 }
2312 if (pri > uq->uq_inherited_pri)
2313 pri = uq->uq_inherited_pri;
2314 thread_lock(td);
2315 sched_lend_user_prio(td, pri);
2316 thread_unlock(td);
2317 mtx_unlock(&umtx_lock);
2318 }
2319 umtx_key_release(&key);
2320 return (error);
2321 }
2322
2323 static int
2324 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2325 uint32_t *old_ceiling)
2326 {
2327 struct umtx_q *uq;
2328 uint32_t flags, id, owner, save_ceiling;
2329 int error, rv, rv1;
2330
2331 error = fueword32(&m->m_flags, &flags);
2332 if (error == -1)
2333 return (EFAULT);
2334 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2335 return (EINVAL);
2336 if (ceiling > RTP_PRIO_MAX)
2337 return (EINVAL);
2338 id = td->td_tid;
2339 uq = td->td_umtxq;
2340 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2341 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2342 &uq->uq_key)) != 0)
2343 return (error);
2344 for (;;) {
2345 umtxq_lock(&uq->uq_key);
2346 umtxq_busy(&uq->uq_key);
2347 umtxq_unlock(&uq->uq_key);
2348
2349 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2350 if (rv == -1) {
2351 error = EFAULT;
2352 break;
2353 }
2354
2355 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2356 id | UMUTEX_CONTESTED);
2357 if (rv == -1) {
2358 error = EFAULT;
2359 break;
2360 }
2361
2362 if (owner == UMUTEX_CONTESTED) {
2363 rv = suword32(&m->m_ceilings[0], ceiling);
2364 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
2365 error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
2366 break;
2367 }
2368
2369 if ((owner & ~UMUTEX_CONTESTED) == id) {
2370 rv = suword32(&m->m_ceilings[0], ceiling);
2371 error = rv == 0 ? 0 : EFAULT;
2372 break;
2373 }
2374
2375 if (owner == UMUTEX_RB_OWNERDEAD) {
2376 error = EOWNERDEAD;
2377 break;
2378 } else if (owner == UMUTEX_RB_NOTRECOV) {
2379 error = ENOTRECOVERABLE;
2380 break;
2381 }
2382
2383 /*
2384 * If we caught a signal, we have retried and now
2385 * exit immediately.
2386 */
2387 if (error != 0)
2388 break;
2389
2390 /*
2391 * We set the contested bit, sleep. Otherwise the lock changed
2392 * and we need to retry or we lost a race to the thread
2393 * unlocking the umtx.
2394 */
2395 umtxq_lock(&uq->uq_key);
2396 umtxq_insert(uq);
2397 umtxq_unbusy(&uq->uq_key);
2398 error = umtxq_sleep(uq, "umtxpp", NULL);
2399 umtxq_remove(uq);
2400 umtxq_unlock(&uq->uq_key);
2401 }
2402 umtxq_lock(&uq->uq_key);
2403 if (error == 0)
2404 umtxq_signal(&uq->uq_key, INT_MAX);
2405 umtxq_unbusy(&uq->uq_key);
2406 umtxq_unlock(&uq->uq_key);
2407 umtx_key_release(&uq->uq_key);
2408 if (error == 0 && old_ceiling != NULL) {
2409 rv = suword32(old_ceiling, save_ceiling);
2410 error = rv == 0 ? 0 : EFAULT;
2411 }
2412 return (error);
2413 }
2414
2415 /*
2416 * Lock a userland POSIX mutex.
2417 */
2418 static int
2419 do_lock_umutex(struct thread *td, struct umutex *m,
2420 struct _umtx_time *timeout, int mode)
2421 {
2422 uint32_t flags;
2423 int error;
2424
2425 error = fueword32(&m->m_flags, &flags);
2426 if (error == -1)
2427 return (EFAULT);
2428
2429 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2430 case 0:
2431 error = do_lock_normal(td, m, flags, timeout, mode);
2432 break;
2433 case UMUTEX_PRIO_INHERIT:
2434 error = do_lock_pi(td, m, flags, timeout, mode);
2435 break;
2436 case UMUTEX_PRIO_PROTECT:
2437 error = do_lock_pp(td, m, flags, timeout, mode);
2438 break;
2439 default:
2440 return (EINVAL);
2441 }
2442 if (timeout == NULL) {
2443 if (error == EINTR && mode != _UMUTEX_WAIT)
2444 error = ERESTART;
2445 } else {
2446 /* Timed-locking is not restarted. */
2447 if (error == ERESTART)
2448 error = EINTR;
2449 }
2450 return (error);
2451 }
2452
2453 /*
2454 * Unlock a userland POSIX mutex.
2455 */
2456 static int
2457 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
2458 {
2459 uint32_t flags;
2460 int error;
2461
2462 error = fueword32(&m->m_flags, &flags);
2463 if (error == -1)
2464 return (EFAULT);
2465
2466 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2467 case 0:
2468 return (do_unlock_normal(td, m, flags, rb));
2469 case UMUTEX_PRIO_INHERIT:
2470 return (do_unlock_pi(td, m, flags, rb));
2471 case UMUTEX_PRIO_PROTECT:
2472 return (do_unlock_pp(td, m, flags, rb));
2473 }
2474
2475 return (EINVAL);
2476 }
2477
2478 static int
2479 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2480 struct timespec *timeout, u_long wflags)
2481 {
2482 struct abs_timeout timo;
2483 struct umtx_q *uq;
2484 uint32_t flags, clockid, hasw;
2485 int error;
2486
2487 uq = td->td_umtxq;
2488 error = fueword32(&cv->c_flags, &flags);
2489 if (error == -1)
2490 return (EFAULT);
2491 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2492 if (error != 0)
2493 return (error);
2494
2495 if ((wflags & CVWAIT_CLOCKID) != 0) {
2496 error = fueword32(&cv->c_clockid, &clockid);
2497 if (error == -1) {
2498 umtx_key_release(&uq->uq_key);
2499 return (EFAULT);
2500 }
2501 if (clockid < CLOCK_REALTIME ||
2502 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2503 /* hmm, only HW clock id will work. */
2504 umtx_key_release(&uq->uq_key);
2505 return (EINVAL);
2506 }
2507 } else {
2508 clockid = CLOCK_REALTIME;
2509 }
2510
2511 umtxq_lock(&uq->uq_key);
2512 umtxq_busy(&uq->uq_key);
2513 umtxq_insert(uq);
2514 umtxq_unlock(&uq->uq_key);
2515
2516 /*
2517 * Set c_has_waiters to 1 before releasing user mutex, also
2518 * don't modify cache line when unnecessary.
2519 */
2520 error = fueword32(&cv->c_has_waiters, &hasw);
2521 if (error == 0 && hasw == 0)
2522 suword32(&cv->c_has_waiters, 1);
2523
2524 umtxq_unbusy_unlocked(&uq->uq_key);
2525
2526 error = do_unlock_umutex(td, m, false);
2527
2528 if (timeout != NULL)
2529 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0,
2530 timeout);
2531
2532 umtxq_lock(&uq->uq_key);
2533 if (error == 0) {
2534 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2535 NULL : &timo);
2536 }
2537
2538 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2539 error = 0;
2540 else {
2541 /*
2542 * This must be timeout,interrupted by signal or
2543 * surprious wakeup, clear c_has_waiter flag when
2544 * necessary.
2545 */
2546 umtxq_busy(&uq->uq_key);
2547 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2548 int oldlen = uq->uq_cur_queue->length;
2549 umtxq_remove(uq);
2550 if (oldlen == 1) {
2551 umtxq_unlock(&uq->uq_key);
2552 suword32(&cv->c_has_waiters, 0);
2553 umtxq_lock(&uq->uq_key);
2554 }
2555 }
2556 umtxq_unbusy(&uq->uq_key);
2557 if (error == ERESTART)
2558 error = EINTR;
2559 }
2560
2561 umtxq_unlock(&uq->uq_key);
2562 umtx_key_release(&uq->uq_key);
2563 return (error);
2564 }
2565
2566 /*
2567 * Signal a userland condition variable.
2568 */
2569 static int
2570 do_cv_signal(struct thread *td, struct ucond *cv)
2571 {
2572 struct umtx_key key;
2573 int error, cnt, nwake;
2574 uint32_t flags;
2575
2576 error = fueword32(&cv->c_flags, &flags);
2577 if (error == -1)
2578 return (EFAULT);
2579 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2580 return (error);
2581 umtxq_lock(&key);
2582 umtxq_busy(&key);
2583 cnt = umtxq_count(&key);
2584 nwake = umtxq_signal(&key, 1);
2585 if (cnt <= nwake) {
2586 umtxq_unlock(&key);
2587 error = suword32(&cv->c_has_waiters, 0);
2588 if (error == -1)
2589 error = EFAULT;
2590 umtxq_lock(&key);
2591 }
2592 umtxq_unbusy(&key);
2593 umtxq_unlock(&key);
2594 umtx_key_release(&key);
2595 return (error);
2596 }
2597
2598 static int
2599 do_cv_broadcast(struct thread *td, struct ucond *cv)
2600 {
2601 struct umtx_key key;
2602 int error;
2603 uint32_t flags;
2604
2605 error = fueword32(&cv->c_flags, &flags);
2606 if (error == -1)
2607 return (EFAULT);
2608 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2609 return (error);
2610
2611 umtxq_lock(&key);
2612 umtxq_busy(&key);
2613 umtxq_signal(&key, INT_MAX);
2614 umtxq_unlock(&key);
2615
2616 error = suword32(&cv->c_has_waiters, 0);
2617 if (error == -1)
2618 error = EFAULT;
2619
2620 umtxq_unbusy_unlocked(&key);
2621
2622 umtx_key_release(&key);
2623 return (error);
2624 }
2625
2626 static int
2627 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2628 {
2629 struct abs_timeout timo;
2630 struct umtx_q *uq;
2631 uint32_t flags, wrflags;
2632 int32_t state, oldstate;
2633 int32_t blocked_readers;
2634 int error, error1, rv;
2635
2636 uq = td->td_umtxq;
2637 error = fueword32(&rwlock->rw_flags, &flags);
2638 if (error == -1)
2639 return (EFAULT);
2640 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2641 if (error != 0)
2642 return (error);
2643
2644 if (timeout != NULL)
2645 abs_timeout_init2(&timo, timeout);
2646
2647 wrflags = URWLOCK_WRITE_OWNER;
2648 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2649 wrflags |= URWLOCK_WRITE_WAITERS;
2650
2651 for (;;) {
2652 rv = fueword32(&rwlock->rw_state, &state);
2653 if (rv == -1) {
2654 umtx_key_release(&uq->uq_key);
2655 return (EFAULT);
2656 }
2657
2658 /* try to lock it */
2659 while (!(state & wrflags)) {
2660 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2661 umtx_key_release(&uq->uq_key);
2662 return (EAGAIN);
2663 }
2664 rv = casueword32(&rwlock->rw_state, state,
2665 &oldstate, state + 1);
2666 if (rv == -1) {
2667 umtx_key_release(&uq->uq_key);
2668 return (EFAULT);
2669 }
2670 if (oldstate == state) {
2671 umtx_key_release(&uq->uq_key);
2672 return (0);
2673 }
2674 error = umtxq_check_susp(td);
2675 if (error != 0)
2676 break;
2677 state = oldstate;
2678 }
2679
2680 if (error)
2681 break;
2682
2683 /* grab monitor lock */
2684 umtxq_lock(&uq->uq_key);
2685 umtxq_busy(&uq->uq_key);
2686 umtxq_unlock(&uq->uq_key);
2687
2688 /*
2689 * re-read the state, in case it changed between the try-lock above
2690 * and the check below
2691 */
2692 rv = fueword32(&rwlock->rw_state, &state);
2693 if (rv == -1)
2694 error = EFAULT;
2695
2696 /* set read contention bit */
2697 while (error == 0 && (state & wrflags) &&
2698 !(state & URWLOCK_READ_WAITERS)) {
2699 rv = casueword32(&rwlock->rw_state, state,
2700 &oldstate, state | URWLOCK_READ_WAITERS);
2701 if (rv == -1) {
2702 error = EFAULT;
2703 break;
2704 }
2705 if (oldstate == state)
2706 goto sleep;
2707 state = oldstate;
2708 error = umtxq_check_susp(td);
2709 if (error != 0)
2710 break;
2711 }
2712 if (error != 0) {
2713 umtxq_unbusy_unlocked(&uq->uq_key);
2714 break;
2715 }
2716
2717 /* state is changed while setting flags, restart */
2718 if (!(state & wrflags)) {
2719 umtxq_unbusy_unlocked(&uq->uq_key);
2720 error = umtxq_check_susp(td);
2721 if (error != 0)
2722 break;
2723 continue;
2724 }
2725
2726 sleep:
2727 /* contention bit is set, before sleeping, increase read waiter count */
2728 rv = fueword32(&rwlock->rw_blocked_readers,
2729 &blocked_readers);
2730 if (rv == -1) {
2731 umtxq_unbusy_unlocked(&uq->uq_key);
2732 error = EFAULT;
2733 break;
2734 }
2735 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2736
2737 while (state & wrflags) {
2738 umtxq_lock(&uq->uq_key);
2739 umtxq_insert(uq);
2740 umtxq_unbusy(&uq->uq_key);
2741
2742 error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2743 NULL : &timo);
2744
2745 umtxq_busy(&uq->uq_key);
2746 umtxq_remove(uq);
2747 umtxq_unlock(&uq->uq_key);
2748 if (error)
2749 break;
2750 rv = fueword32(&rwlock->rw_state, &state);
2751 if (rv == -1) {
2752 error = EFAULT;
2753 break;
2754 }
2755 }
2756
2757 /* decrease read waiter count, and may clear read contention bit */
2758 rv = fueword32(&rwlock->rw_blocked_readers,
2759 &blocked_readers);
2760 if (rv == -1) {
2761 umtxq_unbusy_unlocked(&uq->uq_key);
2762 error = EFAULT;
2763 break;
2764 }
2765 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2766 if (blocked_readers == 1) {
2767 rv = fueword32(&rwlock->rw_state, &state);
2768 if (rv == -1) {
2769 umtxq_unbusy_unlocked(&uq->uq_key);
2770 error = EFAULT;
2771 break;
2772 }
2773 for (;;) {
2774 rv = casueword32(&rwlock->rw_state, state,
2775 &oldstate, state & ~URWLOCK_READ_WAITERS);
2776 if (rv == -1) {
2777 error = EFAULT;
2778 break;
2779 }
2780 if (oldstate == state)
2781 break;
2782 state = oldstate;
2783 error1 = umtxq_check_susp(td);
2784 if (error1 != 0) {
2785 if (error == 0)
2786 error = error1;
2787 break;
2788 }
2789 }
2790 }
2791
2792 umtxq_unbusy_unlocked(&uq->uq_key);
2793 if (error != 0)
2794 break;
2795 }
2796 umtx_key_release(&uq->uq_key);
2797 if (error == ERESTART)
2798 error = EINTR;
2799 return (error);
2800 }
2801
2802 static int
2803 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2804 {
2805 struct abs_timeout timo;
2806 struct umtx_q *uq;
2807 uint32_t flags;
2808 int32_t state, oldstate;
2809 int32_t blocked_writers;
2810 int32_t blocked_readers;
2811 int error, error1, rv;
2812
2813 uq = td->td_umtxq;
2814 error = fueword32(&rwlock->rw_flags, &flags);
2815 if (error == -1)
2816 return (EFAULT);
2817 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2818 if (error != 0)
2819 return (error);
2820
2821 if (timeout != NULL)
2822 abs_timeout_init2(&timo, timeout);
2823
2824 blocked_readers = 0;
2825 for (;;) {
2826 rv = fueword32(&rwlock->rw_state, &state);
2827 if (rv == -1) {
2828 umtx_key_release(&uq->uq_key);
2829 return (EFAULT);
2830 }
2831 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2832 rv = casueword32(&rwlock->rw_state, state,
2833 &oldstate, state | URWLOCK_WRITE_OWNER);
2834 if (rv == -1) {
2835 umtx_key_release(&uq->uq_key);
2836 return (EFAULT);
2837 }
2838 if (oldstate == state) {
2839 umtx_key_release(&uq->uq_key);
2840 return (0);
2841 }
2842 state = oldstate;
2843 error = umtxq_check_susp(td);
2844 if (error != 0)
2845 break;
2846 }
2847
2848 if (error) {
2849 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2850 blocked_readers != 0) {
2851 umtxq_lock(&uq->uq_key);
2852 umtxq_busy(&uq->uq_key);
2853 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2854 umtxq_unbusy(&uq->uq_key);
2855 umtxq_unlock(&uq->uq_key);
2856 }
2857
2858 break;
2859 }
2860
2861 /* grab monitor lock */
2862 umtxq_lock(&uq->uq_key);
2863 umtxq_busy(&uq->uq_key);
2864 umtxq_unlock(&uq->uq_key);
2865
2866 /*
2867 * re-read the state, in case it changed between the try-lock above
2868 * and the check below
2869 */
2870 rv = fueword32(&rwlock->rw_state, &state);
2871 if (rv == -1)
2872 error = EFAULT;
2873
2874 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
2875 URWLOCK_READER_COUNT(state) != 0) &&
2876 (state & URWLOCK_WRITE_WAITERS) == 0) {
2877 rv = casueword32(&rwlock->rw_state, state,
2878 &oldstate, state | URWLOCK_WRITE_WAITERS);
2879 if (rv == -1) {
2880 error = EFAULT;
2881 break;
2882 }
2883 if (oldstate == state)
2884 goto sleep;
2885 state = oldstate;
2886 error = umtxq_check_susp(td);
2887 if (error != 0)
2888 break;
2889 }
2890 if (error != 0) {
2891 umtxq_unbusy_unlocked(&uq->uq_key);
2892 break;
2893 }
2894
2895 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2896 umtxq_unbusy_unlocked(&uq->uq_key);
2897 error = umtxq_check_susp(td);
2898 if (error != 0)
2899 break;
2900 continue;
2901 }
2902 sleep:
2903 rv = fueword32(&rwlock->rw_blocked_writers,
2904 &blocked_writers);
2905 if (rv == -1) {
2906 umtxq_unbusy_unlocked(&uq->uq_key);
2907 error = EFAULT;
2908 break;
2909 }
2910 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2911
2912 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2913 umtxq_lock(&uq->uq_key);
2914 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2915 umtxq_unbusy(&uq->uq_key);
2916
2917 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2918 NULL : &timo);
2919
2920 umtxq_busy(&uq->uq_key);
2921 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2922 umtxq_unlock(&uq->uq_key);
2923 if (error)
2924 break;
2925 rv = fueword32(&rwlock->rw_state, &state);
2926 if (rv == -1) {
2927 error = EFAULT;
2928 break;
2929 }
2930 }
2931
2932 rv = fueword32(&rwlock->rw_blocked_writers,
2933 &blocked_writers);
2934 if (rv == -1) {
2935 umtxq_unbusy_unlocked(&uq->uq_key);
2936 error = EFAULT;
2937 break;
2938 }
2939 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2940 if (blocked_writers == 1) {
2941 rv = fueword32(&rwlock->rw_state, &state);
2942 if (rv == -1) {
2943 umtxq_unbusy_unlocked(&uq->uq_key);
2944 error = EFAULT;
2945 break;
2946 }
2947 for (;;) {
2948 rv = casueword32(&rwlock->rw_state, state,
2949 &oldstate, state & ~URWLOCK_WRITE_WAITERS);
2950 if (rv == -1) {
2951 error = EFAULT;
2952 break;
2953 }
2954 if (oldstate == state)
2955 break;
2956 state = oldstate;
2957 error1 = umtxq_check_susp(td);
2958 /*
2959 * We are leaving the URWLOCK_WRITE_WAITERS
2960 * behind, but this should not harm the
2961 * correctness.
2962 */
2963 if (error1 != 0) {
2964 if (error == 0)
2965 error = error1;
2966 break;
2967 }
2968 }
2969 rv = fueword32(&rwlock->rw_blocked_readers,
2970 &blocked_readers);
2971 if (rv == -1) {
2972 umtxq_unbusy_unlocked(&uq->uq_key);
2973 error = EFAULT;
2974 break;
2975 }
2976 } else
2977 blocked_readers = 0;
2978
2979 umtxq_unbusy_unlocked(&uq->uq_key);
2980 }
2981
2982 umtx_key_release(&uq->uq_key);
2983 if (error == ERESTART)
2984 error = EINTR;
2985 return (error);
2986 }
2987
2988 static int
2989 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2990 {
2991 struct umtx_q *uq;
2992 uint32_t flags;
2993 int32_t state, oldstate;
2994 int error, rv, q, count;
2995
2996 uq = td->td_umtxq;
2997 error = fueword32(&rwlock->rw_flags, &flags);
2998 if (error == -1)
2999 return (EFAULT);
3000 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3001 if (error != 0)
3002 return (error);
3003
3004 error = fueword32(&rwlock->rw_state, &state);
3005 if (error == -1) {
3006 error = EFAULT;
3007 goto out;
3008 }
3009 if (state & URWLOCK_WRITE_OWNER) {
3010 for (;;) {
3011 rv = casueword32(&rwlock->rw_state, state,
3012 &oldstate, state & ~URWLOCK_WRITE_OWNER);
3013 if (rv == -1) {
3014 error = EFAULT;
3015 goto out;
3016 }
3017 if (oldstate != state) {
3018 state = oldstate;
3019 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3020 error = EPERM;
3021 goto out;
3022 }
3023 error = umtxq_check_susp(td);
3024 if (error != 0)
3025 goto out;
3026 } else
3027 break;
3028 }
3029 } else if (URWLOCK_READER_COUNT(state) != 0) {
3030 for (;;) {
3031 rv = casueword32(&rwlock->rw_state, state,
3032 &oldstate, state - 1);
3033 if (rv == -1) {
3034 error = EFAULT;
3035 goto out;
3036 }
3037 if (oldstate != state) {
3038 state = oldstate;
3039 if (URWLOCK_READER_COUNT(oldstate) == 0) {
3040 error = EPERM;
3041 goto out;
3042 }
3043 error = umtxq_check_susp(td);
3044 if (error != 0)
3045 goto out;
3046 } else
3047 break;
3048 }
3049 } else {
3050 error = EPERM;
3051 goto out;
3052 }
3053
3054 count = 0;
3055
3056 if (!(flags & URWLOCK_PREFER_READER)) {
3057 if (state & URWLOCK_WRITE_WAITERS) {
3058 count = 1;
3059 q = UMTX_EXCLUSIVE_QUEUE;
3060 } else if (state & URWLOCK_READ_WAITERS) {
3061 count = INT_MAX;
3062 q = UMTX_SHARED_QUEUE;
3063 }
3064 } else {
3065 if (state & URWLOCK_READ_WAITERS) {
3066 count = INT_MAX;
3067 q = UMTX_SHARED_QUEUE;
3068 } else if (state & URWLOCK_WRITE_WAITERS) {
3069 count = 1;
3070 q = UMTX_EXCLUSIVE_QUEUE;
3071 }
3072 }
3073
3074 if (count) {
3075 umtxq_lock(&uq->uq_key);
3076 umtxq_busy(&uq->uq_key);
3077 umtxq_signal_queue(&uq->uq_key, count, q);
3078 umtxq_unbusy(&uq->uq_key);
3079 umtxq_unlock(&uq->uq_key);
3080 }
3081 out:
3082 umtx_key_release(&uq->uq_key);
3083 return (error);
3084 }
3085
3086 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3087 static int
3088 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3089 {
3090 struct abs_timeout timo;
3091 struct umtx_q *uq;
3092 uint32_t flags, count, count1;
3093 int error, rv;
3094
3095 uq = td->td_umtxq;
3096 error = fueword32(&sem->_flags, &flags);
3097 if (error == -1)
3098 return (EFAULT);
3099 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3100 if (error != 0)
3101 return (error);
3102
3103 if (timeout != NULL)
3104 abs_timeout_init2(&timo, timeout);
3105
3106 umtxq_lock(&uq->uq_key);
3107 umtxq_busy(&uq->uq_key);
3108 umtxq_insert(uq);
3109 umtxq_unlock(&uq->uq_key);
3110 rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3111 if (rv == 0)
3112 rv = fueword32(&sem->_count, &count);
3113 if (rv == -1 || count != 0) {
3114 umtxq_lock(&uq->uq_key);
3115 umtxq_unbusy(&uq->uq_key);
3116 umtxq_remove(uq);
3117 umtxq_unlock(&uq->uq_key);
3118 umtx_key_release(&uq->uq_key);
3119 return (rv == -1 ? EFAULT : 0);
3120 }
3121 umtxq_lock(&uq->uq_key);
3122 umtxq_unbusy(&uq->uq_key);
3123
3124 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3125
3126 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3127 error = 0;
3128 else {
3129 umtxq_remove(uq);
3130 /* A relative timeout cannot be restarted. */
3131 if (error == ERESTART && timeout != NULL &&
3132 (timeout->_flags & UMTX_ABSTIME) == 0)
3133 error = EINTR;
3134 }
3135 umtxq_unlock(&uq->uq_key);
3136 umtx_key_release(&uq->uq_key);
3137 return (error);
3138 }
3139
3140 /*
3141 * Signal a userland semaphore.
3142 */
3143 static int
3144 do_sem_wake(struct thread *td, struct _usem *sem)
3145 {
3146 struct umtx_key key;
3147 int error, cnt;
3148 uint32_t flags;
3149
3150 error = fueword32(&sem->_flags, &flags);
3151 if (error == -1)
3152 return (EFAULT);
3153 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3154 return (error);
3155 umtxq_lock(&key);
3156 umtxq_busy(&key);
3157 cnt = umtxq_count(&key);
3158 if (cnt > 0) {
3159 /*
3160 * Check if count is greater than 0, this means the memory is
3161 * still being referenced by user code, so we can safely
3162 * update _has_waiters flag.
3163 */
3164 if (cnt == 1) {
3165 umtxq_unlock(&key);
3166 error = suword32(&sem->_has_waiters, 0);
3167 umtxq_lock(&key);
3168 if (error == -1)
3169 error = EFAULT;
3170 }
3171 umtxq_signal(&key, 1);
3172 }
3173 umtxq_unbusy(&key);
3174 umtxq_unlock(&key);
3175 umtx_key_release(&key);
3176 return (error);
3177 }
3178 #endif
3179
3180 static int
3181 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
3182 {
3183 struct abs_timeout timo;
3184 struct umtx_q *uq;
3185 uint32_t count, flags;
3186 int error, rv;
3187
3188 uq = td->td_umtxq;
3189 flags = fuword32(&sem->_flags);
3190 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3191 if (error != 0)
3192 return (error);
3193
3194 if (timeout != NULL)
3195 abs_timeout_init2(&timo, timeout);
3196
3197 umtxq_lock(&uq->uq_key);
3198 umtxq_busy(&uq->uq_key);
3199 umtxq_insert(uq);
3200 umtxq_unlock(&uq->uq_key);
3201 rv = fueword32(&sem->_count, &count);
3202 if (rv == -1) {
3203 umtxq_lock(&uq->uq_key);
3204 umtxq_unbusy(&uq->uq_key);
3205 umtxq_remove(uq);
3206 umtxq_unlock(&uq->uq_key);
3207 umtx_key_release(&uq->uq_key);
3208 return (EFAULT);
3209 }
3210 for (;;) {
3211 if (USEM_COUNT(count) != 0) {
3212 umtxq_lock(&uq->uq_key);
3213 umtxq_unbusy(&uq->uq_key);
3214 umtxq_remove(uq);
3215 umtxq_unlock(&uq->uq_key);
3216 umtx_key_release(&uq->uq_key);
3217 return (0);
3218 }
3219 if (count == USEM_HAS_WAITERS)
3220 break;
3221 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
3222 if (rv == -1) {
3223 umtxq_lock(&uq->uq_key);
3224 umtxq_unbusy(&uq->uq_key);
3225 umtxq_remove(uq);
3226 umtxq_unlock(&uq->uq_key);
3227 umtx_key_release(&uq->uq_key);
3228 return (EFAULT);
3229 }
3230 if (count == 0)
3231 break;
3232 }
3233 umtxq_lock(&uq->uq_key);
3234 umtxq_unbusy(&uq->uq_key);
3235
3236 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3237
3238 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3239 error = 0;
3240 else {
3241 umtxq_remove(uq);
3242 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
3243 /* A relative timeout cannot be restarted. */
3244 if (error == ERESTART)
3245 error = EINTR;
3246 if (error == EINTR) {
3247 abs_timeout_update(&timo);
3248 timespecsub(&timo.end, &timo.cur,
3249 &timeout->_timeout);
3250 }
3251 }
3252 }
3253 umtxq_unlock(&uq->uq_key);
3254 umtx_key_release(&uq->uq_key);
3255 return (error);
3256 }
3257
3258 /*
3259 * Signal a userland semaphore.
3260 */
3261 static int
3262 do_sem2_wake(struct thread *td, struct _usem2 *sem)
3263 {
3264 struct umtx_key key;
3265 int error, cnt, rv;
3266 uint32_t count, flags;
3267
3268 rv = fueword32(&sem->_flags, &flags);
3269 if (rv == -1)
3270 return (EFAULT);
3271 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3272 return (error);
3273 umtxq_lock(&key);
3274 umtxq_busy(&key);
3275 cnt = umtxq_count(&key);
3276 if (cnt > 0) {
3277 /*
3278 * If this was the last sleeping thread, clear the waiters
3279 * flag in _count.
3280 */
3281 if (cnt == 1) {
3282 umtxq_unlock(&key);
3283 rv = fueword32(&sem->_count, &count);
3284 while (rv != -1 && count & USEM_HAS_WAITERS)
3285 rv = casueword32(&sem->_count, count, &count,
3286 count & ~USEM_HAS_WAITERS);
3287 if (rv == -1)
3288 error = EFAULT;
3289 umtxq_lock(&key);
3290 }
3291
3292 umtxq_signal(&key, 1);
3293 }
3294 umtxq_unbusy(&key);
3295 umtxq_unlock(&key);
3296 umtx_key_release(&key);
3297 return (error);
3298 }
3299
3300 inline int
3301 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3302 {
3303 int error;
3304
3305 error = copyin(addr, tsp, sizeof(struct timespec));
3306 if (error == 0) {
3307 if (tsp->tv_sec < 0 ||
3308 tsp->tv_nsec >= 1000000000 ||
3309 tsp->tv_nsec < 0)
3310 error = EINVAL;
3311 }
3312 return (error);
3313 }
3314
3315 static inline int
3316 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
3317 {
3318 int error;
3319
3320 if (size <= sizeof(struct timespec)) {
3321 tp->_clockid = CLOCK_REALTIME;
3322 tp->_flags = 0;
3323 error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
3324 } else
3325 error = copyin(addr, tp, sizeof(struct _umtx_time));
3326 if (error != 0)
3327 return (error);
3328 if (tp->_timeout.tv_sec < 0 ||
3329 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3330 return (EINVAL);
3331 return (0);
3332 }
3333
3334 static int
3335 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap)
3336 {
3337
3338 return (EOPNOTSUPP);
3339 }
3340
3341 static int
3342 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3343 {
3344 struct _umtx_time timeout, *tm_p;
3345 int error;
3346
3347 if (uap->uaddr2 == NULL)
3348 tm_p = NULL;
3349 else {
3350 error = umtx_copyin_umtx_time(
3351 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3352 if (error != 0)
3353 return (error);
3354 tm_p = &timeout;
3355 }
3356 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0));
3357 }
3358
3359 static int
3360 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3361 {
3362 struct _umtx_time timeout, *tm_p;
3363 int error;
3364
3365 if (uap->uaddr2 == NULL)
3366 tm_p = NULL;
3367 else {
3368 error = umtx_copyin_umtx_time(
3369 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3370 if (error != 0)
3371 return (error);
3372 tm_p = &timeout;
3373 }
3374 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
3375 }
3376
3377 static int
3378 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3379 {
3380 struct _umtx_time *tm_p, timeout;
3381 int error;
3382
3383 if (uap->uaddr2 == NULL)
3384 tm_p = NULL;
3385 else {
3386 error = umtx_copyin_umtx_time(
3387 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3388 if (error != 0)
3389 return (error);
3390 tm_p = &timeout;
3391 }
3392 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
3393 }
3394
3395 static int
3396 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3397 {
3398
3399 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3400 }
3401
3402 #define BATCH_SIZE 128
3403 static int
3404 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3405 {
3406 char *uaddrs[BATCH_SIZE], **upp;
3407 int count, error, i, pos, tocopy;
3408
3409 upp = (char **)uap->obj;
3410 error = 0;
3411 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3412 pos += tocopy) {
3413 tocopy = MIN(count, BATCH_SIZE);
3414 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
3415 if (error != 0)
3416 break;
3417 for (i = 0; i < tocopy; ++i)
3418 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3419 maybe_yield();
3420 }
3421 return (error);
3422 }
3423
3424 static int
3425 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3426 {
3427
3428 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3429 }
3430
3431 static int
3432 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3433 {
3434 struct _umtx_time *tm_p, timeout;
3435 int error;
3436
3437 /* Allow a null timespec (wait forever). */
3438 if (uap->uaddr2 == NULL)
3439 tm_p = NULL;
3440 else {
3441 error = umtx_copyin_umtx_time(
3442 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3443 if (error != 0)
3444 return (error);
3445 tm_p = &timeout;
3446 }
3447 return (do_lock_umutex(td, uap->obj, tm_p, 0));
3448 }
3449
3450 static int
3451 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3452 {
3453
3454 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
3455 }
3456
3457 static int
3458 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3459 {
3460 struct _umtx_time *tm_p, timeout;
3461 int error;
3462
3463 /* Allow a null timespec (wait forever). */
3464 if (uap->uaddr2 == NULL)
3465 tm_p = NULL;
3466 else {
3467 error = umtx_copyin_umtx_time(
3468 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3469 if (error != 0)
3470 return (error);
3471 tm_p = &timeout;
3472 }
3473 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
3474 }
3475
3476 static int
3477 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3478 {
3479
3480 return (do_wake_umutex(td, uap->obj));
3481 }
3482
3483 static int
3484 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3485 {
3486
3487 return (do_unlock_umutex(td, uap->obj, false));
3488 }
3489
3490 static int
3491 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3492 {
3493
3494 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
3495 }
3496
3497 static int
3498 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3499 {
3500 struct timespec *ts, timeout;
3501 int error;
3502
3503 /* Allow a null timespec (wait forever). */
3504 if (uap->uaddr2 == NULL)
3505 ts = NULL;
3506 else {
3507 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3508 if (error != 0)
3509 return (error);
3510 ts = &timeout;
3511 }
3512 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3513 }
3514
3515 static int
3516 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3517 {
3518
3519 return (do_cv_signal(td, uap->obj));
3520 }
3521
3522 static int
3523 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3524 {
3525
3526 return (do_cv_broadcast(td, uap->obj));
3527 }
3528
3529 static int
3530 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3531 {
3532 struct _umtx_time timeout;
3533 int error;
3534
3535 /* Allow a null timespec (wait forever). */
3536 if (uap->uaddr2 == NULL) {
3537 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3538 } else {
3539 error = umtx_copyin_umtx_time(uap->uaddr2,
3540 (size_t)uap->uaddr1, &timeout);
3541 if (error != 0)
3542 return (error);
3543 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3544 }
3545 return (error);
3546 }
3547
3548 static int
3549 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3550 {
3551 struct _umtx_time timeout;
3552 int error;
3553
3554 /* Allow a null timespec (wait forever). */
3555 if (uap->uaddr2 == NULL) {
3556 error = do_rw_wrlock(td, uap->obj, 0);
3557 } else {
3558 error = umtx_copyin_umtx_time(uap->uaddr2,
3559 (size_t)uap->uaddr1, &timeout);
3560 if (error != 0)
3561 return (error);
3562
3563 error = do_rw_wrlock(td, uap->obj, &timeout);
3564 }
3565 return (error);
3566 }
3567
3568 static int
3569 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3570 {
3571
3572 return (do_rw_unlock(td, uap->obj));
3573 }
3574
3575 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3576 static int
3577 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3578 {
3579 struct _umtx_time *tm_p, timeout;
3580 int error;
3581
3582 /* Allow a null timespec (wait forever). */
3583 if (uap->uaddr2 == NULL)
3584 tm_p = NULL;
3585 else {
3586 error = umtx_copyin_umtx_time(
3587 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3588 if (error != 0)
3589 return (error);
3590 tm_p = &timeout;
3591 }
3592 return (do_sem_wait(td, uap->obj, tm_p));
3593 }
3594
3595 static int
3596 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3597 {
3598
3599 return (do_sem_wake(td, uap->obj));
3600 }
3601 #endif
3602
3603 static int
3604 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3605 {
3606
3607 return (do_wake2_umutex(td, uap->obj, uap->val));
3608 }
3609
3610 static int
3611 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap)
3612 {
3613 struct _umtx_time *tm_p, timeout;
3614 size_t uasize;
3615 int error;
3616
3617 /* Allow a null timespec (wait forever). */
3618 if (uap->uaddr2 == NULL) {
3619 uasize = 0;
3620 tm_p = NULL;
3621 } else {
3622 uasize = (size_t)uap->uaddr1;
3623 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout);
3624 if (error != 0)
3625 return (error);
3626 tm_p = &timeout;
3627 }
3628 error = do_sem2_wait(td, uap->obj, tm_p);
3629 if (error == EINTR && uap->uaddr2 != NULL &&
3630 (timeout._flags & UMTX_ABSTIME) == 0 &&
3631 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) {
3632 error = copyout(&timeout._timeout,
3633 (struct _umtx_time *)uap->uaddr2 + 1,
3634 sizeof(struct timespec));
3635 if (error == 0) {
3636 error = EINTR;
3637 }
3638 }
3639
3640 return (error);
3641 }
3642
3643 static int
3644 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap)
3645 {
3646
3647 return (do_sem2_wake(td, uap->obj));
3648 }
3649
3650 #define USHM_OBJ_UMTX(o) \
3651 ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
3652
3653 #define USHMF_REG_LINKED 0x0001
3654 #define USHMF_OBJ_LINKED 0x0002
3655 struct umtx_shm_reg {
3656 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
3657 LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
3658 struct umtx_key ushm_key;
3659 struct ucred *ushm_cred;
3660 struct shmfd *ushm_obj;
3661 u_int ushm_refcnt;
3662 u_int ushm_flags;
3663 };
3664
3665 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
3666 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
3667
3668 static uma_zone_t umtx_shm_reg_zone;
3669 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
3670 static struct mtx umtx_shm_lock;
3671 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
3672 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
3673
3674 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
3675
3676 static void
3677 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
3678 {
3679 struct umtx_shm_reg_head d;
3680 struct umtx_shm_reg *reg, *reg1;
3681
3682 TAILQ_INIT(&d);
3683 mtx_lock(&umtx_shm_lock);
3684 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
3685 mtx_unlock(&umtx_shm_lock);
3686 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
3687 TAILQ_REMOVE(&d, reg, ushm_reg_link);
3688 umtx_shm_free_reg(reg);
3689 }
3690 }
3691
3692 static struct task umtx_shm_reg_delfree_task =
3693 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
3694
3695 static struct umtx_shm_reg *
3696 umtx_shm_find_reg_locked(const struct umtx_key *key)
3697 {
3698 struct umtx_shm_reg *reg;
3699 struct umtx_shm_reg_head *reg_head;
3700
3701 KASSERT(key->shared, ("umtx_p_find_rg: private key"));
3702 mtx_assert(&umtx_shm_lock, MA_OWNED);
3703 reg_head = &umtx_shm_registry[key->hash];
3704 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
3705 KASSERT(reg->ushm_key.shared,
3706 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
3707 if (reg->ushm_key.info.shared.object ==
3708 key->info.shared.object &&
3709 reg->ushm_key.info.shared.offset ==
3710 key->info.shared.offset) {
3711 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
3712 KASSERT(reg->ushm_refcnt > 0,
3713 ("reg %p refcnt 0 onlist", reg));
3714 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
3715 ("reg %p not linked", reg));
3716 reg->ushm_refcnt++;
3717 return (reg);
3718 }
3719 }
3720 return (NULL);
3721 }
3722
3723 static struct umtx_shm_reg *
3724 umtx_shm_find_reg(const struct umtx_key *key)
3725 {
3726 struct umtx_shm_reg *reg;
3727
3728 mtx_lock(&umtx_shm_lock);
3729 reg = umtx_shm_find_reg_locked(key);
3730 mtx_unlock(&umtx_shm_lock);
3731 return (reg);
3732 }
3733
3734 static void
3735 umtx_shm_free_reg(struct umtx_shm_reg *reg)
3736 {
3737
3738 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
3739 crfree(reg->ushm_cred);
3740 shm_drop(reg->ushm_obj);
3741 uma_zfree(umtx_shm_reg_zone, reg);
3742 }
3743
3744 static bool
3745 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
3746 {
3747 bool res;
3748
3749 mtx_assert(&umtx_shm_lock, MA_OWNED);
3750 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
3751 reg->ushm_refcnt--;
3752 res = reg->ushm_refcnt == 0;
3753 if (res || force) {
3754 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
3755 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
3756 reg, ushm_reg_link);
3757 reg->ushm_flags &= ~USHMF_REG_LINKED;
3758 }
3759 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
3760 LIST_REMOVE(reg, ushm_obj_link);
3761 reg->ushm_flags &= ~USHMF_OBJ_LINKED;
3762 }
3763 }
3764 return (res);
3765 }
3766
3767 static void
3768 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
3769 {
3770 vm_object_t object;
3771 bool dofree;
3772
3773 if (force) {
3774 object = reg->ushm_obj->shm_object;
3775 VM_OBJECT_WLOCK(object);
3776 object->flags |= OBJ_UMTXDEAD;
3777 VM_OBJECT_WUNLOCK(object);
3778 }
3779 mtx_lock(&umtx_shm_lock);
3780 dofree = umtx_shm_unref_reg_locked(reg, force);
3781 mtx_unlock(&umtx_shm_lock);
3782 if (dofree)
3783 umtx_shm_free_reg(reg);
3784 }
3785
3786 void
3787 umtx_shm_object_init(vm_object_t object)
3788 {
3789
3790 LIST_INIT(USHM_OBJ_UMTX(object));
3791 }
3792
3793 void
3794 umtx_shm_object_terminated(vm_object_t object)
3795 {
3796 struct umtx_shm_reg *reg, *reg1;
3797 bool dofree;
3798
3799 dofree = false;
3800 mtx_lock(&umtx_shm_lock);
3801 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
3802 if (umtx_shm_unref_reg_locked(reg, true)) {
3803 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
3804 ushm_reg_link);
3805 dofree = true;
3806 }
3807 }
3808 mtx_unlock(&umtx_shm_lock);
3809 if (dofree)
3810 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
3811 }
3812
3813 static int
3814 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
3815 struct umtx_shm_reg **res)
3816 {
3817 struct umtx_shm_reg *reg, *reg1;
3818 struct ucred *cred;
3819 int error;
3820
3821 reg = umtx_shm_find_reg(key);
3822 if (reg != NULL) {
3823 *res = reg;
3824 return (0);
3825 }
3826 cred = td->td_ucred;
3827 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
3828 return (ENOMEM);
3829 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
3830 reg->ushm_refcnt = 1;
3831 bcopy(key, ®->ushm_key, sizeof(*key));
3832 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR);
3833 reg->ushm_cred = crhold(cred);
3834 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
3835 if (error != 0) {
3836 umtx_shm_free_reg(reg);
3837 return (error);
3838 }
3839 mtx_lock(&umtx_shm_lock);
3840 reg1 = umtx_shm_find_reg_locked(key);
3841 if (reg1 != NULL) {
3842 mtx_unlock(&umtx_shm_lock);
3843 umtx_shm_free_reg(reg);
3844 *res = reg1;
3845 return (0);
3846 }
3847 reg->ushm_refcnt++;
3848 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
3849 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
3850 ushm_obj_link);
3851 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
3852 mtx_unlock(&umtx_shm_lock);
3853 *res = reg;
3854 return (0);
3855 }
3856
3857 static int
3858 umtx_shm_alive(struct thread *td, void *addr)
3859 {
3860 vm_map_t map;
3861 vm_map_entry_t entry;
3862 vm_object_t object;
3863 vm_pindex_t pindex;
3864 vm_prot_t prot;
3865 int res, ret;
3866 boolean_t wired;
3867
3868 map = &td->td_proc->p_vmspace->vm_map;
3869 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
3870 &object, &pindex, &prot, &wired);
3871 if (res != KERN_SUCCESS)
3872 return (EFAULT);
3873 if (object == NULL)
3874 ret = EINVAL;
3875 else
3876 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
3877 vm_map_lookup_done(map, entry);
3878 return (ret);
3879 }
3880
3881 static void
3882 umtx_shm_init(void)
3883 {
3884 int i;
3885
3886 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
3887 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
3888 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
3889 for (i = 0; i < nitems(umtx_shm_registry); i++)
3890 TAILQ_INIT(&umtx_shm_registry[i]);
3891 }
3892
3893 static int
3894 umtx_shm(struct thread *td, void *addr, u_int flags)
3895 {
3896 struct umtx_key key;
3897 struct umtx_shm_reg *reg;
3898 struct file *fp;
3899 int error, fd;
3900
3901 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
3902 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
3903 return (EINVAL);
3904 if ((flags & UMTX_SHM_ALIVE) != 0)
3905 return (umtx_shm_alive(td, addr));
3906 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
3907 if (error != 0)
3908 return (error);
3909 KASSERT(key.shared == 1, ("non-shared key"));
3910 if ((flags & UMTX_SHM_CREAT) != 0) {
3911 error = umtx_shm_create_reg(td, &key, ®);
3912 } else {
3913 reg = umtx_shm_find_reg(&key);
3914 if (reg == NULL)
3915 error = ESRCH;
3916 }
3917 umtx_key_release(&key);
3918 if (error != 0)
3919 return (error);
3920 KASSERT(reg != NULL, ("no reg"));
3921 if ((flags & UMTX_SHM_DESTROY) != 0) {
3922 umtx_shm_unref_reg(reg, true);
3923 } else {
3924 #if 0
3925 #ifdef MAC
3926 error = mac_posixshm_check_open(td->td_ucred,
3927 reg->ushm_obj, FFLAGS(O_RDWR));
3928 if (error == 0)
3929 #endif
3930 error = shm_access(reg->ushm_obj, td->td_ucred,
3931 FFLAGS(O_RDWR));
3932 if (error == 0)
3933 #endif
3934 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
3935 if (error == 0) {
3936 shm_hold(reg->ushm_obj);
3937 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
3938 &shm_ops);
3939 td->td_retval[0] = fd;
3940 fdrop(fp, td);
3941 }
3942 }
3943 umtx_shm_unref_reg(reg, false);
3944 return (error);
3945 }
3946
3947 static int
3948 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap)
3949 {
3950
3951 return (umtx_shm(td, uap->uaddr1, uap->val));
3952 }
3953
3954 static int
3955 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp)
3956 {
3957
3958 td->td_rb_list = rbp->robust_list_offset;
3959 td->td_rbp_list = rbp->robust_priv_list_offset;
3960 td->td_rb_inact = rbp->robust_inact_offset;
3961 return (0);
3962 }
3963
3964 static int
3965 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap)
3966 {
3967 struct umtx_robust_lists_params rb;
3968 int error;
3969
3970 if (uap->val > sizeof(rb))
3971 return (EINVAL);
3972 bzero(&rb, sizeof(rb));
3973 error = copyin(uap->uaddr1, &rb, uap->val);
3974 if (error != 0)
3975 return (error);
3976 return (umtx_robust_lists(td, &rb));
3977 }
3978
3979 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3980
3981 static const _umtx_op_func op_table[] = {
3982 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
3983 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
3984 [UMTX_OP_WAIT] = __umtx_op_wait,
3985 [UMTX_OP_WAKE] = __umtx_op_wake,
3986 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
3987 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex,
3988 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
3989 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
3990 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait,
3991 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
3992 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
3993 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint,
3994 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock,
3995 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock,
3996 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
3997 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
3998 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
3999 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex,
4000 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4001 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4002 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait,
4003 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4004 #else
4005 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4006 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4007 #endif
4008 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
4009 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4010 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait,
4011 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4012 [UMTX_OP_SHM] = __umtx_op_shm,
4013 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists,
4014 };
4015
4016 int
4017 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
4018 {
4019
4020 if ((unsigned)uap->op < nitems(op_table))
4021 return (*op_table[uap->op])(td, uap);
4022 return (EINVAL);
4023 }
4024
4025 #ifdef COMPAT_FREEBSD32
4026
4027 struct timespec32 {
4028 int32_t tv_sec;
4029 int32_t tv_nsec;
4030 };
4031
4032 struct umtx_time32 {
4033 struct timespec32 timeout;
4034 uint32_t flags;
4035 uint32_t clockid;
4036 };
4037
4038 static inline int
4039 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
4040 {
4041 struct timespec32 ts32;
4042 int error;
4043
4044 error = copyin(addr, &ts32, sizeof(struct timespec32));
4045 if (error == 0) {
4046 if (ts32.tv_sec < 0 ||
4047 ts32.tv_nsec >= 1000000000 ||
4048 ts32.tv_nsec < 0)
4049 error = EINVAL;
4050 else {
4051 tsp->tv_sec = ts32.tv_sec;
4052 tsp->tv_nsec = ts32.tv_nsec;
4053 }
4054 }
4055 return (error);
4056 }
4057
4058 static inline int
4059 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
4060 {
4061 struct umtx_time32 t32;
4062 int error;
4063
4064 t32.clockid = CLOCK_REALTIME;
4065 t32.flags = 0;
4066 if (size <= sizeof(struct timespec32))
4067 error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
4068 else
4069 error = copyin(addr, &t32, sizeof(struct umtx_time32));
4070 if (error != 0)
4071 return (error);
4072 if (t32.timeout.tv_sec < 0 ||
4073 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
4074 return (EINVAL);
4075 tp->_timeout.tv_sec = t32.timeout.tv_sec;
4076 tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
4077 tp->_flags = t32.flags;
4078 tp->_clockid = t32.clockid;
4079 return (0);
4080 }
4081
4082 static int
4083 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4084 {
4085 struct _umtx_time *tm_p, timeout;
4086 int error;
4087
4088 if (uap->uaddr2 == NULL)
4089 tm_p = NULL;
4090 else {
4091 error = umtx_copyin_umtx_time32(uap->uaddr2,
4092 (size_t)uap->uaddr1, &timeout);
4093 if (error != 0)
4094 return (error);
4095 tm_p = &timeout;
4096 }
4097 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
4098 }
4099
4100 static int
4101 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4102 {
4103 struct _umtx_time *tm_p, timeout;
4104 int error;
4105
4106 /* Allow a null timespec (wait forever). */
4107 if (uap->uaddr2 == NULL)
4108 tm_p = NULL;
4109 else {
4110 error = umtx_copyin_umtx_time32(uap->uaddr2,
4111 (size_t)uap->uaddr1, &timeout);
4112 if (error != 0)
4113 return (error);
4114 tm_p = &timeout;
4115 }
4116 return (do_lock_umutex(td, uap->obj, tm_p, 0));
4117 }
4118
4119 static int
4120 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4121 {
4122 struct _umtx_time *tm_p, timeout;
4123 int error;
4124
4125 /* Allow a null timespec (wait forever). */
4126 if (uap->uaddr2 == NULL)
4127 tm_p = NULL;
4128 else {
4129 error = umtx_copyin_umtx_time32(uap->uaddr2,
4130 (size_t)uap->uaddr1, &timeout);
4131 if (error != 0)
4132 return (error);
4133 tm_p = &timeout;
4134 }
4135 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
4136 }
4137
4138 static int
4139 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4140 {
4141 struct timespec *ts, timeout;
4142 int error;
4143
4144 /* Allow a null timespec (wait forever). */
4145 if (uap->uaddr2 == NULL)
4146 ts = NULL;
4147 else {
4148 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
4149 if (error != 0)
4150 return (error);
4151 ts = &timeout;
4152 }
4153 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
4154 }
4155
4156 static int
4157 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4158 {
4159 struct _umtx_time timeout;
4160 int error;
4161
4162 /* Allow a null timespec (wait forever). */
4163 if (uap->uaddr2 == NULL) {
4164 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
4165 } else {
4166 error = umtx_copyin_umtx_time32(uap->uaddr2,
4167 (size_t)uap->uaddr1, &timeout);
4168 if (error != 0)
4169 return (error);
4170 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
4171 }
4172 return (error);
4173 }
4174
4175 static int
4176 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4177 {
4178 struct _umtx_time timeout;
4179 int error;
4180
4181 /* Allow a null timespec (wait forever). */
4182 if (uap->uaddr2 == NULL) {
4183 error = do_rw_wrlock(td, uap->obj, 0);
4184 } else {
4185 error = umtx_copyin_umtx_time32(uap->uaddr2,
4186 (size_t)uap->uaddr1, &timeout);
4187 if (error != 0)
4188 return (error);
4189 error = do_rw_wrlock(td, uap->obj, &timeout);
4190 }
4191 return (error);
4192 }
4193
4194 static int
4195 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
4196 {
4197 struct _umtx_time *tm_p, timeout;
4198 int error;
4199
4200 if (uap->uaddr2 == NULL)
4201 tm_p = NULL;
4202 else {
4203 error = umtx_copyin_umtx_time32(
4204 uap->uaddr2, (size_t)uap->uaddr1,&timeout);
4205 if (error != 0)
4206 return (error);
4207 tm_p = &timeout;
4208 }
4209 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
4210 }
4211
4212 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4213 static int
4214 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4215 {
4216 struct _umtx_time *tm_p, timeout;
4217 int error;
4218
4219 /* Allow a null timespec (wait forever). */
4220 if (uap->uaddr2 == NULL)
4221 tm_p = NULL;
4222 else {
4223 error = umtx_copyin_umtx_time32(uap->uaddr2,
4224 (size_t)uap->uaddr1, &timeout);
4225 if (error != 0)
4226 return (error);
4227 tm_p = &timeout;
4228 }
4229 return (do_sem_wait(td, uap->obj, tm_p));
4230 }
4231 #endif
4232
4233 static int
4234 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4235 {
4236 struct _umtx_time *tm_p, timeout;
4237 size_t uasize;
4238 int error;
4239
4240 /* Allow a null timespec (wait forever). */
4241 if (uap->uaddr2 == NULL) {
4242 uasize = 0;
4243 tm_p = NULL;
4244 } else {
4245 uasize = (size_t)uap->uaddr1;
4246 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout);
4247 if (error != 0)
4248 return (error);
4249 tm_p = &timeout;
4250 }
4251 error = do_sem2_wait(td, uap->obj, tm_p);
4252 if (error == EINTR && uap->uaddr2 != NULL &&
4253 (timeout._flags & UMTX_ABSTIME) == 0 &&
4254 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) {
4255 struct timespec32 remain32 = {
4256 .tv_sec = timeout._timeout.tv_sec,
4257 .tv_nsec = timeout._timeout.tv_nsec
4258 };
4259 error = copyout(&remain32,
4260 (struct umtx_time32 *)uap->uaddr2 + 1,
4261 sizeof(struct timespec32));
4262 if (error == 0) {
4263 error = EINTR;
4264 }
4265 }
4266
4267 return (error);
4268 }
4269
4270 static int
4271 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
4272 {
4273 uint32_t uaddrs[BATCH_SIZE], **upp;
4274 int count, error, i, pos, tocopy;
4275
4276 upp = (uint32_t **)uap->obj;
4277 error = 0;
4278 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
4279 pos += tocopy) {
4280 tocopy = MIN(count, BATCH_SIZE);
4281 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
4282 if (error != 0)
4283 break;
4284 for (i = 0; i < tocopy; ++i)
4285 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
4286 INT_MAX, 1);
4287 maybe_yield();
4288 }
4289 return (error);
4290 }
4291
4292 struct umtx_robust_lists_params_compat32 {
4293 uint32_t robust_list_offset;
4294 uint32_t robust_priv_list_offset;
4295 uint32_t robust_inact_offset;
4296 };
4297
4298 static int
4299 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap)
4300 {
4301 struct umtx_robust_lists_params rb;
4302 struct umtx_robust_lists_params_compat32 rb32;
4303 int error;
4304
4305 if (uap->val > sizeof(rb32))
4306 return (EINVAL);
4307 bzero(&rb, sizeof(rb));
4308 bzero(&rb32, sizeof(rb32));
4309 error = copyin(uap->uaddr1, &rb32, uap->val);
4310 if (error != 0)
4311 return (error);
4312 rb.robust_list_offset = rb32.robust_list_offset;
4313 rb.robust_priv_list_offset = rb32.robust_priv_list_offset;
4314 rb.robust_inact_offset = rb32.robust_inact_offset;
4315 return (umtx_robust_lists(td, &rb));
4316 }
4317
4318 static const _umtx_op_func op_table_compat32[] = {
4319 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
4320 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
4321 [UMTX_OP_WAIT] = __umtx_op_wait_compat32,
4322 [UMTX_OP_WAKE] = __umtx_op_wake,
4323 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
4324 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32,
4325 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
4326 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
4327 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32,
4328 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
4329 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4330 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32,
4331 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32,
4332 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32,
4333 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4334 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32,
4335 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4336 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32,
4337 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4338 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4339 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32,
4340 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4341 #else
4342 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4343 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4344 #endif
4345 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32,
4346 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4347 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32,
4348 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4349 [UMTX_OP_SHM] = __umtx_op_shm,
4350 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32,
4351 };
4352
4353 int
4354 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap)
4355 {
4356
4357 if ((unsigned)uap->op < nitems(op_table_compat32)) {
4358 return (*op_table_compat32[uap->op])(td,
4359 (struct _umtx_op_args *)uap);
4360 }
4361 return (EINVAL);
4362 }
4363 #endif
4364
4365 void
4366 umtx_thread_init(struct thread *td)
4367 {
4368
4369 td->td_umtxq = umtxq_alloc();
4370 td->td_umtxq->uq_thread = td;
4371 }
4372
4373 void
4374 umtx_thread_fini(struct thread *td)
4375 {
4376
4377 umtxq_free(td->td_umtxq);
4378 }
4379
4380 /*
4381 * It will be called when new thread is created, e.g fork().
4382 */
4383 void
4384 umtx_thread_alloc(struct thread *td)
4385 {
4386 struct umtx_q *uq;
4387
4388 uq = td->td_umtxq;
4389 uq->uq_inherited_pri = PRI_MAX;
4390
4391 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4392 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4393 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4394 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4395 }
4396
4397 /*
4398 * exec() hook.
4399 *
4400 * Clear robust lists for all process' threads, not delaying the
4401 * cleanup to thread_exit hook, since the relevant address space is
4402 * destroyed right now.
4403 */
4404 static void
4405 umtx_exec_hook(void *arg __unused, struct proc *p,
4406 struct image_params *imgp __unused)
4407 {
4408 struct thread *td;
4409
4410 KASSERT(p == curproc, ("need curproc"));
4411 PROC_LOCK(p);
4412 KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
4413 (p->p_flag & P_STOPPED_SINGLE) != 0,
4414 ("curproc must be single-threaded"));
4415 FOREACH_THREAD_IN_PROC(p, td) {
4416 KASSERT(td == curthread ||
4417 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
4418 ("running thread %p %p", p, td));
4419 PROC_UNLOCK(p);
4420 umtx_thread_cleanup(td);
4421 PROC_LOCK(p);
4422 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
4423 }
4424 PROC_UNLOCK(p);
4425 }
4426
4427 /*
4428 * thread_exit() hook.
4429 */
4430 void
4431 umtx_thread_exit(struct thread *td)
4432 {
4433
4434 umtx_thread_cleanup(td);
4435 }
4436
4437 static int
4438 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res)
4439 {
4440 u_long res1;
4441 #ifdef COMPAT_FREEBSD32
4442 uint32_t res32;
4443 #endif
4444 int error;
4445
4446 #ifdef COMPAT_FREEBSD32
4447 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4448 error = fueword32((void *)ptr, &res32);
4449 if (error == 0)
4450 res1 = res32;
4451 } else
4452 #endif
4453 {
4454 error = fueword((void *)ptr, &res1);
4455 }
4456 if (error == 0)
4457 *res = res1;
4458 else
4459 error = EFAULT;
4460 return (error);
4461 }
4462
4463 static void
4464 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list)
4465 {
4466 #ifdef COMPAT_FREEBSD32
4467 struct umutex32 m32;
4468
4469 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4470 memcpy(&m32, m, sizeof(m32));
4471 *rb_list = m32.m_rb_lnk;
4472 } else
4473 #endif
4474 *rb_list = m->m_rb_lnk;
4475 }
4476
4477 static int
4478 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact)
4479 {
4480 struct umutex m;
4481 int error;
4482
4483 KASSERT(td->td_proc == curproc, ("need current vmspace"));
4484 error = copyin((void *)rbp, &m, sizeof(m));
4485 if (error != 0)
4486 return (error);
4487 if (rb_list != NULL)
4488 umtx_read_rb_list(td, &m, rb_list);
4489 if ((m.m_flags & UMUTEX_ROBUST) == 0)
4490 return (EINVAL);
4491 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
4492 /* inact is cleared after unlock, allow the inconsistency */
4493 return (inact ? 0 : EINVAL);
4494 return (do_unlock_umutex(td, (struct umutex *)rbp, true));
4495 }
4496
4497 static void
4498 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
4499 const char *name)
4500 {
4501 int error, i;
4502 uintptr_t rbp;
4503 bool inact;
4504
4505 if (rb_list == 0)
4506 return;
4507 error = umtx_read_uptr(td, rb_list, &rbp);
4508 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
4509 if (rbp == *rb_inact) {
4510 inact = true;
4511 *rb_inact = 0;
4512 } else
4513 inact = false;
4514 error = umtx_handle_rb(td, rbp, &rbp, inact);
4515 }
4516 if (i == umtx_max_rb && umtx_verbose_rb) {
4517 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
4518 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
4519 }
4520 if (error != 0 && umtx_verbose_rb) {
4521 uprintf("comm %s pid %d: handling %srb error %d\n",
4522 td->td_proc->p_comm, td->td_proc->p_pid, name, error);
4523 }
4524 }
4525
4526 /*
4527 * Clean up umtx data.
4528 */
4529 static void
4530 umtx_thread_cleanup(struct thread *td)
4531 {
4532 struct umtx_q *uq;
4533 struct umtx_pi *pi;
4534 uintptr_t rb_inact;
4535
4536 /*
4537 * Disown pi mutexes.
4538 */
4539 uq = td->td_umtxq;
4540 if (uq != NULL) {
4541 mtx_lock(&umtx_lock);
4542 uq->uq_inherited_pri = PRI_MAX;
4543 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
4544 pi->pi_owner = NULL;
4545 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
4546 }
4547 mtx_unlock(&umtx_lock);
4548 thread_lock(td);
4549 sched_lend_user_prio(td, PRI_MAX);
4550 thread_unlock(td);
4551 }
4552
4553 /*
4554 * Handle terminated robust mutexes. Must be done after
4555 * robust pi disown, otherwise unlock could see unowned
4556 * entries.
4557 */
4558 rb_inact = td->td_rb_inact;
4559 if (rb_inact != 0)
4560 (void)umtx_read_uptr(td, rb_inact, &rb_inact);
4561 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "");
4562 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ");
4563 if (rb_inact != 0)
4564 (void)umtx_handle_rb(td, rb_inact, NULL, true);
4565 }
Cache object: 309e12c73e9e5ac29bacf88b3c23fe1c
|