FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2015, 2016 The FreeBSD Foundation
3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Konstantin Belousov
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice unmodified, this list of conditions, and the following
15 * disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/11.1/sys/kern/kern_umtx.c 316120 2017-03-29 01:21:48Z vangyzen $");
34
35 #include "opt_compat.h"
36 #include "opt_umtx_profiling.h"
37
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/file.h>
42 #include <sys/filedesc.h>
43 #include <sys/limits.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mman.h>
47 #include <sys/mutex.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/resource.h>
51 #include <sys/resourcevar.h>
52 #include <sys/rwlock.h>
53 #include <sys/sbuf.h>
54 #include <sys/sched.h>
55 #include <sys/smp.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysent.h>
58 #include <sys/systm.h>
59 #include <sys/sysproto.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/taskqueue.h>
62 #include <sys/time.h>
63 #include <sys/eventhandler.h>
64 #include <sys/umtx.h>
65
66 #include <security/mac/mac_framework.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/pmap.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73
74 #include <machine/atomic.h>
75 #include <machine/cpu.h>
76
77 #ifdef COMPAT_FREEBSD32
78 #include <compat/freebsd32/freebsd32_proto.h>
79 #endif
80
81 #define _UMUTEX_TRY 1
82 #define _UMUTEX_WAIT 2
83
84 #ifdef UMTX_PROFILING
85 #define UPROF_PERC_BIGGER(w, f, sw, sf) \
86 (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
87 #endif
88
89 /* Priority inheritance mutex info. */
90 struct umtx_pi {
91 /* Owner thread */
92 struct thread *pi_owner;
93
94 /* Reference count */
95 int pi_refcount;
96
97 /* List entry to link umtx holding by thread */
98 TAILQ_ENTRY(umtx_pi) pi_link;
99
100 /* List entry in hash */
101 TAILQ_ENTRY(umtx_pi) pi_hashlink;
102
103 /* List for waiters */
104 TAILQ_HEAD(,umtx_q) pi_blocked;
105
106 /* Identify a userland lock object */
107 struct umtx_key pi_key;
108 };
109
110 /* A userland synchronous object user. */
111 struct umtx_q {
112 /* Linked list for the hash. */
113 TAILQ_ENTRY(umtx_q) uq_link;
114
115 /* Umtx key. */
116 struct umtx_key uq_key;
117
118 /* Umtx flags. */
119 int uq_flags;
120 #define UQF_UMTXQ 0x0001
121
122 /* The thread waits on. */
123 struct thread *uq_thread;
124
125 /*
126 * Blocked on PI mutex. read can use chain lock
127 * or umtx_lock, write must have both chain lock and
128 * umtx_lock being hold.
129 */
130 struct umtx_pi *uq_pi_blocked;
131
132 /* On blocked list */
133 TAILQ_ENTRY(umtx_q) uq_lockq;
134
135 /* Thread contending with us */
136 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
137
138 /* Inherited priority from PP mutex */
139 u_char uq_inherited_pri;
140
141 /* Spare queue ready to be reused */
142 struct umtxq_queue *uq_spare_queue;
143
144 /* The queue we on */
145 struct umtxq_queue *uq_cur_queue;
146 };
147
148 TAILQ_HEAD(umtxq_head, umtx_q);
149
150 /* Per-key wait-queue */
151 struct umtxq_queue {
152 struct umtxq_head head;
153 struct umtx_key key;
154 LIST_ENTRY(umtxq_queue) link;
155 int length;
156 };
157
158 LIST_HEAD(umtxq_list, umtxq_queue);
159
160 /* Userland lock object's wait-queue chain */
161 struct umtxq_chain {
162 /* Lock for this chain. */
163 struct mtx uc_lock;
164
165 /* List of sleep queues. */
166 struct umtxq_list uc_queue[2];
167 #define UMTX_SHARED_QUEUE 0
168 #define UMTX_EXCLUSIVE_QUEUE 1
169
170 LIST_HEAD(, umtxq_queue) uc_spare_queue;
171
172 /* Busy flag */
173 char uc_busy;
174
175 /* Chain lock waiters */
176 int uc_waiters;
177
178 /* All PI in the list */
179 TAILQ_HEAD(,umtx_pi) uc_pi_list;
180
181 #ifdef UMTX_PROFILING
182 u_int length;
183 u_int max_length;
184 #endif
185 };
186
187 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
188
189 /*
190 * Don't propagate time-sharing priority, there is a security reason,
191 * a user can simply introduce PI-mutex, let thread A lock the mutex,
192 * and let another thread B block on the mutex, because B is
193 * sleeping, its priority will be boosted, this causes A's priority to
194 * be boosted via priority propagating too and will never be lowered even
195 * if it is using 100%CPU, this is unfair to other processes.
196 */
197
198 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
199 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
200 PRI_MAX_TIMESHARE : (td)->td_user_pri)
201
202 #define GOLDEN_RATIO_PRIME 2654404609U
203 #define UMTX_CHAINS 512
204 #define UMTX_SHIFTS (__WORD_BIT - 9)
205
206 #define GET_SHARE(flags) \
207 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
208
209 #define BUSY_SPINS 200
210
211 struct abs_timeout {
212 int clockid;
213 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */
214 struct timespec cur;
215 struct timespec end;
216 };
217
218 #ifdef COMPAT_FREEBSD32
219 struct umutex32 {
220 volatile __lwpid_t m_owner; /* Owner of the mutex */
221 __uint32_t m_flags; /* Flags of the mutex */
222 __uint32_t m_ceilings[2]; /* Priority protect ceiling */
223 __uint32_t m_rb_lnk; /* Robust linkage */
224 __uint32_t m_pad;
225 __uint32_t m_spare[2];
226 };
227
228 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
229 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
230 __offsetof(struct umutex32, m_spare[0]), "m_spare32");
231 #endif
232
233 int umtx_shm_vnobj_persistent = 0;
234 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
235 &umtx_shm_vnobj_persistent, 0,
236 "False forces destruction of umtx attached to file, on last close");
237 static int umtx_max_rb = 1000;
238 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
239 &umtx_max_rb, 0,
240 "");
241
242 static uma_zone_t umtx_pi_zone;
243 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
244 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
245 static int umtx_pi_allocated;
246
247 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
248 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
249 &umtx_pi_allocated, 0, "Allocated umtx_pi");
250 static int umtx_verbose_rb = 1;
251 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
252 &umtx_verbose_rb, 0,
253 "");
254
255 #ifdef UMTX_PROFILING
256 static long max_length;
257 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
258 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
259 #endif
260
261 static void abs_timeout_update(struct abs_timeout *timo);
262
263 static void umtx_shm_init(void);
264 static void umtxq_sysinit(void *);
265 static void umtxq_hash(struct umtx_key *key);
266 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
267 static void umtxq_lock(struct umtx_key *key);
268 static void umtxq_unlock(struct umtx_key *key);
269 static void umtxq_busy(struct umtx_key *key);
270 static void umtxq_unbusy(struct umtx_key *key);
271 static void umtxq_insert_queue(struct umtx_q *uq, int q);
272 static void umtxq_remove_queue(struct umtx_q *uq, int q);
273 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
274 static int umtxq_count(struct umtx_key *key);
275 static struct umtx_pi *umtx_pi_alloc(int);
276 static void umtx_pi_free(struct umtx_pi *pi);
277 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
278 bool rb);
279 static void umtx_thread_cleanup(struct thread *td);
280 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
281 struct image_params *imgp __unused);
282 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
283
284 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
285 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
286 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
287
288 static struct mtx umtx_lock;
289
290 #ifdef UMTX_PROFILING
291 static void
292 umtx_init_profiling(void)
293 {
294 struct sysctl_oid *chain_oid;
295 char chain_name[10];
296 int i;
297
298 for (i = 0; i < UMTX_CHAINS; ++i) {
299 snprintf(chain_name, sizeof(chain_name), "%d", i);
300 chain_oid = SYSCTL_ADD_NODE(NULL,
301 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
302 chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
303 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
304 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
305 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
306 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
307 }
308 }
309
310 static int
311 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
312 {
313 char buf[512];
314 struct sbuf sb;
315 struct umtxq_chain *uc;
316 u_int fract, i, j, tot, whole;
317 u_int sf0, sf1, sf2, sf3, sf4;
318 u_int si0, si1, si2, si3, si4;
319 u_int sw0, sw1, sw2, sw3, sw4;
320
321 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
322 for (i = 0; i < 2; i++) {
323 tot = 0;
324 for (j = 0; j < UMTX_CHAINS; ++j) {
325 uc = &umtxq_chains[i][j];
326 mtx_lock(&uc->uc_lock);
327 tot += uc->max_length;
328 mtx_unlock(&uc->uc_lock);
329 }
330 if (tot == 0)
331 sbuf_printf(&sb, "%u) Empty ", i);
332 else {
333 sf0 = sf1 = sf2 = sf3 = sf4 = 0;
334 si0 = si1 = si2 = si3 = si4 = 0;
335 sw0 = sw1 = sw2 = sw3 = sw4 = 0;
336 for (j = 0; j < UMTX_CHAINS; j++) {
337 uc = &umtxq_chains[i][j];
338 mtx_lock(&uc->uc_lock);
339 whole = uc->max_length * 100;
340 mtx_unlock(&uc->uc_lock);
341 fract = (whole % tot) * 100;
342 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
343 sf0 = fract;
344 si0 = j;
345 sw0 = whole;
346 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
347 sf1)) {
348 sf1 = fract;
349 si1 = j;
350 sw1 = whole;
351 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
352 sf2)) {
353 sf2 = fract;
354 si2 = j;
355 sw2 = whole;
356 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
357 sf3)) {
358 sf3 = fract;
359 si3 = j;
360 sw3 = whole;
361 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
362 sf4)) {
363 sf4 = fract;
364 si4 = j;
365 sw4 = whole;
366 }
367 }
368 sbuf_printf(&sb, "queue %u:\n", i);
369 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
370 sf0 / tot, si0);
371 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
372 sf1 / tot, si1);
373 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
374 sf2 / tot, si2);
375 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
376 sf3 / tot, si3);
377 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
378 sf4 / tot, si4);
379 }
380 }
381 sbuf_trim(&sb);
382 sbuf_finish(&sb);
383 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
384 sbuf_delete(&sb);
385 return (0);
386 }
387
388 static int
389 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
390 {
391 struct umtxq_chain *uc;
392 u_int i, j;
393 int clear, error;
394
395 clear = 0;
396 error = sysctl_handle_int(oidp, &clear, 0, req);
397 if (error != 0 || req->newptr == NULL)
398 return (error);
399
400 if (clear != 0) {
401 for (i = 0; i < 2; ++i) {
402 for (j = 0; j < UMTX_CHAINS; ++j) {
403 uc = &umtxq_chains[i][j];
404 mtx_lock(&uc->uc_lock);
405 uc->length = 0;
406 uc->max_length = 0;
407 mtx_unlock(&uc->uc_lock);
408 }
409 }
410 }
411 return (0);
412 }
413
414 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
415 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
416 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
418 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
419 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
420 #endif
421
422 static void
423 umtxq_sysinit(void *arg __unused)
424 {
425 int i, j;
426
427 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
428 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
429 for (i = 0; i < 2; ++i) {
430 for (j = 0; j < UMTX_CHAINS; ++j) {
431 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
432 MTX_DEF | MTX_DUPOK);
433 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
434 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
435 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
436 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
437 umtxq_chains[i][j].uc_busy = 0;
438 umtxq_chains[i][j].uc_waiters = 0;
439 #ifdef UMTX_PROFILING
440 umtxq_chains[i][j].length = 0;
441 umtxq_chains[i][j].max_length = 0;
442 #endif
443 }
444 }
445 #ifdef UMTX_PROFILING
446 umtx_init_profiling();
447 #endif
448 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
449 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
450 EVENTHANDLER_PRI_ANY);
451 umtx_shm_init();
452 }
453
454 struct umtx_q *
455 umtxq_alloc(void)
456 {
457 struct umtx_q *uq;
458
459 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
460 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
461 M_WAITOK | M_ZERO);
462 TAILQ_INIT(&uq->uq_spare_queue->head);
463 TAILQ_INIT(&uq->uq_pi_contested);
464 uq->uq_inherited_pri = PRI_MAX;
465 return (uq);
466 }
467
468 void
469 umtxq_free(struct umtx_q *uq)
470 {
471
472 MPASS(uq->uq_spare_queue != NULL);
473 free(uq->uq_spare_queue, M_UMTX);
474 free(uq, M_UMTX);
475 }
476
477 static inline void
478 umtxq_hash(struct umtx_key *key)
479 {
480 unsigned n;
481
482 n = (uintptr_t)key->info.both.a + key->info.both.b;
483 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
484 }
485
486 static inline struct umtxq_chain *
487 umtxq_getchain(struct umtx_key *key)
488 {
489
490 if (key->type <= TYPE_SEM)
491 return (&umtxq_chains[1][key->hash]);
492 return (&umtxq_chains[0][key->hash]);
493 }
494
495 /*
496 * Lock a chain.
497 */
498 static inline void
499 umtxq_lock(struct umtx_key *key)
500 {
501 struct umtxq_chain *uc;
502
503 uc = umtxq_getchain(key);
504 mtx_lock(&uc->uc_lock);
505 }
506
507 /*
508 * Unlock a chain.
509 */
510 static inline void
511 umtxq_unlock(struct umtx_key *key)
512 {
513 struct umtxq_chain *uc;
514
515 uc = umtxq_getchain(key);
516 mtx_unlock(&uc->uc_lock);
517 }
518
519 /*
520 * Set chain to busy state when following operation
521 * may be blocked (kernel mutex can not be used).
522 */
523 static inline void
524 umtxq_busy(struct umtx_key *key)
525 {
526 struct umtxq_chain *uc;
527
528 uc = umtxq_getchain(key);
529 mtx_assert(&uc->uc_lock, MA_OWNED);
530 if (uc->uc_busy) {
531 #ifdef SMP
532 if (smp_cpus > 1) {
533 int count = BUSY_SPINS;
534 if (count > 0) {
535 umtxq_unlock(key);
536 while (uc->uc_busy && --count > 0)
537 cpu_spinwait();
538 umtxq_lock(key);
539 }
540 }
541 #endif
542 while (uc->uc_busy) {
543 uc->uc_waiters++;
544 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
545 uc->uc_waiters--;
546 }
547 }
548 uc->uc_busy = 1;
549 }
550
551 /*
552 * Unbusy a chain.
553 */
554 static inline void
555 umtxq_unbusy(struct umtx_key *key)
556 {
557 struct umtxq_chain *uc;
558
559 uc = umtxq_getchain(key);
560 mtx_assert(&uc->uc_lock, MA_OWNED);
561 KASSERT(uc->uc_busy != 0, ("not busy"));
562 uc->uc_busy = 0;
563 if (uc->uc_waiters)
564 wakeup_one(uc);
565 }
566
567 static inline void
568 umtxq_unbusy_unlocked(struct umtx_key *key)
569 {
570
571 umtxq_lock(key);
572 umtxq_unbusy(key);
573 umtxq_unlock(key);
574 }
575
576 static struct umtxq_queue *
577 umtxq_queue_lookup(struct umtx_key *key, int q)
578 {
579 struct umtxq_queue *uh;
580 struct umtxq_chain *uc;
581
582 uc = umtxq_getchain(key);
583 UMTXQ_LOCKED_ASSERT(uc);
584 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
585 if (umtx_key_match(&uh->key, key))
586 return (uh);
587 }
588
589 return (NULL);
590 }
591
592 static inline void
593 umtxq_insert_queue(struct umtx_q *uq, int q)
594 {
595 struct umtxq_queue *uh;
596 struct umtxq_chain *uc;
597
598 uc = umtxq_getchain(&uq->uq_key);
599 UMTXQ_LOCKED_ASSERT(uc);
600 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
601 uh = umtxq_queue_lookup(&uq->uq_key, q);
602 if (uh != NULL) {
603 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
604 } else {
605 uh = uq->uq_spare_queue;
606 uh->key = uq->uq_key;
607 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
608 #ifdef UMTX_PROFILING
609 uc->length++;
610 if (uc->length > uc->max_length) {
611 uc->max_length = uc->length;
612 if (uc->max_length > max_length)
613 max_length = uc->max_length;
614 }
615 #endif
616 }
617 uq->uq_spare_queue = NULL;
618
619 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
620 uh->length++;
621 uq->uq_flags |= UQF_UMTXQ;
622 uq->uq_cur_queue = uh;
623 return;
624 }
625
626 static inline void
627 umtxq_remove_queue(struct umtx_q *uq, int q)
628 {
629 struct umtxq_chain *uc;
630 struct umtxq_queue *uh;
631
632 uc = umtxq_getchain(&uq->uq_key);
633 UMTXQ_LOCKED_ASSERT(uc);
634 if (uq->uq_flags & UQF_UMTXQ) {
635 uh = uq->uq_cur_queue;
636 TAILQ_REMOVE(&uh->head, uq, uq_link);
637 uh->length--;
638 uq->uq_flags &= ~UQF_UMTXQ;
639 if (TAILQ_EMPTY(&uh->head)) {
640 KASSERT(uh->length == 0,
641 ("inconsistent umtxq_queue length"));
642 #ifdef UMTX_PROFILING
643 uc->length--;
644 #endif
645 LIST_REMOVE(uh, link);
646 } else {
647 uh = LIST_FIRST(&uc->uc_spare_queue);
648 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
649 LIST_REMOVE(uh, link);
650 }
651 uq->uq_spare_queue = uh;
652 uq->uq_cur_queue = NULL;
653 }
654 }
655
656 /*
657 * Check if there are multiple waiters
658 */
659 static int
660 umtxq_count(struct umtx_key *key)
661 {
662 struct umtxq_chain *uc;
663 struct umtxq_queue *uh;
664
665 uc = umtxq_getchain(key);
666 UMTXQ_LOCKED_ASSERT(uc);
667 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
668 if (uh != NULL)
669 return (uh->length);
670 return (0);
671 }
672
673 /*
674 * Check if there are multiple PI waiters and returns first
675 * waiter.
676 */
677 static int
678 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
679 {
680 struct umtxq_chain *uc;
681 struct umtxq_queue *uh;
682
683 *first = NULL;
684 uc = umtxq_getchain(key);
685 UMTXQ_LOCKED_ASSERT(uc);
686 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
687 if (uh != NULL) {
688 *first = TAILQ_FIRST(&uh->head);
689 return (uh->length);
690 }
691 return (0);
692 }
693
694 static int
695 umtxq_check_susp(struct thread *td)
696 {
697 struct proc *p;
698 int error;
699
700 /*
701 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
702 * eventually break the lockstep loop.
703 */
704 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
705 return (0);
706 error = 0;
707 p = td->td_proc;
708 PROC_LOCK(p);
709 if (P_SHOULDSTOP(p) ||
710 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
711 if (p->p_flag & P_SINGLE_EXIT)
712 error = EINTR;
713 else
714 error = ERESTART;
715 }
716 PROC_UNLOCK(p);
717 return (error);
718 }
719
720 /*
721 * Wake up threads waiting on an userland object.
722 */
723
724 static int
725 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
726 {
727 struct umtxq_chain *uc;
728 struct umtxq_queue *uh;
729 struct umtx_q *uq;
730 int ret;
731
732 ret = 0;
733 uc = umtxq_getchain(key);
734 UMTXQ_LOCKED_ASSERT(uc);
735 uh = umtxq_queue_lookup(key, q);
736 if (uh != NULL) {
737 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
738 umtxq_remove_queue(uq, q);
739 wakeup(uq);
740 if (++ret >= n_wake)
741 return (ret);
742 }
743 }
744 return (ret);
745 }
746
747
748 /*
749 * Wake up specified thread.
750 */
751 static inline void
752 umtxq_signal_thread(struct umtx_q *uq)
753 {
754 struct umtxq_chain *uc;
755
756 uc = umtxq_getchain(&uq->uq_key);
757 UMTXQ_LOCKED_ASSERT(uc);
758 umtxq_remove(uq);
759 wakeup(uq);
760 }
761
762 static inline int
763 tstohz(const struct timespec *tsp)
764 {
765 struct timeval tv;
766
767 TIMESPEC_TO_TIMEVAL(&tv, tsp);
768 return tvtohz(&tv);
769 }
770
771 static void
772 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
773 const struct timespec *timeout)
774 {
775
776 timo->clockid = clockid;
777 if (!absolute) {
778 timo->is_abs_real = false;
779 abs_timeout_update(timo);
780 timo->end = timo->cur;
781 timespecadd(&timo->end, timeout);
782 } else {
783 timo->end = *timeout;
784 timo->is_abs_real = clockid == CLOCK_REALTIME ||
785 clockid == CLOCK_REALTIME_FAST ||
786 clockid == CLOCK_REALTIME_PRECISE;
787 /*
788 * If is_abs_real, umtxq_sleep will read the clock
789 * after setting td_rtcgen; otherwise, read it here.
790 */
791 if (!timo->is_abs_real) {
792 abs_timeout_update(timo);
793 }
794 }
795 }
796
797 static void
798 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
799 {
800
801 abs_timeout_init(timo, umtxtime->_clockid,
802 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
803 }
804
805 static inline void
806 abs_timeout_update(struct abs_timeout *timo)
807 {
808
809 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
810 }
811
812 static int
813 abs_timeout_gethz(struct abs_timeout *timo)
814 {
815 struct timespec tts;
816
817 if (timespeccmp(&timo->end, &timo->cur, <=))
818 return (-1);
819 tts = timo->end;
820 timespecsub(&tts, &timo->cur);
821 return (tstohz(&tts));
822 }
823
824 static uint32_t
825 umtx_unlock_val(uint32_t flags, bool rb)
826 {
827
828 if (rb)
829 return (UMUTEX_RB_OWNERDEAD);
830 else if ((flags & UMUTEX_NONCONSISTENT) != 0)
831 return (UMUTEX_RB_NOTRECOV);
832 else
833 return (UMUTEX_UNOWNED);
834
835 }
836
837 /*
838 * Put thread into sleep state, before sleeping, check if
839 * thread was removed from umtx queue.
840 */
841 static inline int
842 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
843 {
844 struct umtxq_chain *uc;
845 int error, timo;
846
847 if (abstime != NULL && abstime->is_abs_real) {
848 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
849 abs_timeout_update(abstime);
850 }
851
852 uc = umtxq_getchain(&uq->uq_key);
853 UMTXQ_LOCKED_ASSERT(uc);
854 for (;;) {
855 if (!(uq->uq_flags & UQF_UMTXQ)) {
856 error = 0;
857 break;
858 }
859 if (abstime != NULL) {
860 timo = abs_timeout_gethz(abstime);
861 if (timo < 0) {
862 error = ETIMEDOUT;
863 break;
864 }
865 } else
866 timo = 0;
867 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
868 if (error == EINTR || error == ERESTART) {
869 umtxq_lock(&uq->uq_key);
870 break;
871 }
872 if (abstime != NULL) {
873 if (abstime->is_abs_real)
874 curthread->td_rtcgen =
875 atomic_load_acq_int(&rtc_generation);
876 abs_timeout_update(abstime);
877 }
878 umtxq_lock(&uq->uq_key);
879 }
880
881 curthread->td_rtcgen = 0;
882 return (error);
883 }
884
885 /*
886 * Convert userspace address into unique logical address.
887 */
888 int
889 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
890 {
891 struct thread *td = curthread;
892 vm_map_t map;
893 vm_map_entry_t entry;
894 vm_pindex_t pindex;
895 vm_prot_t prot;
896 boolean_t wired;
897
898 key->type = type;
899 if (share == THREAD_SHARE) {
900 key->shared = 0;
901 key->info.private.vs = td->td_proc->p_vmspace;
902 key->info.private.addr = (uintptr_t)addr;
903 } else {
904 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
905 map = &td->td_proc->p_vmspace->vm_map;
906 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
907 &entry, &key->info.shared.object, &pindex, &prot,
908 &wired) != KERN_SUCCESS) {
909 return (EFAULT);
910 }
911
912 if ((share == PROCESS_SHARE) ||
913 (share == AUTO_SHARE &&
914 VM_INHERIT_SHARE == entry->inheritance)) {
915 key->shared = 1;
916 key->info.shared.offset = (vm_offset_t)addr -
917 entry->start + entry->offset;
918 vm_object_reference(key->info.shared.object);
919 } else {
920 key->shared = 0;
921 key->info.private.vs = td->td_proc->p_vmspace;
922 key->info.private.addr = (uintptr_t)addr;
923 }
924 vm_map_lookup_done(map, entry);
925 }
926
927 umtxq_hash(key);
928 return (0);
929 }
930
931 /*
932 * Release key.
933 */
934 void
935 umtx_key_release(struct umtx_key *key)
936 {
937 if (key->shared)
938 vm_object_deallocate(key->info.shared.object);
939 }
940
941 /*
942 * Fetch and compare value, sleep on the address if value is not changed.
943 */
944 static int
945 do_wait(struct thread *td, void *addr, u_long id,
946 struct _umtx_time *timeout, int compat32, int is_private)
947 {
948 struct abs_timeout timo;
949 struct umtx_q *uq;
950 u_long tmp;
951 uint32_t tmp32;
952 int error = 0;
953
954 uq = td->td_umtxq;
955 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
956 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
957 return (error);
958
959 if (timeout != NULL)
960 abs_timeout_init2(&timo, timeout);
961
962 umtxq_lock(&uq->uq_key);
963 umtxq_insert(uq);
964 umtxq_unlock(&uq->uq_key);
965 if (compat32 == 0) {
966 error = fueword(addr, &tmp);
967 if (error != 0)
968 error = EFAULT;
969 } else {
970 error = fueword32(addr, &tmp32);
971 if (error == 0)
972 tmp = tmp32;
973 else
974 error = EFAULT;
975 }
976 umtxq_lock(&uq->uq_key);
977 if (error == 0) {
978 if (tmp == id)
979 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
980 NULL : &timo);
981 if ((uq->uq_flags & UQF_UMTXQ) == 0)
982 error = 0;
983 else
984 umtxq_remove(uq);
985 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
986 umtxq_remove(uq);
987 }
988 umtxq_unlock(&uq->uq_key);
989 umtx_key_release(&uq->uq_key);
990 if (error == ERESTART)
991 error = EINTR;
992 return (error);
993 }
994
995 /*
996 * Wake up threads sleeping on the specified address.
997 */
998 int
999 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1000 {
1001 struct umtx_key key;
1002 int ret;
1003
1004 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1005 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1006 return (ret);
1007 umtxq_lock(&key);
1008 umtxq_signal(&key, n_wake);
1009 umtxq_unlock(&key);
1010 umtx_key_release(&key);
1011 return (0);
1012 }
1013
1014 /*
1015 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1016 */
1017 static int
1018 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1019 struct _umtx_time *timeout, int mode)
1020 {
1021 struct abs_timeout timo;
1022 struct umtx_q *uq;
1023 uint32_t owner, old, id;
1024 int error, rv;
1025
1026 id = td->td_tid;
1027 uq = td->td_umtxq;
1028 error = 0;
1029 if (timeout != NULL)
1030 abs_timeout_init2(&timo, timeout);
1031
1032 /*
1033 * Care must be exercised when dealing with umtx structure. It
1034 * can fault on any access.
1035 */
1036 for (;;) {
1037 rv = fueword32(&m->m_owner, &owner);
1038 if (rv == -1)
1039 return (EFAULT);
1040 if (mode == _UMUTEX_WAIT) {
1041 if (owner == UMUTEX_UNOWNED ||
1042 owner == UMUTEX_CONTESTED ||
1043 owner == UMUTEX_RB_OWNERDEAD ||
1044 owner == UMUTEX_RB_NOTRECOV)
1045 return (0);
1046 } else {
1047 /*
1048 * Robust mutex terminated. Kernel duty is to
1049 * return EOWNERDEAD to the userspace. The
1050 * umutex.m_flags UMUTEX_NONCONSISTENT is set
1051 * by the common userspace code.
1052 */
1053 if (owner == UMUTEX_RB_OWNERDEAD) {
1054 rv = casueword32(&m->m_owner,
1055 UMUTEX_RB_OWNERDEAD, &owner,
1056 id | UMUTEX_CONTESTED);
1057 if (rv == -1)
1058 return (EFAULT);
1059 if (owner == UMUTEX_RB_OWNERDEAD)
1060 return (EOWNERDEAD); /* success */
1061 rv = umtxq_check_susp(td);
1062 if (rv != 0)
1063 return (rv);
1064 continue;
1065 }
1066 if (owner == UMUTEX_RB_NOTRECOV)
1067 return (ENOTRECOVERABLE);
1068
1069
1070 /*
1071 * Try the uncontested case. This should be
1072 * done in userland.
1073 */
1074 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1075 &owner, id);
1076 /* The address was invalid. */
1077 if (rv == -1)
1078 return (EFAULT);
1079
1080 /* The acquire succeeded. */
1081 if (owner == UMUTEX_UNOWNED)
1082 return (0);
1083
1084 /*
1085 * If no one owns it but it is contested try
1086 * to acquire it.
1087 */
1088 if (owner == UMUTEX_CONTESTED) {
1089 rv = casueword32(&m->m_owner,
1090 UMUTEX_CONTESTED, &owner,
1091 id | UMUTEX_CONTESTED);
1092 /* The address was invalid. */
1093 if (rv == -1)
1094 return (EFAULT);
1095
1096 if (owner == UMUTEX_CONTESTED)
1097 return (0);
1098
1099 rv = umtxq_check_susp(td);
1100 if (rv != 0)
1101 return (rv);
1102
1103 /*
1104 * If this failed the lock has
1105 * changed, restart.
1106 */
1107 continue;
1108 }
1109 }
1110
1111 if (mode == _UMUTEX_TRY)
1112 return (EBUSY);
1113
1114 /*
1115 * If we caught a signal, we have retried and now
1116 * exit immediately.
1117 */
1118 if (error != 0)
1119 return (error);
1120
1121 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1122 GET_SHARE(flags), &uq->uq_key)) != 0)
1123 return (error);
1124
1125 umtxq_lock(&uq->uq_key);
1126 umtxq_busy(&uq->uq_key);
1127 umtxq_insert(uq);
1128 umtxq_unlock(&uq->uq_key);
1129
1130 /*
1131 * Set the contested bit so that a release in user space
1132 * knows to use the system call for unlock. If this fails
1133 * either some one else has acquired the lock or it has been
1134 * released.
1135 */
1136 rv = casueword32(&m->m_owner, owner, &old,
1137 owner | UMUTEX_CONTESTED);
1138
1139 /* The address was invalid. */
1140 if (rv == -1) {
1141 umtxq_lock(&uq->uq_key);
1142 umtxq_remove(uq);
1143 umtxq_unbusy(&uq->uq_key);
1144 umtxq_unlock(&uq->uq_key);
1145 umtx_key_release(&uq->uq_key);
1146 return (EFAULT);
1147 }
1148
1149 /*
1150 * We set the contested bit, sleep. Otherwise the lock changed
1151 * and we need to retry or we lost a race to the thread
1152 * unlocking the umtx.
1153 */
1154 umtxq_lock(&uq->uq_key);
1155 umtxq_unbusy(&uq->uq_key);
1156 if (old == owner)
1157 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1158 NULL : &timo);
1159 umtxq_remove(uq);
1160 umtxq_unlock(&uq->uq_key);
1161 umtx_key_release(&uq->uq_key);
1162
1163 if (error == 0)
1164 error = umtxq_check_susp(td);
1165 }
1166
1167 return (0);
1168 }
1169
1170 /*
1171 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1172 */
1173 static int
1174 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1175 {
1176 struct umtx_key key;
1177 uint32_t owner, old, id, newlock;
1178 int error, count;
1179
1180 id = td->td_tid;
1181 /*
1182 * Make sure we own this mtx.
1183 */
1184 error = fueword32(&m->m_owner, &owner);
1185 if (error == -1)
1186 return (EFAULT);
1187
1188 if ((owner & ~UMUTEX_CONTESTED) != id)
1189 return (EPERM);
1190
1191 newlock = umtx_unlock_val(flags, rb);
1192 if ((owner & UMUTEX_CONTESTED) == 0) {
1193 error = casueword32(&m->m_owner, owner, &old, newlock);
1194 if (error == -1)
1195 return (EFAULT);
1196 if (old == owner)
1197 return (0);
1198 owner = old;
1199 }
1200
1201 /* We should only ever be in here for contested locks */
1202 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1203 &key)) != 0)
1204 return (error);
1205
1206 umtxq_lock(&key);
1207 umtxq_busy(&key);
1208 count = umtxq_count(&key);
1209 umtxq_unlock(&key);
1210
1211 /*
1212 * When unlocking the umtx, it must be marked as unowned if
1213 * there is zero or one thread only waiting for it.
1214 * Otherwise, it must be marked as contested.
1215 */
1216 if (count > 1)
1217 newlock |= UMUTEX_CONTESTED;
1218 error = casueword32(&m->m_owner, owner, &old, newlock);
1219 umtxq_lock(&key);
1220 umtxq_signal(&key, 1);
1221 umtxq_unbusy(&key);
1222 umtxq_unlock(&key);
1223 umtx_key_release(&key);
1224 if (error == -1)
1225 return (EFAULT);
1226 if (old != owner)
1227 return (EINVAL);
1228 return (0);
1229 }
1230
1231 /*
1232 * Check if the mutex is available and wake up a waiter,
1233 * only for simple mutex.
1234 */
1235 static int
1236 do_wake_umutex(struct thread *td, struct umutex *m)
1237 {
1238 struct umtx_key key;
1239 uint32_t owner;
1240 uint32_t flags;
1241 int error;
1242 int count;
1243
1244 error = fueword32(&m->m_owner, &owner);
1245 if (error == -1)
1246 return (EFAULT);
1247
1248 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
1249 owner != UMUTEX_RB_NOTRECOV)
1250 return (0);
1251
1252 error = fueword32(&m->m_flags, &flags);
1253 if (error == -1)
1254 return (EFAULT);
1255
1256 /* We should only ever be in here for contested locks */
1257 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1258 &key)) != 0)
1259 return (error);
1260
1261 umtxq_lock(&key);
1262 umtxq_busy(&key);
1263 count = umtxq_count(&key);
1264 umtxq_unlock(&key);
1265
1266 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
1267 owner != UMUTEX_RB_NOTRECOV) {
1268 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1269 UMUTEX_UNOWNED);
1270 if (error == -1)
1271 error = EFAULT;
1272 }
1273
1274 umtxq_lock(&key);
1275 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1276 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1277 umtxq_signal(&key, 1);
1278 umtxq_unbusy(&key);
1279 umtxq_unlock(&key);
1280 umtx_key_release(&key);
1281 return (error);
1282 }
1283
1284 /*
1285 * Check if the mutex has waiters and tries to fix contention bit.
1286 */
1287 static int
1288 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1289 {
1290 struct umtx_key key;
1291 uint32_t owner, old;
1292 int type;
1293 int error;
1294 int count;
1295
1296 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
1297 UMUTEX_ROBUST)) {
1298 case 0:
1299 case UMUTEX_ROBUST:
1300 type = TYPE_NORMAL_UMUTEX;
1301 break;
1302 case UMUTEX_PRIO_INHERIT:
1303 type = TYPE_PI_UMUTEX;
1304 break;
1305 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
1306 type = TYPE_PI_ROBUST_UMUTEX;
1307 break;
1308 case UMUTEX_PRIO_PROTECT:
1309 type = TYPE_PP_UMUTEX;
1310 break;
1311 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
1312 type = TYPE_PP_ROBUST_UMUTEX;
1313 break;
1314 default:
1315 return (EINVAL);
1316 }
1317 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
1318 return (error);
1319
1320 owner = 0;
1321 umtxq_lock(&key);
1322 umtxq_busy(&key);
1323 count = umtxq_count(&key);
1324 umtxq_unlock(&key);
1325 /*
1326 * Only repair contention bit if there is a waiter, this means the mutex
1327 * is still being referenced by userland code, otherwise don't update
1328 * any memory.
1329 */
1330 if (count > 1) {
1331 error = fueword32(&m->m_owner, &owner);
1332 if (error == -1)
1333 error = EFAULT;
1334 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
1335 error = casueword32(&m->m_owner, owner, &old,
1336 owner | UMUTEX_CONTESTED);
1337 if (error == -1) {
1338 error = EFAULT;
1339 break;
1340 }
1341 if (old == owner)
1342 break;
1343 owner = old;
1344 error = umtxq_check_susp(td);
1345 if (error != 0)
1346 break;
1347 }
1348 } else if (count == 1) {
1349 error = fueword32(&m->m_owner, &owner);
1350 if (error == -1)
1351 error = EFAULT;
1352 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
1353 (owner & UMUTEX_CONTESTED) == 0) {
1354 error = casueword32(&m->m_owner, owner, &old,
1355 owner | UMUTEX_CONTESTED);
1356 if (error == -1) {
1357 error = EFAULT;
1358 break;
1359 }
1360 if (old == owner)
1361 break;
1362 owner = old;
1363 error = umtxq_check_susp(td);
1364 if (error != 0)
1365 break;
1366 }
1367 }
1368 umtxq_lock(&key);
1369 if (error == EFAULT) {
1370 umtxq_signal(&key, INT_MAX);
1371 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1372 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1373 umtxq_signal(&key, 1);
1374 umtxq_unbusy(&key);
1375 umtxq_unlock(&key);
1376 umtx_key_release(&key);
1377 return (error);
1378 }
1379
1380 static inline struct umtx_pi *
1381 umtx_pi_alloc(int flags)
1382 {
1383 struct umtx_pi *pi;
1384
1385 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1386 TAILQ_INIT(&pi->pi_blocked);
1387 atomic_add_int(&umtx_pi_allocated, 1);
1388 return (pi);
1389 }
1390
1391 static inline void
1392 umtx_pi_free(struct umtx_pi *pi)
1393 {
1394 uma_zfree(umtx_pi_zone, pi);
1395 atomic_add_int(&umtx_pi_allocated, -1);
1396 }
1397
1398 /*
1399 * Adjust the thread's position on a pi_state after its priority has been
1400 * changed.
1401 */
1402 static int
1403 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1404 {
1405 struct umtx_q *uq, *uq1, *uq2;
1406 struct thread *td1;
1407
1408 mtx_assert(&umtx_lock, MA_OWNED);
1409 if (pi == NULL)
1410 return (0);
1411
1412 uq = td->td_umtxq;
1413
1414 /*
1415 * Check if the thread needs to be moved on the blocked chain.
1416 * It needs to be moved if either its priority is lower than
1417 * the previous thread or higher than the next thread.
1418 */
1419 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1420 uq2 = TAILQ_NEXT(uq, uq_lockq);
1421 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1422 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1423 /*
1424 * Remove thread from blocked chain and determine where
1425 * it should be moved to.
1426 */
1427 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1428 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1429 td1 = uq1->uq_thread;
1430 MPASS(td1->td_proc->p_magic == P_MAGIC);
1431 if (UPRI(td1) > UPRI(td))
1432 break;
1433 }
1434
1435 if (uq1 == NULL)
1436 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1437 else
1438 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1439 }
1440 return (1);
1441 }
1442
1443 static struct umtx_pi *
1444 umtx_pi_next(struct umtx_pi *pi)
1445 {
1446 struct umtx_q *uq_owner;
1447
1448 if (pi->pi_owner == NULL)
1449 return (NULL);
1450 uq_owner = pi->pi_owner->td_umtxq;
1451 if (uq_owner == NULL)
1452 return (NULL);
1453 return (uq_owner->uq_pi_blocked);
1454 }
1455
1456 /*
1457 * Floyd's Cycle-Finding Algorithm.
1458 */
1459 static bool
1460 umtx_pi_check_loop(struct umtx_pi *pi)
1461 {
1462 struct umtx_pi *pi1; /* fast iterator */
1463
1464 mtx_assert(&umtx_lock, MA_OWNED);
1465 if (pi == NULL)
1466 return (false);
1467 pi1 = pi;
1468 for (;;) {
1469 pi = umtx_pi_next(pi);
1470 if (pi == NULL)
1471 break;
1472 pi1 = umtx_pi_next(pi1);
1473 if (pi1 == NULL)
1474 break;
1475 pi1 = umtx_pi_next(pi1);
1476 if (pi1 == NULL)
1477 break;
1478 if (pi == pi1)
1479 return (true);
1480 }
1481 return (false);
1482 }
1483
1484 /*
1485 * Propagate priority when a thread is blocked on POSIX
1486 * PI mutex.
1487 */
1488 static void
1489 umtx_propagate_priority(struct thread *td)
1490 {
1491 struct umtx_q *uq;
1492 struct umtx_pi *pi;
1493 int pri;
1494
1495 mtx_assert(&umtx_lock, MA_OWNED);
1496 pri = UPRI(td);
1497 uq = td->td_umtxq;
1498 pi = uq->uq_pi_blocked;
1499 if (pi == NULL)
1500 return;
1501 if (umtx_pi_check_loop(pi))
1502 return;
1503
1504 for (;;) {
1505 td = pi->pi_owner;
1506 if (td == NULL || td == curthread)
1507 return;
1508
1509 MPASS(td->td_proc != NULL);
1510 MPASS(td->td_proc->p_magic == P_MAGIC);
1511
1512 thread_lock(td);
1513 if (td->td_lend_user_pri > pri)
1514 sched_lend_user_prio(td, pri);
1515 else {
1516 thread_unlock(td);
1517 break;
1518 }
1519 thread_unlock(td);
1520
1521 /*
1522 * Pick up the lock that td is blocked on.
1523 */
1524 uq = td->td_umtxq;
1525 pi = uq->uq_pi_blocked;
1526 if (pi == NULL)
1527 break;
1528 /* Resort td on the list if needed. */
1529 umtx_pi_adjust_thread(pi, td);
1530 }
1531 }
1532
1533 /*
1534 * Unpropagate priority for a PI mutex when a thread blocked on
1535 * it is interrupted by signal or resumed by others.
1536 */
1537 static void
1538 umtx_repropagate_priority(struct umtx_pi *pi)
1539 {
1540 struct umtx_q *uq, *uq_owner;
1541 struct umtx_pi *pi2;
1542 int pri;
1543
1544 mtx_assert(&umtx_lock, MA_OWNED);
1545
1546 if (umtx_pi_check_loop(pi))
1547 return;
1548 while (pi != NULL && pi->pi_owner != NULL) {
1549 pri = PRI_MAX;
1550 uq_owner = pi->pi_owner->td_umtxq;
1551
1552 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1553 uq = TAILQ_FIRST(&pi2->pi_blocked);
1554 if (uq != NULL) {
1555 if (pri > UPRI(uq->uq_thread))
1556 pri = UPRI(uq->uq_thread);
1557 }
1558 }
1559
1560 if (pri > uq_owner->uq_inherited_pri)
1561 pri = uq_owner->uq_inherited_pri;
1562 thread_lock(pi->pi_owner);
1563 sched_lend_user_prio(pi->pi_owner, pri);
1564 thread_unlock(pi->pi_owner);
1565 if ((pi = uq_owner->uq_pi_blocked) != NULL)
1566 umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1567 }
1568 }
1569
1570 /*
1571 * Insert a PI mutex into owned list.
1572 */
1573 static void
1574 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1575 {
1576 struct umtx_q *uq_owner;
1577
1578 uq_owner = owner->td_umtxq;
1579 mtx_assert(&umtx_lock, MA_OWNED);
1580 if (pi->pi_owner != NULL)
1581 panic("pi_owner != NULL");
1582 pi->pi_owner = owner;
1583 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1584 }
1585
1586
1587 /*
1588 * Disown a PI mutex, and remove it from the owned list.
1589 */
1590 static void
1591 umtx_pi_disown(struct umtx_pi *pi)
1592 {
1593
1594 mtx_assert(&umtx_lock, MA_OWNED);
1595 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1596 pi->pi_owner = NULL;
1597 }
1598
1599 /*
1600 * Claim ownership of a PI mutex.
1601 */
1602 static int
1603 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1604 {
1605 struct umtx_q *uq;
1606 int pri;
1607
1608 mtx_lock(&umtx_lock);
1609 if (pi->pi_owner == owner) {
1610 mtx_unlock(&umtx_lock);
1611 return (0);
1612 }
1613
1614 if (pi->pi_owner != NULL) {
1615 /*
1616 * userland may have already messed the mutex, sigh.
1617 */
1618 mtx_unlock(&umtx_lock);
1619 return (EPERM);
1620 }
1621 umtx_pi_setowner(pi, owner);
1622 uq = TAILQ_FIRST(&pi->pi_blocked);
1623 if (uq != NULL) {
1624 pri = UPRI(uq->uq_thread);
1625 thread_lock(owner);
1626 if (pri < UPRI(owner))
1627 sched_lend_user_prio(owner, pri);
1628 thread_unlock(owner);
1629 }
1630 mtx_unlock(&umtx_lock);
1631 return (0);
1632 }
1633
1634 /*
1635 * Adjust a thread's order position in its blocked PI mutex,
1636 * this may result new priority propagating process.
1637 */
1638 void
1639 umtx_pi_adjust(struct thread *td, u_char oldpri)
1640 {
1641 struct umtx_q *uq;
1642 struct umtx_pi *pi;
1643
1644 uq = td->td_umtxq;
1645 mtx_lock(&umtx_lock);
1646 /*
1647 * Pick up the lock that td is blocked on.
1648 */
1649 pi = uq->uq_pi_blocked;
1650 if (pi != NULL) {
1651 umtx_pi_adjust_thread(pi, td);
1652 umtx_repropagate_priority(pi);
1653 }
1654 mtx_unlock(&umtx_lock);
1655 }
1656
1657 /*
1658 * Sleep on a PI mutex.
1659 */
1660 static int
1661 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
1662 const char *wmesg, struct abs_timeout *timo, bool shared)
1663 {
1664 struct umtxq_chain *uc;
1665 struct thread *td, *td1;
1666 struct umtx_q *uq1;
1667 int error, pri;
1668
1669 error = 0;
1670 td = uq->uq_thread;
1671 KASSERT(td == curthread, ("inconsistent uq_thread"));
1672 uc = umtxq_getchain(&uq->uq_key);
1673 UMTXQ_LOCKED_ASSERT(uc);
1674 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
1675 umtxq_insert(uq);
1676 mtx_lock(&umtx_lock);
1677 if (pi->pi_owner == NULL) {
1678 mtx_unlock(&umtx_lock);
1679 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
1680 mtx_lock(&umtx_lock);
1681 if (td1 != NULL) {
1682 if (pi->pi_owner == NULL)
1683 umtx_pi_setowner(pi, td1);
1684 PROC_UNLOCK(td1->td_proc);
1685 }
1686 }
1687
1688 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1689 pri = UPRI(uq1->uq_thread);
1690 if (pri > UPRI(td))
1691 break;
1692 }
1693
1694 if (uq1 != NULL)
1695 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1696 else
1697 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1698
1699 uq->uq_pi_blocked = pi;
1700 thread_lock(td);
1701 td->td_flags |= TDF_UPIBLOCKED;
1702 thread_unlock(td);
1703 umtx_propagate_priority(td);
1704 mtx_unlock(&umtx_lock);
1705 umtxq_unbusy(&uq->uq_key);
1706
1707 error = umtxq_sleep(uq, wmesg, timo);
1708 umtxq_remove(uq);
1709
1710 mtx_lock(&umtx_lock);
1711 uq->uq_pi_blocked = NULL;
1712 thread_lock(td);
1713 td->td_flags &= ~TDF_UPIBLOCKED;
1714 thread_unlock(td);
1715 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1716 umtx_repropagate_priority(pi);
1717 mtx_unlock(&umtx_lock);
1718 umtxq_unlock(&uq->uq_key);
1719
1720 return (error);
1721 }
1722
1723 /*
1724 * Add reference count for a PI mutex.
1725 */
1726 static void
1727 umtx_pi_ref(struct umtx_pi *pi)
1728 {
1729 struct umtxq_chain *uc;
1730
1731 uc = umtxq_getchain(&pi->pi_key);
1732 UMTXQ_LOCKED_ASSERT(uc);
1733 pi->pi_refcount++;
1734 }
1735
1736 /*
1737 * Decrease reference count for a PI mutex, if the counter
1738 * is decreased to zero, its memory space is freed.
1739 */
1740 static void
1741 umtx_pi_unref(struct umtx_pi *pi)
1742 {
1743 struct umtxq_chain *uc;
1744
1745 uc = umtxq_getchain(&pi->pi_key);
1746 UMTXQ_LOCKED_ASSERT(uc);
1747 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1748 if (--pi->pi_refcount == 0) {
1749 mtx_lock(&umtx_lock);
1750 if (pi->pi_owner != NULL)
1751 umtx_pi_disown(pi);
1752 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1753 ("blocked queue not empty"));
1754 mtx_unlock(&umtx_lock);
1755 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1756 umtx_pi_free(pi);
1757 }
1758 }
1759
1760 /*
1761 * Find a PI mutex in hash table.
1762 */
1763 static struct umtx_pi *
1764 umtx_pi_lookup(struct umtx_key *key)
1765 {
1766 struct umtxq_chain *uc;
1767 struct umtx_pi *pi;
1768
1769 uc = umtxq_getchain(key);
1770 UMTXQ_LOCKED_ASSERT(uc);
1771
1772 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1773 if (umtx_key_match(&pi->pi_key, key)) {
1774 return (pi);
1775 }
1776 }
1777 return (NULL);
1778 }
1779
1780 /*
1781 * Insert a PI mutex into hash table.
1782 */
1783 static inline void
1784 umtx_pi_insert(struct umtx_pi *pi)
1785 {
1786 struct umtxq_chain *uc;
1787
1788 uc = umtxq_getchain(&pi->pi_key);
1789 UMTXQ_LOCKED_ASSERT(uc);
1790 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1791 }
1792
1793 /*
1794 * Lock a PI mutex.
1795 */
1796 static int
1797 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1798 struct _umtx_time *timeout, int try)
1799 {
1800 struct abs_timeout timo;
1801 struct umtx_q *uq;
1802 struct umtx_pi *pi, *new_pi;
1803 uint32_t id, old_owner, owner, old;
1804 int error, rv;
1805
1806 id = td->td_tid;
1807 uq = td->td_umtxq;
1808
1809 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
1810 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
1811 &uq->uq_key)) != 0)
1812 return (error);
1813
1814 if (timeout != NULL)
1815 abs_timeout_init2(&timo, timeout);
1816
1817 umtxq_lock(&uq->uq_key);
1818 pi = umtx_pi_lookup(&uq->uq_key);
1819 if (pi == NULL) {
1820 new_pi = umtx_pi_alloc(M_NOWAIT);
1821 if (new_pi == NULL) {
1822 umtxq_unlock(&uq->uq_key);
1823 new_pi = umtx_pi_alloc(M_WAITOK);
1824 umtxq_lock(&uq->uq_key);
1825 pi = umtx_pi_lookup(&uq->uq_key);
1826 if (pi != NULL) {
1827 umtx_pi_free(new_pi);
1828 new_pi = NULL;
1829 }
1830 }
1831 if (new_pi != NULL) {
1832 new_pi->pi_key = uq->uq_key;
1833 umtx_pi_insert(new_pi);
1834 pi = new_pi;
1835 }
1836 }
1837 umtx_pi_ref(pi);
1838 umtxq_unlock(&uq->uq_key);
1839
1840 /*
1841 * Care must be exercised when dealing with umtx structure. It
1842 * can fault on any access.
1843 */
1844 for (;;) {
1845 /*
1846 * Try the uncontested case. This should be done in userland.
1847 */
1848 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
1849 /* The address was invalid. */
1850 if (rv == -1) {
1851 error = EFAULT;
1852 break;
1853 }
1854
1855 /* The acquire succeeded. */
1856 if (owner == UMUTEX_UNOWNED) {
1857 error = 0;
1858 break;
1859 }
1860
1861 if (owner == UMUTEX_RB_NOTRECOV) {
1862 error = ENOTRECOVERABLE;
1863 break;
1864 }
1865
1866 /* If no one owns it but it is contested try to acquire it. */
1867 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
1868 old_owner = owner;
1869 rv = casueword32(&m->m_owner, owner, &owner,
1870 id | UMUTEX_CONTESTED);
1871 /* The address was invalid. */
1872 if (rv == -1) {
1873 error = EFAULT;
1874 break;
1875 }
1876
1877 if (owner == old_owner) {
1878 umtxq_lock(&uq->uq_key);
1879 umtxq_busy(&uq->uq_key);
1880 error = umtx_pi_claim(pi, td);
1881 umtxq_unbusy(&uq->uq_key);
1882 umtxq_unlock(&uq->uq_key);
1883 if (error != 0) {
1884 /*
1885 * Since we're going to return an
1886 * error, restore the m_owner to its
1887 * previous, unowned state to avoid
1888 * compounding the problem.
1889 */
1890 (void)casuword32(&m->m_owner,
1891 id | UMUTEX_CONTESTED,
1892 old_owner);
1893 }
1894 if (error == 0 &&
1895 old_owner == UMUTEX_RB_OWNERDEAD)
1896 error = EOWNERDEAD;
1897 break;
1898 }
1899
1900 error = umtxq_check_susp(td);
1901 if (error != 0)
1902 break;
1903
1904 /* If this failed the lock has changed, restart. */
1905 continue;
1906 }
1907
1908 if ((owner & ~UMUTEX_CONTESTED) == id) {
1909 error = EDEADLK;
1910 break;
1911 }
1912
1913 if (try != 0) {
1914 error = EBUSY;
1915 break;
1916 }
1917
1918 /*
1919 * If we caught a signal, we have retried and now
1920 * exit immediately.
1921 */
1922 if (error != 0)
1923 break;
1924
1925 umtxq_lock(&uq->uq_key);
1926 umtxq_busy(&uq->uq_key);
1927 umtxq_unlock(&uq->uq_key);
1928
1929 /*
1930 * Set the contested bit so that a release in user space
1931 * knows to use the system call for unlock. If this fails
1932 * either some one else has acquired the lock or it has been
1933 * released.
1934 */
1935 rv = casueword32(&m->m_owner, owner, &old, owner |
1936 UMUTEX_CONTESTED);
1937
1938 /* The address was invalid. */
1939 if (rv == -1) {
1940 umtxq_unbusy_unlocked(&uq->uq_key);
1941 error = EFAULT;
1942 break;
1943 }
1944
1945 umtxq_lock(&uq->uq_key);
1946 /*
1947 * We set the contested bit, sleep. Otherwise the lock changed
1948 * and we need to retry or we lost a race to the thread
1949 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD
1950 * value for owner is impossible there.
1951 */
1952 if (old == owner) {
1953 error = umtxq_sleep_pi(uq, pi,
1954 owner & ~UMUTEX_CONTESTED,
1955 "umtxpi", timeout == NULL ? NULL : &timo,
1956 (flags & USYNC_PROCESS_SHARED) != 0);
1957 if (error != 0)
1958 continue;
1959 } else {
1960 umtxq_unbusy(&uq->uq_key);
1961 umtxq_unlock(&uq->uq_key);
1962 }
1963
1964 error = umtxq_check_susp(td);
1965 if (error != 0)
1966 break;
1967 }
1968
1969 umtxq_lock(&uq->uq_key);
1970 umtx_pi_unref(pi);
1971 umtxq_unlock(&uq->uq_key);
1972
1973 umtx_key_release(&uq->uq_key);
1974 return (error);
1975 }
1976
1977 /*
1978 * Unlock a PI mutex.
1979 */
1980 static int
1981 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1982 {
1983 struct umtx_key key;
1984 struct umtx_q *uq_first, *uq_first2, *uq_me;
1985 struct umtx_pi *pi, *pi2;
1986 uint32_t id, new_owner, old, owner;
1987 int count, error, pri;
1988
1989 id = td->td_tid;
1990 /*
1991 * Make sure we own this mtx.
1992 */
1993 error = fueword32(&m->m_owner, &owner);
1994 if (error == -1)
1995 return (EFAULT);
1996
1997 if ((owner & ~UMUTEX_CONTESTED) != id)
1998 return (EPERM);
1999
2000 new_owner = umtx_unlock_val(flags, rb);
2001
2002 /* This should be done in userland */
2003 if ((owner & UMUTEX_CONTESTED) == 0) {
2004 error = casueword32(&m->m_owner, owner, &old, new_owner);
2005 if (error == -1)
2006 return (EFAULT);
2007 if (old == owner)
2008 return (0);
2009 owner = old;
2010 }
2011
2012 /* We should only ever be in here for contested locks */
2013 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2014 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2015 &key)) != 0)
2016 return (error);
2017
2018 umtxq_lock(&key);
2019 umtxq_busy(&key);
2020 count = umtxq_count_pi(&key, &uq_first);
2021 if (uq_first != NULL) {
2022 mtx_lock(&umtx_lock);
2023 pi = uq_first->uq_pi_blocked;
2024 KASSERT(pi != NULL, ("pi == NULL?"));
2025 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
2026 mtx_unlock(&umtx_lock);
2027 umtxq_unbusy(&key);
2028 umtxq_unlock(&key);
2029 umtx_key_release(&key);
2030 /* userland messed the mutex */
2031 return (EPERM);
2032 }
2033 uq_me = td->td_umtxq;
2034 if (pi->pi_owner == td)
2035 umtx_pi_disown(pi);
2036 /* get highest priority thread which is still sleeping. */
2037 uq_first = TAILQ_FIRST(&pi->pi_blocked);
2038 while (uq_first != NULL &&
2039 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2040 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2041 }
2042 pri = PRI_MAX;
2043 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2044 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2045 if (uq_first2 != NULL) {
2046 if (pri > UPRI(uq_first2->uq_thread))
2047 pri = UPRI(uq_first2->uq_thread);
2048 }
2049 }
2050 thread_lock(td);
2051 sched_lend_user_prio(td, pri);
2052 thread_unlock(td);
2053 mtx_unlock(&umtx_lock);
2054 if (uq_first)
2055 umtxq_signal_thread(uq_first);
2056 } else {
2057 pi = umtx_pi_lookup(&key);
2058 /*
2059 * A umtx_pi can exist if a signal or timeout removed the
2060 * last waiter from the umtxq, but there is still
2061 * a thread in do_lock_pi() holding the umtx_pi.
2062 */
2063 if (pi != NULL) {
2064 /*
2065 * The umtx_pi can be unowned, such as when a thread
2066 * has just entered do_lock_pi(), allocated the
2067 * umtx_pi, and unlocked the umtxq.
2068 * If the current thread owns it, it must disown it.
2069 */
2070 mtx_lock(&umtx_lock);
2071 if (pi->pi_owner == td)
2072 umtx_pi_disown(pi);
2073 mtx_unlock(&umtx_lock);
2074 }
2075 }
2076 umtxq_unlock(&key);
2077
2078 /*
2079 * When unlocking the umtx, it must be marked as unowned if
2080 * there is zero or one thread only waiting for it.
2081 * Otherwise, it must be marked as contested.
2082 */
2083
2084 if (count > 1)
2085 new_owner |= UMUTEX_CONTESTED;
2086 error = casueword32(&m->m_owner, owner, &old, new_owner);
2087
2088 umtxq_unbusy_unlocked(&key);
2089 umtx_key_release(&key);
2090 if (error == -1)
2091 return (EFAULT);
2092 if (old != owner)
2093 return (EINVAL);
2094 return (0);
2095 }
2096
2097 /*
2098 * Lock a PP mutex.
2099 */
2100 static int
2101 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2102 struct _umtx_time *timeout, int try)
2103 {
2104 struct abs_timeout timo;
2105 struct umtx_q *uq, *uq2;
2106 struct umtx_pi *pi;
2107 uint32_t ceiling;
2108 uint32_t owner, id;
2109 int error, pri, old_inherited_pri, su, rv;
2110
2111 id = td->td_tid;
2112 uq = td->td_umtxq;
2113 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2114 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2115 &uq->uq_key)) != 0)
2116 return (error);
2117
2118 if (timeout != NULL)
2119 abs_timeout_init2(&timo, timeout);
2120
2121 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2122 for (;;) {
2123 old_inherited_pri = uq->uq_inherited_pri;
2124 umtxq_lock(&uq->uq_key);
2125 umtxq_busy(&uq->uq_key);
2126 umtxq_unlock(&uq->uq_key);
2127
2128 rv = fueword32(&m->m_ceilings[0], &ceiling);
2129 if (rv == -1) {
2130 error = EFAULT;
2131 goto out;
2132 }
2133 ceiling = RTP_PRIO_MAX - ceiling;
2134 if (ceiling > RTP_PRIO_MAX) {
2135 error = EINVAL;
2136 goto out;
2137 }
2138
2139 mtx_lock(&umtx_lock);
2140 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2141 mtx_unlock(&umtx_lock);
2142 error = EINVAL;
2143 goto out;
2144 }
2145 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2146 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2147 thread_lock(td);
2148 if (uq->uq_inherited_pri < UPRI(td))
2149 sched_lend_user_prio(td, uq->uq_inherited_pri);
2150 thread_unlock(td);
2151 }
2152 mtx_unlock(&umtx_lock);
2153
2154 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2155 id | UMUTEX_CONTESTED);
2156 /* The address was invalid. */
2157 if (rv == -1) {
2158 error = EFAULT;
2159 break;
2160 }
2161
2162 if (owner == UMUTEX_CONTESTED) {
2163 error = 0;
2164 break;
2165 } else if (owner == UMUTEX_RB_OWNERDEAD) {
2166 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
2167 &owner, id | UMUTEX_CONTESTED);
2168 if (rv == -1) {
2169 error = EFAULT;
2170 break;
2171 }
2172 if (owner == UMUTEX_RB_OWNERDEAD) {
2173 error = EOWNERDEAD; /* success */
2174 break;
2175 }
2176 error = 0;
2177 } else if (owner == UMUTEX_RB_NOTRECOV) {
2178 error = ENOTRECOVERABLE;
2179 break;
2180 }
2181
2182 if (try != 0) {
2183 error = EBUSY;
2184 break;
2185 }
2186
2187 /*
2188 * If we caught a signal, we have retried and now
2189 * exit immediately.
2190 */
2191 if (error != 0)
2192 break;
2193
2194 umtxq_lock(&uq->uq_key);
2195 umtxq_insert(uq);
2196 umtxq_unbusy(&uq->uq_key);
2197 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2198 NULL : &timo);
2199 umtxq_remove(uq);
2200 umtxq_unlock(&uq->uq_key);
2201
2202 mtx_lock(&umtx_lock);
2203 uq->uq_inherited_pri = old_inherited_pri;
2204 pri = PRI_MAX;
2205 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2206 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2207 if (uq2 != NULL) {
2208 if (pri > UPRI(uq2->uq_thread))
2209 pri = UPRI(uq2->uq_thread);
2210 }
2211 }
2212 if (pri > uq->uq_inherited_pri)
2213 pri = uq->uq_inherited_pri;
2214 thread_lock(td);
2215 sched_lend_user_prio(td, pri);
2216 thread_unlock(td);
2217 mtx_unlock(&umtx_lock);
2218 }
2219
2220 if (error != 0 && error != EOWNERDEAD) {
2221 mtx_lock(&umtx_lock);
2222 uq->uq_inherited_pri = old_inherited_pri;
2223 pri = PRI_MAX;
2224 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2225 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2226 if (uq2 != NULL) {
2227 if (pri > UPRI(uq2->uq_thread))
2228 pri = UPRI(uq2->uq_thread);
2229 }
2230 }
2231 if (pri > uq->uq_inherited_pri)
2232 pri = uq->uq_inherited_pri;
2233 thread_lock(td);
2234 sched_lend_user_prio(td, pri);
2235 thread_unlock(td);
2236 mtx_unlock(&umtx_lock);
2237 }
2238
2239 out:
2240 umtxq_unbusy_unlocked(&uq->uq_key);
2241 umtx_key_release(&uq->uq_key);
2242 return (error);
2243 }
2244
2245 /*
2246 * Unlock a PP mutex.
2247 */
2248 static int
2249 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2250 {
2251 struct umtx_key key;
2252 struct umtx_q *uq, *uq2;
2253 struct umtx_pi *pi;
2254 uint32_t id, owner, rceiling;
2255 int error, pri, new_inherited_pri, su;
2256
2257 id = td->td_tid;
2258 uq = td->td_umtxq;
2259 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2260
2261 /*
2262 * Make sure we own this mtx.
2263 */
2264 error = fueword32(&m->m_owner, &owner);
2265 if (error == -1)
2266 return (EFAULT);
2267
2268 if ((owner & ~UMUTEX_CONTESTED) != id)
2269 return (EPERM);
2270
2271 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2272 if (error != 0)
2273 return (error);
2274
2275 if (rceiling == -1)
2276 new_inherited_pri = PRI_MAX;
2277 else {
2278 rceiling = RTP_PRIO_MAX - rceiling;
2279 if (rceiling > RTP_PRIO_MAX)
2280 return (EINVAL);
2281 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2282 }
2283
2284 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2285 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2286 &key)) != 0)
2287 return (error);
2288 umtxq_lock(&key);
2289 umtxq_busy(&key);
2290 umtxq_unlock(&key);
2291 /*
2292 * For priority protected mutex, always set unlocked state
2293 * to UMUTEX_CONTESTED, so that userland always enters kernel
2294 * to lock the mutex, it is necessary because thread priority
2295 * has to be adjusted for such mutex.
2296 */
2297 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
2298 UMUTEX_CONTESTED);
2299
2300 umtxq_lock(&key);
2301 if (error == 0)
2302 umtxq_signal(&key, 1);
2303 umtxq_unbusy(&key);
2304 umtxq_unlock(&key);
2305
2306 if (error == -1)
2307 error = EFAULT;
2308 else {
2309 mtx_lock(&umtx_lock);
2310 if (su != 0)
2311 uq->uq_inherited_pri = new_inherited_pri;
2312 pri = PRI_MAX;
2313 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2314 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2315 if (uq2 != NULL) {
2316 if (pri > UPRI(uq2->uq_thread))
2317 pri = UPRI(uq2->uq_thread);
2318 }
2319 }
2320 if (pri > uq->uq_inherited_pri)
2321 pri = uq->uq_inherited_pri;
2322 thread_lock(td);
2323 sched_lend_user_prio(td, pri);
2324 thread_unlock(td);
2325 mtx_unlock(&umtx_lock);
2326 }
2327 umtx_key_release(&key);
2328 return (error);
2329 }
2330
2331 static int
2332 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2333 uint32_t *old_ceiling)
2334 {
2335 struct umtx_q *uq;
2336 uint32_t flags, id, owner, save_ceiling;
2337 int error, rv, rv1;
2338
2339 error = fueword32(&m->m_flags, &flags);
2340 if (error == -1)
2341 return (EFAULT);
2342 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2343 return (EINVAL);
2344 if (ceiling > RTP_PRIO_MAX)
2345 return (EINVAL);
2346 id = td->td_tid;
2347 uq = td->td_umtxq;
2348 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2349 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2350 &uq->uq_key)) != 0)
2351 return (error);
2352 for (;;) {
2353 umtxq_lock(&uq->uq_key);
2354 umtxq_busy(&uq->uq_key);
2355 umtxq_unlock(&uq->uq_key);
2356
2357 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2358 if (rv == -1) {
2359 error = EFAULT;
2360 break;
2361 }
2362
2363 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2364 id | UMUTEX_CONTESTED);
2365 if (rv == -1) {
2366 error = EFAULT;
2367 break;
2368 }
2369
2370 if (owner == UMUTEX_CONTESTED) {
2371 rv = suword32(&m->m_ceilings[0], ceiling);
2372 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
2373 error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
2374 break;
2375 }
2376
2377 if ((owner & ~UMUTEX_CONTESTED) == id) {
2378 rv = suword32(&m->m_ceilings[0], ceiling);
2379 error = rv == 0 ? 0 : EFAULT;
2380 break;
2381 }
2382
2383 if (owner == UMUTEX_RB_OWNERDEAD) {
2384 error = EOWNERDEAD;
2385 break;
2386 } else if (owner == UMUTEX_RB_NOTRECOV) {
2387 error = ENOTRECOVERABLE;
2388 break;
2389 }
2390
2391 /*
2392 * If we caught a signal, we have retried and now
2393 * exit immediately.
2394 */
2395 if (error != 0)
2396 break;
2397
2398 /*
2399 * We set the contested bit, sleep. Otherwise the lock changed
2400 * and we need to retry or we lost a race to the thread
2401 * unlocking the umtx.
2402 */
2403 umtxq_lock(&uq->uq_key);
2404 umtxq_insert(uq);
2405 umtxq_unbusy(&uq->uq_key);
2406 error = umtxq_sleep(uq, "umtxpp", NULL);
2407 umtxq_remove(uq);
2408 umtxq_unlock(&uq->uq_key);
2409 }
2410 umtxq_lock(&uq->uq_key);
2411 if (error == 0)
2412 umtxq_signal(&uq->uq_key, INT_MAX);
2413 umtxq_unbusy(&uq->uq_key);
2414 umtxq_unlock(&uq->uq_key);
2415 umtx_key_release(&uq->uq_key);
2416 if (error == 0 && old_ceiling != NULL) {
2417 rv = suword32(old_ceiling, save_ceiling);
2418 error = rv == 0 ? 0 : EFAULT;
2419 }
2420 return (error);
2421 }
2422
2423 /*
2424 * Lock a userland POSIX mutex.
2425 */
2426 static int
2427 do_lock_umutex(struct thread *td, struct umutex *m,
2428 struct _umtx_time *timeout, int mode)
2429 {
2430 uint32_t flags;
2431 int error;
2432
2433 error = fueword32(&m->m_flags, &flags);
2434 if (error == -1)
2435 return (EFAULT);
2436
2437 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2438 case 0:
2439 error = do_lock_normal(td, m, flags, timeout, mode);
2440 break;
2441 case UMUTEX_PRIO_INHERIT:
2442 error = do_lock_pi(td, m, flags, timeout, mode);
2443 break;
2444 case UMUTEX_PRIO_PROTECT:
2445 error = do_lock_pp(td, m, flags, timeout, mode);
2446 break;
2447 default:
2448 return (EINVAL);
2449 }
2450 if (timeout == NULL) {
2451 if (error == EINTR && mode != _UMUTEX_WAIT)
2452 error = ERESTART;
2453 } else {
2454 /* Timed-locking is not restarted. */
2455 if (error == ERESTART)
2456 error = EINTR;
2457 }
2458 return (error);
2459 }
2460
2461 /*
2462 * Unlock a userland POSIX mutex.
2463 */
2464 static int
2465 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
2466 {
2467 uint32_t flags;
2468 int error;
2469
2470 error = fueword32(&m->m_flags, &flags);
2471 if (error == -1)
2472 return (EFAULT);
2473
2474 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2475 case 0:
2476 return (do_unlock_normal(td, m, flags, rb));
2477 case UMUTEX_PRIO_INHERIT:
2478 return (do_unlock_pi(td, m, flags, rb));
2479 case UMUTEX_PRIO_PROTECT:
2480 return (do_unlock_pp(td, m, flags, rb));
2481 }
2482
2483 return (EINVAL);
2484 }
2485
2486 static int
2487 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2488 struct timespec *timeout, u_long wflags)
2489 {
2490 struct abs_timeout timo;
2491 struct umtx_q *uq;
2492 uint32_t flags, clockid, hasw;
2493 int error;
2494
2495 uq = td->td_umtxq;
2496 error = fueword32(&cv->c_flags, &flags);
2497 if (error == -1)
2498 return (EFAULT);
2499 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2500 if (error != 0)
2501 return (error);
2502
2503 if ((wflags & CVWAIT_CLOCKID) != 0) {
2504 error = fueword32(&cv->c_clockid, &clockid);
2505 if (error == -1) {
2506 umtx_key_release(&uq->uq_key);
2507 return (EFAULT);
2508 }
2509 if (clockid < CLOCK_REALTIME ||
2510 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2511 /* hmm, only HW clock id will work. */
2512 umtx_key_release(&uq->uq_key);
2513 return (EINVAL);
2514 }
2515 } else {
2516 clockid = CLOCK_REALTIME;
2517 }
2518
2519 umtxq_lock(&uq->uq_key);
2520 umtxq_busy(&uq->uq_key);
2521 umtxq_insert(uq);
2522 umtxq_unlock(&uq->uq_key);
2523
2524 /*
2525 * Set c_has_waiters to 1 before releasing user mutex, also
2526 * don't modify cache line when unnecessary.
2527 */
2528 error = fueword32(&cv->c_has_waiters, &hasw);
2529 if (error == 0 && hasw == 0)
2530 suword32(&cv->c_has_waiters, 1);
2531
2532 umtxq_unbusy_unlocked(&uq->uq_key);
2533
2534 error = do_unlock_umutex(td, m, false);
2535
2536 if (timeout != NULL)
2537 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0,
2538 timeout);
2539
2540 umtxq_lock(&uq->uq_key);
2541 if (error == 0) {
2542 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2543 NULL : &timo);
2544 }
2545
2546 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2547 error = 0;
2548 else {
2549 /*
2550 * This must be timeout,interrupted by signal or
2551 * surprious wakeup, clear c_has_waiter flag when
2552 * necessary.
2553 */
2554 umtxq_busy(&uq->uq_key);
2555 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2556 int oldlen = uq->uq_cur_queue->length;
2557 umtxq_remove(uq);
2558 if (oldlen == 1) {
2559 umtxq_unlock(&uq->uq_key);
2560 suword32(&cv->c_has_waiters, 0);
2561 umtxq_lock(&uq->uq_key);
2562 }
2563 }
2564 umtxq_unbusy(&uq->uq_key);
2565 if (error == ERESTART)
2566 error = EINTR;
2567 }
2568
2569 umtxq_unlock(&uq->uq_key);
2570 umtx_key_release(&uq->uq_key);
2571 return (error);
2572 }
2573
2574 /*
2575 * Signal a userland condition variable.
2576 */
2577 static int
2578 do_cv_signal(struct thread *td, struct ucond *cv)
2579 {
2580 struct umtx_key key;
2581 int error, cnt, nwake;
2582 uint32_t flags;
2583
2584 error = fueword32(&cv->c_flags, &flags);
2585 if (error == -1)
2586 return (EFAULT);
2587 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2588 return (error);
2589 umtxq_lock(&key);
2590 umtxq_busy(&key);
2591 cnt = umtxq_count(&key);
2592 nwake = umtxq_signal(&key, 1);
2593 if (cnt <= nwake) {
2594 umtxq_unlock(&key);
2595 error = suword32(&cv->c_has_waiters, 0);
2596 if (error == -1)
2597 error = EFAULT;
2598 umtxq_lock(&key);
2599 }
2600 umtxq_unbusy(&key);
2601 umtxq_unlock(&key);
2602 umtx_key_release(&key);
2603 return (error);
2604 }
2605
2606 static int
2607 do_cv_broadcast(struct thread *td, struct ucond *cv)
2608 {
2609 struct umtx_key key;
2610 int error;
2611 uint32_t flags;
2612
2613 error = fueword32(&cv->c_flags, &flags);
2614 if (error == -1)
2615 return (EFAULT);
2616 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2617 return (error);
2618
2619 umtxq_lock(&key);
2620 umtxq_busy(&key);
2621 umtxq_signal(&key, INT_MAX);
2622 umtxq_unlock(&key);
2623
2624 error = suword32(&cv->c_has_waiters, 0);
2625 if (error == -1)
2626 error = EFAULT;
2627
2628 umtxq_unbusy_unlocked(&key);
2629
2630 umtx_key_release(&key);
2631 return (error);
2632 }
2633
2634 static int
2635 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2636 {
2637 struct abs_timeout timo;
2638 struct umtx_q *uq;
2639 uint32_t flags, wrflags;
2640 int32_t state, oldstate;
2641 int32_t blocked_readers;
2642 int error, error1, rv;
2643
2644 uq = td->td_umtxq;
2645 error = fueword32(&rwlock->rw_flags, &flags);
2646 if (error == -1)
2647 return (EFAULT);
2648 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2649 if (error != 0)
2650 return (error);
2651
2652 if (timeout != NULL)
2653 abs_timeout_init2(&timo, timeout);
2654
2655 wrflags = URWLOCK_WRITE_OWNER;
2656 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2657 wrflags |= URWLOCK_WRITE_WAITERS;
2658
2659 for (;;) {
2660 rv = fueword32(&rwlock->rw_state, &state);
2661 if (rv == -1) {
2662 umtx_key_release(&uq->uq_key);
2663 return (EFAULT);
2664 }
2665
2666 /* try to lock it */
2667 while (!(state & wrflags)) {
2668 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2669 umtx_key_release(&uq->uq_key);
2670 return (EAGAIN);
2671 }
2672 rv = casueword32(&rwlock->rw_state, state,
2673 &oldstate, state + 1);
2674 if (rv == -1) {
2675 umtx_key_release(&uq->uq_key);
2676 return (EFAULT);
2677 }
2678 if (oldstate == state) {
2679 umtx_key_release(&uq->uq_key);
2680 return (0);
2681 }
2682 error = umtxq_check_susp(td);
2683 if (error != 0)
2684 break;
2685 state = oldstate;
2686 }
2687
2688 if (error)
2689 break;
2690
2691 /* grab monitor lock */
2692 umtxq_lock(&uq->uq_key);
2693 umtxq_busy(&uq->uq_key);
2694 umtxq_unlock(&uq->uq_key);
2695
2696 /*
2697 * re-read the state, in case it changed between the try-lock above
2698 * and the check below
2699 */
2700 rv = fueword32(&rwlock->rw_state, &state);
2701 if (rv == -1)
2702 error = EFAULT;
2703
2704 /* set read contention bit */
2705 while (error == 0 && (state & wrflags) &&
2706 !(state & URWLOCK_READ_WAITERS)) {
2707 rv = casueword32(&rwlock->rw_state, state,
2708 &oldstate, state | URWLOCK_READ_WAITERS);
2709 if (rv == -1) {
2710 error = EFAULT;
2711 break;
2712 }
2713 if (oldstate == state)
2714 goto sleep;
2715 state = oldstate;
2716 error = umtxq_check_susp(td);
2717 if (error != 0)
2718 break;
2719 }
2720 if (error != 0) {
2721 umtxq_unbusy_unlocked(&uq->uq_key);
2722 break;
2723 }
2724
2725 /* state is changed while setting flags, restart */
2726 if (!(state & wrflags)) {
2727 umtxq_unbusy_unlocked(&uq->uq_key);
2728 error = umtxq_check_susp(td);
2729 if (error != 0)
2730 break;
2731 continue;
2732 }
2733
2734 sleep:
2735 /* contention bit is set, before sleeping, increase read waiter count */
2736 rv = fueword32(&rwlock->rw_blocked_readers,
2737 &blocked_readers);
2738 if (rv == -1) {
2739 umtxq_unbusy_unlocked(&uq->uq_key);
2740 error = EFAULT;
2741 break;
2742 }
2743 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2744
2745 while (state & wrflags) {
2746 umtxq_lock(&uq->uq_key);
2747 umtxq_insert(uq);
2748 umtxq_unbusy(&uq->uq_key);
2749
2750 error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2751 NULL : &timo);
2752
2753 umtxq_busy(&uq->uq_key);
2754 umtxq_remove(uq);
2755 umtxq_unlock(&uq->uq_key);
2756 if (error)
2757 break;
2758 rv = fueword32(&rwlock->rw_state, &state);
2759 if (rv == -1) {
2760 error = EFAULT;
2761 break;
2762 }
2763 }
2764
2765 /* decrease read waiter count, and may clear read contention bit */
2766 rv = fueword32(&rwlock->rw_blocked_readers,
2767 &blocked_readers);
2768 if (rv == -1) {
2769 umtxq_unbusy_unlocked(&uq->uq_key);
2770 error = EFAULT;
2771 break;
2772 }
2773 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2774 if (blocked_readers == 1) {
2775 rv = fueword32(&rwlock->rw_state, &state);
2776 if (rv == -1) {
2777 umtxq_unbusy_unlocked(&uq->uq_key);
2778 error = EFAULT;
2779 break;
2780 }
2781 for (;;) {
2782 rv = casueword32(&rwlock->rw_state, state,
2783 &oldstate, state & ~URWLOCK_READ_WAITERS);
2784 if (rv == -1) {
2785 error = EFAULT;
2786 break;
2787 }
2788 if (oldstate == state)
2789 break;
2790 state = oldstate;
2791 error1 = umtxq_check_susp(td);
2792 if (error1 != 0) {
2793 if (error == 0)
2794 error = error1;
2795 break;
2796 }
2797 }
2798 }
2799
2800 umtxq_unbusy_unlocked(&uq->uq_key);
2801 if (error != 0)
2802 break;
2803 }
2804 umtx_key_release(&uq->uq_key);
2805 if (error == ERESTART)
2806 error = EINTR;
2807 return (error);
2808 }
2809
2810 static int
2811 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2812 {
2813 struct abs_timeout timo;
2814 struct umtx_q *uq;
2815 uint32_t flags;
2816 int32_t state, oldstate;
2817 int32_t blocked_writers;
2818 int32_t blocked_readers;
2819 int error, error1, rv;
2820
2821 uq = td->td_umtxq;
2822 error = fueword32(&rwlock->rw_flags, &flags);
2823 if (error == -1)
2824 return (EFAULT);
2825 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2826 if (error != 0)
2827 return (error);
2828
2829 if (timeout != NULL)
2830 abs_timeout_init2(&timo, timeout);
2831
2832 blocked_readers = 0;
2833 for (;;) {
2834 rv = fueword32(&rwlock->rw_state, &state);
2835 if (rv == -1) {
2836 umtx_key_release(&uq->uq_key);
2837 return (EFAULT);
2838 }
2839 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2840 rv = casueword32(&rwlock->rw_state, state,
2841 &oldstate, state | URWLOCK_WRITE_OWNER);
2842 if (rv == -1) {
2843 umtx_key_release(&uq->uq_key);
2844 return (EFAULT);
2845 }
2846 if (oldstate == state) {
2847 umtx_key_release(&uq->uq_key);
2848 return (0);
2849 }
2850 state = oldstate;
2851 error = umtxq_check_susp(td);
2852 if (error != 0)
2853 break;
2854 }
2855
2856 if (error) {
2857 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2858 blocked_readers != 0) {
2859 umtxq_lock(&uq->uq_key);
2860 umtxq_busy(&uq->uq_key);
2861 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2862 umtxq_unbusy(&uq->uq_key);
2863 umtxq_unlock(&uq->uq_key);
2864 }
2865
2866 break;
2867 }
2868
2869 /* grab monitor lock */
2870 umtxq_lock(&uq->uq_key);
2871 umtxq_busy(&uq->uq_key);
2872 umtxq_unlock(&uq->uq_key);
2873
2874 /*
2875 * re-read the state, in case it changed between the try-lock above
2876 * and the check below
2877 */
2878 rv = fueword32(&rwlock->rw_state, &state);
2879 if (rv == -1)
2880 error = EFAULT;
2881
2882 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
2883 URWLOCK_READER_COUNT(state) != 0) &&
2884 (state & URWLOCK_WRITE_WAITERS) == 0) {
2885 rv = casueword32(&rwlock->rw_state, state,
2886 &oldstate, state | URWLOCK_WRITE_WAITERS);
2887 if (rv == -1) {
2888 error = EFAULT;
2889 break;
2890 }
2891 if (oldstate == state)
2892 goto sleep;
2893 state = oldstate;
2894 error = umtxq_check_susp(td);
2895 if (error != 0)
2896 break;
2897 }
2898 if (error != 0) {
2899 umtxq_unbusy_unlocked(&uq->uq_key);
2900 break;
2901 }
2902
2903 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2904 umtxq_unbusy_unlocked(&uq->uq_key);
2905 error = umtxq_check_susp(td);
2906 if (error != 0)
2907 break;
2908 continue;
2909 }
2910 sleep:
2911 rv = fueword32(&rwlock->rw_blocked_writers,
2912 &blocked_writers);
2913 if (rv == -1) {
2914 umtxq_unbusy_unlocked(&uq->uq_key);
2915 error = EFAULT;
2916 break;
2917 }
2918 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2919
2920 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2921 umtxq_lock(&uq->uq_key);
2922 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2923 umtxq_unbusy(&uq->uq_key);
2924
2925 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2926 NULL : &timo);
2927
2928 umtxq_busy(&uq->uq_key);
2929 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2930 umtxq_unlock(&uq->uq_key);
2931 if (error)
2932 break;
2933 rv = fueword32(&rwlock->rw_state, &state);
2934 if (rv == -1) {
2935 error = EFAULT;
2936 break;
2937 }
2938 }
2939
2940 rv = fueword32(&rwlock->rw_blocked_writers,
2941 &blocked_writers);
2942 if (rv == -1) {
2943 umtxq_unbusy_unlocked(&uq->uq_key);
2944 error = EFAULT;
2945 break;
2946 }
2947 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2948 if (blocked_writers == 1) {
2949 rv = fueword32(&rwlock->rw_state, &state);
2950 if (rv == -1) {
2951 umtxq_unbusy_unlocked(&uq->uq_key);
2952 error = EFAULT;
2953 break;
2954 }
2955 for (;;) {
2956 rv = casueword32(&rwlock->rw_state, state,
2957 &oldstate, state & ~URWLOCK_WRITE_WAITERS);
2958 if (rv == -1) {
2959 error = EFAULT;
2960 break;
2961 }
2962 if (oldstate == state)
2963 break;
2964 state = oldstate;
2965 error1 = umtxq_check_susp(td);
2966 /*
2967 * We are leaving the URWLOCK_WRITE_WAITERS
2968 * behind, but this should not harm the
2969 * correctness.
2970 */
2971 if (error1 != 0) {
2972 if (error == 0)
2973 error = error1;
2974 break;
2975 }
2976 }
2977 rv = fueword32(&rwlock->rw_blocked_readers,
2978 &blocked_readers);
2979 if (rv == -1) {
2980 umtxq_unbusy_unlocked(&uq->uq_key);
2981 error = EFAULT;
2982 break;
2983 }
2984 } else
2985 blocked_readers = 0;
2986
2987 umtxq_unbusy_unlocked(&uq->uq_key);
2988 }
2989
2990 umtx_key_release(&uq->uq_key);
2991 if (error == ERESTART)
2992 error = EINTR;
2993 return (error);
2994 }
2995
2996 static int
2997 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2998 {
2999 struct umtx_q *uq;
3000 uint32_t flags;
3001 int32_t state, oldstate;
3002 int error, rv, q, count;
3003
3004 uq = td->td_umtxq;
3005 error = fueword32(&rwlock->rw_flags, &flags);
3006 if (error == -1)
3007 return (EFAULT);
3008 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3009 if (error != 0)
3010 return (error);
3011
3012 error = fueword32(&rwlock->rw_state, &state);
3013 if (error == -1) {
3014 error = EFAULT;
3015 goto out;
3016 }
3017 if (state & URWLOCK_WRITE_OWNER) {
3018 for (;;) {
3019 rv = casueword32(&rwlock->rw_state, state,
3020 &oldstate, state & ~URWLOCK_WRITE_OWNER);
3021 if (rv == -1) {
3022 error = EFAULT;
3023 goto out;
3024 }
3025 if (oldstate != state) {
3026 state = oldstate;
3027 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3028 error = EPERM;
3029 goto out;
3030 }
3031 error = umtxq_check_susp(td);
3032 if (error != 0)
3033 goto out;
3034 } else
3035 break;
3036 }
3037 } else if (URWLOCK_READER_COUNT(state) != 0) {
3038 for (;;) {
3039 rv = casueword32(&rwlock->rw_state, state,
3040 &oldstate, state - 1);
3041 if (rv == -1) {
3042 error = EFAULT;
3043 goto out;
3044 }
3045 if (oldstate != state) {
3046 state = oldstate;
3047 if (URWLOCK_READER_COUNT(oldstate) == 0) {
3048 error = EPERM;
3049 goto out;
3050 }
3051 error = umtxq_check_susp(td);
3052 if (error != 0)
3053 goto out;
3054 } else
3055 break;
3056 }
3057 } else {
3058 error = EPERM;
3059 goto out;
3060 }
3061
3062 count = 0;
3063
3064 if (!(flags & URWLOCK_PREFER_READER)) {
3065 if (state & URWLOCK_WRITE_WAITERS) {
3066 count = 1;
3067 q = UMTX_EXCLUSIVE_QUEUE;
3068 } else if (state & URWLOCK_READ_WAITERS) {
3069 count = INT_MAX;
3070 q = UMTX_SHARED_QUEUE;
3071 }
3072 } else {
3073 if (state & URWLOCK_READ_WAITERS) {
3074 count = INT_MAX;
3075 q = UMTX_SHARED_QUEUE;
3076 } else if (state & URWLOCK_WRITE_WAITERS) {
3077 count = 1;
3078 q = UMTX_EXCLUSIVE_QUEUE;
3079 }
3080 }
3081
3082 if (count) {
3083 umtxq_lock(&uq->uq_key);
3084 umtxq_busy(&uq->uq_key);
3085 umtxq_signal_queue(&uq->uq_key, count, q);
3086 umtxq_unbusy(&uq->uq_key);
3087 umtxq_unlock(&uq->uq_key);
3088 }
3089 out:
3090 umtx_key_release(&uq->uq_key);
3091 return (error);
3092 }
3093
3094 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3095 static int
3096 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3097 {
3098 struct abs_timeout timo;
3099 struct umtx_q *uq;
3100 uint32_t flags, count, count1;
3101 int error, rv;
3102
3103 uq = td->td_umtxq;
3104 error = fueword32(&sem->_flags, &flags);
3105 if (error == -1)
3106 return (EFAULT);
3107 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3108 if (error != 0)
3109 return (error);
3110
3111 if (timeout != NULL)
3112 abs_timeout_init2(&timo, timeout);
3113
3114 umtxq_lock(&uq->uq_key);
3115 umtxq_busy(&uq->uq_key);
3116 umtxq_insert(uq);
3117 umtxq_unlock(&uq->uq_key);
3118 rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3119 if (rv == 0)
3120 rv = fueword32(&sem->_count, &count);
3121 if (rv == -1 || count != 0) {
3122 umtxq_lock(&uq->uq_key);
3123 umtxq_unbusy(&uq->uq_key);
3124 umtxq_remove(uq);
3125 umtxq_unlock(&uq->uq_key);
3126 umtx_key_release(&uq->uq_key);
3127 return (rv == -1 ? EFAULT : 0);
3128 }
3129 umtxq_lock(&uq->uq_key);
3130 umtxq_unbusy(&uq->uq_key);
3131
3132 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3133
3134 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3135 error = 0;
3136 else {
3137 umtxq_remove(uq);
3138 /* A relative timeout cannot be restarted. */
3139 if (error == ERESTART && timeout != NULL &&
3140 (timeout->_flags & UMTX_ABSTIME) == 0)
3141 error = EINTR;
3142 }
3143 umtxq_unlock(&uq->uq_key);
3144 umtx_key_release(&uq->uq_key);
3145 return (error);
3146 }
3147
3148 /*
3149 * Signal a userland semaphore.
3150 */
3151 static int
3152 do_sem_wake(struct thread *td, struct _usem *sem)
3153 {
3154 struct umtx_key key;
3155 int error, cnt;
3156 uint32_t flags;
3157
3158 error = fueword32(&sem->_flags, &flags);
3159 if (error == -1)
3160 return (EFAULT);
3161 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3162 return (error);
3163 umtxq_lock(&key);
3164 umtxq_busy(&key);
3165 cnt = umtxq_count(&key);
3166 if (cnt > 0) {
3167 /*
3168 * Check if count is greater than 0, this means the memory is
3169 * still being referenced by user code, so we can safely
3170 * update _has_waiters flag.
3171 */
3172 if (cnt == 1) {
3173 umtxq_unlock(&key);
3174 error = suword32(&sem->_has_waiters, 0);
3175 umtxq_lock(&key);
3176 if (error == -1)
3177 error = EFAULT;
3178 }
3179 umtxq_signal(&key, 1);
3180 }
3181 umtxq_unbusy(&key);
3182 umtxq_unlock(&key);
3183 umtx_key_release(&key);
3184 return (error);
3185 }
3186 #endif
3187
3188 static int
3189 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
3190 {
3191 struct abs_timeout timo;
3192 struct umtx_q *uq;
3193 uint32_t count, flags;
3194 int error, rv;
3195
3196 uq = td->td_umtxq;
3197 flags = fuword32(&sem->_flags);
3198 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3199 if (error != 0)
3200 return (error);
3201
3202 if (timeout != NULL)
3203 abs_timeout_init2(&timo, timeout);
3204
3205 umtxq_lock(&uq->uq_key);
3206 umtxq_busy(&uq->uq_key);
3207 umtxq_insert(uq);
3208 umtxq_unlock(&uq->uq_key);
3209 rv = fueword32(&sem->_count, &count);
3210 if (rv == -1) {
3211 umtxq_lock(&uq->uq_key);
3212 umtxq_unbusy(&uq->uq_key);
3213 umtxq_remove(uq);
3214 umtxq_unlock(&uq->uq_key);
3215 umtx_key_release(&uq->uq_key);
3216 return (EFAULT);
3217 }
3218 for (;;) {
3219 if (USEM_COUNT(count) != 0) {
3220 umtxq_lock(&uq->uq_key);
3221 umtxq_unbusy(&uq->uq_key);
3222 umtxq_remove(uq);
3223 umtxq_unlock(&uq->uq_key);
3224 umtx_key_release(&uq->uq_key);
3225 return (0);
3226 }
3227 if (count == USEM_HAS_WAITERS)
3228 break;
3229 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
3230 if (rv == -1) {
3231 umtxq_lock(&uq->uq_key);
3232 umtxq_unbusy(&uq->uq_key);
3233 umtxq_remove(uq);
3234 umtxq_unlock(&uq->uq_key);
3235 umtx_key_release(&uq->uq_key);
3236 return (EFAULT);
3237 }
3238 if (count == 0)
3239 break;
3240 }
3241 umtxq_lock(&uq->uq_key);
3242 umtxq_unbusy(&uq->uq_key);
3243
3244 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3245
3246 if ((uq->uq_flags & UQF_UMTXQ) == 0)
3247 error = 0;
3248 else {
3249 umtxq_remove(uq);
3250 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
3251 /* A relative timeout cannot be restarted. */
3252 if (error == ERESTART)
3253 error = EINTR;
3254 if (error == EINTR) {
3255 abs_timeout_update(&timo);
3256 timeout->_timeout = timo.end;
3257 timespecsub(&timeout->_timeout, &timo.cur);
3258 }
3259 }
3260 }
3261 umtxq_unlock(&uq->uq_key);
3262 umtx_key_release(&uq->uq_key);
3263 return (error);
3264 }
3265
3266 /*
3267 * Signal a userland semaphore.
3268 */
3269 static int
3270 do_sem2_wake(struct thread *td, struct _usem2 *sem)
3271 {
3272 struct umtx_key key;
3273 int error, cnt, rv;
3274 uint32_t count, flags;
3275
3276 rv = fueword32(&sem->_flags, &flags);
3277 if (rv == -1)
3278 return (EFAULT);
3279 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3280 return (error);
3281 umtxq_lock(&key);
3282 umtxq_busy(&key);
3283 cnt = umtxq_count(&key);
3284 if (cnt > 0) {
3285 /*
3286 * If this was the last sleeping thread, clear the waiters
3287 * flag in _count.
3288 */
3289 if (cnt == 1) {
3290 umtxq_unlock(&key);
3291 rv = fueword32(&sem->_count, &count);
3292 while (rv != -1 && count & USEM_HAS_WAITERS)
3293 rv = casueword32(&sem->_count, count, &count,
3294 count & ~USEM_HAS_WAITERS);
3295 if (rv == -1)
3296 error = EFAULT;
3297 umtxq_lock(&key);
3298 }
3299
3300 umtxq_signal(&key, 1);
3301 }
3302 umtxq_unbusy(&key);
3303 umtxq_unlock(&key);
3304 umtx_key_release(&key);
3305 return (error);
3306 }
3307
3308 inline int
3309 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3310 {
3311 int error;
3312
3313 error = copyin(addr, tsp, sizeof(struct timespec));
3314 if (error == 0) {
3315 if (tsp->tv_sec < 0 ||
3316 tsp->tv_nsec >= 1000000000 ||
3317 tsp->tv_nsec < 0)
3318 error = EINVAL;
3319 }
3320 return (error);
3321 }
3322
3323 static inline int
3324 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
3325 {
3326 int error;
3327
3328 if (size <= sizeof(struct timespec)) {
3329 tp->_clockid = CLOCK_REALTIME;
3330 tp->_flags = 0;
3331 error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
3332 } else
3333 error = copyin(addr, tp, sizeof(struct _umtx_time));
3334 if (error != 0)
3335 return (error);
3336 if (tp->_timeout.tv_sec < 0 ||
3337 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3338 return (EINVAL);
3339 return (0);
3340 }
3341
3342 static int
3343 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap)
3344 {
3345
3346 return (EOPNOTSUPP);
3347 }
3348
3349 static int
3350 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3351 {
3352 struct _umtx_time timeout, *tm_p;
3353 int error;
3354
3355 if (uap->uaddr2 == NULL)
3356 tm_p = NULL;
3357 else {
3358 error = umtx_copyin_umtx_time(
3359 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3360 if (error != 0)
3361 return (error);
3362 tm_p = &timeout;
3363 }
3364 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0));
3365 }
3366
3367 static int
3368 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3369 {
3370 struct _umtx_time timeout, *tm_p;
3371 int error;
3372
3373 if (uap->uaddr2 == NULL)
3374 tm_p = NULL;
3375 else {
3376 error = umtx_copyin_umtx_time(
3377 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3378 if (error != 0)
3379 return (error);
3380 tm_p = &timeout;
3381 }
3382 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
3383 }
3384
3385 static int
3386 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3387 {
3388 struct _umtx_time *tm_p, timeout;
3389 int error;
3390
3391 if (uap->uaddr2 == NULL)
3392 tm_p = NULL;
3393 else {
3394 error = umtx_copyin_umtx_time(
3395 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3396 if (error != 0)
3397 return (error);
3398 tm_p = &timeout;
3399 }
3400 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
3401 }
3402
3403 static int
3404 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3405 {
3406
3407 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3408 }
3409
3410 #define BATCH_SIZE 128
3411 static int
3412 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3413 {
3414 char *uaddrs[BATCH_SIZE], **upp;
3415 int count, error, i, pos, tocopy;
3416
3417 upp = (char **)uap->obj;
3418 error = 0;
3419 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3420 pos += tocopy) {
3421 tocopy = MIN(count, BATCH_SIZE);
3422 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
3423 if (error != 0)
3424 break;
3425 for (i = 0; i < tocopy; ++i)
3426 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3427 maybe_yield();
3428 }
3429 return (error);
3430 }
3431
3432 static int
3433 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3434 {
3435
3436 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3437 }
3438
3439 static int
3440 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3441 {
3442 struct _umtx_time *tm_p, timeout;
3443 int error;
3444
3445 /* Allow a null timespec (wait forever). */
3446 if (uap->uaddr2 == NULL)
3447 tm_p = NULL;
3448 else {
3449 error = umtx_copyin_umtx_time(
3450 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3451 if (error != 0)
3452 return (error);
3453 tm_p = &timeout;
3454 }
3455 return (do_lock_umutex(td, uap->obj, tm_p, 0));
3456 }
3457
3458 static int
3459 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3460 {
3461
3462 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
3463 }
3464
3465 static int
3466 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3467 {
3468 struct _umtx_time *tm_p, timeout;
3469 int error;
3470
3471 /* Allow a null timespec (wait forever). */
3472 if (uap->uaddr2 == NULL)
3473 tm_p = NULL;
3474 else {
3475 error = umtx_copyin_umtx_time(
3476 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3477 if (error != 0)
3478 return (error);
3479 tm_p = &timeout;
3480 }
3481 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
3482 }
3483
3484 static int
3485 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3486 {
3487
3488 return (do_wake_umutex(td, uap->obj));
3489 }
3490
3491 static int
3492 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3493 {
3494
3495 return (do_unlock_umutex(td, uap->obj, false));
3496 }
3497
3498 static int
3499 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3500 {
3501
3502 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
3503 }
3504
3505 static int
3506 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3507 {
3508 struct timespec *ts, timeout;
3509 int error;
3510
3511 /* Allow a null timespec (wait forever). */
3512 if (uap->uaddr2 == NULL)
3513 ts = NULL;
3514 else {
3515 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3516 if (error != 0)
3517 return (error);
3518 ts = &timeout;
3519 }
3520 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3521 }
3522
3523 static int
3524 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3525 {
3526
3527 return (do_cv_signal(td, uap->obj));
3528 }
3529
3530 static int
3531 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3532 {
3533
3534 return (do_cv_broadcast(td, uap->obj));
3535 }
3536
3537 static int
3538 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3539 {
3540 struct _umtx_time timeout;
3541 int error;
3542
3543 /* Allow a null timespec (wait forever). */
3544 if (uap->uaddr2 == NULL) {
3545 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3546 } else {
3547 error = umtx_copyin_umtx_time(uap->uaddr2,
3548 (size_t)uap->uaddr1, &timeout);
3549 if (error != 0)
3550 return (error);
3551 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3552 }
3553 return (error);
3554 }
3555
3556 static int
3557 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3558 {
3559 struct _umtx_time timeout;
3560 int error;
3561
3562 /* Allow a null timespec (wait forever). */
3563 if (uap->uaddr2 == NULL) {
3564 error = do_rw_wrlock(td, uap->obj, 0);
3565 } else {
3566 error = umtx_copyin_umtx_time(uap->uaddr2,
3567 (size_t)uap->uaddr1, &timeout);
3568 if (error != 0)
3569 return (error);
3570
3571 error = do_rw_wrlock(td, uap->obj, &timeout);
3572 }
3573 return (error);
3574 }
3575
3576 static int
3577 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3578 {
3579
3580 return (do_rw_unlock(td, uap->obj));
3581 }
3582
3583 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3584 static int
3585 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3586 {
3587 struct _umtx_time *tm_p, timeout;
3588 int error;
3589
3590 /* Allow a null timespec (wait forever). */
3591 if (uap->uaddr2 == NULL)
3592 tm_p = NULL;
3593 else {
3594 error = umtx_copyin_umtx_time(
3595 uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3596 if (error != 0)
3597 return (error);
3598 tm_p = &timeout;
3599 }
3600 return (do_sem_wait(td, uap->obj, tm_p));
3601 }
3602
3603 static int
3604 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3605 {
3606
3607 return (do_sem_wake(td, uap->obj));
3608 }
3609 #endif
3610
3611 static int
3612 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3613 {
3614
3615 return (do_wake2_umutex(td, uap->obj, uap->val));
3616 }
3617
3618 static int
3619 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap)
3620 {
3621 struct _umtx_time *tm_p, timeout;
3622 size_t uasize;
3623 int error;
3624
3625 /* Allow a null timespec (wait forever). */
3626 if (uap->uaddr2 == NULL) {
3627 uasize = 0;
3628 tm_p = NULL;
3629 } else {
3630 uasize = (size_t)uap->uaddr1;
3631 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout);
3632 if (error != 0)
3633 return (error);
3634 tm_p = &timeout;
3635 }
3636 error = do_sem2_wait(td, uap->obj, tm_p);
3637 if (error == EINTR && uap->uaddr2 != NULL &&
3638 (timeout._flags & UMTX_ABSTIME) == 0 &&
3639 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) {
3640 error = copyout(&timeout._timeout,
3641 (struct _umtx_time *)uap->uaddr2 + 1,
3642 sizeof(struct timespec));
3643 if (error == 0) {
3644 error = EINTR;
3645 }
3646 }
3647
3648 return (error);
3649 }
3650
3651 static int
3652 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap)
3653 {
3654
3655 return (do_sem2_wake(td, uap->obj));
3656 }
3657
3658 #define USHM_OBJ_UMTX(o) \
3659 ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
3660
3661 #define USHMF_REG_LINKED 0x0001
3662 #define USHMF_OBJ_LINKED 0x0002
3663 struct umtx_shm_reg {
3664 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
3665 LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
3666 struct umtx_key ushm_key;
3667 struct ucred *ushm_cred;
3668 struct shmfd *ushm_obj;
3669 u_int ushm_refcnt;
3670 u_int ushm_flags;
3671 };
3672
3673 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
3674 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
3675
3676 static uma_zone_t umtx_shm_reg_zone;
3677 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
3678 static struct mtx umtx_shm_lock;
3679 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
3680 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
3681
3682 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
3683
3684 static void
3685 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
3686 {
3687 struct umtx_shm_reg_head d;
3688 struct umtx_shm_reg *reg, *reg1;
3689
3690 TAILQ_INIT(&d);
3691 mtx_lock(&umtx_shm_lock);
3692 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
3693 mtx_unlock(&umtx_shm_lock);
3694 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
3695 TAILQ_REMOVE(&d, reg, ushm_reg_link);
3696 umtx_shm_free_reg(reg);
3697 }
3698 }
3699
3700 static struct task umtx_shm_reg_delfree_task =
3701 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
3702
3703 static struct umtx_shm_reg *
3704 umtx_shm_find_reg_locked(const struct umtx_key *key)
3705 {
3706 struct umtx_shm_reg *reg;
3707 struct umtx_shm_reg_head *reg_head;
3708
3709 KASSERT(key->shared, ("umtx_p_find_rg: private key"));
3710 mtx_assert(&umtx_shm_lock, MA_OWNED);
3711 reg_head = &umtx_shm_registry[key->hash];
3712 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
3713 KASSERT(reg->ushm_key.shared,
3714 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
3715 if (reg->ushm_key.info.shared.object ==
3716 key->info.shared.object &&
3717 reg->ushm_key.info.shared.offset ==
3718 key->info.shared.offset) {
3719 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
3720 KASSERT(reg->ushm_refcnt > 0,
3721 ("reg %p refcnt 0 onlist", reg));
3722 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
3723 ("reg %p not linked", reg));
3724 reg->ushm_refcnt++;
3725 return (reg);
3726 }
3727 }
3728 return (NULL);
3729 }
3730
3731 static struct umtx_shm_reg *
3732 umtx_shm_find_reg(const struct umtx_key *key)
3733 {
3734 struct umtx_shm_reg *reg;
3735
3736 mtx_lock(&umtx_shm_lock);
3737 reg = umtx_shm_find_reg_locked(key);
3738 mtx_unlock(&umtx_shm_lock);
3739 return (reg);
3740 }
3741
3742 static void
3743 umtx_shm_free_reg(struct umtx_shm_reg *reg)
3744 {
3745
3746 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
3747 crfree(reg->ushm_cred);
3748 shm_drop(reg->ushm_obj);
3749 uma_zfree(umtx_shm_reg_zone, reg);
3750 }
3751
3752 static bool
3753 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
3754 {
3755 bool res;
3756
3757 mtx_assert(&umtx_shm_lock, MA_OWNED);
3758 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
3759 reg->ushm_refcnt--;
3760 res = reg->ushm_refcnt == 0;
3761 if (res || force) {
3762 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
3763 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
3764 reg, ushm_reg_link);
3765 reg->ushm_flags &= ~USHMF_REG_LINKED;
3766 }
3767 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
3768 LIST_REMOVE(reg, ushm_obj_link);
3769 reg->ushm_flags &= ~USHMF_OBJ_LINKED;
3770 }
3771 }
3772 return (res);
3773 }
3774
3775 static void
3776 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
3777 {
3778 vm_object_t object;
3779 bool dofree;
3780
3781 if (force) {
3782 object = reg->ushm_obj->shm_object;
3783 VM_OBJECT_WLOCK(object);
3784 object->flags |= OBJ_UMTXDEAD;
3785 VM_OBJECT_WUNLOCK(object);
3786 }
3787 mtx_lock(&umtx_shm_lock);
3788 dofree = umtx_shm_unref_reg_locked(reg, force);
3789 mtx_unlock(&umtx_shm_lock);
3790 if (dofree)
3791 umtx_shm_free_reg(reg);
3792 }
3793
3794 void
3795 umtx_shm_object_init(vm_object_t object)
3796 {
3797
3798 LIST_INIT(USHM_OBJ_UMTX(object));
3799 }
3800
3801 void
3802 umtx_shm_object_terminated(vm_object_t object)
3803 {
3804 struct umtx_shm_reg *reg, *reg1;
3805 bool dofree;
3806
3807 dofree = false;
3808 mtx_lock(&umtx_shm_lock);
3809 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
3810 if (umtx_shm_unref_reg_locked(reg, true)) {
3811 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
3812 ushm_reg_link);
3813 dofree = true;
3814 }
3815 }
3816 mtx_unlock(&umtx_shm_lock);
3817 if (dofree)
3818 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
3819 }
3820
3821 static int
3822 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
3823 struct umtx_shm_reg **res)
3824 {
3825 struct umtx_shm_reg *reg, *reg1;
3826 struct ucred *cred;
3827 int error;
3828
3829 reg = umtx_shm_find_reg(key);
3830 if (reg != NULL) {
3831 *res = reg;
3832 return (0);
3833 }
3834 cred = td->td_ucred;
3835 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
3836 return (ENOMEM);
3837 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
3838 reg->ushm_refcnt = 1;
3839 bcopy(key, ®->ushm_key, sizeof(*key));
3840 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR);
3841 reg->ushm_cred = crhold(cred);
3842 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
3843 if (error != 0) {
3844 umtx_shm_free_reg(reg);
3845 return (error);
3846 }
3847 mtx_lock(&umtx_shm_lock);
3848 reg1 = umtx_shm_find_reg_locked(key);
3849 if (reg1 != NULL) {
3850 mtx_unlock(&umtx_shm_lock);
3851 umtx_shm_free_reg(reg);
3852 *res = reg1;
3853 return (0);
3854 }
3855 reg->ushm_refcnt++;
3856 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
3857 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
3858 ushm_obj_link);
3859 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
3860 mtx_unlock(&umtx_shm_lock);
3861 *res = reg;
3862 return (0);
3863 }
3864
3865 static int
3866 umtx_shm_alive(struct thread *td, void *addr)
3867 {
3868 vm_map_t map;
3869 vm_map_entry_t entry;
3870 vm_object_t object;
3871 vm_pindex_t pindex;
3872 vm_prot_t prot;
3873 int res, ret;
3874 boolean_t wired;
3875
3876 map = &td->td_proc->p_vmspace->vm_map;
3877 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
3878 &object, &pindex, &prot, &wired);
3879 if (res != KERN_SUCCESS)
3880 return (EFAULT);
3881 if (object == NULL)
3882 ret = EINVAL;
3883 else
3884 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
3885 vm_map_lookup_done(map, entry);
3886 return (ret);
3887 }
3888
3889 static void
3890 umtx_shm_init(void)
3891 {
3892 int i;
3893
3894 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
3895 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
3896 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
3897 for (i = 0; i < nitems(umtx_shm_registry); i++)
3898 TAILQ_INIT(&umtx_shm_registry[i]);
3899 }
3900
3901 static int
3902 umtx_shm(struct thread *td, void *addr, u_int flags)
3903 {
3904 struct umtx_key key;
3905 struct umtx_shm_reg *reg;
3906 struct file *fp;
3907 int error, fd;
3908
3909 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
3910 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
3911 return (EINVAL);
3912 if ((flags & UMTX_SHM_ALIVE) != 0)
3913 return (umtx_shm_alive(td, addr));
3914 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
3915 if (error != 0)
3916 return (error);
3917 KASSERT(key.shared == 1, ("non-shared key"));
3918 if ((flags & UMTX_SHM_CREAT) != 0) {
3919 error = umtx_shm_create_reg(td, &key, ®);
3920 } else {
3921 reg = umtx_shm_find_reg(&key);
3922 if (reg == NULL)
3923 error = ESRCH;
3924 }
3925 umtx_key_release(&key);
3926 if (error != 0)
3927 return (error);
3928 KASSERT(reg != NULL, ("no reg"));
3929 if ((flags & UMTX_SHM_DESTROY) != 0) {
3930 umtx_shm_unref_reg(reg, true);
3931 } else {
3932 #if 0
3933 #ifdef MAC
3934 error = mac_posixshm_check_open(td->td_ucred,
3935 reg->ushm_obj, FFLAGS(O_RDWR));
3936 if (error == 0)
3937 #endif
3938 error = shm_access(reg->ushm_obj, td->td_ucred,
3939 FFLAGS(O_RDWR));
3940 if (error == 0)
3941 #endif
3942 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
3943 if (error == 0) {
3944 shm_hold(reg->ushm_obj);
3945 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
3946 &shm_ops);
3947 td->td_retval[0] = fd;
3948 fdrop(fp, td);
3949 }
3950 }
3951 umtx_shm_unref_reg(reg, false);
3952 return (error);
3953 }
3954
3955 static int
3956 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap)
3957 {
3958
3959 return (umtx_shm(td, uap->uaddr1, uap->val));
3960 }
3961
3962 static int
3963 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp)
3964 {
3965
3966 td->td_rb_list = rbp->robust_list_offset;
3967 td->td_rbp_list = rbp->robust_priv_list_offset;
3968 td->td_rb_inact = rbp->robust_inact_offset;
3969 return (0);
3970 }
3971
3972 static int
3973 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap)
3974 {
3975 struct umtx_robust_lists_params rb;
3976 int error;
3977
3978 if (uap->val > sizeof(rb))
3979 return (EINVAL);
3980 bzero(&rb, sizeof(rb));
3981 error = copyin(uap->uaddr1, &rb, uap->val);
3982 if (error != 0)
3983 return (error);
3984 return (umtx_robust_lists(td, &rb));
3985 }
3986
3987 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3988
3989 static const _umtx_op_func op_table[] = {
3990 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
3991 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
3992 [UMTX_OP_WAIT] = __umtx_op_wait,
3993 [UMTX_OP_WAKE] = __umtx_op_wake,
3994 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
3995 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex,
3996 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
3997 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
3998 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait,
3999 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
4000 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4001 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint,
4002 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock,
4003 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock,
4004 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4005 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
4006 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4007 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex,
4008 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4009 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4010 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait,
4011 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4012 #else
4013 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4014 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4015 #endif
4016 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
4017 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4018 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait,
4019 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4020 [UMTX_OP_SHM] = __umtx_op_shm,
4021 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists,
4022 };
4023
4024 int
4025 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
4026 {
4027
4028 if ((unsigned)uap->op < nitems(op_table))
4029 return (*op_table[uap->op])(td, uap);
4030 return (EINVAL);
4031 }
4032
4033 #ifdef COMPAT_FREEBSD32
4034
4035 struct timespec32 {
4036 int32_t tv_sec;
4037 int32_t tv_nsec;
4038 };
4039
4040 struct umtx_time32 {
4041 struct timespec32 timeout;
4042 uint32_t flags;
4043 uint32_t clockid;
4044 };
4045
4046 static inline int
4047 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
4048 {
4049 struct timespec32 ts32;
4050 int error;
4051
4052 error = copyin(addr, &ts32, sizeof(struct timespec32));
4053 if (error == 0) {
4054 if (ts32.tv_sec < 0 ||
4055 ts32.tv_nsec >= 1000000000 ||
4056 ts32.tv_nsec < 0)
4057 error = EINVAL;
4058 else {
4059 tsp->tv_sec = ts32.tv_sec;
4060 tsp->tv_nsec = ts32.tv_nsec;
4061 }
4062 }
4063 return (error);
4064 }
4065
4066 static inline int
4067 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
4068 {
4069 struct umtx_time32 t32;
4070 int error;
4071
4072 t32.clockid = CLOCK_REALTIME;
4073 t32.flags = 0;
4074 if (size <= sizeof(struct timespec32))
4075 error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
4076 else
4077 error = copyin(addr, &t32, sizeof(struct umtx_time32));
4078 if (error != 0)
4079 return (error);
4080 if (t32.timeout.tv_sec < 0 ||
4081 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
4082 return (EINVAL);
4083 tp->_timeout.tv_sec = t32.timeout.tv_sec;
4084 tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
4085 tp->_flags = t32.flags;
4086 tp->_clockid = t32.clockid;
4087 return (0);
4088 }
4089
4090 static int
4091 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4092 {
4093 struct _umtx_time *tm_p, timeout;
4094 int error;
4095
4096 if (uap->uaddr2 == NULL)
4097 tm_p = NULL;
4098 else {
4099 error = umtx_copyin_umtx_time32(uap->uaddr2,
4100 (size_t)uap->uaddr1, &timeout);
4101 if (error != 0)
4102 return (error);
4103 tm_p = &timeout;
4104 }
4105 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
4106 }
4107
4108 static int
4109 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4110 {
4111 struct _umtx_time *tm_p, timeout;
4112 int error;
4113
4114 /* Allow a null timespec (wait forever). */
4115 if (uap->uaddr2 == NULL)
4116 tm_p = NULL;
4117 else {
4118 error = umtx_copyin_umtx_time(uap->uaddr2,
4119 (size_t)uap->uaddr1, &timeout);
4120 if (error != 0)
4121 return (error);
4122 tm_p = &timeout;
4123 }
4124 return (do_lock_umutex(td, uap->obj, tm_p, 0));
4125 }
4126
4127 static int
4128 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
4129 {
4130 struct _umtx_time *tm_p, timeout;
4131 int error;
4132
4133 /* Allow a null timespec (wait forever). */
4134 if (uap->uaddr2 == NULL)
4135 tm_p = NULL;
4136 else {
4137 error = umtx_copyin_umtx_time32(uap->uaddr2,
4138 (size_t)uap->uaddr1, &timeout);
4139 if (error != 0)
4140 return (error);
4141 tm_p = &timeout;
4142 }
4143 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
4144 }
4145
4146 static int
4147 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4148 {
4149 struct timespec *ts, timeout;
4150 int error;
4151
4152 /* Allow a null timespec (wait forever). */
4153 if (uap->uaddr2 == NULL)
4154 ts = NULL;
4155 else {
4156 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
4157 if (error != 0)
4158 return (error);
4159 ts = &timeout;
4160 }
4161 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
4162 }
4163
4164 static int
4165 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4166 {
4167 struct _umtx_time timeout;
4168 int error;
4169
4170 /* Allow a null timespec (wait forever). */
4171 if (uap->uaddr2 == NULL) {
4172 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
4173 } else {
4174 error = umtx_copyin_umtx_time32(uap->uaddr2,
4175 (size_t)uap->uaddr1, &timeout);
4176 if (error != 0)
4177 return (error);
4178 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
4179 }
4180 return (error);
4181 }
4182
4183 static int
4184 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
4185 {
4186 struct _umtx_time timeout;
4187 int error;
4188
4189 /* Allow a null timespec (wait forever). */
4190 if (uap->uaddr2 == NULL) {
4191 error = do_rw_wrlock(td, uap->obj, 0);
4192 } else {
4193 error = umtx_copyin_umtx_time32(uap->uaddr2,
4194 (size_t)uap->uaddr1, &timeout);
4195 if (error != 0)
4196 return (error);
4197 error = do_rw_wrlock(td, uap->obj, &timeout);
4198 }
4199 return (error);
4200 }
4201
4202 static int
4203 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
4204 {
4205 struct _umtx_time *tm_p, timeout;
4206 int error;
4207
4208 if (uap->uaddr2 == NULL)
4209 tm_p = NULL;
4210 else {
4211 error = umtx_copyin_umtx_time32(
4212 uap->uaddr2, (size_t)uap->uaddr1,&timeout);
4213 if (error != 0)
4214 return (error);
4215 tm_p = &timeout;
4216 }
4217 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
4218 }
4219
4220 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4221 static int
4222 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4223 {
4224 struct _umtx_time *tm_p, timeout;
4225 int error;
4226
4227 /* Allow a null timespec (wait forever). */
4228 if (uap->uaddr2 == NULL)
4229 tm_p = NULL;
4230 else {
4231 error = umtx_copyin_umtx_time32(uap->uaddr2,
4232 (size_t)uap->uaddr1, &timeout);
4233 if (error != 0)
4234 return (error);
4235 tm_p = &timeout;
4236 }
4237 return (do_sem_wait(td, uap->obj, tm_p));
4238 }
4239 #endif
4240
4241 static int
4242 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
4243 {
4244 struct _umtx_time *tm_p, timeout;
4245 size_t uasize;
4246 int error;
4247
4248 /* Allow a null timespec (wait forever). */
4249 if (uap->uaddr2 == NULL) {
4250 uasize = 0;
4251 tm_p = NULL;
4252 } else {
4253 uasize = (size_t)uap->uaddr1;
4254 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout);
4255 if (error != 0)
4256 return (error);
4257 tm_p = &timeout;
4258 }
4259 error = do_sem2_wait(td, uap->obj, tm_p);
4260 if (error == EINTR && uap->uaddr2 != NULL &&
4261 (timeout._flags & UMTX_ABSTIME) == 0 &&
4262 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) {
4263 struct timespec32 remain32 = {
4264 .tv_sec = timeout._timeout.tv_sec,
4265 .tv_nsec = timeout._timeout.tv_nsec
4266 };
4267 error = copyout(&remain32,
4268 (struct umtx_time32 *)uap->uaddr2 + 1,
4269 sizeof(struct timespec32));
4270 if (error == 0) {
4271 error = EINTR;
4272 }
4273 }
4274
4275 return (error);
4276 }
4277
4278 static int
4279 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
4280 {
4281 uint32_t uaddrs[BATCH_SIZE], **upp;
4282 int count, error, i, pos, tocopy;
4283
4284 upp = (uint32_t **)uap->obj;
4285 error = 0;
4286 for (count = uap->val, pos = 0; count > 0; count -= tocopy,
4287 pos += tocopy) {
4288 tocopy = MIN(count, BATCH_SIZE);
4289 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
4290 if (error != 0)
4291 break;
4292 for (i = 0; i < tocopy; ++i)
4293 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
4294 INT_MAX, 1);
4295 maybe_yield();
4296 }
4297 return (error);
4298 }
4299
4300 struct umtx_robust_lists_params_compat32 {
4301 uint32_t robust_list_offset;
4302 uint32_t robust_priv_list_offset;
4303 uint32_t robust_inact_offset;
4304 };
4305
4306 static int
4307 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap)
4308 {
4309 struct umtx_robust_lists_params rb;
4310 struct umtx_robust_lists_params_compat32 rb32;
4311 int error;
4312
4313 if (uap->val > sizeof(rb32))
4314 return (EINVAL);
4315 bzero(&rb, sizeof(rb));
4316 bzero(&rb32, sizeof(rb32));
4317 error = copyin(uap->uaddr1, &rb32, uap->val);
4318 if (error != 0)
4319 return (error);
4320 rb.robust_list_offset = rb32.robust_list_offset;
4321 rb.robust_priv_list_offset = rb32.robust_priv_list_offset;
4322 rb.robust_inact_offset = rb32.robust_inact_offset;
4323 return (umtx_robust_lists(td, &rb));
4324 }
4325
4326 static const _umtx_op_func op_table_compat32[] = {
4327 [UMTX_OP_RESERVED0] = __umtx_op_unimpl,
4328 [UMTX_OP_RESERVED1] = __umtx_op_unimpl,
4329 [UMTX_OP_WAIT] = __umtx_op_wait_compat32,
4330 [UMTX_OP_WAKE] = __umtx_op_wake,
4331 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
4332 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32,
4333 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex,
4334 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling,
4335 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32,
4336 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal,
4337 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast,
4338 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32,
4339 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32,
4340 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32,
4341 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock,
4342 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32,
4343 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private,
4344 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32,
4345 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex,
4346 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4347 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32,
4348 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake,
4349 #else
4350 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl,
4351 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl,
4352 #endif
4353 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32,
4354 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex,
4355 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32,
4356 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake,
4357 [UMTX_OP_SHM] = __umtx_op_shm,
4358 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32,
4359 };
4360
4361 int
4362 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
4363 {
4364
4365 if ((unsigned)uap->op < nitems(op_table_compat32)) {
4366 return (*op_table_compat32[uap->op])(td,
4367 (struct _umtx_op_args *)uap);
4368 }
4369 return (EINVAL);
4370 }
4371 #endif
4372
4373 void
4374 umtx_thread_init(struct thread *td)
4375 {
4376
4377 td->td_umtxq = umtxq_alloc();
4378 td->td_umtxq->uq_thread = td;
4379 }
4380
4381 void
4382 umtx_thread_fini(struct thread *td)
4383 {
4384
4385 umtxq_free(td->td_umtxq);
4386 }
4387
4388 /*
4389 * It will be called when new thread is created, e.g fork().
4390 */
4391 void
4392 umtx_thread_alloc(struct thread *td)
4393 {
4394 struct umtx_q *uq;
4395
4396 uq = td->td_umtxq;
4397 uq->uq_inherited_pri = PRI_MAX;
4398
4399 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4400 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4401 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4402 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4403 }
4404
4405 /*
4406 * exec() hook.
4407 *
4408 * Clear robust lists for all process' threads, not delaying the
4409 * cleanup to thread_exit hook, since the relevant address space is
4410 * destroyed right now.
4411 */
4412 static void
4413 umtx_exec_hook(void *arg __unused, struct proc *p,
4414 struct image_params *imgp __unused)
4415 {
4416 struct thread *td;
4417
4418 KASSERT(p == curproc, ("need curproc"));
4419 PROC_LOCK(p);
4420 KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
4421 (p->p_flag & P_STOPPED_SINGLE) != 0,
4422 ("curproc must be single-threaded"));
4423 FOREACH_THREAD_IN_PROC(p, td) {
4424 KASSERT(td == curthread ||
4425 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
4426 ("running thread %p %p", p, td));
4427 PROC_UNLOCK(p);
4428 umtx_thread_cleanup(td);
4429 PROC_LOCK(p);
4430 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
4431 }
4432 PROC_UNLOCK(p);
4433 }
4434
4435 /*
4436 * thread_exit() hook.
4437 */
4438 void
4439 umtx_thread_exit(struct thread *td)
4440 {
4441
4442 umtx_thread_cleanup(td);
4443 }
4444
4445 static int
4446 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res)
4447 {
4448 u_long res1;
4449 #ifdef COMPAT_FREEBSD32
4450 uint32_t res32;
4451 #endif
4452 int error;
4453
4454 #ifdef COMPAT_FREEBSD32
4455 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4456 error = fueword32((void *)ptr, &res32);
4457 if (error == 0)
4458 res1 = res32;
4459 } else
4460 #endif
4461 {
4462 error = fueword((void *)ptr, &res1);
4463 }
4464 if (error == 0)
4465 *res = res1;
4466 else
4467 error = EFAULT;
4468 return (error);
4469 }
4470
4471 static void
4472 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list)
4473 {
4474 #ifdef COMPAT_FREEBSD32
4475 struct umutex32 m32;
4476
4477 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
4478 memcpy(&m32, m, sizeof(m32));
4479 *rb_list = m32.m_rb_lnk;
4480 } else
4481 #endif
4482 *rb_list = m->m_rb_lnk;
4483 }
4484
4485 static int
4486 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact)
4487 {
4488 struct umutex m;
4489 int error;
4490
4491 KASSERT(td->td_proc == curproc, ("need current vmspace"));
4492 error = copyin((void *)rbp, &m, sizeof(m));
4493 if (error != 0)
4494 return (error);
4495 if (rb_list != NULL)
4496 umtx_read_rb_list(td, &m, rb_list);
4497 if ((m.m_flags & UMUTEX_ROBUST) == 0)
4498 return (EINVAL);
4499 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
4500 /* inact is cleared after unlock, allow the inconsistency */
4501 return (inact ? 0 : EINVAL);
4502 return (do_unlock_umutex(td, (struct umutex *)rbp, true));
4503 }
4504
4505 static void
4506 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
4507 const char *name)
4508 {
4509 int error, i;
4510 uintptr_t rbp;
4511 bool inact;
4512
4513 if (rb_list == 0)
4514 return;
4515 error = umtx_read_uptr(td, rb_list, &rbp);
4516 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
4517 if (rbp == *rb_inact) {
4518 inact = true;
4519 *rb_inact = 0;
4520 } else
4521 inact = false;
4522 error = umtx_handle_rb(td, rbp, &rbp, inact);
4523 }
4524 if (i == umtx_max_rb && umtx_verbose_rb) {
4525 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
4526 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
4527 }
4528 if (error != 0 && umtx_verbose_rb) {
4529 uprintf("comm %s pid %d: handling %srb error %d\n",
4530 td->td_proc->p_comm, td->td_proc->p_pid, name, error);
4531 }
4532 }
4533
4534 /*
4535 * Clean up umtx data.
4536 */
4537 static void
4538 umtx_thread_cleanup(struct thread *td)
4539 {
4540 struct umtx_q *uq;
4541 struct umtx_pi *pi;
4542 uintptr_t rb_inact;
4543
4544 /*
4545 * Disown pi mutexes.
4546 */
4547 uq = td->td_umtxq;
4548 if (uq != NULL) {
4549 mtx_lock(&umtx_lock);
4550 uq->uq_inherited_pri = PRI_MAX;
4551 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
4552 pi->pi_owner = NULL;
4553 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
4554 }
4555 mtx_unlock(&umtx_lock);
4556 thread_lock(td);
4557 sched_lend_user_prio(td, PRI_MAX);
4558 thread_unlock(td);
4559 }
4560
4561 /*
4562 * Handle terminated robust mutexes. Must be done after
4563 * robust pi disown, otherwise unlock could see unowned
4564 * entries.
4565 */
4566 rb_inact = td->td_rb_inact;
4567 if (rb_inact != 0)
4568 (void)umtx_read_uptr(td, rb_inact, &rb_inact);
4569 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "");
4570 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ");
4571 if (rb_inact != 0)
4572 (void)umtx_handle_rb(td, rb_inact, NULL, true);
4573 }
Cache object: ecfe3477b9f5b40e91ea05ea3eecaed9
|