1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_capsicum.h"
36 #include "opt_kstack_pages.h"
37
38 #include <sys/param.h>
39 #include <sys/capsicum.h>
40 #include <sys/systm.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/smp.h>
47 #include <sys/sysproto.h>
48
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53
54 #include <machine/atomic.h>
55 #include <machine/cpu.h>
56 #include <machine/pcb.h>
57 #include <machine/pcb_ext.h>
58 #include <machine/proc.h>
59 #include <machine/sysarch.h>
60
61 #include <security/audit/audit.h>
62
63 #include <vm/vm_kern.h> /* for kernel_map */
64
65 #define MAX_LD 8192
66 #define LD_PER_PAGE 512
67 #define NEW_MAX_LD(num) rounddown2(num + LD_PER_PAGE, LD_PER_PAGE)
68 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
69 #define NULL_LDT_BASE ((caddr_t)NULL)
70
71 #ifdef SMP
72 static void set_user_ldt_rv(void *arg);
73 #endif
74 static int i386_set_ldt_data(struct thread *, int start, int num,
75 union descriptor *descs);
76 static int i386_ldt_grow(struct thread *td, int len);
77
78 void
79 fill_based_sd(struct segment_descriptor *sdp, uint32_t base)
80 {
81
82 sdp->sd_lobase = base & 0xffffff;
83 sdp->sd_hibase = (base >> 24) & 0xff;
84 sdp->sd_lolimit = 0xffff; /* 4GB limit, wraps around */
85 sdp->sd_hilimit = 0xf;
86 sdp->sd_type = SDT_MEMRWA;
87 sdp->sd_dpl = SEL_UPL;
88 sdp->sd_p = 1;
89 sdp->sd_xx = 0;
90 sdp->sd_def32 = 1;
91 sdp->sd_gran = 1;
92 }
93
94 /*
95 * Construct special descriptors for "base" selectors. Store them in
96 * the PCB for later use by cpu_switch(). Store them in the GDT for
97 * more immediate use. The GDT entries are part of the current
98 * context. Callers must load related segment registers to complete
99 * setting up the current context.
100 */
101 void
102 set_fsbase(struct thread *td, uint32_t base)
103 {
104 struct segment_descriptor sd;
105
106 fill_based_sd(&sd, base);
107 critical_enter();
108 td->td_pcb->pcb_fsd = sd;
109 PCPU_GET(fsgs_gdt)[0] = sd;
110 critical_exit();
111 }
112
113 void
114 set_gsbase(struct thread *td, uint32_t base)
115 {
116 struct segment_descriptor sd;
117
118 fill_based_sd(&sd, base);
119 critical_enter();
120 td->td_pcb->pcb_gsd = sd;
121 PCPU_GET(fsgs_gdt)[1] = sd;
122 critical_exit();
123 }
124
125 #ifndef _SYS_SYSPROTO_H_
126 struct sysarch_args {
127 int op;
128 char *parms;
129 };
130 #endif
131
132 int
133 sysarch(struct thread *td, struct sysarch_args *uap)
134 {
135 int error;
136 union descriptor *lp;
137 union {
138 struct i386_ldt_args largs;
139 struct i386_ioperm_args iargs;
140 struct i386_get_xfpustate xfpu;
141 } kargs;
142 uint32_t base;
143 struct segment_descriptor *sdp;
144
145 AUDIT_ARG_CMD(uap->op);
146
147 #ifdef CAPABILITY_MODE
148 /*
149 * When adding new operations, add a new case statement here to
150 * explicitly indicate whether or not the operation is safe to
151 * perform in capability mode.
152 */
153 if (IN_CAPABILITY_MODE(td)) {
154 switch (uap->op) {
155 case I386_GET_LDT:
156 case I386_SET_LDT:
157 case I386_GET_IOPERM:
158 case I386_GET_FSBASE:
159 case I386_SET_FSBASE:
160 case I386_GET_GSBASE:
161 case I386_SET_GSBASE:
162 case I386_GET_XFPUSTATE:
163 break;
164
165 case I386_SET_IOPERM:
166 default:
167 #ifdef KTRACE
168 if (KTRPOINT(td, KTR_CAPFAIL))
169 ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
170 #endif
171 return (ECAPMODE);
172 }
173 }
174 #endif
175
176 switch (uap->op) {
177 case I386_GET_IOPERM:
178 case I386_SET_IOPERM:
179 if ((error = copyin(uap->parms, &kargs.iargs,
180 sizeof(struct i386_ioperm_args))) != 0)
181 return (error);
182 break;
183 case I386_GET_LDT:
184 case I386_SET_LDT:
185 if ((error = copyin(uap->parms, &kargs.largs,
186 sizeof(struct i386_ldt_args))) != 0)
187 return (error);
188 break;
189 case I386_GET_XFPUSTATE:
190 if ((error = copyin(uap->parms, &kargs.xfpu,
191 sizeof(struct i386_get_xfpustate))) != 0)
192 return (error);
193 break;
194 default:
195 break;
196 }
197
198 switch (uap->op) {
199 case I386_GET_LDT:
200 error = i386_get_ldt(td, &kargs.largs);
201 break;
202 case I386_SET_LDT:
203 if (kargs.largs.descs != NULL) {
204 if (kargs.largs.num > MAX_LD)
205 return (EINVAL);
206 lp = malloc(kargs.largs.num * sizeof(union descriptor),
207 M_TEMP, M_WAITOK);
208 error = copyin(kargs.largs.descs, lp,
209 kargs.largs.num * sizeof(union descriptor));
210 if (error == 0)
211 error = i386_set_ldt(td, &kargs.largs, lp);
212 free(lp, M_TEMP);
213 } else {
214 error = i386_set_ldt(td, &kargs.largs, NULL);
215 }
216 break;
217 case I386_GET_IOPERM:
218 error = i386_get_ioperm(td, &kargs.iargs);
219 if (error == 0)
220 error = copyout(&kargs.iargs, uap->parms,
221 sizeof(struct i386_ioperm_args));
222 break;
223 case I386_SET_IOPERM:
224 error = i386_set_ioperm(td, &kargs.iargs);
225 break;
226 case I386_VM86:
227 error = vm86_sysarch(td, uap->parms);
228 break;
229 case I386_GET_FSBASE:
230 sdp = &td->td_pcb->pcb_fsd;
231 base = sdp->sd_hibase << 24 | sdp->sd_lobase;
232 error = copyout(&base, uap->parms, sizeof(base));
233 break;
234 case I386_SET_FSBASE:
235 error = copyin(uap->parms, &base, sizeof(base));
236 if (error == 0) {
237 /*
238 * Construct the special descriptor for fsbase
239 * and arrange for doreti to load its selector
240 * soon enough.
241 */
242 set_fsbase(td, base);
243 td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
244 }
245 break;
246 case I386_GET_GSBASE:
247 sdp = &td->td_pcb->pcb_gsd;
248 base = sdp->sd_hibase << 24 | sdp->sd_lobase;
249 error = copyout(&base, uap->parms, sizeof(base));
250 break;
251 case I386_SET_GSBASE:
252 error = copyin(uap->parms, &base, sizeof(base));
253 if (error == 0) {
254 /*
255 * Construct the special descriptor for gsbase.
256 * The selector is loaded immediately, since we
257 * normally only reload %gs on context switches.
258 */
259 set_gsbase(td, base);
260 load_gs(GSEL(GUGS_SEL, SEL_UPL));
261 }
262 break;
263 case I386_GET_XFPUSTATE:
264 if (kargs.xfpu.len > cpu_max_ext_state_size -
265 sizeof(union savefpu))
266 return (EINVAL);
267 npxgetregs(td);
268 error = copyout((char *)(get_pcb_user_save_td(td) + 1),
269 kargs.xfpu.addr, kargs.xfpu.len);
270 break;
271 default:
272 error = EINVAL;
273 break;
274 }
275 return (error);
276 }
277
278 int
279 i386_extend_pcb(struct thread *td)
280 {
281 int i, offset;
282 u_long *addr;
283 struct pcb_ext *ext;
284 struct soft_segment_descriptor ssd = {
285 0, /* segment base address (overwritten) */
286 ctob(IOPAGES + 1) - 1, /* length */
287 SDT_SYS386TSS, /* segment type */
288 0, /* priority level */
289 1, /* descriptor present */
290 0, 0,
291 0, /* default 32 size */
292 0 /* granularity */
293 };
294
295 ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
296 M_WAITOK | M_ZERO);
297 /* -16 is so we can convert a trapframe into vm86trapframe inplace */
298 ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16;
299 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
300 /*
301 * The last byte of the i/o map must be followed by an 0xff byte.
302 * We arbitrarily allocate 16 bytes here, to keep the starting
303 * address on a doubleword boundary.
304 */
305 offset = PAGE_SIZE - 16;
306 ext->ext_tss.tss_ioopt =
307 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
308 ext->ext_iomap = (caddr_t)ext + offset;
309 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
310
311 addr = (u_long *)ext->ext_vm86.vm86_intmap;
312 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
313 *addr++ = ~0;
314
315 ssd.ssd_base = (unsigned)&ext->ext_tss;
316 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
317 ssdtosd(&ssd, &ext->ext_tssd);
318
319 KASSERT(td == curthread, ("giving TSS to !curthread"));
320 KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
321
322 /* Switch to the new TSS. */
323 critical_enter();
324 td->td_pcb->pcb_ext = ext;
325 PCPU_SET(private_tss, 1);
326 *PCPU_GET(tss_gdt) = ext->ext_tssd;
327 ltr(GSEL(GPROC0_SEL, SEL_KPL));
328 critical_exit();
329
330 return 0;
331 }
332
333 int
334 i386_set_ioperm(td, uap)
335 struct thread *td;
336 struct i386_ioperm_args *uap;
337 {
338 char *iomap;
339 u_int i;
340 int error;
341
342 if ((error = priv_check(td, PRIV_IO)) != 0)
343 return (error);
344 if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
345 return (error);
346 /*
347 * XXX
348 * While this is restricted to root, we should probably figure out
349 * whether any other driver is using this i/o address, as so not to
350 * cause confusion. This probably requires a global 'usage registry'.
351 */
352
353 if (td->td_pcb->pcb_ext == 0)
354 if ((error = i386_extend_pcb(td)) != 0)
355 return (error);
356 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
357
358 if (uap->start > uap->start + uap->length ||
359 uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
360 return (EINVAL);
361
362 for (i = uap->start; i < uap->start + uap->length; i++) {
363 if (uap->enable)
364 iomap[i >> 3] &= ~(1 << (i & 7));
365 else
366 iomap[i >> 3] |= (1 << (i & 7));
367 }
368 return (error);
369 }
370
371 int
372 i386_get_ioperm(td, uap)
373 struct thread *td;
374 struct i386_ioperm_args *uap;
375 {
376 int i, state;
377 char *iomap;
378
379 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
380 return (EINVAL);
381
382 if (td->td_pcb->pcb_ext == 0) {
383 uap->length = 0;
384 goto done;
385 }
386
387 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
388
389 i = uap->start;
390 state = (iomap[i >> 3] >> (i & 7)) & 1;
391 uap->enable = !state;
392 uap->length = 1;
393
394 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
395 if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
396 break;
397 uap->length++;
398 }
399
400 done:
401 return (0);
402 }
403
404 /*
405 * Update the GDT entry pointing to the LDT to point to the LDT of the
406 * current process. Manage dt_lock holding/unholding autonomously.
407 */
408 static void
409 set_user_ldt_locked(struct mdproc *mdp)
410 {
411 struct proc_ldt *pldt;
412 int gdt_idx;
413
414 mtx_assert(&dt_lock, MA_OWNED);
415
416 pldt = mdp->md_ldt;
417 gdt_idx = GUSERLDT_SEL;
418 gdt_idx += PCPU_GET(cpuid) * NGDT; /* always 0 on UP */
419 gdt[gdt_idx].sd = pldt->ldt_sd;
420 lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
421 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
422 }
423
424 void
425 set_user_ldt(struct mdproc *mdp)
426 {
427
428 mtx_lock_spin(&dt_lock);
429 set_user_ldt_locked(mdp);
430 mtx_unlock_spin(&dt_lock);
431 }
432
433 #ifdef SMP
434 static void
435 set_user_ldt_rv(void *arg)
436 {
437 struct proc *p;
438
439 p = curproc;
440 if (arg == p->p_vmspace)
441 set_user_ldt(&p->p_md);
442 }
443 #endif
444
445 /*
446 * dt_lock must be held. Returns with dt_lock held.
447 */
448 struct proc_ldt *
449 user_ldt_alloc(struct mdproc *mdp, int len)
450 {
451 struct proc_ldt *pldt, *new_ldt;
452
453 mtx_assert(&dt_lock, MA_OWNED);
454 mtx_unlock_spin(&dt_lock);
455 new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
456
457 new_ldt->ldt_len = len = NEW_MAX_LD(len);
458 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
459 len * sizeof(union descriptor), M_WAITOK | M_ZERO);
460 new_ldt->ldt_refcnt = 1;
461 new_ldt->ldt_active = 0;
462
463 mtx_lock_spin(&dt_lock);
464 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
465 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
466 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
467
468 if ((pldt = mdp->md_ldt) != NULL) {
469 if (len > pldt->ldt_len)
470 len = pldt->ldt_len;
471 bcopy(pldt->ldt_base, new_ldt->ldt_base,
472 len * sizeof(union descriptor));
473 } else
474 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
475
476 return (new_ldt);
477 }
478
479 /*
480 * Must be called with dt_lock held. Returns with dt_lock unheld.
481 */
482 void
483 user_ldt_free(struct thread *td)
484 {
485 struct mdproc *mdp;
486 struct proc_ldt *pldt;
487
488 mtx_assert(&dt_lock, MA_OWNED);
489 mdp = &td->td_proc->p_md;
490 if ((pldt = mdp->md_ldt) == NULL) {
491 mtx_unlock_spin(&dt_lock);
492 return;
493 }
494
495 if (td == curthread) {
496 lldt(_default_ldt);
497 PCPU_SET(currentldt, _default_ldt);
498 }
499
500 mdp->md_ldt = NULL;
501 user_ldt_deref(pldt);
502 }
503
504 void
505 user_ldt_deref(struct proc_ldt *pldt)
506 {
507
508 mtx_assert(&dt_lock, MA_OWNED);
509 if (--pldt->ldt_refcnt == 0) {
510 mtx_unlock_spin(&dt_lock);
511 kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
512 pldt->ldt_len * sizeof(union descriptor));
513 free(pldt, M_SUBPROC);
514 } else
515 mtx_unlock_spin(&dt_lock);
516 }
517
518 /*
519 * Note for the authors of compat layers (linux, etc): copyout() in
520 * the function below is not a problem since it presents data in
521 * arch-specific format (i.e. i386-specific in this case), not in
522 * the OS-specific one.
523 */
524 int
525 i386_get_ldt(struct thread *td, struct i386_ldt_args *uap)
526 {
527 struct proc_ldt *pldt;
528 char *data;
529 u_int nldt, num;
530 int error;
531
532 #ifdef DEBUG
533 printf("i386_get_ldt: start=%u num=%u descs=%p\n",
534 uap->start, uap->num, (void *)uap->descs);
535 #endif
536
537 num = min(uap->num, MAX_LD);
538 data = malloc(num * sizeof(union descriptor), M_TEMP, M_WAITOK);
539 mtx_lock_spin(&dt_lock);
540 pldt = td->td_proc->p_md.md_ldt;
541 nldt = pldt != NULL ? pldt->ldt_len : NLDT;
542 if (uap->start >= nldt) {
543 num = 0;
544 } else {
545 num = min(num, nldt - uap->start);
546 bcopy(pldt != NULL ?
547 &((union descriptor *)(pldt->ldt_base))[uap->start] :
548 &ldt[uap->start], data, num * sizeof(union descriptor));
549 }
550 mtx_unlock_spin(&dt_lock);
551 error = copyout(data, uap->descs, num * sizeof(union descriptor));
552 if (error == 0)
553 td->td_retval[0] = num;
554 free(data, M_TEMP);
555 return (error);
556 }
557
558 int
559 i386_set_ldt(struct thread *td, struct i386_ldt_args *uap,
560 union descriptor *descs)
561 {
562 struct mdproc *mdp;
563 struct proc_ldt *pldt;
564 union descriptor *dp;
565 u_int largest_ld, i;
566 int error;
567
568 #ifdef DEBUG
569 printf("i386_set_ldt: start=%u num=%u descs=%p\n",
570 uap->start, uap->num, (void *)uap->descs);
571 #endif
572 error = 0;
573 mdp = &td->td_proc->p_md;
574
575 if (descs == NULL) {
576 /* Free descriptors */
577 if (uap->start == 0 && uap->num == 0) {
578 /*
579 * Treat this as a special case, so userland needn't
580 * know magic number NLDT.
581 */
582 uap->start = NLDT;
583 uap->num = MAX_LD - NLDT;
584 }
585 mtx_lock_spin(&dt_lock);
586 if ((pldt = mdp->md_ldt) == NULL ||
587 uap->start >= pldt->ldt_len) {
588 mtx_unlock_spin(&dt_lock);
589 return (0);
590 }
591 largest_ld = uap->start + uap->num;
592 if (largest_ld > pldt->ldt_len)
593 largest_ld = pldt->ldt_len;
594 for (i = uap->start; i < largest_ld; i++)
595 atomic_store_rel_64(&((uint64_t *)(pldt->ldt_base))[i],
596 0);
597 mtx_unlock_spin(&dt_lock);
598 return (0);
599 }
600
601 if (uap->start != LDT_AUTO_ALLOC || uap->num != 1) {
602 /* verify range of descriptors to modify */
603 largest_ld = uap->start + uap->num;
604 if (uap->start >= MAX_LD || largest_ld > MAX_LD)
605 return (EINVAL);
606 }
607
608 /* Check descriptors for access violations */
609 for (i = 0; i < uap->num; i++) {
610 dp = &descs[i];
611
612 switch (dp->sd.sd_type) {
613 case SDT_SYSNULL: /* system null */
614 dp->sd.sd_p = 0;
615 break;
616 case SDT_SYS286TSS: /* system 286 TSS available */
617 case SDT_SYSLDT: /* system local descriptor table */
618 case SDT_SYS286BSY: /* system 286 TSS busy */
619 case SDT_SYSTASKGT: /* system task gate */
620 case SDT_SYS286IGT: /* system 286 interrupt gate */
621 case SDT_SYS286TGT: /* system 286 trap gate */
622 case SDT_SYSNULL2: /* undefined by Intel */
623 case SDT_SYS386TSS: /* system 386 TSS available */
624 case SDT_SYSNULL3: /* undefined by Intel */
625 case SDT_SYS386BSY: /* system 386 TSS busy */
626 case SDT_SYSNULL4: /* undefined by Intel */
627 case SDT_SYS386IGT: /* system 386 interrupt gate */
628 case SDT_SYS386TGT: /* system 386 trap gate */
629 case SDT_SYS286CGT: /* system 286 call gate */
630 case SDT_SYS386CGT: /* system 386 call gate */
631 return (EACCES);
632
633 /* memory segment types */
634 case SDT_MEMEC: /* memory execute only conforming */
635 case SDT_MEMEAC: /* memory execute only accessed conforming */
636 case SDT_MEMERC: /* memory execute read conforming */
637 case SDT_MEMERAC: /* memory execute read accessed conforming */
638 /* Must be "present" if executable and conforming. */
639 if (dp->sd.sd_p == 0)
640 return (EACCES);
641 break;
642 case SDT_MEMRO: /* memory read only */
643 case SDT_MEMROA: /* memory read only accessed */
644 case SDT_MEMRW: /* memory read write */
645 case SDT_MEMRWA: /* memory read write accessed */
646 case SDT_MEMROD: /* memory read only expand dwn limit */
647 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
648 case SDT_MEMRWD: /* memory read write expand dwn limit */
649 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
650 case SDT_MEME: /* memory execute only */
651 case SDT_MEMEA: /* memory execute only accessed */
652 case SDT_MEMER: /* memory execute read */
653 case SDT_MEMERA: /* memory execute read accessed */
654 break;
655 default:
656 return (EINVAL);
657 }
658
659 /* Only user (ring-3) descriptors may be present. */
660 if (dp->sd.sd_p != 0 && dp->sd.sd_dpl != SEL_UPL)
661 return (EACCES);
662 }
663
664 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
665 /* Allocate a free slot */
666 mtx_lock_spin(&dt_lock);
667 if ((pldt = mdp->md_ldt) == NULL) {
668 if ((error = i386_ldt_grow(td, NLDT + 1))) {
669 mtx_unlock_spin(&dt_lock);
670 return (error);
671 }
672 pldt = mdp->md_ldt;
673 }
674 again:
675 /*
676 * start scanning a bit up to leave room for NVidia and
677 * Wine, which still user the "Blat" method of allocation.
678 */
679 dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
680 for (i = NLDT; i < pldt->ldt_len; ++i) {
681 if (dp->sd.sd_type == SDT_SYSNULL)
682 break;
683 dp++;
684 }
685 if (i >= pldt->ldt_len) {
686 if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) {
687 mtx_unlock_spin(&dt_lock);
688 return (error);
689 }
690 goto again;
691 }
692 uap->start = i;
693 error = i386_set_ldt_data(td, i, 1, descs);
694 mtx_unlock_spin(&dt_lock);
695 } else {
696 largest_ld = uap->start + uap->num;
697 mtx_lock_spin(&dt_lock);
698 if (!(error = i386_ldt_grow(td, largest_ld))) {
699 error = i386_set_ldt_data(td, uap->start, uap->num,
700 descs);
701 }
702 mtx_unlock_spin(&dt_lock);
703 }
704 if (error == 0)
705 td->td_retval[0] = uap->start;
706 return (error);
707 }
708
709 static int
710 i386_set_ldt_data(struct thread *td, int start, int num,
711 union descriptor *descs)
712 {
713 struct mdproc *mdp;
714 struct proc_ldt *pldt;
715 uint64_t *dst, *src;
716 int i;
717
718 mtx_assert(&dt_lock, MA_OWNED);
719
720 mdp = &td->td_proc->p_md;
721 pldt = mdp->md_ldt;
722 dst = (uint64_t *)(pldt->ldt_base);
723 src = (uint64_t *)descs;
724
725 /*
726 * Atomic(9) is used only to get 64bit atomic store with
727 * cmpxchg8b when available. There is no op without release
728 * semantic.
729 */
730 for (i = 0; i < num; i++)
731 atomic_store_rel_64(&dst[start + i], src[i]);
732 return (0);
733 }
734
735 static int
736 i386_ldt_grow(struct thread *td, int len)
737 {
738 struct mdproc *mdp;
739 struct proc_ldt *new_ldt, *pldt;
740 caddr_t old_ldt_base;
741 int old_ldt_len;
742
743 mtx_assert(&dt_lock, MA_OWNED);
744
745 if (len > MAX_LD)
746 return (ENOMEM);
747 if (len < NLDT + 1)
748 len = NLDT + 1;
749
750 mdp = &td->td_proc->p_md;
751 old_ldt_base = NULL_LDT_BASE;
752 old_ldt_len = 0;
753
754 /* Allocate a user ldt. */
755 if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) {
756 new_ldt = user_ldt_alloc(mdp, len);
757 if (new_ldt == NULL)
758 return (ENOMEM);
759 pldt = mdp->md_ldt;
760
761 if (pldt != NULL) {
762 if (new_ldt->ldt_len <= pldt->ldt_len) {
763 /*
764 * We just lost the race for allocation, so
765 * free the new object and return.
766 */
767 mtx_unlock_spin(&dt_lock);
768 kmem_free(kernel_arena,
769 (vm_offset_t)new_ldt->ldt_base,
770 new_ldt->ldt_len * sizeof(union descriptor));
771 free(new_ldt, M_SUBPROC);
772 mtx_lock_spin(&dt_lock);
773 return (0);
774 }
775
776 /*
777 * We have to substitute the current LDT entry for
778 * curproc with the new one since its size grew.
779 */
780 old_ldt_base = pldt->ldt_base;
781 old_ldt_len = pldt->ldt_len;
782 pldt->ldt_sd = new_ldt->ldt_sd;
783 pldt->ldt_base = new_ldt->ldt_base;
784 pldt->ldt_len = new_ldt->ldt_len;
785 } else
786 mdp->md_ldt = pldt = new_ldt;
787 #ifdef SMP
788 /*
789 * Signal other cpus to reload ldt. We need to unlock dt_lock
790 * here because other CPU will contest on it since their
791 * curthreads won't hold the lock and will block when trying
792 * to acquire it.
793 */
794 mtx_unlock_spin(&dt_lock);
795 smp_rendezvous(NULL, set_user_ldt_rv, NULL,
796 td->td_proc->p_vmspace);
797 #else
798 set_user_ldt_locked(&td->td_proc->p_md);
799 mtx_unlock_spin(&dt_lock);
800 #endif
801 if (old_ldt_base != NULL_LDT_BASE) {
802 kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
803 old_ldt_len * sizeof(union descriptor));
804 free(new_ldt, M_SUBPROC);
805 }
806 mtx_lock_spin(&dt_lock);
807 }
808 return (0);
809 }
Cache object: c872a0d2c12182adc4890f463ab2c761
|