The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/vm_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 1982, 1986 The Regents of the University of California.
    5  * Copyright (c) 1989, 1990 William Jolitz
    6  * Copyright (c) 1994 John Dyson
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department, and William Jolitz.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from: @(#)vm_machdep.c  7.3 (Berkeley) 5/13/91
   42  *      Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
   43  */
   44 
   45 #include <sys/cdefs.h>
   46 __FBSDID("$FreeBSD$");
   47 
   48 #include "opt_isa.h"
   49 #include "opt_npx.h"
   50 #include "opt_reset.h"
   51 #include "opt_cpu.h"
   52 
   53 #include <sys/param.h>
   54 #include <sys/systm.h>
   55 #include <sys/bio.h>
   56 #include <sys/buf.h>
   57 #include <sys/kernel.h>
   58 #include <sys/ktr.h>
   59 #include <sys/lock.h>
   60 #include <sys/malloc.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/mutex.h>
   63 #include <sys/pioctl.h>
   64 #include <sys/proc.h>
   65 #include <sys/sysent.h>
   66 #include <sys/sf_buf.h>
   67 #include <sys/smp.h>
   68 #include <sys/sched.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/unistd.h>
   71 #include <sys/vnode.h>
   72 #include <sys/vmmeter.h>
   73 
   74 #include <machine/cpu.h>
   75 #include <machine/cputypes.h>
   76 #include <machine/md_var.h>
   77 #include <machine/pcb.h>
   78 #include <machine/pcb_ext.h>
   79 #include <machine/smp.h>
   80 #include <machine/vm86.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_extern.h>
   84 #include <vm/vm_kern.h>
   85 #include <vm/vm_page.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_param.h>
   88 
   89 #ifndef NSFBUFS
   90 #define NSFBUFS         (512 + maxusers * 16)
   91 #endif
   92 
   93 _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
   94     "__OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf.");
   95 
   96 union savefpu *
   97 get_pcb_user_save_td(struct thread *td)
   98 {
   99         vm_offset_t p;
  100 
  101         p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
  102             roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
  103         KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
  104         return ((union savefpu *)p);
  105 }
  106 
  107 union savefpu *
  108 get_pcb_user_save_pcb(struct pcb *pcb)
  109 {
  110         vm_offset_t p;
  111 
  112         p = (vm_offset_t)(pcb + 1);
  113         return ((union savefpu *)p);
  114 }
  115 
  116 struct pcb *
  117 get_pcb_td(struct thread *td)
  118 {
  119         vm_offset_t p;
  120 
  121         p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
  122             roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
  123             sizeof(struct pcb);
  124         return ((struct pcb *)p);
  125 }
  126 
  127 void *
  128 alloc_fpusave(int flags)
  129 {
  130         void *res;
  131         struct savefpu_ymm *sf;
  132 
  133         res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
  134         if (use_xsave) {
  135                 sf = (struct savefpu_ymm *)res;
  136                 bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
  137                 sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
  138         }
  139         return (res);
  140 }
  141 /*
  142  * Finish a fork operation, with process p2 nearly set up.
  143  * Copy and update the pcb, set up the stack so that the child
  144  * ready to run and return to user mode.
  145  */
  146 void
  147 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
  148 {
  149         struct proc *p1;
  150         struct pcb *pcb2;
  151         struct mdproc *mdp2;
  152 
  153         p1 = td1->td_proc;
  154         if ((flags & RFPROC) == 0) {
  155                 if ((flags & RFMEM) == 0) {
  156                         /* unshare user LDT */
  157                         struct mdproc *mdp1 = &p1->p_md;
  158                         struct proc_ldt *pldt, *pldt1;
  159 
  160                         mtx_lock_spin(&dt_lock);
  161                         if ((pldt1 = mdp1->md_ldt) != NULL &&
  162                             pldt1->ldt_refcnt > 1) {
  163                                 pldt = user_ldt_alloc(mdp1, pldt1->ldt_len);
  164                                 if (pldt == NULL)
  165                                         panic("could not copy LDT");
  166                                 mdp1->md_ldt = pldt;
  167                                 set_user_ldt(mdp1);
  168                                 user_ldt_deref(pldt1);
  169                         } else
  170                                 mtx_unlock_spin(&dt_lock);
  171                 }
  172                 return;
  173         }
  174 
  175         /* Ensure that td1's pcb is up to date. */
  176         if (td1 == curthread)
  177                 td1->td_pcb->pcb_gs = rgs();
  178         critical_enter();
  179         if (PCPU_GET(fpcurthread) == td1)
  180                 npxsave(td1->td_pcb->pcb_save);
  181         critical_exit();
  182 
  183         /* Point the pcb to the top of the stack */
  184         pcb2 = get_pcb_td(td2);
  185         td2->td_pcb = pcb2;
  186 
  187         /* Copy td1's pcb */
  188         bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
  189 
  190         /* Properly initialize pcb_save */
  191         pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
  192         bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
  193             cpu_max_ext_state_size);
  194 
  195         /* Reset debug registers in the new process */
  196         x86_clear_dbregs(pcb2);
  197 
  198         /* Point mdproc and then copy over td1's contents */
  199         mdp2 = &p2->p_md;
  200         bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
  201 
  202         /*
  203          * Create a new fresh stack for the new process.
  204          * Copy the trap frame for the return to user mode as if from a
  205          * syscall.  This copies most of the user mode register values.
  206          * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe
  207          * if we go to vm86.
  208          */
  209         td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb -
  210             VM86_STACK_SPACE) - 1;
  211         bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
  212 
  213         td2->td_frame->tf_eax = 0;              /* Child returns zero */
  214         td2->td_frame->tf_eflags &= ~PSL_C;     /* success */
  215         td2->td_frame->tf_edx = 1;
  216 
  217         /*
  218          * If the parent process has the trap bit set (i.e. a debugger had
  219          * single stepped the process to the system call), we need to clear
  220          * the trap flag from the new frame unless the debugger had set PF_FORK
  221          * on the parent.  Otherwise, the child will receive a (likely
  222          * unexpected) SIGTRAP when it executes the first instruction after
  223          * returning  to userland.
  224          */
  225         if ((p1->p_pfsflags & PF_FORK) == 0)
  226                 td2->td_frame->tf_eflags &= ~PSL_T;
  227 
  228         /*
  229          * Set registers for trampoline to user mode.  Leave space for the
  230          * return address on stack.  These are the kernel mode register values.
  231          */
  232 #if defined(PAE) || defined(PAE_TABLES)
  233         pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
  234 #else
  235         pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
  236 #endif
  237         pcb2->pcb_edi = 0;
  238         pcb2->pcb_esi = (int)fork_return;       /* fork_trampoline argument */
  239         pcb2->pcb_ebp = 0;
  240         pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
  241         pcb2->pcb_ebx = (int)td2;               /* fork_trampoline argument */
  242         pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
  243         /*-
  244          * pcb2->pcb_dr*:       cloned above.
  245          * pcb2->pcb_savefpu:   cloned above.
  246          * pcb2->pcb_flags:     cloned above.
  247          * pcb2->pcb_onfault:   cloned above (always NULL here?).
  248          * pcb2->pcb_gs:        cloned above.
  249          * pcb2->pcb_ext:       cleared below.
  250          */
  251 
  252         /*
  253          * XXX don't copy the i/o pages.  this should probably be fixed.
  254          */
  255         pcb2->pcb_ext = 0;
  256 
  257         /* Copy the LDT, if necessary. */
  258         mtx_lock_spin(&dt_lock);
  259         if (mdp2->md_ldt != NULL) {
  260                 if (flags & RFMEM) {
  261                         mdp2->md_ldt->ldt_refcnt++;
  262                 } else {
  263                         mdp2->md_ldt = user_ldt_alloc(mdp2,
  264                             mdp2->md_ldt->ldt_len);
  265                         if (mdp2->md_ldt == NULL)
  266                                 panic("could not copy LDT");
  267                 }
  268         }
  269         mtx_unlock_spin(&dt_lock);
  270 
  271         /* Setup to release spin count in fork_exit(). */
  272         td2->td_md.md_spinlock_count = 1;
  273         td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
  274 
  275         /*
  276          * Now, cpu_switch() can schedule the new process.
  277          * pcb_esp is loaded pointing to the cpu_switch() stack frame
  278          * containing the return address when exiting cpu_switch.
  279          * This will normally be to fork_trampoline(), which will have
  280          * %ebx loaded with the new proc's pointer.  fork_trampoline()
  281          * will set up a stack to call fork_return(p, frame); to complete
  282          * the return to user-mode.
  283          */
  284 }
  285 
  286 /*
  287  * Intercept the return address from a freshly forked process that has NOT
  288  * been scheduled yet.
  289  *
  290  * This is needed to make kernel threads stay in kernel mode.
  291  */
  292 void
  293 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
  294 {
  295         /*
  296          * Note that the trap frame follows the args, so the function
  297          * is really called like this:  func(arg, frame);
  298          */
  299         td->td_pcb->pcb_esi = (int) func;       /* function */
  300         td->td_pcb->pcb_ebx = (int) arg;        /* first arg */
  301 }
  302 
  303 void
  304 cpu_exit(struct thread *td)
  305 {
  306 
  307         /*
  308          * If this process has a custom LDT, release it.  Reset pc->pcb_gs
  309          * and %gs before we free it in case they refer to an LDT entry.
  310          */
  311         mtx_lock_spin(&dt_lock);
  312         if (td->td_proc->p_md.md_ldt) {
  313                 td->td_pcb->pcb_gs = _udatasel;
  314                 load_gs(_udatasel);
  315                 user_ldt_free(td);
  316         } else
  317                 mtx_unlock_spin(&dt_lock);
  318 }
  319 
  320 void
  321 cpu_thread_exit(struct thread *td)
  322 {
  323 
  324         critical_enter();
  325         if (td == PCPU_GET(fpcurthread))
  326                 npxdrop();
  327         critical_exit();
  328 
  329         /* Disable any hardware breakpoints. */
  330         if (td->td_pcb->pcb_flags & PCB_DBREGS) {
  331                 reset_dbregs();
  332                 td->td_pcb->pcb_flags &= ~PCB_DBREGS;
  333         }
  334 }
  335 
  336 void
  337 cpu_thread_clean(struct thread *td)
  338 {
  339         struct pcb *pcb;
  340 
  341         pcb = td->td_pcb; 
  342         if (pcb->pcb_ext != NULL) {
  343                 /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
  344                 /*
  345                  * XXX do we need to move the TSS off the allocated pages
  346                  * before freeing them?  (not done here)
  347                  */
  348                 pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1));
  349                 pcb->pcb_ext = NULL;
  350         }
  351 }
  352 
  353 void
  354 cpu_thread_swapin(struct thread *td)
  355 {
  356 }
  357 
  358 void
  359 cpu_thread_swapout(struct thread *td)
  360 {
  361 }
  362 
  363 void
  364 cpu_thread_alloc(struct thread *td)
  365 {
  366         struct pcb *pcb;
  367         struct xstate_hdr *xhdr;
  368 
  369         td->td_pcb = pcb = get_pcb_td(td);
  370         td->td_frame = (struct trapframe *)((caddr_t)pcb -
  371             VM86_STACK_SPACE) - 1;
  372         pcb->pcb_ext = NULL; 
  373         pcb->pcb_save = get_pcb_user_save_pcb(pcb);
  374         if (use_xsave) {
  375                 xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
  376                 bzero(xhdr, sizeof(*xhdr));
  377                 xhdr->xstate_bv = xsave_mask;
  378         }
  379 }
  380 
  381 void
  382 cpu_thread_free(struct thread *td)
  383 {
  384 
  385         cpu_thread_clean(td);
  386 }
  387 
  388 bool
  389 cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused)
  390 {
  391 
  392         return (true);
  393 }
  394 
  395 int
  396 cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused,
  397     int com __unused, void *data __unused)
  398 {
  399 
  400         return (EINVAL);
  401 }
  402 
  403 void
  404 cpu_set_syscall_retval(struct thread *td, int error)
  405 {
  406 
  407         switch (error) {
  408         case 0:
  409                 td->td_frame->tf_eax = td->td_retval[0];
  410                 td->td_frame->tf_edx = td->td_retval[1];
  411                 td->td_frame->tf_eflags &= ~PSL_C;
  412                 break;
  413 
  414         case ERESTART:
  415                 /*
  416                  * Reconstruct pc, assuming lcall $X,y is 7 bytes, int
  417                  * 0x80 is 2 bytes. We saved this in tf_err.
  418                  */
  419                 td->td_frame->tf_eip -= td->td_frame->tf_err;
  420                 break;
  421 
  422         case EJUSTRETURN:
  423                 break;
  424 
  425         default:
  426                 td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error);
  427                 td->td_frame->tf_eflags |= PSL_C;
  428                 break;
  429         }
  430 }
  431 
  432 /*
  433  * Initialize machine state, mostly pcb and trap frame for a new
  434  * thread, about to return to userspace.  Put enough state in the new
  435  * thread's PCB to get it to go back to the fork_return(), which
  436  * finalizes the thread state and handles peculiarities of the first
  437  * return to userspace for the new thread.
  438  */
  439 void
  440 cpu_copy_thread(struct thread *td, struct thread *td0)
  441 {
  442         struct pcb *pcb2;
  443 
  444         /* Point the pcb to the top of the stack. */
  445         pcb2 = td->td_pcb;
  446 
  447         /*
  448          * Copy the upcall pcb.  This loads kernel regs.
  449          * Those not loaded individually below get their default
  450          * values here.
  451          */
  452         bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
  453         pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
  454             PCB_KERNNPX);
  455         pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
  456         bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
  457             cpu_max_ext_state_size);
  458 
  459         /*
  460          * Create a new fresh stack for the new thread.
  461          */
  462         bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
  463 
  464         /* If the current thread has the trap bit set (i.e. a debugger had
  465          * single stepped the process to the system call), we need to clear
  466          * the trap flag from the new frame. Otherwise, the new thread will
  467          * receive a (likely unexpected) SIGTRAP when it executes the first
  468          * instruction after returning to userland.
  469          */
  470         td->td_frame->tf_eflags &= ~PSL_T;
  471 
  472         /*
  473          * Set registers for trampoline to user mode.  Leave space for the
  474          * return address on stack.  These are the kernel mode register values.
  475          */
  476         pcb2->pcb_edi = 0;
  477         pcb2->pcb_esi = (int)fork_return;                   /* trampoline arg */
  478         pcb2->pcb_ebp = 0;
  479         pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
  480         pcb2->pcb_ebx = (int)td;                            /* trampoline arg */
  481         pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
  482         pcb2->pcb_gs = rgs();
  483         /*
  484          * If we didn't copy the pcb, we'd need to do the following registers:
  485          * pcb2->pcb_cr3:       cloned above.
  486          * pcb2->pcb_dr*:       cloned above.
  487          * pcb2->pcb_savefpu:   cloned above.
  488          * pcb2->pcb_flags:     cloned above.
  489          * pcb2->pcb_onfault:   cloned above (always NULL here?).
  490          * pcb2->pcb_gs:        cloned above.
  491          * pcb2->pcb_ext:       cleared below.
  492          */
  493         pcb2->pcb_ext = NULL;
  494 
  495         /* Setup to release spin count in fork_exit(). */
  496         td->td_md.md_spinlock_count = 1;
  497         td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
  498 }
  499 
  500 /*
  501  * Set that machine state for performing an upcall that starts
  502  * the entry function with the given argument.
  503  */
  504 void
  505 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
  506     stack_t *stack)
  507 {
  508 
  509         /* 
  510          * Do any extra cleaning that needs to be done.
  511          * The thread may have optional components
  512          * that are not present in a fresh thread.
  513          * This may be a recycled thread so make it look
  514          * as though it's newly allocated.
  515          */
  516         cpu_thread_clean(td);
  517 
  518         /*
  519          * Set the trap frame to point at the beginning of the entry
  520          * function.
  521          */
  522         td->td_frame->tf_ebp = 0; 
  523         td->td_frame->tf_esp =
  524             (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
  525         td->td_frame->tf_eip = (int)entry;
  526 
  527         /* Return address sentinel value to stop stack unwinding. */
  528         suword((void *)td->td_frame->tf_esp, 0);
  529 
  530         /* Pass the argument to the entry point. */
  531         suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
  532             (int)arg);
  533 }
  534 
  535 int
  536 cpu_set_user_tls(struct thread *td, void *tls_base)
  537 {
  538         struct segment_descriptor sd;
  539         uint32_t base;
  540 
  541         /*
  542          * Construct a descriptor and store it in the pcb for
  543          * the next context switch.  Also store it in the gdt
  544          * so that the load of tf_fs into %fs will activate it
  545          * at return to userland.
  546          */
  547         base = (uint32_t)tls_base;
  548         sd.sd_lobase = base & 0xffffff;
  549         sd.sd_hibase = (base >> 24) & 0xff;
  550         sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
  551         sd.sd_hilimit = 0xf;
  552         sd.sd_type  = SDT_MEMRWA;
  553         sd.sd_dpl   = SEL_UPL;
  554         sd.sd_p     = 1;
  555         sd.sd_xx    = 0;
  556         sd.sd_def32 = 1;
  557         sd.sd_gran  = 1;
  558         critical_enter();
  559         /* set %gs */
  560         td->td_pcb->pcb_gsd = sd;
  561         if (td == curthread) {
  562                 PCPU_GET(fsgs_gdt)[1] = sd;
  563                 load_gs(GSEL(GUGS_SEL, SEL_UPL));
  564         }
  565         critical_exit();
  566         return (0);
  567 }
  568 
  569 /*
  570  * Convert kernel VA to physical address
  571  */
  572 vm_paddr_t
  573 kvtop(void *addr)
  574 {
  575         vm_paddr_t pa;
  576 
  577         pa = pmap_kextract((vm_offset_t)addr);
  578         if (pa == 0)
  579                 panic("kvtop: zero page frame");
  580         return (pa);
  581 }
  582 
  583 /*
  584  * Get an sf_buf from the freelist.  May block if none are available.
  585  */
  586 void
  587 sf_buf_map(struct sf_buf *sf, int flags)
  588 {
  589         pt_entry_t opte, *ptep;
  590 
  591         /*
  592          * Update the sf_buf's virtual-to-physical mapping, flushing the
  593          * virtual address from the TLB.  Since the reference count for 
  594          * the sf_buf's old mapping was zero, that mapping is not 
  595          * currently in use.  Consequently, there is no need to exchange 
  596          * the old and new PTEs atomically, even under PAE.
  597          */
  598         ptep = vtopte(sf->kva);
  599         opte = *ptep;
  600         *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V |
  601             pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0);
  602 
  603         /*
  604          * Avoid unnecessary TLB invalidations: If the sf_buf's old
  605          * virtual-to-physical mapping was not used, then any processor
  606          * that has invalidated the sf_buf's virtual address from its TLB
  607          * since the last used mapping need not invalidate again.
  608          */
  609 #ifdef SMP
  610         if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
  611                 CPU_ZERO(&sf->cpumask);
  612 
  613         sf_buf_shootdown(sf, flags);
  614 #else
  615         if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
  616                 pmap_invalidate_page(kernel_pmap, sf->kva);
  617 #endif
  618 }
  619 
  620 #ifdef SMP
  621 static void
  622 sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused,
  623     vm_offset_t addr1 __unused, vm_offset_t addr2 __unused)
  624 {
  625 }
  626 
  627 void
  628 sf_buf_shootdown(struct sf_buf *sf, int flags)
  629 {
  630         cpuset_t other_cpus;
  631         u_int cpuid;
  632 
  633         sched_pin();
  634         cpuid = PCPU_GET(cpuid);
  635         if (!CPU_ISSET(cpuid, &sf->cpumask)) {
  636                 CPU_SET(cpuid, &sf->cpumask);
  637                 invlpg(sf->kva);
  638         }
  639         if ((flags & SFB_CPUPRIVATE) == 0) {
  640                 other_cpus = all_cpus;
  641                 CPU_CLR(cpuid, &other_cpus);
  642                 CPU_NAND(&other_cpus, &sf->cpumask);
  643                 if (!CPU_EMPTY(&other_cpus)) {
  644                         CPU_OR(&sf->cpumask, &other_cpus);
  645                         smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap,
  646                             sf_buf_shootdown_curcpu_cb);
  647                 }
  648         }
  649         sched_unpin();
  650 }
  651 #endif
  652 
  653 /*
  654  * MD part of sf_buf_free().
  655  */
  656 int
  657 sf_buf_unmap(struct sf_buf *sf)
  658 {
  659 
  660         return (0);
  661 }
  662 
  663 static void
  664 sf_buf_invalidate(struct sf_buf *sf)
  665 {
  666         vm_page_t m = sf->m;
  667 
  668         /*
  669          * Use pmap_qenter to update the pte for
  670          * existing mapping, in particular, the PAT
  671          * settings are recalculated.
  672          */
  673         pmap_qenter(sf->kva, &m, 1);
  674         pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
  675 }
  676 
  677 /*
  678  * Invalidate the cache lines that may belong to the page, if
  679  * (possibly old) mapping of the page by sf buffer exists.  Returns
  680  * TRUE when mapping was found and cache invalidated.
  681  */
  682 boolean_t
  683 sf_buf_invalidate_cache(vm_page_t m)
  684 {
  685 
  686         return (sf_buf_process_page(m, sf_buf_invalidate));
  687 }
  688 
  689 /*
  690  * Software interrupt handler for queued VM system processing.
  691  */   
  692 void  
  693 swi_vm(void *dummy) 
  694 {     
  695         if (busdma_swi_pending != 0)
  696                 busdma_swi();
  697 }
  698 
  699 /*
  700  * Tell whether this address is in some physical memory region.
  701  * Currently used by the kernel coredump code in order to avoid
  702  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  703  * or other unpredictable behaviour.
  704  */
  705 
  706 int
  707 is_physical_memory(vm_paddr_t addr)
  708 {
  709 
  710 #ifdef DEV_ISA
  711         /* The ISA ``memory hole''. */
  712         if (addr >= 0xa0000 && addr < 0x100000)
  713                 return 0;
  714 #endif
  715 
  716         /*
  717          * stuff other tests for known memory-mapped devices (PCI?)
  718          * here
  719          */
  720 
  721         return 1;
  722 }

Cache object: f4407431bb9bdd912e7766e1ac177ee1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.