The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/pc/mmu.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Memory mappings.  Life was easier when 2G of memory was enough.
    3  *
    4  * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
    5  * (9load sits under 1M during the load).  The memory from KZERO to the
    6  * top of memory is mapped 1-1 with physical memory, starting at physical
    7  * address 0.  All kernel memory and data structures (i.e., the entries stored
    8  * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
    9  * then the kernel can only have 256MB of memory for itself.
   10  * 
   11  * The 256M below KZERO comprises three parts.  The lowest 4M is the
   12  * virtual page table, a virtual address representation of the current 
   13  * page table tree.  The second 4M is used for temporary per-process
   14  * mappings managed by kmap and kunmap.  The remaining 248M is used
   15  * for global (shared by all procs and all processors) device memory
   16  * mappings and managed by vmap and vunmap.  The total amount (256M)
   17  * could probably be reduced somewhat if desired.  The largest device
   18  * mapping is that of the video card, and even though modern video cards
   19  * have embarrassing amounts of memory, the video drivers only use one
   20  * frame buffer worth (at most 16M).  Each is described in more detail below.
   21  *
   22  * The VPT is a 4M frame constructed by inserting the pdb into itself.
   23  * This short-circuits one level of the page tables, with the result that 
   24  * the contents of second-level page tables can be accessed at VPT.  
   25  * We use the VPT to edit the page tables (see mmu) after inserting them
   26  * into the page directory.  It is a convenient mechanism for mapping what
   27  * might be otherwise-inaccessible pages.  The idea was borrowed from
   28  * the Exokernel.
   29  *
   30  * The VPT doesn't solve all our problems, because we still need to 
   31  * prepare page directories before we can install them.  For that, we
   32  * use tmpmap/tmpunmap, which map a single page at TMPADDR.
   33  */
   34 
   35 #include        "u.h"
   36 #include        "../port/lib.h"
   37 #include        "mem.h"
   38 #include        "dat.h"
   39 #include        "fns.h"
   40 #include        "io.h"
   41 
   42 /*
   43  * Simple segment descriptors with no translation.
   44  */
   45 #define DATASEGM(p)     { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
   46 #define EXECSEGM(p)     { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
   47 #define EXEC16SEGM(p)   { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
   48 #define TSSSEGM(b,p)    { ((b)<<16)|sizeof(Tss),\
   49                           ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
   50 
   51 Segdesc gdt[NGDT] =
   52 {
   53 [NULLSEG]       { 0, 0},                /* null descriptor */
   54 [KDSEG]         DATASEGM(0),            /* kernel data/stack */
   55 [KESEG]         EXECSEGM(0),            /* kernel code */
   56 [UDSEG]         DATASEGM(3),            /* user data/stack */
   57 [UESEG]         EXECSEGM(3),            /* user code */
   58 [TSSSEG]        TSSSEGM(0,0),           /* tss segment */
   59 [KESEG16]               EXEC16SEGM(0),  /* kernel code 16-bit */
   60 };
   61 
   62 static int didmmuinit;
   63 static void taskswitch(ulong, ulong);
   64 static void memglobal(void);
   65 
   66 #define vpt ((ulong*)VPT)
   67 #define VPTX(va)                (((ulong)(va))>>12)
   68 #define vpd (vpt+VPTX(VPT))
   69 
   70 void
   71 mmuinit0(void)
   72 {
   73         memmove(m->gdt, gdt, sizeof gdt);
   74 }
   75 
   76 void
   77 mmuinit(void)
   78 {
   79         ulong x, *p;
   80         ushort ptr[3];
   81 
   82         didmmuinit = 1;
   83 
   84         if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
   85                 VPT, vpd, KMAP);
   86 
   87         memglobal();
   88         m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
   89         
   90         m->tss = malloc(sizeof(Tss));
   91         memset(m->tss, 0, sizeof(Tss));
   92         m->tss->iomap = 0xDFFF<<16;
   93 
   94         /*
   95          * We used to keep the GDT in the Mach structure, but it
   96          * turns out that that slows down access to the rest of the
   97          * page.  Since the Mach structure is accessed quite often,
   98          * it pays off anywhere from a factor of 1.25 to 2 on real
   99          * hardware to separate them (the AMDs are more sensitive
  100          * than Intels in this regard).  Under VMware it pays off
  101          * a factor of about 10 to 100.
  102          */
  103         memmove(m->gdt, gdt, sizeof gdt);
  104         x = (ulong)m->tss;
  105         m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
  106         m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
  107 
  108         ptr[0] = sizeof(gdt)-1;
  109         x = (ulong)m->gdt;
  110         ptr[1] = x & 0xFFFF;
  111         ptr[2] = (x>>16) & 0xFFFF;
  112         lgdt(ptr);
  113 
  114         ptr[0] = sizeof(Segdesc)*256-1;
  115         x = IDTADDR;
  116         ptr[1] = x & 0xFFFF;
  117         ptr[2] = (x>>16) & 0xFFFF;
  118         lidt(ptr);
  119 
  120         /* make kernel text unwritable */
  121         for(x = KTZERO; x < (ulong)etext; x += BY2PG){
  122                 p = mmuwalk(m->pdb, x, 2, 0);
  123                 if(p == nil)
  124                         panic("mmuinit");
  125                 *p &= ~PTEWRITE;
  126         }
  127 
  128         taskswitch(PADDR(m->pdb),  (ulong)m + BY2PG);
  129         ltr(TSSSEL);
  130 }
  131 
  132 /* 
  133  * On processors that support it, we set the PTEGLOBAL bit in
  134  * page table and page directory entries that map kernel memory.
  135  * Doing this tells the processor not to bother flushing them
  136  * from the TLB when doing the TLB flush associated with a 
  137  * context switch (write to CR3).  Since kernel memory mappings
  138  * are never removed, this is safe.  (If we ever remove kernel memory
  139  * mappings, we can do a full flush by turning off the PGE bit in CR4,
  140  * writing to CR3, and then turning the PGE bit back on.) 
  141  *
  142  * See also mmukmap below.
  143  * 
  144  * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
  145  */
  146 static void
  147 memglobal(void)
  148 {
  149         int i, j;
  150         ulong *pde, *pte;
  151 
  152         /* only need to do this once, on bootstrap processor */
  153         if(m->machno != 0)
  154                 return;
  155 
  156         if(!m->havepge)
  157                 return;
  158 
  159         pde = m->pdb;
  160         for(i=PDX(KZERO); i<1024; i++){
  161                 if(pde[i] & PTEVALID){
  162                         pde[i] |= PTEGLOBAL;
  163                         if(!(pde[i] & PTESIZE)){
  164                                 pte = KADDR(pde[i]&~(BY2PG-1));
  165                                 for(j=0; j<1024; j++)
  166                                         if(pte[j] & PTEVALID)
  167                                                 pte[j] |= PTEGLOBAL;
  168                         }
  169                 }
  170         }                       
  171 }
  172 
  173 /*
  174  * Flush all the user-space and device-mapping mmu info
  175  * for this process, because something has been deleted.
  176  * It will be paged back in on demand.
  177  */
  178 void
  179 flushmmu(void)
  180 {
  181         int s;
  182 
  183         s = splhi();
  184         up->newtlb = 1;
  185         mmuswitch(up);
  186         splx(s);
  187 }
  188 
  189 /*
  190  * Flush a single page mapping from the tlb.
  191  */
  192 void
  193 flushpg(ulong va)
  194 {
  195         if(X86FAMILY(m->cpuidax) >= 4)
  196                 invlpg(va);
  197         else
  198                 putcr3(getcr3());
  199 }
  200         
  201 /*
  202  * Allocate a new page for a page directory. 
  203  * We keep a small cache of pre-initialized
  204  * page directories in each mach.
  205  */
  206 static Page*
  207 mmupdballoc(void)
  208 {
  209         int s;
  210         Page *page;
  211         ulong *pdb;
  212 
  213         s = splhi();
  214         m->pdballoc++;
  215         if(m->pdbpool == 0){
  216                 spllo();
  217                 page = newpage(0, 0, 0);
  218                 page->va = (ulong)vpd;
  219                 splhi();
  220                 pdb = tmpmap(page);
  221                 memmove(pdb, m->pdb, BY2PG);
  222                 pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID;     /* set up VPT */
  223                 tmpunmap(pdb);
  224         }else{
  225                 page = m->pdbpool;
  226                 m->pdbpool = page->next;
  227                 m->pdbcnt--;
  228         }
  229         splx(s);
  230         return page;
  231 }
  232 
  233 static void
  234 mmupdbfree(Proc *proc, Page *p)
  235 {
  236         if(islo())
  237                 panic("mmupdbfree: islo");
  238         m->pdbfree++;
  239         if(m->pdbcnt >= 10){
  240                 p->next = proc->mmufree;
  241                 proc->mmufree = p;
  242         }else{
  243                 p->next = m->pdbpool;
  244                 m->pdbpool = p;
  245                 m->pdbcnt++;
  246         }
  247 }
  248 
  249 /*
  250  * A user-space memory segment has been deleted, or the
  251  * process is exiting.  Clear all the pde entries for user-space
  252  * memory mappings and device mappings.  Any entries that
  253  * are needed will be paged back in as necessary.
  254  */
  255 static void
  256 mmuptefree(Proc* proc)
  257 {
  258         int s;
  259         ulong *pdb;
  260         Page **last, *page;
  261 
  262         if(proc->mmupdb == nil || proc->mmuused == nil)
  263                 return;
  264         s = splhi();
  265         pdb = tmpmap(proc->mmupdb);
  266         last = &proc->mmuused;
  267         for(page = *last; page; page = page->next){
  268                 pdb[page->daddr] = 0;
  269                 last = &page->next;
  270         }
  271         tmpunmap(pdb);
  272         splx(s);
  273         *last = proc->mmufree;
  274         proc->mmufree = proc->mmuused;
  275         proc->mmuused = 0;
  276 }
  277 
  278 static void
  279 taskswitch(ulong pdb, ulong stack)
  280 {
  281         Tss *tss;
  282 
  283         tss = m->tss;
  284         tss->ss0 = KDSEL;
  285         tss->esp0 = stack;
  286         tss->ss1 = KDSEL;
  287         tss->esp1 = stack;
  288         tss->ss2 = KDSEL;
  289         tss->esp2 = stack;
  290         putcr3(pdb);
  291 }
  292 
  293 void
  294 mmuswitch(Proc* proc)
  295 {
  296         ulong *pdb;
  297 
  298         if(proc->newtlb){
  299                 mmuptefree(proc);
  300                 proc->newtlb = 0;
  301         }
  302 
  303         if(proc->mmupdb){
  304                 pdb = tmpmap(proc->mmupdb);
  305                 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  306                 tmpunmap(pdb);
  307                 taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
  308         }else
  309                 taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
  310 }
  311 
  312 /*
  313  * Release any pages allocated for a page directory base or page-tables
  314  * for this process:
  315  *   switch to the prototype pdb for this processor (m->pdb);
  316  *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)
  317  *   onto the process' free list (proc->mmufree). This has the side-effect of
  318  *   cleaning any user entries in the pdb (proc->mmupdb);
  319  *   if there's a pdb put it in the cache of pre-initialised pdb's
  320  *   for this processor (m->pdbpool) or on the process' free list;
  321  *   finally, place any pages freed back into the free pool (palloc).
  322  * This routine is only called from schedinit() with palloc locked.
  323  */
  324 void
  325 mmurelease(Proc* proc)
  326 {
  327         Page *page, *next;
  328         ulong *pdb;
  329 
  330         if(islo())
  331                 panic("mmurelease: islo");
  332         taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  333         if(proc->kmaptable){
  334                 if(proc->mmupdb == nil)
  335                         panic("mmurelease: no mmupdb");
  336                 if(--proc->kmaptable->ref)
  337                         panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
  338                 if(proc->nkmap)
  339                         panic("mmurelease: nkmap %d", proc->nkmap);
  340                 /*
  341                  * remove kmaptable from pdb before putting pdb up for reuse.
  342                  */
  343                 pdb = tmpmap(proc->mmupdb);
  344                 if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
  345                         panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
  346                                 pdb[PDX(KMAP)], proc->kmaptable->pa);
  347                 pdb[PDX(KMAP)] = 0;
  348                 tmpunmap(pdb);
  349                 /*
  350                  * move kmaptable to free list.
  351                  */
  352                 pagechainhead(proc->kmaptable);
  353                 proc->kmaptable = 0;
  354         }
  355         if(proc->mmupdb){
  356                 mmuptefree(proc);
  357                 mmupdbfree(proc, proc->mmupdb);
  358                 proc->mmupdb = 0;
  359         }
  360         for(page = proc->mmufree; page; page = next){
  361                 next = page->next;
  362                 if(--page->ref)
  363                         panic("mmurelease: page->ref %d", page->ref);
  364                 pagechainhead(page);
  365         }
  366         if(proc->mmufree && palloc.r.p)
  367                 wakeup(&palloc.r);
  368         proc->mmufree = 0;
  369 }
  370 
  371 /*
  372  * Allocate and install pdb for the current process.
  373  */
  374 static void
  375 upallocpdb(void)
  376 {
  377         int s;
  378         ulong *pdb;
  379         Page *page;
  380         
  381         if(up->mmupdb != nil)
  382                 return;
  383         page = mmupdballoc();
  384         s = splhi();
  385         if(up->mmupdb != nil){
  386                 /*
  387                  * Perhaps we got an interrupt while
  388                  * mmupdballoc was sleeping and that
  389                  * interrupt allocated an mmupdb?
  390                  * Seems unlikely.
  391                  */
  392                 mmupdbfree(up, page);
  393                 splx(s);
  394                 return;
  395         }
  396         pdb = tmpmap(page);
  397         pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  398         tmpunmap(pdb);
  399         up->mmupdb = page;
  400         putcr3(up->mmupdb->pa);
  401         splx(s);
  402 }
  403 
  404 /*
  405  * Update the mmu in response to a user fault.  pa may have PTEWRITE set.
  406  */
  407 void
  408 putmmu(ulong va, ulong pa, Page*)
  409 {
  410         int old, s;
  411         Page *page;
  412 
  413         if(up->mmupdb == nil)
  414                 upallocpdb();
  415 
  416         /*
  417          * We should be able to get through this with interrupts
  418          * turned on (if we get interrupted we'll just pick up 
  419          * where we left off) but we get many faults accessing
  420          * vpt[] near the end of this function, and they always happen
  421          * after the process has been switched out and then 
  422          * switched back, usually many times in a row (perhaps
  423          * it cannot switch back successfully for some reason).
  424          * 
  425          * In any event, I'm tired of searching for this bug.  
  426          * Turn off interrupts during putmmu even though
  427          * we shouldn't need to.                - rsc
  428          */
  429         
  430         s = splhi();
  431         if(!(vpd[PDX(va)]&PTEVALID)){
  432                 if(up->mmufree == 0){
  433                         spllo();
  434                         page = newpage(0, 0, 0);
  435                         splhi();
  436                 }
  437                 else{
  438                         page = up->mmufree;
  439                         up->mmufree = page->next;
  440                 }
  441                 vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
  442                 /* page is now mapped into the VPT - clear it */
  443                 memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
  444                 page->daddr = PDX(va);
  445                 page->next = up->mmuused;
  446                 up->mmuused = page;
  447         }
  448         old = vpt[VPTX(va)];
  449         vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
  450         if(old&PTEVALID)
  451                 flushpg(va);
  452         if(getcr3() != up->mmupdb->pa)
  453                 print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
  454         splx(s);
  455 }
  456 
  457 /*
  458  * Double-check the user MMU.
  459  * Error checking only.
  460  */
  461 void
  462 checkmmu(ulong va, ulong pa)
  463 {
  464         if(up->mmupdb == 0)
  465                 return;
  466         if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
  467                 return;
  468         if(PPN(vpt[VPTX(va)]) != pa)
  469                 print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
  470                         up->pid, up->text,
  471                         va, pa, vpt[VPTX(va)]);
  472 }
  473 
  474 /*
  475  * Walk the page-table pointed to by pdb and return a pointer
  476  * to the entry for virtual address va at the requested level.
  477  * If the entry is invalid and create isn't requested then bail
  478  * out early. Otherwise, for the 2nd level walk, allocate a new
  479  * page-table page and register it in the 1st level.  This is used
  480  * only to edit kernel mappings, which use pages from kernel memory,
  481  * so it's okay to use KADDR to look at the tables.
  482  */
  483 ulong*
  484 mmuwalk(ulong* pdb, ulong va, int level, int create)
  485 {
  486         ulong *table;
  487         void *map;
  488 
  489         table = &pdb[PDX(va)];
  490         if(!(*table & PTEVALID) && create == 0)
  491                 return 0;
  492 
  493         switch(level){
  494 
  495         default:
  496                 return 0;
  497 
  498         case 1:
  499                 return table;
  500 
  501         case 2:
  502                 if(*table & PTESIZE)
  503                         panic("mmuwalk2: va %luX entry %luX", va, *table);
  504                 if(!(*table & PTEVALID)){
  505                         /*
  506                          * Have to call low-level allocator from
  507                          * memory.c if we haven't set up the xalloc
  508                          * tables yet.
  509                          */
  510                         if(didmmuinit)
  511                                 map = xspanalloc(BY2PG, BY2PG, 0);
  512                         else
  513                                 map = rampage();
  514                         if(map == nil)
  515                                 panic("mmuwalk xspanalloc failed");
  516                         *table = PADDR(map)|PTEWRITE|PTEVALID;
  517                 }
  518                 table = KADDR(PPN(*table));
  519                 return &table[PTX(va)];
  520         }
  521 }
  522 
  523 /*
  524  * Device mappings are shared by all procs and processors and
  525  * live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
  526  * copy of the mappings is stored in mach0->pdb, and they are
  527  * paged in from there as necessary by vmapsync during faults.
  528  */
  529 
  530 static Lock vmaplock;
  531 
  532 static int findhole(ulong *a, int n, int count);
  533 static ulong vmapalloc(ulong size);
  534 static void pdbunmap(ulong*, ulong, int);
  535 
  536 /*
  537  * Add a device mapping to the vmap range.
  538  */
  539 void*
  540 vmap(ulong pa, int size)
  541 {
  542         int osize;
  543         ulong o, va;
  544         
  545         /*
  546          * might be asking for less than a page.
  547          */
  548         osize = size;
  549         o = pa & (BY2PG-1);
  550         pa -= o;
  551         size += o;
  552 
  553         size = ROUND(size, BY2PG);
  554         if(pa == 0){
  555                 print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
  556                 return nil;
  557         }
  558         ilock(&vmaplock);
  559         if((va = vmapalloc(size)) == 0 
  560         || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
  561                 iunlock(&vmaplock);
  562                 return 0;
  563         }
  564         iunlock(&vmaplock);
  565         /* avoid trap on local processor
  566         for(i=0; i<size; i+=4*MB)
  567                 vmapsync(va+i);
  568         */
  569         USED(osize);
  570 //      print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
  571         return (void*)(va + o);
  572 }
  573 
  574 static int
  575 findhole(ulong *a, int n, int count)
  576 {
  577         int have, i;
  578         
  579         have = 0;
  580         for(i=0; i<n; i++){
  581                 if(a[i] == 0)
  582                         have++;
  583                 else
  584                         have = 0;
  585                 if(have >= count)
  586                         return i+1 - have;
  587         }
  588         return -1;
  589 }
  590 
  591 /*
  592  * Look for free space in the vmap.
  593  */
  594 static ulong
  595 vmapalloc(ulong size)
  596 {
  597         int i, n, o;
  598         ulong *vpdb;
  599         int vpdbsize;
  600         
  601         vpdb = &MACHP(0)->pdb[PDX(VMAP)];
  602         vpdbsize = VMAPSIZE/(4*MB);
  603 
  604         if(size >= 4*MB){
  605                 n = (size+4*MB-1) / (4*MB);
  606                 if((o = findhole(vpdb, vpdbsize, n)) != -1)
  607                         return VMAP + o*4*MB;
  608                 return 0;
  609         }
  610         n = (size+BY2PG-1) / BY2PG;
  611         for(i=0; i<vpdbsize; i++)
  612                 if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
  613                         if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
  614                                 return VMAP + i*4*MB + o*BY2PG;
  615         if((o = findhole(vpdb, vpdbsize, 1)) != -1)
  616                 return VMAP + o*4*MB;
  617                 
  618         /*
  619          * could span page directory entries, but not worth the trouble.
  620          * not going to be very much contention.
  621          */
  622         return 0;
  623 }
  624 
  625 /*
  626  * Remove a device mapping from the vmap range.
  627  * Since pdbunmap does not remove page tables, just entries,
  628  * the call need not be interlocked with vmap.
  629  */
  630 void
  631 vunmap(void *v, int size)
  632 {
  633         int i;
  634         ulong va, o;
  635         Mach *nm;
  636         Proc *p;
  637         
  638         /*
  639          * might not be aligned
  640          */
  641         va = (ulong)v;
  642         o = va&(BY2PG-1);
  643         va -= o;
  644         size += o;
  645         size = ROUND(size, BY2PG);
  646         
  647         if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
  648                 panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
  649                         va, size, getcallerpc(&va));
  650 
  651         pdbunmap(MACHP(0)->pdb, va, size);
  652         
  653         /*
  654          * Flush mapping from all the tlbs and copied pdbs.
  655          * This can be (and is) slow, since it is called only rarely.
  656          * It is possible for vunmap to be called with up == nil,
  657          * e.g. from the reset/init driver routines during system
  658          * boot. In that case it suffices to flush the MACH(0) TLB
  659          * and return.
  660          */
  661         if(!active.thunderbirdsarego){
  662                 putcr3(PADDR(MACHP(0)->pdb));
  663                 return;
  664         }
  665         for(i=0; i<conf.nproc; i++){
  666                 p = proctab(i);
  667                 if(p->state == Dead)
  668                         continue;
  669                 if(p != up)
  670                         p->newtlb = 1;
  671         }
  672         for(i=0; i<conf.nmach; i++){
  673                 nm = MACHP(i);
  674                 if(nm != m)
  675                         nm->flushmmu = 1;
  676         }
  677         flushmmu();
  678         for(i=0; i<conf.nmach; i++){
  679                 nm = MACHP(i);
  680                 if(nm != m)
  681                         while((active.machs&(1<<nm->machno)) && nm->flushmmu)
  682                                 ;
  683         }
  684 }
  685 
  686 /*
  687  * Add kernel mappings for pa -> va for a section of size bytes.
  688  */
  689 int
  690 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
  691 {
  692         int pse;
  693         ulong pgsz, *pte, *table;
  694         ulong flag, off;
  695         
  696         flag = pa&0xFFF;
  697         pa &= ~0xFFF;
  698 
  699         if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
  700                 pse = 1;
  701         else
  702                 pse = 0;
  703 
  704         for(off=0; off<size; off+=pgsz){
  705                 table = &pdb[PDX(va+off)];
  706                 if((*table&PTEVALID) && (*table&PTESIZE))
  707                         panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
  708                                 va+off, pa+off, *table);
  709 
  710                 /*
  711                  * Check if it can be mapped using a 4MB page:
  712                  * va, pa aligned and size >= 4MB and processor can do it.
  713                  */
  714                 if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
  715                         *table = (pa+off)|flag|PTESIZE|PTEVALID;
  716                         pgsz = 4*MB;
  717                 }else{
  718                         pte = mmuwalk(pdb, va+off, 2, 1);
  719                         if(*pte&PTEVALID)
  720                                 panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
  721                                         va+off, pa+off, *pte);
  722                         *pte = (pa+off)|flag|PTEVALID;
  723                         pgsz = BY2PG;
  724                 }
  725         }
  726         return 0;
  727 }
  728 
  729 /*
  730  * Remove mappings.  Must already exist, for sanity.
  731  * Only used for kernel mappings, so okay to use KADDR.
  732  */
  733 static void
  734 pdbunmap(ulong *pdb, ulong va, int size)
  735 {
  736         ulong vae;
  737         ulong *table;
  738         
  739         vae = va+size;
  740         while(va < vae){
  741                 table = &pdb[PDX(va)];
  742                 if(!(*table & PTEVALID)){
  743                         panic("vunmap: not mapped");
  744                         /* 
  745                         va = (va+4*MB-1) & ~(4*MB-1);
  746                         continue;
  747                         */
  748                 }
  749                 if(*table & PTESIZE){
  750                         *table = 0;
  751                         va = (va+4*MB-1) & ~(4*MB-1);
  752                         continue;
  753                 }
  754                 table = KADDR(PPN(*table));
  755                 if(!(table[PTX(va)] & PTEVALID))
  756                         panic("vunmap: not mapped");
  757                 table[PTX(va)] = 0;
  758                 va += BY2PG;
  759         }
  760 }
  761 
  762 /*
  763  * Handle a fault by bringing vmap up to date.
  764  * Only copy pdb entries and they never go away,
  765  * so no locking needed.
  766  */
  767 int
  768 vmapsync(ulong va)
  769 {
  770         ulong entry, *table;
  771 
  772         if(va < VMAP || va >= VMAP+VMAPSIZE)
  773                 return 0;
  774 
  775         entry = MACHP(0)->pdb[PDX(va)];
  776         if(!(entry&PTEVALID))
  777                 return 0;
  778         if(!(entry&PTESIZE)){
  779                 /* make sure entry will help the fault */
  780                 table = KADDR(PPN(entry));
  781                 if(!(table[PTX(va)]&PTEVALID))
  782                         return 0;
  783         }
  784         vpd[PDX(va)] = entry;
  785         /*
  786          * TLB doesn't cache negative results, so no flush needed.
  787          */
  788         return 1;
  789 }
  790 
  791 
  792 /*
  793  * KMap is used to map individual pages into virtual memory.
  794  * It is rare to have more than a few KMaps at a time (in the 
  795  * absence of interrupts, only two at a time are ever used,
  796  * but interrupts can stack).  The mappings are local to a process,
  797  * so we can use the same range of virtual address space for
  798  * all processes without any coordination.
  799  */
  800 #define kpt (vpt+VPTX(KMAP))
  801 #define NKPT (KMAPSIZE/BY2PG)
  802 
  803 KMap*
  804 kmap(Page *page)
  805 {
  806         int i, o, s;
  807 
  808         if(up == nil)
  809                 panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
  810         if(up->mmupdb == nil)
  811                 upallocpdb();
  812         if(up->nkmap < 0)
  813                 panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
  814         
  815         /*
  816          * Splhi shouldn't be necessary here, but paranoia reigns.
  817          * See comment in putmmu above.
  818          */
  819         s = splhi();
  820         up->nkmap++;
  821         if(!(vpd[PDX(KMAP)]&PTEVALID)){
  822                 /* allocate page directory */
  823                 if(KMAPSIZE > BY2XPG)
  824                         panic("bad kmapsize");
  825                 if(up->kmaptable != nil)
  826                         panic("kmaptable");
  827                 spllo();
  828                 up->kmaptable = newpage(0, 0, 0);
  829                 splhi();
  830                 vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
  831                 flushpg((ulong)kpt);
  832                 memset(kpt, 0, BY2PG);
  833                 kpt[0] = page->pa|PTEWRITE|PTEVALID;
  834                 up->lastkmap = 0;
  835                 splx(s);
  836                 return (KMap*)KMAP;
  837         }
  838         if(up->kmaptable == nil)
  839                 panic("no kmaptable");
  840         o = up->lastkmap+1;
  841         for(i=0; i<NKPT; i++){
  842                 if(kpt[(i+o)%NKPT] == 0){
  843                         o = (i+o)%NKPT;
  844                         kpt[o] = page->pa|PTEWRITE|PTEVALID;
  845                         up->lastkmap = o;
  846                         splx(s);
  847                         return (KMap*)(KMAP+o*BY2PG);
  848                 }
  849         }
  850         panic("out of kmap");
  851         return nil;
  852 }
  853 
  854 void
  855 kunmap(KMap *k)
  856 {
  857         ulong va;
  858 
  859         va = (ulong)k;
  860         if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
  861                 panic("kunmap: no kmaps");
  862         if(va < KMAP || va >= KMAP+KMAPSIZE)
  863                 panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
  864         if(!(vpt[VPTX(va)]&PTEVALID))
  865                 panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
  866         up->nkmap--;
  867         if(up->nkmap < 0)
  868                 panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
  869         vpt[VPTX(va)] = 0;
  870         flushpg(va);
  871 }
  872 
  873 /*
  874  * Temporary one-page mapping used to edit page directories.
  875  *
  876  * The fasttmp #define controls whether the code optimizes
  877  * the case where the page is already mapped in the physical
  878  * memory window.  
  879  */
  880 #define fasttmp 1
  881 
  882 void*
  883 tmpmap(Page *p)
  884 {
  885         ulong i;
  886         ulong *entry;
  887         
  888         if(islo())
  889                 panic("tmpaddr: islo");
  890 
  891         if(fasttmp && p->pa < -KZERO)
  892                 return KADDR(p->pa);
  893 
  894         /*
  895          * PDX(TMPADDR) == PDX(MACHADDR), so this
  896          * entry is private to the processor and shared 
  897          * between up->mmupdb (if any) and m->pdb.
  898          */
  899         entry = &vpt[VPTX(TMPADDR)];
  900         if(!(*entry&PTEVALID)){
  901                 for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
  902                         print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
  903                 panic("tmpmap: no entry");
  904         }
  905         if(PPN(*entry) != PPN(TMPADDR-KZERO))
  906                 panic("tmpmap: already mapped entry=%#.8lux", *entry);
  907         *entry = p->pa|PTEWRITE|PTEVALID;
  908         flushpg(TMPADDR);
  909         return (void*)TMPADDR;
  910 }
  911 
  912 void
  913 tmpunmap(void *v)
  914 {
  915         ulong *entry;
  916         
  917         if(islo())
  918                 panic("tmpaddr: islo");
  919         if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
  920                 return;
  921         if(v != (void*)TMPADDR)
  922                 panic("tmpunmap: bad address");
  923         entry = &vpt[VPTX(TMPADDR)];
  924         if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
  925                 panic("tmpmap: not mapped entry=%#.8lux", *entry);
  926         *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
  927         flushpg(TMPADDR);
  928 }
  929 
  930 /*
  931  * These could go back to being macros once the kernel is debugged,
  932  * but the extra checking is nice to have.
  933  */
  934 void*
  935 kaddr(ulong pa)
  936 {
  937         if(pa > (ulong)-KZERO)
  938                 panic("kaddr: pa=%#.8lux", pa);
  939         return (void*)(pa+KZERO);
  940 }
  941 
  942 ulong
  943 paddr(void *v)
  944 {
  945         ulong va;
  946         
  947         va = (ulong)v;
  948         if(va < KZERO)
  949                 panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
  950         return va-KZERO;
  951 }
  952 
  953 /*
  954  * More debugging.
  955  */
  956 void
  957 countpagerefs(ulong *ref, int print)
  958 {
  959         int i, n;
  960         Mach *mm;
  961         Page *pg;
  962         Proc *p;
  963         
  964         n = 0;
  965         for(i=0; i<conf.nproc; i++){
  966                 p = proctab(i);
  967                 if(p->mmupdb){
  968                         if(print){
  969                                 if(ref[pagenumber(p->mmupdb)])
  970                                         iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
  971                                                 p->mmupdb->pa, i, p->pid);
  972                                 continue;
  973                         }
  974                         if(ref[pagenumber(p->mmupdb)]++ == 0)
  975                                 n++;
  976                         else
  977                                 iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
  978                                         p->mmupdb->pa, i, p->pid);
  979                 }
  980                 if(p->kmaptable){
  981                         if(print){
  982                                 if(ref[pagenumber(p->kmaptable)])
  983                                         iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
  984                                                 p->kmaptable->pa, i, p->pid);
  985                                 continue;
  986                         }
  987                         if(ref[pagenumber(p->kmaptable)]++ == 0)
  988                                 n++;
  989                         else
  990                                 iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
  991                                         p->kmaptable->pa, i, p->pid);
  992                 }
  993                 for(pg=p->mmuused; pg; pg=pg->next){
  994                         if(print){
  995                                 if(ref[pagenumber(pg)])
  996                                         iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
  997                                                 pg->pa, i, p->pid);
  998                                 continue;
  999                         }
 1000                         if(ref[pagenumber(pg)]++ == 0)
 1001                                 n++;
 1002                         else
 1003                                 iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
 1004                                         pg->pa, i, p->pid);
 1005                 }
 1006                 for(pg=p->mmufree; pg; pg=pg->next){
 1007                         if(print){
 1008                                 if(ref[pagenumber(pg)])
 1009                                         iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
 1010                                                 pg->pa, i, p->pid);
 1011                                 continue;
 1012                         }
 1013                         if(ref[pagenumber(pg)]++ == 0)
 1014                                 n++;
 1015                         else
 1016                                 iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
 1017                                         pg->pa, i, p->pid);
 1018                 }
 1019         }
 1020         if(!print)
 1021                 iprint("%d pages in proc mmu\n", n);
 1022         n = 0;
 1023         for(i=0; i<conf.nmach; i++){
 1024                 mm = MACHP(i);
 1025                 for(pg=mm->pdbpool; pg; pg=pg->next){
 1026                         if(print){
 1027                                 if(ref[pagenumber(pg)])
 1028                                         iprint("page %#.8lux is in cpu%d pdbpool\n",
 1029                                                 pg->pa, i);
 1030                                 continue;
 1031                         }
 1032                         if(ref[pagenumber(pg)]++ == 0)
 1033                                 n++;
 1034                         else
 1035                                 iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
 1036                                         pg->pa, i);
 1037                 }
 1038         }
 1039         if(!print){
 1040                 iprint("%d pages in mach pdbpools\n", n);
 1041                 for(i=0; i<conf.nmach; i++)
 1042                         iprint("cpu%d: %d pdballoc, %d pdbfree\n",
 1043                                 i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
 1044         }
 1045 }
 1046 
 1047 void
 1048 checkfault(ulong, ulong)
 1049 {
 1050 }
 1051 
 1052 /*
 1053  * Return the number of bytes that can be accessed via KADDR(pa).
 1054  * If pa is not a valid argument to KADDR, return 0.
 1055  */
 1056 ulong
 1057 cankaddr(ulong pa)
 1058 {
 1059         if(pa >= -KZERO)
 1060                 return 0;
 1061         return -KZERO - pa;
 1062 }
 1063 

Cache object: b5b170125b272fc32f6269bb4c450a2d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.