The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/swap_pager.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1994 John S. Dyson
    3  * Copyright (c) 1990 University of Utah.
    4  * Copyright (c) 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * the Systems Programming Group of the University of Utah Computer
    9  * Science Department.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the University of
   22  *      California, Berkeley and its contributors.
   23  * 4. Neither the name of the University nor the names of its contributors
   24  *    may be used to endorse or promote products derived from this software
   25  *    without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   37  * SUCH DAMAGE.
   38  *
   39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
   40  *
   41  *      @(#)swap_pager.c        8.9 (Berkeley) 3/21/94
   42  * $FreeBSD$
   43  */
   44 
   45 /*
   46  * Quick hack to page to dedicated partition(s).
   47  * TODO:
   48  *      Add multiprocessor locks
   49  *      Deal with async writes in a better fashion
   50  */
   51 
   52 #include <sys/param.h>
   53 #include <sys/systm.h>
   54 #include <sys/kernel.h>
   55 #include <sys/proc.h>
   56 #include <sys/buf.h>
   57 #include <sys/vnode.h>
   58 #include <sys/malloc.h>
   59 #include <sys/vmmeter.h>
   60 #include <sys/rlist.h>
   61 
   62 #ifndef MAX_PAGEOUT_CLUSTER
   63 #define MAX_PAGEOUT_CLUSTER 16
   64 #endif
   65 
   66 #ifndef NPENDINGIO
   67 #define NPENDINGIO      16
   68 #endif
   69 
   70 #define SWB_NPAGES MAX_PAGEOUT_CLUSTER
   71 
   72 #include <vm/vm.h>
   73 #include <vm/vm_prot.h>
   74 #include <vm/vm_object.h>
   75 #include <vm/vm_page.h>
   76 #include <vm/vm_pager.h>
   77 #include <vm/vm_pageout.h>
   78 #include <vm/swap_pager.h>
   79 #include <vm/vm_extern.h>
   80 
   81 static int nswiodone;
   82 int swap_pager_full;
   83 extern int vm_swap_size;
   84 static int no_swap_space = 1;
   85 static int max_pageout_cluster;
   86 struct rlisthdr swaplist;
   87 
   88 TAILQ_HEAD(swpclean, swpagerclean);
   89 
   90 typedef struct swpagerclean *swp_clean_t;
   91 
   92 static struct swpagerclean {
   93         TAILQ_ENTRY(swpagerclean) spc_list;
   94         int spc_flags;
   95         struct buf *spc_bp;
   96         vm_object_t spc_object;
   97         vm_offset_t spc_kva;
   98         int spc_first;
   99         int spc_count;
  100         vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
  101 } swcleanlist[NPENDINGIO];
  102 
  103 
  104 /* spc_flags values */
  105 #define SPC_ERROR       0x01
  106 
  107 #define SWB_EMPTY (-1)
  108 
  109 /* list of completed page cleans */
  110 static struct swpclean swap_pager_done;
  111 
  112 /* list of pending page cleans */
  113 static struct swpclean swap_pager_inuse;
  114 
  115 /* list of free pager clean structs */
  116 static struct swpclean swap_pager_free;
  117 static int swap_pager_free_count;
  118 static int swap_pager_free_pending;
  119 
  120 /* list of "named" anon region objects */
  121 static struct pagerlst swap_pager_object_list;
  122 
  123 /* list of "unnamed" anon region objects */
  124 struct pagerlst swap_pager_un_object_list;
  125 
  126 #define SWAP_FREE_NEEDED        0x1     /* need a swap block */
  127 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
  128 static int swap_pager_needflags;
  129 
  130 static struct pagerlst *swp_qs[] = {
  131         &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
  132 };
  133 
  134 /*
  135  * pagerops for OBJT_SWAP - "swap pager".
  136  */
  137 static vm_object_t
  138                 swap_pager_alloc __P((void *handle, vm_ooffset_t size,
  139                                       vm_prot_t prot, vm_ooffset_t offset));
  140 static void     swap_pager_dealloc __P((vm_object_t object));
  141 static boolean_t
  142                 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
  143                                         int *before, int *after));
  144 static int      swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
  145 static void     swap_pager_init __P((void));
  146 static void spc_free __P((swp_clean_t));
  147 
  148 struct pagerops swappagerops = {
  149         swap_pager_init,
  150         swap_pager_alloc,
  151         swap_pager_dealloc,
  152         swap_pager_getpages,
  153         swap_pager_putpages,
  154         swap_pager_haspage,
  155         swap_pager_sync
  156 };
  157 
  158 static int npendingio;
  159 static int dmmin;
  160 int dmmax;
  161 
  162 static int      swap_pager_block_index __P((vm_pindex_t pindex));
  163 static int      swap_pager_block_offset __P((vm_pindex_t pindex));
  164 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
  165                                           vm_pindex_t pindex, int *valid));
  166 static void     swap_pager_finish __P((swp_clean_t spc));
  167 static void     swap_pager_free_swap __P((vm_object_t object));
  168 static void     swap_pager_freeswapspace __P((vm_object_t object,
  169                                               unsigned int from,
  170                                               unsigned int to));
  171 static int      swap_pager_getswapspace __P((vm_object_t object,
  172                                              unsigned int amount,
  173                                              daddr_t *rtval));
  174 static void     swap_pager_iodone __P((struct buf *));
  175 static void     swap_pager_iodone1 __P((struct buf *bp));
  176 static void     swap_pager_reclaim __P((void));
  177 static void     swap_pager_ridpages __P((vm_page_t *m, int count,
  178                                          int reqpage));
  179 static void     swap_pager_setvalid __P((vm_object_t object,
  180                                          vm_offset_t offset, int valid));
  181 static __inline void    swapsizecheck __P((void));
  182 
  183 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
  184 
  185 static __inline void
  186 swapsizecheck()
  187 {
  188         if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
  189                 if (swap_pager_full == 0)
  190                         printf("swap_pager: out of swap space\n");
  191                 swap_pager_full = 1;
  192         } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
  193                 swap_pager_full = 0;
  194 }
  195 
  196 static void
  197 swap_pager_init()
  198 {
  199         int maxsafepending;
  200         TAILQ_INIT(&swap_pager_object_list);
  201         TAILQ_INIT(&swap_pager_un_object_list);
  202 
  203         /*
  204          * Initialize clean lists
  205          */
  206         TAILQ_INIT(&swap_pager_inuse);
  207         TAILQ_INIT(&swap_pager_done);
  208         TAILQ_INIT(&swap_pager_free);
  209         swap_pager_free_count = 0;
  210 
  211         /*
  212          * Calculate the swap allocation constants.
  213          */
  214         dmmin = PAGE_SIZE / DEV_BSIZE;
  215         dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
  216 
  217         maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
  218         npendingio = NPENDINGIO;
  219         max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
  220 
  221         if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
  222                 max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
  223                 npendingio = maxsafepending / (2 * max_pageout_cluster);
  224                 if (npendingio < 2)
  225                         npendingio = 2;
  226         }
  227 }
  228 
  229 void
  230 swap_pager_swap_init()
  231 {
  232         swp_clean_t spc;
  233         struct buf *bp;
  234         int i;
  235 
  236         /*
  237          * kva's are allocated here so that we dont need to keep doing
  238          * kmem_alloc pageables at runtime
  239          */
  240         for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
  241                 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
  242                 if (!spc->spc_kva) {
  243                         break;
  244                 }
  245                 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
  246                 if (!spc->spc_bp) {
  247                         kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
  248                         break;
  249                 }
  250                 spc->spc_flags = 0;
  251                 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
  252                 swap_pager_free_count++;
  253         }
  254 }
  255 
  256 int
  257 swap_pager_swp_alloc(object, wait)
  258         vm_object_t object;
  259         int wait;
  260 {
  261         sw_blk_t swb;
  262         int nblocks;
  263         int i, j;
  264 
  265         nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
  266         swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
  267         if (swb == NULL)
  268                 return 1;
  269 
  270         for (i = 0; i < nblocks; i++) {
  271                 swb[i].swb_valid = 0;
  272                 swb[i].swb_locked = 0;
  273                 for (j = 0; j < SWB_NPAGES; j++)
  274                         swb[i].swb_block[j] = SWB_EMPTY;
  275         }
  276 
  277         object->un_pager.swp.swp_nblocks = nblocks;
  278         object->un_pager.swp.swp_allocsize = 0;
  279         object->un_pager.swp.swp_blocks = swb;
  280         object->un_pager.swp.swp_poip = 0;
  281 
  282         if (object->handle != NULL) {
  283                 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
  284         } else {
  285                 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
  286         }
  287 
  288         return 0;
  289 }
  290 
  291 /*
  292  * Allocate an object and associated resources.
  293  * Note that if we are called from the pageout daemon (handle == NULL)
  294  * we should not wait for memory as it could resulting in deadlock.
  295  */
  296 static vm_object_t
  297 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
  298                  vm_ooffset_t offset)
  299 {
  300         vm_object_t object;
  301 
  302         /*
  303          * If this is a "named" anonymous region, look it up and use the
  304          * object if it exists, otherwise allocate a new one.
  305          */
  306         if (handle) {
  307                 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
  308                 if (object != NULL) {
  309                         vm_object_reference(object);
  310                 } else {
  311                         /*
  312                          * XXX - there is a race condition here. Two processes
  313                          * can request the same named object simultaneuously,
  314                          * and if one blocks for memory, the result is a disaster.
  315                          * Probably quite rare, but is yet another reason to just
  316                          * rip support of "named anonymous regions" out altogether.
  317                          */
  318                         object = vm_object_allocate(OBJT_SWAP,
  319                                 OFF_TO_IDX(offset + PAGE_MASK + size));
  320                         object->handle = handle;
  321                         (void) swap_pager_swp_alloc(object, M_WAITOK);
  322                 }
  323         } else {
  324                 object = vm_object_allocate(OBJT_SWAP,
  325                         OFF_TO_IDX(offset + PAGE_MASK + size));
  326                 (void) swap_pager_swp_alloc(object, M_WAITOK);
  327         }
  328 
  329         return (object);
  330 }
  331 
  332 /*
  333  * returns disk block associated with pager and offset
  334  * additionally, as a side effect returns a flag indicating
  335  * if the block has been written
  336  */
  337 
  338 static __inline daddr_t *
  339 swap_pager_diskaddr(object, pindex, valid)
  340         vm_object_t object;
  341         vm_pindex_t pindex;
  342         int *valid;
  343 {
  344         register sw_blk_t swb;
  345         int ix;
  346 
  347         if (valid)
  348                 *valid = 0;
  349         ix = pindex / SWB_NPAGES;
  350         if ((ix >= object->un_pager.swp.swp_nblocks) ||
  351             (pindex >= object->size)) {
  352                 return (FALSE);
  353         }
  354         swb = &object->un_pager.swp.swp_blocks[ix];
  355         ix = pindex % SWB_NPAGES;
  356         if (valid)
  357                 *valid = swb->swb_valid & (1 << ix);
  358         return &swb->swb_block[ix];
  359 }
  360 
  361 /*
  362  * Utility routine to set the valid (written) bit for
  363  * a block associated with a pager and offset
  364  */
  365 static void
  366 swap_pager_setvalid(object, offset, valid)
  367         vm_object_t object;
  368         vm_offset_t offset;
  369         int valid;
  370 {
  371         register sw_blk_t swb;
  372         int ix;
  373 
  374         ix = offset / SWB_NPAGES;
  375         if (ix >= object->un_pager.swp.swp_nblocks)
  376                 return;
  377 
  378         swb = &object->un_pager.swp.swp_blocks[ix];
  379         ix = offset % SWB_NPAGES;
  380         if (valid)
  381                 swb->swb_valid |= (1 << ix);
  382         else
  383                 swb->swb_valid &= ~(1 << ix);
  384         return;
  385 }
  386 
  387 /*
  388  * this routine allocates swap space with a fragmentation
  389  * minimization policy.
  390  */
  391 static int
  392 swap_pager_getswapspace(object, amount, rtval)
  393         vm_object_t object;
  394         unsigned int amount;
  395         daddr_t *rtval;
  396 {
  397         unsigned location;
  398 
  399         vm_swap_size -= amount;
  400                 
  401         if (!rlist_alloc(&swaplist, amount, &location)) {
  402                 vm_swap_size += amount;
  403                 return 0;
  404         } else {
  405                 swapsizecheck();
  406                 object->un_pager.swp.swp_allocsize += amount;
  407                 *rtval = location;
  408                 return 1;
  409         }
  410 }
  411 
  412 /*
  413  * this routine frees swap space with a fragmentation
  414  * minimization policy.
  415  */
  416 static void
  417 swap_pager_freeswapspace(object, from, to)
  418         vm_object_t object;
  419         unsigned int from;
  420         unsigned int to;
  421 {
  422         rlist_free(&swaplist, from, to);
  423         vm_swap_size += (to - from) + 1;
  424         object->un_pager.swp.swp_allocsize -= (to - from) + 1;
  425         swapsizecheck();
  426 }
  427 /*
  428  * this routine frees swap blocks from a specified pager
  429  */
  430 void
  431 swap_pager_freespace(object, start, size)
  432         vm_object_t object;
  433         vm_pindex_t start;
  434         vm_size_t size;
  435 {
  436         vm_pindex_t i;
  437         int s;
  438 
  439         s = splvm();
  440         for (i = start; i < start + size; i += 1) {
  441                 int valid;
  442                 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
  443 
  444                 if (addr && *addr != SWB_EMPTY) {
  445                         swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
  446                         if (valid) {
  447                                 swap_pager_setvalid(object, i, 0);
  448                         }
  449                         *addr = SWB_EMPTY;
  450                 }
  451         }
  452         splx(s);
  453 }
  454 
  455 /*
  456  * same as freespace, but don't free, just force a DMZ next time
  457  */
  458 void
  459 swap_pager_dmzspace(object, start, size)
  460         vm_object_t object;
  461         vm_pindex_t start;
  462         vm_size_t size;
  463 {
  464         vm_pindex_t i;
  465         int s;
  466 
  467         s = splvm();
  468         for (i = start; i < start + size; i += 1) {
  469                 int valid;
  470                 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
  471 
  472                 if (addr && *addr != SWB_EMPTY) {
  473                         if (valid) {
  474                                 swap_pager_setvalid(object, i, 0);
  475                         }
  476                 }
  477         }
  478         splx(s);
  479 }
  480 
  481 static void
  482 swap_pager_free_swap(object)
  483         vm_object_t object;
  484 {
  485         register int i, j;
  486         register sw_blk_t swb;
  487         int first_block=0, block_count=0;
  488         int s;
  489         /*
  490          * Free left over swap blocks
  491          */
  492         swb = object->un_pager.swp.swp_blocks;
  493         if (swb == NULL) {
  494                 return;
  495         }
  496 
  497         s = splvm();
  498         for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
  499                 for (j = 0; j < SWB_NPAGES; j++) {
  500                         if (swb->swb_block[j] != SWB_EMPTY) {
  501                                 /*
  502                                  * initially the length of the run is zero
  503                                  */
  504                                 if (block_count == 0) {
  505                                         first_block = swb->swb_block[j];
  506                                         block_count = btodb(PAGE_SIZE);
  507                                         swb->swb_block[j] = SWB_EMPTY;
  508                                 /*
  509                                  * if the new block can be included into the current run
  510                                  */
  511                                 } else if (swb->swb_block[j] == first_block + block_count) {
  512                                         block_count += btodb(PAGE_SIZE);
  513                                         swb->swb_block[j] = SWB_EMPTY;
  514                                 /*
  515                                  * terminate the previous run, and start a new one
  516                                  */
  517                                 } else {
  518                                         swap_pager_freeswapspace(object, first_block,
  519                                         (unsigned) first_block + block_count - 1);
  520                                         first_block = swb->swb_block[j];
  521                                         block_count = btodb(PAGE_SIZE);
  522                                         swb->swb_block[j] = SWB_EMPTY;
  523                                 }
  524                         }
  525                 }
  526         }
  527 
  528         if (block_count) {
  529                 swap_pager_freeswapspace(object, first_block,
  530                          (unsigned) first_block + block_count - 1);
  531         }
  532         splx(s);
  533 }
  534 
  535 
  536 /*
  537  * swap_pager_reclaim frees up over-allocated space from all pagers
  538  * this eliminates internal fragmentation due to allocation of space
  539  * for segments that are never swapped to. It has been written so that
  540  * it does not block until the rlist_free operation occurs; it keeps
  541  * the queues consistant.
  542  */
  543 
  544 /*
  545  * Maximum number of blocks (pages) to reclaim per pass
  546  */
  547 #define MAXRECLAIM 128
  548 
  549 static void
  550 swap_pager_reclaim()
  551 {
  552         vm_object_t object;
  553         int i, j, k;
  554         int s;
  555         int reclaimcount;
  556         static struct {
  557                 int address;
  558                 vm_object_t object;
  559         } reclaims[MAXRECLAIM];
  560         static int in_reclaim;
  561 
  562         /*
  563          * allow only one process to be in the swap_pager_reclaim subroutine
  564          */
  565         s = splvm();
  566         if (in_reclaim) {
  567                 tsleep(&in_reclaim, PSWP, "swrclm", 0);
  568                 splx(s);
  569                 return;
  570         }
  571         in_reclaim = 1;
  572         reclaimcount = 0;
  573 
  574         /* for each pager queue */
  575         for (k = 0; swp_qs[k]; k++) {
  576 
  577                 object = TAILQ_FIRST(swp_qs[k]);
  578                 while (object && (reclaimcount < MAXRECLAIM)) {
  579 
  580                         /*
  581                          * see if any blocks associated with a pager has been
  582                          * allocated but not used (written)
  583                          */
  584                         if ((object->flags & OBJ_DEAD) == 0 &&
  585                                 (object->paging_in_progress == 0)) {
  586                                 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
  587                                         sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
  588 
  589                                         if (swb->swb_locked)
  590                                                 continue;
  591                                         for (j = 0; j < SWB_NPAGES; j++) {
  592                                                 if (swb->swb_block[j] != SWB_EMPTY &&
  593                                                     (swb->swb_valid & (1 << j)) == 0) {
  594                                                         reclaims[reclaimcount].address = swb->swb_block[j];
  595                                                         reclaims[reclaimcount++].object = object;
  596                                                         swb->swb_block[j] = SWB_EMPTY;
  597                                                         if (reclaimcount >= MAXRECLAIM)
  598                                                                 goto rfinished;
  599                                                 }
  600                                         }
  601                                 }
  602                         }
  603                         object = TAILQ_NEXT(object, pager_object_list);
  604                 }
  605         }
  606 
  607 rfinished:
  608 
  609         /*
  610          * free the blocks that have been added to the reclaim list
  611          */
  612         for (i = 0; i < reclaimcount; i++) {
  613                 swap_pager_freeswapspace(reclaims[i].object,
  614                     reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
  615         }
  616         splx(s);
  617         in_reclaim = 0;
  618         wakeup(&in_reclaim);
  619 }
  620 
  621 
  622 /*
  623  * swap_pager_copy copies blocks from one pager to another and
  624  * destroys the source pager
  625  */
  626 
  627 void
  628 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset,
  629         offset, destroysource)
  630         vm_object_t srcobject;
  631         vm_pindex_t srcoffset;
  632         vm_object_t dstobject;
  633         vm_pindex_t dstoffset;
  634         vm_pindex_t offset;
  635         int destroysource;
  636 {
  637         vm_pindex_t i;
  638         int origsize;
  639         int s;
  640 
  641         if (vm_swap_size)
  642                 no_swap_space = 0;
  643 
  644         origsize = srcobject->un_pager.swp.swp_allocsize;
  645 
  646         /*
  647          * remove the source object from the swap_pager internal queue
  648          */
  649         if (destroysource) {
  650                 if (srcobject->handle == NULL) {
  651                         TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
  652                 } else {
  653                         TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
  654                 }
  655         }
  656 
  657         s = splvm();
  658         while (srcobject->un_pager.swp.swp_poip) {
  659                 tsleep(srcobject, PVM, "spgout", 0);
  660         }
  661 
  662         /*
  663          * clean all of the pages that are currently active and finished
  664          */
  665         if (swap_pager_free_pending)
  666                 swap_pager_sync();
  667 
  668         /*
  669          * transfer source to destination
  670          */
  671         for (i = 0; i < dstobject->size; i += 1) {
  672                 int srcvalid, dstvalid;
  673                 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject,
  674                                 i + offset + srcoffset, &srcvalid);
  675                 daddr_t *dstaddrp;
  676 
  677                 /*
  678                  * see if the source has space allocated
  679                  */
  680                 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
  681                         /*
  682                          * if the source is valid and the dest has no space,
  683                          * then copy the allocation from the srouce to the
  684                          * dest.
  685                          */
  686                         if (srcvalid) {
  687                                 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
  688                                                         &dstvalid);
  689                                 /*
  690                                  * if the dest already has a valid block,
  691                                  * deallocate the source block without
  692                                  * copying.
  693                                  */
  694                                 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
  695                                         swap_pager_freeswapspace(dstobject, *dstaddrp,
  696                                                 *dstaddrp + btodb(PAGE_SIZE) - 1);
  697                                         *dstaddrp = SWB_EMPTY;
  698                                 }
  699                                 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
  700                                         *dstaddrp = *srcaddrp;
  701                                         *srcaddrp = SWB_EMPTY;
  702                                         dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
  703                                         srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
  704                                         swap_pager_setvalid(dstobject, i + dstoffset, 1);
  705                                 }
  706                         }
  707                         /*
  708                          * if the source is not empty at this point, then
  709                          * deallocate the space.
  710                          */
  711                         if (*srcaddrp != SWB_EMPTY) {
  712                                 swap_pager_freeswapspace(srcobject, *srcaddrp,
  713                                         *srcaddrp + btodb(PAGE_SIZE) - 1);
  714                                 *srcaddrp = SWB_EMPTY;
  715                         }
  716                 }
  717         }
  718         splx(s);
  719 
  720         /*
  721          * Free left over swap blocks
  722          */
  723         if (destroysource) {
  724                 swap_pager_free_swap(srcobject);
  725 
  726                 if (srcobject->un_pager.swp.swp_allocsize) {
  727                         printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
  728                             srcobject->un_pager.swp.swp_allocsize, origsize);
  729                 }
  730 
  731                 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
  732                 srcobject->un_pager.swp.swp_blocks = NULL;
  733         }
  734         return;
  735 }
  736 
  737 static void
  738 swap_pager_dealloc(object)
  739         vm_object_t object;
  740 {
  741         int s;
  742         sw_blk_t swb;
  743 
  744         /*
  745          * Remove from list right away so lookups will fail if we block for
  746          * pageout completion.
  747          */
  748         if (object->handle == NULL) {
  749                 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
  750         } else {
  751                 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
  752         }
  753 
  754         /*
  755          * Wait for all pageouts to finish and remove all entries from
  756          * cleaning list.
  757          */
  758 
  759         s = splvm();
  760         while (object->un_pager.swp.swp_poip) {
  761                 tsleep(object, PVM, "swpout", 0);
  762         }
  763         splx(s);
  764 
  765         if (swap_pager_free_pending)
  766                 swap_pager_sync();
  767 
  768         /*
  769          * Free left over swap blocks
  770          */
  771         swap_pager_free_swap(object);
  772 
  773         if (object->un_pager.swp.swp_allocsize) {
  774                 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
  775                     object->un_pager.swp.swp_allocsize);
  776         }
  777         swb = object->un_pager.swp.swp_blocks;
  778         if (swb) {
  779                 /*
  780                 * Free swap management resources
  781                 */
  782                 free(swb, M_VMPGDATA);
  783                 object->un_pager.swp.swp_blocks = NULL;
  784         }
  785 }
  786 
  787 static __inline int
  788 swap_pager_block_index(pindex)
  789         vm_pindex_t pindex;
  790 {
  791         return (pindex / SWB_NPAGES);
  792 }
  793 
  794 static __inline int
  795 swap_pager_block_offset(pindex)
  796         vm_pindex_t pindex;
  797 {
  798         return (pindex % SWB_NPAGES);
  799 }
  800 
  801 /*
  802  * swap_pager_haspage returns TRUE if the pager has data that has
  803  * been written out.
  804  */
  805 static boolean_t
  806 swap_pager_haspage(object, pindex, before, after)
  807         vm_object_t object;
  808         vm_pindex_t pindex;
  809         int *before;
  810         int *after;
  811 {
  812         register sw_blk_t swb;
  813         int ix;
  814 
  815         if (before != NULL)
  816                 *before = 0;
  817         if (after != NULL)
  818                 *after = 0;
  819         ix = pindex / SWB_NPAGES;
  820         if (ix >= object->un_pager.swp.swp_nblocks) {
  821                 return (FALSE);
  822         }
  823         swb = &object->un_pager.swp.swp_blocks[ix];
  824         ix = pindex % SWB_NPAGES;
  825 
  826         if (swb->swb_block[ix] != SWB_EMPTY) {
  827 
  828                 if (swb->swb_valid & (1 << ix)) {
  829                         int tix;
  830                         if (before) {
  831                                 for(tix = ix - 1; tix >= 0; --tix) {
  832                                         if ((swb->swb_valid & (1 << tix)) == 0)
  833                                                 break;
  834                                         if ((swb->swb_block[tix] +
  835                                                 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
  836                                                 swb->swb_block[ix])
  837                                                 break;
  838                                         (*before)++;
  839                                 }
  840                         }
  841 
  842                         if (after) {
  843                                 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
  844                                         if ((swb->swb_valid & (1 << tix)) == 0)
  845                                                 break;
  846                                         if ((swb->swb_block[tix] -
  847                                                 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
  848                                                 swb->swb_block[ix])
  849                                                 break;
  850                                         (*after)++;
  851                                 }
  852                         }
  853 
  854                         return TRUE;
  855                 }
  856         }
  857         return (FALSE);
  858 }
  859 
  860 /*
  861  * Wakeup based upon spc state
  862  */
  863 static void
  864 spc_wakeup(void)
  865 {
  866         if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
  867                 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
  868                 wakeup(&swap_pager_needflags);
  869         } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
  870                 swap_pager_free_count >= ((2 * npendingio) / 3)) {
  871                 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
  872                 wakeup(&swap_pager_free);
  873         }
  874 }
  875 
  876 /*
  877  * Free an spc structure
  878  */
  879 static void
  880 spc_free(spc)
  881         swp_clean_t spc;
  882 {
  883         spc->spc_flags = 0;
  884         TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
  885         swap_pager_free_count++;
  886         if (swap_pager_needflags) {
  887                 spc_wakeup();
  888         }
  889 }
  890 
  891 /*
  892  * swap_pager_ridpages is a convienience routine that deallocates all
  893  * but the required page.  this is usually used in error returns that
  894  * need to invalidate the "extra" readahead pages.
  895  */
  896 static void
  897 swap_pager_ridpages(m, count, reqpage)
  898         vm_page_t *m;
  899         int count;
  900         int reqpage;
  901 {
  902         int i;
  903 
  904         for (i = 0; i < count; i++) {
  905                 if (i != reqpage) {
  906                         vm_page_free(m[i]);
  907                 }
  908         }
  909 }
  910 
  911 /*
  912  * swap_pager_iodone1 is the completion routine for both reads and async writes
  913  */
  914 static void
  915 swap_pager_iodone1(bp)
  916         struct buf *bp;
  917 {
  918         bp->b_flags |= B_DONE;
  919         bp->b_flags &= ~B_ASYNC;
  920         wakeup(bp);
  921 }
  922 
  923 static int
  924 swap_pager_getpages(object, m, count, reqpage)
  925         vm_object_t object;
  926         vm_page_t *m;
  927         int count, reqpage;
  928 {
  929         register struct buf *bp;
  930         sw_blk_t swb[count];
  931         register int s;
  932         int i;
  933         boolean_t rv;
  934         vm_offset_t kva, off[count];
  935         vm_pindex_t paging_offset;
  936         int reqaddr[count];
  937         int sequential;
  938 
  939         int first, last;
  940         int failed;
  941         int reqdskregion;
  942 
  943         object = m[reqpage]->object;
  944         paging_offset = OFF_TO_IDX(object->paging_offset);
  945         sequential = (m[reqpage]->pindex == (object->last_read + 1));
  946 
  947         for (i = 0; i < count; i++) {
  948                 vm_pindex_t fidx = m[i]->pindex + paging_offset;
  949                 int ix = swap_pager_block_index(fidx);
  950 
  951                 if (ix >= object->un_pager.swp.swp_nblocks) {
  952                         int j;
  953 
  954                         if (i <= reqpage) {
  955                                 swap_pager_ridpages(m, count, reqpage);
  956                                 return (VM_PAGER_FAIL);
  957                         }
  958                         for (j = i; j < count; j++) {
  959                                 vm_page_free(m[j]);
  960                         }
  961                         count = i;
  962                         break;
  963                 }
  964                 swb[i] = &object->un_pager.swp.swp_blocks[ix];
  965                 off[i] = swap_pager_block_offset(fidx);
  966                 reqaddr[i] = swb[i]->swb_block[off[i]];
  967         }
  968 
  969         /* make sure that our required input request is existant */
  970 
  971         if (reqaddr[reqpage] == SWB_EMPTY ||
  972             (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
  973                 swap_pager_ridpages(m, count, reqpage);
  974                 return (VM_PAGER_FAIL);
  975         }
  976         reqdskregion = reqaddr[reqpage] / dmmax;
  977 
  978         /*
  979          * search backwards for the first contiguous page to transfer
  980          */
  981         failed = 0;
  982         first = 0;
  983         for (i = reqpage - 1; i >= 0; --i) {
  984                 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
  985                     (swb[i]->swb_valid & (1 << off[i])) == 0 ||
  986                     (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
  987                     ((reqaddr[i] / dmmax) != reqdskregion)) {
  988                         failed = 1;
  989                         vm_page_free(m[i]);
  990                         if (first == 0)
  991                                 first = i + 1;
  992                 }
  993         }
  994         /*
  995          * search forwards for the last contiguous page to transfer
  996          */
  997         failed = 0;
  998         last = count;
  999         for (i = reqpage + 1; i < count; i++) {
 1000                 if (failed || (reqaddr[i] == SWB_EMPTY) ||
 1001                     (swb[i]->swb_valid & (1 << off[i])) == 0 ||
 1002                     (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
 1003                     ((reqaddr[i] / dmmax) != reqdskregion)) {
 1004                         failed = 1;
 1005                         vm_page_free(m[i]);
 1006                         if (last == count)
 1007                                 last = i;
 1008                 }
 1009         }
 1010 
 1011         count = last;
 1012         if (first != 0) {
 1013                 for (i = first; i < count; i++) {
 1014                         m[i - first] = m[i];
 1015                         reqaddr[i - first] = reqaddr[i];
 1016                         off[i - first] = off[i];
 1017                 }
 1018                 count -= first;
 1019                 reqpage -= first;
 1020         }
 1021         ++swb[reqpage]->swb_locked;
 1022 
 1023         /*
 1024          * at this point: "m" is a pointer to the array of vm_page_t for
 1025          * paging I/O "count" is the number of vm_page_t entries represented
 1026          * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
 1027          * into "m" for the page actually faulted
 1028          */
 1029 
 1030         /*
 1031          * Get a swap buffer header to perform the IO
 1032          */
 1033         bp = getpbuf();
 1034         kva = (vm_offset_t) bp->b_data;
 1035 
 1036         /*
 1037          * map our page(s) into kva for input
 1038          */
 1039         pmap_qenter(kva, m, count);
 1040 
 1041         bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
 1042         bp->b_iodone = swap_pager_iodone1;
 1043         bp->b_proc = &proc0;    /* XXX (but without B_PHYS set this is ok) */
 1044         bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 1045         crhold(bp->b_rcred);
 1046         crhold(bp->b_wcred);
 1047         bp->b_data = (caddr_t) kva;
 1048         bp->b_blkno = reqaddr[0];
 1049         bp->b_bcount = PAGE_SIZE * count;
 1050         bp->b_bufsize = PAGE_SIZE * count;
 1051 
 1052         pbgetvp(swapdev_vp, bp);
 1053 
 1054         cnt.v_swapin++;
 1055         cnt.v_swappgsin += count;
 1056         /*
 1057          * perform the I/O
 1058          */
 1059         VOP_STRATEGY(bp->b_vp, bp);
 1060 
 1061         /*
 1062          * wait for the sync I/O to complete
 1063          */
 1064         s = splvm();
 1065         while ((bp->b_flags & B_DONE) == 0) {
 1066                 if (tsleep(bp, PVM, "swread", hz*20)) {
 1067                         printf(
 1068 "swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n",
 1069                             (u_long)bp->b_dev, (long)bp->b_blkno,
 1070                             (long)bp->b_bcount);
 1071                 }
 1072         }
 1073 
 1074         if (bp->b_flags & B_ERROR) {
 1075                 printf(
 1076 "swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n",
 1077                     (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
 1078                 rv = VM_PAGER_ERROR;
 1079         } else {
 1080                 rv = VM_PAGER_OK;
 1081         }
 1082 
 1083         splx(s);
 1084         swb[reqpage]->swb_locked--;
 1085 
 1086         /*
 1087          * remove the mapping for kernel virtual
 1088          */
 1089         pmap_qremove(kva, count);
 1090 
 1091         /*
 1092          * release the physical I/O buffer
 1093          */
 1094         relpbuf(bp);
 1095         /*
 1096          * finish up input if everything is ok
 1097          */
 1098         if (rv == VM_PAGER_OK) {
 1099                 for (i = 0; i < count; i++) {
 1100                         m[i]->dirty = 0;
 1101                         vm_page_flag_clear(m[i], PG_ZERO);
 1102                         if (i != reqpage) {
 1103                                 /*
 1104                                  * whether or not to leave the page
 1105                                  * activated is up in the air, but we
 1106                                  * should put the page on a page queue
 1107                                  * somewhere. (it already is in the
 1108                                  * object). After some emperical
 1109                                  * results, it is best to deactivate
 1110                                  * the readahead pages.
 1111                                  */
 1112                                 vm_page_deactivate(m[i]);
 1113 
 1114                                 /*
 1115                                  * just in case someone was asking for
 1116                                  * this page we now tell them that it
 1117                                  * is ok to use
 1118                                  */
 1119                                 m[i]->valid = VM_PAGE_BITS_ALL;
 1120                                 vm_page_wakeup(m[i]);
 1121                         }
 1122                 }
 1123 
 1124                 m[reqpage]->object->last_read = m[count-1]->pindex;
 1125         } else {
 1126                 swap_pager_ridpages(m, count, reqpage);
 1127         }
 1128         return (rv);
 1129 }
 1130 
 1131 int
 1132 swap_pager_putpages(object, m, count, sync, rtvals)
 1133         vm_object_t object;
 1134         vm_page_t *m;
 1135         int count;
 1136         boolean_t sync;
 1137         int *rtvals;
 1138 {
 1139         register struct buf *bp;
 1140         sw_blk_t swb[count];
 1141         register int s;
 1142         int i, j, ix, firstidx, lastidx;
 1143         boolean_t rv;
 1144         vm_offset_t kva, off, fidx;
 1145         swp_clean_t spc;
 1146         vm_pindex_t paging_pindex;
 1147         int reqaddr[count];
 1148         int failed;
 1149 
 1150         if (vm_swap_size)
 1151                 no_swap_space = 0;
 1152 
 1153         if (no_swap_space) {
 1154                 for (i = 0; i < count; i++)
 1155                         rtvals[i] = VM_PAGER_FAIL;
 1156                 return VM_PAGER_FAIL;
 1157         }
 1158 
 1159         if (curproc != pageproc)
 1160                 sync = TRUE;
 1161 
 1162         object = m[0]->object;
 1163         paging_pindex = OFF_TO_IDX(object->paging_offset);
 1164 
 1165         failed = 0;
 1166         for (j = 0; j < count; j++) {
 1167                 fidx = m[j]->pindex + paging_pindex;
 1168                 ix = swap_pager_block_index(fidx);
 1169                 swb[j] = 0;
 1170                 if (ix >= object->un_pager.swp.swp_nblocks) {
 1171                         rtvals[j] = VM_PAGER_FAIL;
 1172                         failed = 1;
 1173                         continue;
 1174                 } else {
 1175                         rtvals[j] = VM_PAGER_OK;
 1176                 }
 1177                 swb[j] = &object->un_pager.swp.swp_blocks[ix];
 1178                 swb[j]->swb_locked++;
 1179                 if (failed) {
 1180                         rtvals[j] = VM_PAGER_FAIL;
 1181                         continue;
 1182                 }
 1183                 off = swap_pager_block_offset(fidx);
 1184                 reqaddr[j] = swb[j]->swb_block[off];
 1185                 if (reqaddr[j] == SWB_EMPTY) {
 1186                         daddr_t blk;
 1187                         int tries;
 1188                         int ntoget;
 1189 
 1190                         tries = 0;
 1191                         s = splvm();
 1192 
 1193                         /*
 1194                          * if any other pages have been allocated in this
 1195                          * block, we only try to get one page.
 1196                          */
 1197                         for (i = 0; i < SWB_NPAGES; i++) {
 1198                                 if (swb[j]->swb_block[i] != SWB_EMPTY)
 1199                                         break;
 1200                         }
 1201 
 1202                         ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
 1203                         /*
 1204                          * this code is alittle conservative, but works (the
 1205                          * intent of this code is to allocate small chunks for
 1206                          * small objects)
 1207                          */
 1208                         if ((off == 0) && ((fidx + ntoget) > object->size)) {
 1209                                 ntoget = object->size - fidx;
 1210                         }
 1211         retrygetspace:
 1212                         if (!swap_pager_full && ntoget > 1 &&
 1213                             swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
 1214                                 &blk)) {
 1215 
 1216                                 for (i = 0; i < ntoget; i++) {
 1217                                         swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
 1218                                         swb[j]->swb_valid = 0;
 1219                                 }
 1220 
 1221                                 reqaddr[j] = swb[j]->swb_block[off];
 1222                         } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
 1223                                 &swb[j]->swb_block[off])) {
 1224                                 /*
 1225                                  * if the allocation has failed, we try to
 1226                                  * reclaim space and retry.
 1227                                  */
 1228                                 if (++tries == 1) {
 1229                                         swap_pager_reclaim();
 1230                                         goto retrygetspace;
 1231                                 }
 1232                                 rtvals[j] = VM_PAGER_AGAIN;
 1233                                 failed = 1;
 1234                                 swap_pager_full = 1;
 1235                         } else {
 1236                                 reqaddr[j] = swb[j]->swb_block[off];
 1237                                 swb[j]->swb_valid &= ~(1 << off);
 1238                         }
 1239                         splx(s);
 1240                 }
 1241         }
 1242 
 1243         /*
 1244          * search forwards for the last contiguous page to transfer
 1245          */
 1246         failed = 0;
 1247         for (i = 0; i < count; i++) {
 1248                 if (failed ||
 1249                         (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
 1250                     ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
 1251                     (rtvals[i] != VM_PAGER_OK)) {
 1252                         failed = 1;
 1253                         if (rtvals[i] == VM_PAGER_OK)
 1254                                 rtvals[i] = VM_PAGER_AGAIN;
 1255                 }
 1256         }
 1257 
 1258         ix = 0;
 1259         firstidx = -1;
 1260         for (i = 0; i < count; i++) {
 1261                 if (rtvals[i] == VM_PAGER_OK) {
 1262                         ix++;
 1263                         if (firstidx == -1) {
 1264                                 firstidx = i;
 1265                         }
 1266                 } else if (firstidx >= 0) {
 1267                         break;
 1268                 }
 1269         }
 1270 
 1271         if (firstidx == -1) {
 1272                 for (i = 0; i < count; i++) {
 1273                         if (rtvals[i] == VM_PAGER_OK)
 1274                                 rtvals[i] = VM_PAGER_AGAIN;
 1275                 }
 1276                 return VM_PAGER_AGAIN;
 1277         }
 1278 
 1279         lastidx = firstidx + ix;
 1280 
 1281         if (ix > max_pageout_cluster) {
 1282                 for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
 1283                         if (rtvals[i] == VM_PAGER_OK)
 1284                                 rtvals[i] = VM_PAGER_AGAIN;
 1285                 }
 1286                 ix = max_pageout_cluster;
 1287                 lastidx = firstidx + ix;
 1288         }
 1289 
 1290         for (i = 0; i < firstidx; i++) {
 1291                 if (swb[i])
 1292                         swb[i]->swb_locked--;
 1293         }
 1294 
 1295         for (i = lastidx; i < count; i++) {
 1296                 if (swb[i])
 1297                         swb[i]->swb_locked--;
 1298         }
 1299 
 1300 #ifdef INVARIANTS
 1301         for (i = firstidx; i < lastidx; i++) {
 1302                 if (reqaddr[i] == SWB_EMPTY) {
 1303                         printf("I/O to empty block???? -- pindex: %d, i: %d\n",
 1304                                 m[i]->pindex, i);
 1305                 }
 1306         }
 1307 #endif
 1308 
 1309         /*
 1310          * Clean up all completed async pageouts.
 1311          */
 1312         if (swap_pager_free_pending)
 1313                 swap_pager_sync();
 1314 
 1315         /*
 1316          * get a swap pager clean data structure, block until we get it
 1317          */
 1318         if (curproc == pageproc) {
 1319                 if (swap_pager_free_count == 0) {
 1320                         s = splvm();
 1321                         while (swap_pager_free_count == 0) {
 1322                                 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
 1323                         /*
 1324                          * if it does not get one within a short time, then
 1325                          * there is a potential deadlock, so we go-on trying
 1326                          * to free pages.  It is important to block here as opposed
 1327                          * to returning, thereby allowing the pageout daemon to continue.
 1328                          * It is likely that pageout daemon will start suboptimally
 1329                          * reclaiming vnode backed pages if we don't block.  Since the
 1330                          * I/O subsystem is probably already fully utilized, might as
 1331                          * well wait.
 1332                          */
 1333                                 if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
 1334                                         if (swap_pager_free_pending)
 1335                                                 swap_pager_sync();
 1336                                         if (swap_pager_free_count == 0) {
 1337                                                 for (i = firstidx; i < lastidx; i++) {
 1338                                                         rtvals[i] = VM_PAGER_AGAIN;
 1339                                                 }
 1340                                                 splx(s);
 1341                                                 return VM_PAGER_AGAIN;
 1342                                         }
 1343                                 } else {
 1344                                         swap_pager_sync();
 1345                                 }
 1346                         }
 1347                         splx(s);
 1348                 }
 1349 
 1350                 spc = TAILQ_FIRST(&swap_pager_free);
 1351                 KASSERT(spc != NULL,
 1352                     ("swap_pager_putpages: free queue is empty, %d expected\n",
 1353                     swap_pager_free_count));
 1354                 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
 1355                 swap_pager_free_count--;
 1356 
 1357                 kva = spc->spc_kva;
 1358                 bp = spc->spc_bp;
 1359                 bzero(bp, sizeof *bp);
 1360                 bp->b_spc = spc;
 1361                 bp->b_xflags = 0;
 1362                 bp->b_data = (caddr_t) kva;
 1363         } else {
 1364                 spc = NULL;
 1365                 bp = getpbuf();
 1366                 kva = (vm_offset_t) bp->b_data;
 1367                 bp->b_spc = NULL;
 1368         }
 1369 
 1370         /*
 1371          * map our page(s) into kva for I/O
 1372          */
 1373         pmap_qenter(kva, &m[firstidx], ix);
 1374 
 1375         /*
 1376          * get the base I/O offset into the swap file
 1377          */
 1378         for (i = firstidx; i < lastidx ; i++) {
 1379                 fidx = m[i]->pindex + paging_pindex;
 1380                 off = swap_pager_block_offset(fidx);
 1381                 /*
 1382                  * set the valid bit
 1383                  */
 1384                 swb[i]->swb_valid |= (1 << off);
 1385                 /*
 1386                  * and unlock the data structure
 1387                  */
 1388                 swb[i]->swb_locked--;
 1389         }
 1390 
 1391         bp->b_flags = B_BUSY | B_PAGING;
 1392         bp->b_proc = &proc0;    /* XXX (but without B_PHYS set this is ok) */
 1393         bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 1394         if (bp->b_rcred != NOCRED)
 1395                 crhold(bp->b_rcred);
 1396         if (bp->b_wcred != NOCRED)
 1397                 crhold(bp->b_wcred);
 1398         bp->b_blkno = reqaddr[firstidx];
 1399         pbgetvp(swapdev_vp, bp);
 1400 
 1401         bp->b_bcount = PAGE_SIZE * ix;
 1402         bp->b_bufsize = PAGE_SIZE * ix;
 1403 
 1404         s = splvm();
 1405         swapdev_vp->v_numoutput++;
 1406 
 1407         /*
 1408          * If this is an async write we set up additional buffer fields and
 1409          * place a "cleaning" entry on the inuse queue.
 1410          */
 1411         object->un_pager.swp.swp_poip++;
 1412  
 1413         if (spc) {
 1414                 spc->spc_flags = 0;
 1415                 spc->spc_object = object;
 1416                 bp->b_npages = ix;
 1417                 for (i = firstidx; i < lastidx; i++) {
 1418                         spc->spc_m[i] = m[i];
 1419                         bp->b_pages[i - firstidx] = m[i];
 1420                         vm_page_protect(m[i], VM_PROT_READ);
 1421                         pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 1422                         m[i]->dirty = 0;
 1423                 }
 1424                 spc->spc_first = firstidx;
 1425                 spc->spc_count = ix;
 1426                 /*
 1427                  * the completion routine for async writes
 1428                  */
 1429                 bp->b_flags |= B_CALL;
 1430                 bp->b_iodone = swap_pager_iodone;
 1431                 bp->b_dirtyoff = 0;
 1432                 bp->b_dirtyend = bp->b_bcount;
 1433                 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
 1434         } else {
 1435                 bp->b_flags |= B_CALL;
 1436                 bp->b_iodone = swap_pager_iodone1;
 1437                 bp->b_npages = ix;
 1438                 for (i = firstidx; i < lastidx; i++)
 1439                         bp->b_pages[i - firstidx] = m[i];
 1440         }
 1441 
 1442         cnt.v_swapout++;
 1443         cnt.v_swappgsout += ix;
 1444 
 1445         /*
 1446          * perform the I/O
 1447          */
 1448         VOP_STRATEGY(bp->b_vp, bp);
 1449         if (sync == FALSE) {
 1450                 if (swap_pager_free_pending) {
 1451                         swap_pager_sync();
 1452                 }
 1453                 for (i = firstidx; i < lastidx; i++) {
 1454                         rtvals[i] = VM_PAGER_PEND;
 1455                 }
 1456                 splx(s);
 1457                 return VM_PAGER_PEND;
 1458         }
 1459 
 1460         /*
 1461          * wait for the sync I/O to complete
 1462          */
 1463         while ((bp->b_flags & B_DONE) == 0) {
 1464                 tsleep(bp, PVM, "swwrt", 0);
 1465         }
 1466 
 1467         if (bp->b_flags & B_ERROR) {
 1468                 printf(
 1469 "swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n",
 1470                     (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
 1471                 rv = VM_PAGER_ERROR;
 1472         } else {
 1473                 rv = VM_PAGER_OK;
 1474         }
 1475 
 1476         object->un_pager.swp.swp_poip--;
 1477         if (object->un_pager.swp.swp_poip == 0)
 1478                 wakeup(object);
 1479 
 1480         if (bp->b_vp)
 1481                 pbrelvp(bp);
 1482 
 1483         splx(s);
 1484 
 1485         /*
 1486          * remove the mapping for kernel virtual
 1487          */
 1488         pmap_qremove(kva, ix);
 1489 
 1490         /*
 1491          * if we have written the page, then indicate that the page is clean.
 1492          */
 1493         if (rv == VM_PAGER_OK) {
 1494                 for (i = firstidx; i < lastidx; i++) {
 1495                         if (rtvals[i] == VM_PAGER_OK) {
 1496                                 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 1497                                 m[i]->dirty = 0;
 1498                                 /*
 1499                                  * optimization, if a page has been read
 1500                                  * during the pageout process, we activate it.
 1501                                  */
 1502                                 if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
 1503                                     pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
 1504                                         vm_page_activate(m[i]);
 1505                                 }
 1506                         }
 1507                 }
 1508         } else {
 1509                 for (i = firstidx; i < lastidx; i++) {
 1510                         rtvals[i] = rv;
 1511                 }
 1512         }
 1513 
 1514         if (spc != NULL) {
 1515                 if (bp->b_rcred != NOCRED)
 1516                         crfree(bp->b_rcred);
 1517                 if (bp->b_wcred != NOCRED)
 1518                         crfree(bp->b_wcred);
 1519                 spc_free(spc);
 1520         } else
 1521                 relpbuf(bp);
 1522         if (swap_pager_free_pending)
 1523                 swap_pager_sync();
 1524 
 1525         return (rv);
 1526 }
 1527 
 1528 void
 1529 swap_pager_sync()
 1530 {
 1531         swp_clean_t spc;
 1532 
 1533         while (spc = TAILQ_FIRST(&swap_pager_done)) {
 1534                 swap_pager_finish(spc);
 1535         }
 1536         return;
 1537 }
 1538 
 1539 static void
 1540 swap_pager_finish(spc)
 1541         register swp_clean_t spc;
 1542 {
 1543         int i, s, lastidx;
 1544         vm_object_t object;
 1545         vm_page_t *ma;
 1546 
 1547         ma = spc->spc_m;
 1548         object = spc->spc_object;
 1549         lastidx = spc->spc_first + spc->spc_count;
 1550 
 1551         s = splvm();
 1552         TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
 1553         splx(s);
 1554 
 1555         pmap_qremove(spc->spc_kva, spc->spc_count);
 1556 
 1557         /*
 1558          * If no error, mark as clean and inform the pmap system. If error,
 1559          * mark as dirty so we will try again. (XXX could get stuck doing
 1560          * this, should give up after awhile)
 1561          */
 1562         if (spc->spc_flags & SPC_ERROR) {
 1563 
 1564                 for (i = spc->spc_first; i < lastidx; i++) {
 1565                         printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
 1566                             (u_long) VM_PAGE_TO_PHYS(ma[i]));
 1567                         ma[i]->dirty = VM_PAGE_BITS_ALL;
 1568                         vm_page_io_finish(ma[i]);
 1569                 }
 1570 
 1571                 vm_object_pip_subtract(object, spc->spc_count);
 1572                 if ((object->paging_in_progress == 0) &&
 1573                         (object->flags & OBJ_PIPWNT)) {
 1574                         vm_object_clear_flag(object, OBJ_PIPWNT);
 1575                         wakeup(object);
 1576                 }
 1577 
 1578         } else {
 1579                 for (i = spc->spc_first; i < lastidx; i++) {
 1580                         if ((ma[i]->queue != PQ_ACTIVE) &&
 1581                            ((ma[i]->flags & PG_WANTED) ||
 1582                                  pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) {
 1583                                 vm_page_activate(ma[i]);
 1584                         }
 1585                 }
 1586         }
 1587 
 1588         nswiodone -= spc->spc_count;
 1589         swap_pager_free_pending--;
 1590         spc_free(spc);
 1591 
 1592         return;
 1593 }
 1594 
 1595 /*
 1596  * swap_pager_iodone
 1597  */
 1598 static void
 1599 swap_pager_iodone(bp)
 1600         register struct buf *bp;
 1601 {
 1602         int i, s, lastidx;
 1603         register swp_clean_t spc;
 1604         vm_object_t object;
 1605         vm_page_t *ma;
 1606 
 1607 
 1608         s = splvm();
 1609         spc = (swp_clean_t) bp->b_spc;
 1610         TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
 1611         TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
 1612 
 1613         object = spc->spc_object;
 1614 
 1615 #if defined(DIAGNOSTIC)
 1616         if (object->paging_in_progress < spc->spc_count)
 1617                 printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
 1618                         object->paging_in_progress, spc->spc_count);
 1619 #endif
 1620 
 1621         if (bp->b_flags & B_ERROR) {
 1622                 spc->spc_flags |= SPC_ERROR;
 1623                 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
 1624                     (bp->b_flags & B_READ) ? "pagein" : "pageout",
 1625                     (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
 1626         } else {
 1627                 vm_object_pip_subtract(object, spc->spc_count);
 1628                 if ((object->paging_in_progress == 0) &&
 1629                         (object->flags & OBJ_PIPWNT)) {
 1630                         vm_object_clear_flag(object, OBJ_PIPWNT);
 1631                         wakeup(object);
 1632                 }
 1633                 ma = spc->spc_m;
 1634                 lastidx = spc->spc_first + spc->spc_count;
 1635                 for (i = spc->spc_first; i < lastidx; i++) {
 1636                         /*
 1637                          * we wakeup any processes that are waiting on these pages.
 1638                          */
 1639                         vm_page_io_finish(ma[i]);
 1640                 }
 1641         }
 1642 
 1643         if (bp->b_vp)
 1644                 pbrelvp(bp);
 1645 
 1646         if (bp->b_rcred != NOCRED)
 1647                 crfree(bp->b_rcred);
 1648         if (bp->b_wcred != NOCRED)
 1649                 crfree(bp->b_wcred);
 1650 
 1651         nswiodone += spc->spc_count;
 1652         swap_pager_free_pending++;
 1653         if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
 1654                 wakeup(spc->spc_object);
 1655         }
 1656 
 1657         if (swap_pager_needflags &&
 1658           ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
 1659                 spc_wakeup();
 1660         }
 1661 
 1662         if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
 1663                 vm_pageout_pages_needed) {
 1664                 wakeup(&vm_pageout_pages_needed);
 1665                 vm_pageout_pages_needed = 0;
 1666         }
 1667 
 1668         splx(s);
 1669 }

Cache object: 2987aaf6361c377d9bf30673a201cf0c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.