The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_bio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1994,1997 John S. Dyson
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice immediately at the beginning of the file, without modification,
   10  *    this list of conditions, and the following disclaimer.
   11  * 2. Absolutely no warranty of function or purpose is made by the author
   12  *              John S. Dyson.
   13  *
   14  * $FreeBSD$
   15  */
   16 
   17 /*
   18  * this file contains a new buffer I/O scheme implementing a coherent
   19  * VM object and buffer cache scheme.  Pains have been taken to make
   20  * sure that the performance degradation associated with schemes such
   21  * as this is not realized.
   22  *
   23  * Author:  John S. Dyson
   24  * Significant help during the development and debugging phases
   25  * had been provided by David Greenman, also of the FreeBSD core team.
   26  *
   27  * see man buf(9) for more info.
   28  */
   29 
   30 #define VMIO
   31 #include <sys/param.h>
   32 #include <sys/systm.h>
   33 #include <sys/sysproto.h>
   34 #include <sys/kernel.h>
   35 #include <sys/sysctl.h>
   36 #include <sys/proc.h>
   37 #include <sys/vnode.h>
   38 #include <sys/vmmeter.h>
   39 #include <sys/lock.h>
   40 #include <miscfs/specfs/specdev.h>
   41 #include <vm/vm.h>
   42 #include <vm/vm_param.h>
   43 #include <vm/vm_prot.h>
   44 #include <vm/vm_kern.h>
   45 #include <vm/vm_pageout.h>
   46 #include <vm/vm_page.h>
   47 #include <vm/vm_object.h>
   48 #include <vm/vm_extern.h>
   49 #include <vm/vm_map.h>
   50 #include <sys/buf.h>
   51 #include <sys/mount.h>
   52 #include <sys/malloc.h>
   53 #include <sys/resourcevar.h>
   54 
   55 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
   56 
   57 struct  bio_ops bioops;         /* I/O operation notification */
   58 
   59 #if 0   /* replaced bu sched_sync */
   60 static void vfs_update __P((void));
   61 static struct   proc *updateproc;
   62 static struct kproc_desc up_kp = {
   63         "update",
   64         vfs_update,
   65         &updateproc
   66 };
   67 SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
   68 #endif
   69 
   70 struct buf *buf;                /* buffer header pool */
   71 struct swqueue bswlist;
   72 
   73 static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
   74                 vm_offset_t to);
   75 static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
   76                 vm_offset_t to);
   77 static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff,
   78                               vm_offset_t off, vm_offset_t size,
   79                               vm_page_t m);
   80 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
   81                                int pageno, vm_page_t m);
   82 static void vfs_clean_pages(struct buf * bp);
   83 static void vfs_setdirty(struct buf *bp);
   84 static void vfs_vmio_release(struct buf *bp);
   85 static void flushdirtybuffers(int slpflag, int slptimeo);
   86 
   87 int needsbuffer;
   88 
   89 /*
   90  * Internal update daemon, process 3
   91  *      The variable vfs_update_wakeup allows for internal syncs.
   92  */
   93 int vfs_update_wakeup;
   94 
   95 
   96 /*
   97  * buffers base kva
   98  */
   99 
  100 /*
  101  * bogus page -- for I/O to/from partially complete buffers
  102  * this is a temporary solution to the problem, but it is not
  103  * really that bad.  it would be better to split the buffer
  104  * for input in the case of buffers partially already in memory,
  105  * but the code is intricate enough already.
  106  */
  107 vm_page_t bogus_page;
  108 static vm_offset_t bogus_offset;
  109 
  110 static int bufspace, maxbufspace, vmiospace, maxvmiobufspace,
  111         bufmallocspace, maxbufmallocspace;
  112 int numdirtybuffers;
  113 static int lodirtybuffers, hidirtybuffers;
  114 static int numfreebuffers, lofreebuffers, hifreebuffers;
  115 static int kvafreespace;
  116 
  117 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD,
  118         &numdirtybuffers, 0, "");
  119 SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW,
  120         &lodirtybuffers, 0, "");
  121 SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW,
  122         &hidirtybuffers, 0, "");
  123 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD,
  124         &numfreebuffers, 0, "");
  125 SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW,
  126         &lofreebuffers, 0, "");
  127 SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW,
  128         &hifreebuffers, 0, "");
  129 SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW,
  130         &maxbufspace, 0, "");
  131 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
  132         &bufspace, 0, "");
  133 SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW,
  134         &maxvmiobufspace, 0, "");
  135 SYSCTL_INT(_vfs, OID_AUTO, vmiospace, CTLFLAG_RD,
  136         &vmiospace, 0, "");
  137 SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW,
  138         &maxbufmallocspace, 0, "");
  139 SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD,
  140         &bufmallocspace, 0, "");
  141 SYSCTL_INT(_vfs, OID_AUTO, kvafreespace, CTLFLAG_RD,
  142         &kvafreespace, 0, "");
  143 
  144 static LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash;
  145 struct bqueues bufqueues[BUFFER_QUEUES] = {0};
  146 
  147 extern int vm_swap_size;
  148 
  149 #define BUF_MAXUSE 24
  150 
  151 #define VFS_BIO_NEED_ANY 1
  152 #define VFS_BIO_NEED_LOWLIMIT 2
  153 #define VFS_BIO_NEED_FREE 4
  154 
  155 /*
  156  * Initialize buffer headers and related structures.
  157  */
  158 void
  159 bufinit()
  160 {
  161         struct buf *bp;
  162         int i;
  163 
  164         TAILQ_INIT(&bswlist);
  165         LIST_INIT(&invalhash);
  166 
  167         /* first, make a null hash table */
  168         for (i = 0; i < BUFHSZ; i++)
  169                 LIST_INIT(&bufhashtbl[i]);
  170 
  171         /* next, make a null set of free lists */
  172         for (i = 0; i < BUFFER_QUEUES; i++)
  173                 TAILQ_INIT(&bufqueues[i]);
  174 
  175         /* finally, initialize each buffer header and stick on empty q */
  176         for (i = 0; i < nbuf; i++) {
  177                 bp = &buf[i];
  178                 bzero(bp, sizeof *bp);
  179                 bp->b_flags = B_INVAL;  /* we're just an empty header */
  180                 bp->b_dev = NODEV;
  181                 bp->b_rcred = NOCRED;
  182                 bp->b_wcred = NOCRED;
  183                 bp->b_qindex = QUEUE_EMPTY;
  184                 bp->b_xflags = 0;
  185                 LIST_INIT(&bp->b_dep);
  186                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
  187                 LIST_INSERT_HEAD(&invalhash, bp, b_hash);
  188         }
  189 /*
  190  * maxbufspace is currently calculated to support all filesystem blocks
  191  * to be 8K.  If you happen to use a 16K filesystem, the size of the buffer
  192  * cache is still the same as it would be for 8K filesystems.  This
  193  * keeps the size of the buffer cache "in check" for big block filesystems.
  194  */
  195         maxbufspace = (nbuf + 8) * DFLTBSIZE;
  196 /*
  197  * reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed
  198  */
  199         maxvmiobufspace = 2 * maxbufspace / 3;
  200 /*
  201  * Limit the amount of malloc memory since it is wired permanently into
  202  * the kernel space.  Even though this is accounted for in the buffer
  203  * allocation, we don't want the malloced region to grow uncontrolled.
  204  * The malloc scheme improves memory utilization significantly on average
  205  * (small) directories.
  206  */
  207         maxbufmallocspace = maxbufspace / 20;
  208 
  209 /*
  210  * Remove the probability of deadlock conditions by limiting the
  211  * number of dirty buffers.
  212  */
  213         hidirtybuffers = nbuf / 8 + 20;
  214         lodirtybuffers = nbuf / 16 + 10;
  215         numdirtybuffers = 0;
  216         lofreebuffers = nbuf / 18 + 5;
  217         hifreebuffers = 2 * lofreebuffers;
  218         numfreebuffers = nbuf;
  219         kvafreespace = 0;
  220 
  221         bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
  222         bogus_page = vm_page_alloc(kernel_object,
  223                         ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
  224                         VM_ALLOC_NORMAL);
  225 
  226 }
  227 
  228 /*
  229  * Free the kva allocation for a buffer
  230  * Must be called only at splbio or higher,
  231  *  as this is the only locking for buffer_map.
  232  */
  233 static void
  234 bfreekva(struct buf * bp)
  235 {
  236         if (bp->b_kvasize == 0)
  237                 return;
  238                 
  239         vm_map_delete(buffer_map,
  240                 (vm_offset_t) bp->b_kvabase,
  241                 (vm_offset_t) bp->b_kvabase + bp->b_kvasize);
  242 
  243         bp->b_kvasize = 0;
  244 
  245 }
  246 
  247 /*
  248  * remove the buffer from the appropriate free list
  249  */
  250 void
  251 bremfree(struct buf * bp)
  252 {
  253         int s = splbio();
  254 
  255         if (bp->b_qindex != QUEUE_NONE) {
  256                 if (bp->b_qindex == QUEUE_EMPTY) {
  257                         kvafreespace -= bp->b_kvasize;
  258                 }
  259                 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
  260                 bp->b_qindex = QUEUE_NONE;
  261         } else {
  262 #if !defined(MAX_PERF)
  263                 panic("bremfree: removing a buffer when not on a queue");
  264 #endif
  265         }
  266         if ((bp->b_flags & B_INVAL) ||
  267                 (bp->b_flags & (B_DELWRI|B_LOCKED)) == 0)
  268                 --numfreebuffers;
  269         splx(s);
  270 }
  271 
  272 
  273 /*
  274  * Get a buffer with the specified data.  Look in the cache first.
  275  */
  276 int
  277 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
  278     struct buf ** bpp)
  279 {
  280         struct buf *bp;
  281 
  282         bp = getblk(vp, blkno, size, 0, 0);
  283         *bpp = bp;
  284 
  285         /* if not found in cache, do some I/O */
  286         if ((bp->b_flags & B_CACHE) == 0) {
  287                 if (curproc != NULL)
  288                         curproc->p_stats->p_ru.ru_inblock++;
  289                 bp->b_flags |= B_READ;
  290                 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
  291                 if (bp->b_rcred == NOCRED) {
  292                         if (cred != NOCRED)
  293                                 crhold(cred);
  294                         bp->b_rcred = cred;
  295                 }
  296                 vfs_busy_pages(bp, 0);
  297                 VOP_STRATEGY(vp, bp);
  298                 return (biowait(bp));
  299         }
  300         return (0);
  301 }
  302 
  303 /*
  304  * Operates like bread, but also starts asynchronous I/O on
  305  * read-ahead blocks.
  306  */
  307 int
  308 breadn(struct vnode * vp, daddr_t blkno, int size,
  309     daddr_t * rablkno, int *rabsize,
  310     int cnt, struct ucred * cred, struct buf ** bpp)
  311 {
  312         struct buf *bp, *rabp;
  313         int i;
  314         int rv = 0, readwait = 0;
  315 
  316         *bpp = bp = getblk(vp, blkno, size, 0, 0);
  317 
  318         /* if not found in cache, do some I/O */
  319         if ((bp->b_flags & B_CACHE) == 0) {
  320                 if (curproc != NULL)
  321                         curproc->p_stats->p_ru.ru_inblock++;
  322                 bp->b_flags |= B_READ;
  323                 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
  324                 if (bp->b_rcred == NOCRED) {
  325                         if (cred != NOCRED)
  326                                 crhold(cred);
  327                         bp->b_rcred = cred;
  328                 }
  329                 vfs_busy_pages(bp, 0);
  330                 VOP_STRATEGY(vp, bp);
  331                 ++readwait;
  332         }
  333         for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
  334                 if (inmem(vp, *rablkno))
  335                         continue;
  336                 rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
  337 
  338                 if ((rabp->b_flags & B_CACHE) == 0) {
  339                         if (curproc != NULL)
  340                                 curproc->p_stats->p_ru.ru_inblock++;
  341                         rabp->b_flags |= B_READ | B_ASYNC;
  342                         rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
  343                         if (rabp->b_rcred == NOCRED) {
  344                                 if (cred != NOCRED)
  345                                         crhold(cred);
  346                                 rabp->b_rcred = cred;
  347                         }
  348                         vfs_busy_pages(rabp, 0);
  349                         VOP_STRATEGY(vp, rabp);
  350                 } else {
  351                         brelse(rabp);
  352                 }
  353         }
  354 
  355         if (readwait) {
  356                 rv = biowait(bp);
  357         }
  358         return (rv);
  359 }
  360 
  361 /*
  362  * Write, release buffer on completion.  (Done by iodone
  363  * if async.)
  364  */
  365 int
  366 bwrite(struct buf * bp)
  367 {
  368         int oldflags, s;
  369         struct vnode *vp;
  370         struct mount *mp;
  371 
  372 
  373         if (bp->b_flags & B_INVAL) {
  374                 brelse(bp);
  375                 return (0);
  376         }
  377 
  378         oldflags = bp->b_flags;
  379 
  380 #if !defined(MAX_PERF)
  381         if ((bp->b_flags & B_BUSY) == 0)
  382                 panic("bwrite: buffer is not busy???");
  383 #endif
  384 
  385         bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
  386         bp->b_flags |= B_WRITEINPROG;
  387 
  388         s = splbio();
  389         if ((oldflags & B_DELWRI) == B_DELWRI) {
  390                 --numdirtybuffers;
  391                 reassignbuf(bp, bp->b_vp);
  392         }
  393 
  394         bp->b_vp->v_numoutput++;
  395         vfs_busy_pages(bp, 1);
  396         if (curproc != NULL)
  397                 curproc->p_stats->p_ru.ru_oublock++;
  398         splx(s);
  399         VOP_STRATEGY(bp->b_vp, bp);
  400 
  401         /*
  402          * Collect statistics on synchronous and asynchronous writes.
  403          * Writes to block devices are charged to their associated
  404          * filesystem (if any).
  405          */
  406         if ((vp = bp->b_vp) != NULL) {
  407                 if (vp->v_type == VBLK)
  408                         mp = vp->v_specmountpoint;
  409                 else
  410                         mp = vp->v_mount;
  411                 if (mp != NULL)
  412                         if ((oldflags & B_ASYNC) == 0)
  413                                 mp->mnt_stat.f_syncwrites++;
  414                         else
  415                                 mp->mnt_stat.f_asyncwrites++;
  416         }
  417 
  418         if ((oldflags & B_ASYNC) == 0) {
  419                 int rtval = biowait(bp);
  420                 brelse(bp);
  421                 return (rtval);
  422         }
  423         return (0);
  424 }
  425 
  426 void
  427 vfs_bio_need_satisfy(void) {
  428         ++numfreebuffers;
  429         if (!needsbuffer)
  430                 return;
  431         if (numdirtybuffers < lodirtybuffers) {
  432                 needsbuffer &= ~(VFS_BIO_NEED_ANY | VFS_BIO_NEED_LOWLIMIT);
  433         } else {
  434                 needsbuffer &= ~VFS_BIO_NEED_ANY;
  435         }
  436         if (numfreebuffers >= hifreebuffers) {
  437                 needsbuffer &= ~VFS_BIO_NEED_FREE;
  438         }
  439         wakeup(&needsbuffer);
  440 }
  441 
  442 /*
  443  * Delayed write. (Buffer is marked dirty).
  444  */
  445 void
  446 bdwrite(struct buf * bp)
  447 {
  448         struct vnode *vp;
  449 
  450 #if !defined(MAX_PERF)
  451         if ((bp->b_flags & B_BUSY) == 0) {
  452                 panic("bdwrite: buffer is not busy");
  453         }
  454 #endif
  455 
  456         if (bp->b_flags & B_INVAL) {
  457                 brelse(bp);
  458                 return;
  459         }
  460         bp->b_flags &= ~(B_READ|B_RELBUF);
  461         if ((bp->b_flags & B_DELWRI) == 0) {
  462                 bp->b_flags |= B_DONE | B_DELWRI;
  463                 reassignbuf(bp, bp->b_vp);
  464                 ++numdirtybuffers;
  465         }
  466 
  467         /*
  468          * This bmap keeps the system from needing to do the bmap later,
  469          * perhaps when the system is attempting to do a sync.  Since it
  470          * is likely that the indirect block -- or whatever other datastructure
  471          * that the filesystem needs is still in memory now, it is a good
  472          * thing to do this.  Note also, that if the pageout daemon is
  473          * requesting a sync -- there might not be enough memory to do
  474          * the bmap then...  So, this is important to do.
  475          */
  476         if (bp->b_lblkno == bp->b_blkno) {
  477                 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
  478         }
  479 
  480         /*
  481          * Set the *dirty* buffer range based upon the VM system dirty pages.
  482          */
  483         vfs_setdirty(bp);
  484 
  485         /*
  486          * We need to do this here to satisfy the vnode_pager and the
  487          * pageout daemon, so that it thinks that the pages have been
  488          * "cleaned".  Note that since the pages are in a delayed write
  489          * buffer -- the VFS layer "will" see that the pages get written
  490          * out on the next sync, or perhaps the cluster will be completed.
  491          */
  492         vfs_clean_pages(bp);
  493         bqrelse(bp);
  494 
  495         /*
  496          * XXX The soft dependency code is not prepared to
  497          * have I/O done when a bdwrite is requested. For
  498          * now we just let the write be delayed if it is
  499          * requested by the soft dependency code.
  500          */
  501         if ((vp = bp->b_vp) &&
  502             ((vp->v_type == VBLK && vp->v_specmountpoint &&
  503                   (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) ||
  504                  (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))))
  505                 return;
  506 
  507         if (numdirtybuffers >= hidirtybuffers)
  508                 flushdirtybuffers(0, 0);
  509 
  510         return;
  511 }
  512 
  513 
  514 /*
  515  * Same as first half of bdwrite, mark buffer dirty, but do not release it.
  516  * Check how this compares with vfs_setdirty(); XXX [JRE]
  517  */
  518 void
  519 bdirty(bp)
  520       struct buf *bp;
  521 {
  522         
  523         bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */
  524         if ((bp->b_flags & B_DELWRI) == 0) {
  525                 bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */
  526                 reassignbuf(bp, bp->b_vp);
  527                 ++numdirtybuffers;
  528         }
  529 }
  530 
  531 /*
  532  * Asynchronous write.
  533  * Start output on a buffer, but do not wait for it to complete.
  534  * The buffer is released when the output completes.
  535  */
  536 void
  537 bawrite(struct buf * bp)
  538 {
  539         bp->b_flags |= B_ASYNC;
  540         (void) VOP_BWRITE(bp);
  541 }
  542 
  543 /*
  544  * Ordered write.
  545  * Start output on a buffer, and flag it so that the device will write
  546  * it in the order it was queued.  The buffer is released when the output
  547  * completes.
  548  */
  549 int
  550 bowrite(struct buf * bp)
  551 {
  552         bp->b_flags |= B_ORDERED|B_ASYNC;
  553         return (VOP_BWRITE(bp));
  554 }
  555 
  556 /*
  557  * Release a buffer.
  558  */
  559 void
  560 brelse(struct buf * bp)
  561 {
  562         int s;
  563 
  564         if (bp->b_flags & B_CLUSTER) {
  565                 relpbuf(bp);
  566                 return;
  567         }
  568 
  569         s = splbio();
  570 
  571         /* anyone need this block? */
  572         if (bp->b_flags & B_WANTED) {
  573                 bp->b_flags &= ~(B_WANTED | B_AGE);
  574                 wakeup(bp);
  575         } 
  576 
  577         if (bp->b_flags & B_LOCKED)
  578                 bp->b_flags &= ~B_ERROR;
  579 
  580         if ((bp->b_flags & (B_READ | B_ERROR)) == B_ERROR) {
  581                 bp->b_flags &= ~B_ERROR;
  582                 bdirty(bp);
  583         } else if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_FREEBUF)) ||
  584             (bp->b_bufsize <= 0)) {
  585                 bp->b_flags |= B_INVAL;
  586                 if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
  587                         (*bioops.io_deallocate)(bp);
  588                 if (bp->b_flags & B_DELWRI)
  589                         --numdirtybuffers;
  590                 bp->b_flags &= ~(B_DELWRI | B_CACHE | B_FREEBUF);
  591                 if ((bp->b_flags & B_VMIO) == 0) {
  592                         if (bp->b_bufsize)
  593                                 allocbuf(bp, 0);
  594                         if (bp->b_vp)
  595                                 brelvp(bp);
  596                 }
  597         }
  598 
  599         /*
  600          * We must clear B_RELBUF if B_DELWRI is set.  If vfs_vmio_release() 
  601          * is called with B_DELWRI set, the underlying pages may wind up
  602          * getting freed causing a previous write (bdwrite()) to get 'lost'
  603          * because pages associated with a B_DELWRI bp are marked clean.
  604          * 
  605          * We still allow the B_INVAL case to call vfs_vmio_release(), even
  606          * if B_DELWRI is set.
  607          */
  608 
  609         if (bp->b_flags & B_DELWRI)
  610                 bp->b_flags &= ~B_RELBUF;
  611 
  612         /*
  613          * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
  614          * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
  615          * but the VM object is kept around.  The B_NOCACHE flag is used to
  616          * invalidate the pages in the VM object.
  617          *
  618          * The b_{validoff,validend,dirtyoff,dirtyend} values are relative 
  619          * to b_offset and currently have byte granularity, whereas the
  620          * valid flags in the vm_pages have only DEV_BSIZE resolution.
  621          * The byte resolution fields are used to avoid unnecessary re-reads
  622          * of the buffer but the code really needs to be genericized so
  623          * other filesystem modules can take advantage of these fields.
  624          *
  625          * XXX this seems to cause performance problems.
  626          */
  627         if ((bp->b_flags & B_VMIO)
  628             && !(bp->b_vp->v_tag == VT_NFS &&
  629                  bp->b_vp->v_type != VBLK &&
  630                  (bp->b_flags & B_DELWRI) != 0)
  631 #ifdef notdef
  632             && (bp->b_vp->v_tag != VT_NFS
  633                 || bp->b_vp->v_type == VBLK
  634                 || (bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR))
  635                 || bp->b_validend == 0
  636                 || (bp->b_validoff == 0
  637                     && bp->b_validend == bp->b_bufsize))
  638 #endif
  639             ) {
  640 
  641                 int i, j, resid;
  642                 vm_page_t m;
  643                 off_t foff;
  644                 vm_pindex_t poff;
  645                 vm_object_t obj;
  646                 struct vnode *vp;
  647 
  648                 vp = bp->b_vp;
  649 
  650                 /*
  651                  * Get the base offset and length of the buffer.  Note that 
  652                  * for block sizes that are less then PAGE_SIZE, the b_data
  653                  * base of the buffer does not represent exactly b_offset and
  654                  * neither b_offset nor b_size are necessarily page aligned.
  655                  * Instead, the starting position of b_offset is:
  656                  *
  657                  *      b_data + (b_offset & PAGE_MASK)
  658                  *
  659                  * block sizes less then DEV_BSIZE (usually 512) are not 
  660                  * supported due to the page granularity bits (m->valid,
  661                  * m->dirty, etc...). 
  662                  *
  663                  * See man buf(9) for more information
  664                  */
  665 
  666                 resid = bp->b_bufsize;
  667                 foff = bp->b_offset;
  668 
  669                 for (i = 0; i < bp->b_npages; i++) {
  670                         m = bp->b_pages[i];
  671                         vm_page_flag_clear(m, PG_ZERO);
  672                         if (m == bogus_page) {
  673 
  674                                 obj = (vm_object_t) vp->v_object;
  675                                 poff = OFF_TO_IDX(bp->b_offset);
  676 
  677                                 for (j = i; j < bp->b_npages; j++) {
  678                                         m = bp->b_pages[j];
  679                                         if (m == bogus_page) {
  680                                                 m = vm_page_lookup(obj, poff + j);
  681 #if !defined(MAX_PERF)
  682                                                 if (!m) {
  683                                                         panic("brelse: page missing\n");
  684                                                 }
  685 #endif
  686                                                 bp->b_pages[j] = m;
  687                                         }
  688                                 }
  689 
  690                                 if ((bp->b_flags & B_INVAL) == 0) {
  691                                         pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
  692                                 }
  693                         }
  694                         if (bp->b_flags & (B_NOCACHE|B_ERROR)) {
  695                                 int poffset = foff & PAGE_MASK;
  696                                 int presid = resid > (PAGE_SIZE - poffset) ?
  697                                         (PAGE_SIZE - poffset) : resid;
  698 
  699                                 KASSERT(presid >= 0, ("brelse: extra page"));
  700                                 vm_page_set_invalid(m, poffset, presid);
  701                         }
  702                         resid -= PAGE_SIZE - (foff & PAGE_MASK);
  703                         foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
  704                 }
  705 
  706                 if (bp->b_flags & (B_INVAL | B_RELBUF))
  707                         vfs_vmio_release(bp);
  708 
  709         } else if (bp->b_flags & B_VMIO) {
  710 
  711                 if (bp->b_flags & (B_INVAL | B_RELBUF))
  712                         vfs_vmio_release(bp);
  713 
  714         }
  715                         
  716 #if !defined(MAX_PERF)
  717         if (bp->b_qindex != QUEUE_NONE)
  718                 panic("brelse: free buffer onto another queue???");
  719 #endif
  720 
  721         /* enqueue */
  722         /* buffers with no memory */
  723         if (bp->b_bufsize == 0) {
  724                 bp->b_flags |= B_INVAL;
  725                 bp->b_qindex = QUEUE_EMPTY;
  726                 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
  727                 LIST_REMOVE(bp, b_hash);
  728                 LIST_INSERT_HEAD(&invalhash, bp, b_hash);
  729                 bp->b_dev = NODEV;
  730                 kvafreespace += bp->b_kvasize;
  731 
  732         /* buffers with junk contents */
  733         } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
  734                 bp->b_flags |= B_INVAL;
  735                 bp->b_qindex = QUEUE_AGE;
  736                 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
  737                 LIST_REMOVE(bp, b_hash);
  738                 LIST_INSERT_HEAD(&invalhash, bp, b_hash);
  739                 bp->b_dev = NODEV;
  740 
  741         /* buffers that are locked */
  742         } else if (bp->b_flags & B_LOCKED) {
  743                 bp->b_qindex = QUEUE_LOCKED;
  744                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
  745 
  746         /* buffers with stale but valid contents */
  747         } else if (bp->b_flags & B_AGE) {
  748                 bp->b_qindex = QUEUE_AGE;
  749                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
  750 
  751         /* buffers with valid and quite potentially reuseable contents */
  752         } else {
  753                 bp->b_qindex = QUEUE_LRU;
  754                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
  755         }
  756 
  757         if ((bp->b_flags & B_INVAL) ||
  758                 (bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
  759                 if (bp->b_flags & B_DELWRI) {
  760                         --numdirtybuffers;
  761                         bp->b_flags &= ~B_DELWRI;
  762                 }
  763                 vfs_bio_need_satisfy();
  764         }
  765 
  766         /* unlock */
  767         bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
  768                 B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
  769         splx(s);
  770 }
  771 
  772 /*
  773  * Release a buffer.
  774  */
  775 void
  776 bqrelse(struct buf * bp)
  777 {
  778         int s;
  779 
  780         s = splbio();
  781 
  782         /* anyone need this block? */
  783         if (bp->b_flags & B_WANTED) {
  784                 bp->b_flags &= ~(B_WANTED | B_AGE);
  785                 wakeup(bp);
  786         } 
  787 
  788 #if !defined(MAX_PERF)
  789         if (bp->b_qindex != QUEUE_NONE)
  790                 panic("bqrelse: free buffer onto another queue???");
  791 #endif
  792 
  793         if (bp->b_flags & B_LOCKED) {
  794                 bp->b_flags &= ~B_ERROR;
  795                 bp->b_qindex = QUEUE_LOCKED;
  796                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
  797                 /* buffers with stale but valid contents */
  798         } else {
  799                 bp->b_qindex = QUEUE_LRU;
  800                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
  801         }
  802 
  803         if ((bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
  804                 vfs_bio_need_satisfy();
  805         }
  806 
  807         /* unlock */
  808         bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
  809                 B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
  810         splx(s);
  811 }
  812 
  813 static void
  814 vfs_vmio_release(bp)
  815         struct buf *bp;
  816 {
  817         int i, s;
  818         vm_page_t m;
  819 
  820         s = splvm();
  821         for (i = 0; i < bp->b_npages; i++) {
  822                 m = bp->b_pages[i];
  823                 bp->b_pages[i] = NULL;
  824                 /*
  825                  * In order to keep page LRU ordering consistent, put
  826                  * everything on the inactive queue.
  827                  */
  828                 vm_page_unwire(m, 0);
  829                 /*
  830                  * We don't mess with busy pages, it is
  831                  * the responsibility of the process that
  832                  * busied the pages to deal with them.
  833                  */
  834                 if ((m->flags & PG_BUSY) || (m->busy != 0))
  835                         continue;
  836                         
  837                 if (m->wire_count == 0) {
  838                         vm_page_flag_clear(m, PG_ZERO);
  839                         /*
  840                          * Might as well free the page if we can and it has
  841                          * no valid data.
  842                          */
  843                         if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && m->hold_count == 0) {
  844                                 vm_page_busy(m);
  845                                 vm_page_protect(m, VM_PROT_NONE);
  846                                 vm_page_free(m);
  847                         }
  848                 }
  849         }
  850         splx(s);
  851         bufspace -= bp->b_bufsize;
  852         vmiospace -= bp->b_bufsize;
  853         pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
  854         bp->b_npages = 0;
  855         bp->b_bufsize = 0;
  856         bp->b_flags &= ~B_VMIO;
  857         if (bp->b_vp)
  858                 brelvp(bp);
  859 }
  860 
  861 /*
  862  * Check to see if a block is currently memory resident.
  863  */
  864 struct buf *
  865 gbincore(struct vnode * vp, daddr_t blkno)
  866 {
  867         struct buf *bp;
  868         struct bufhashhdr *bh;
  869 
  870         bh = BUFHASH(vp, blkno);
  871         bp = bh->lh_first;
  872 
  873         /* Search hash chain */
  874         while (bp != NULL) {
  875                 /* hit */
  876                 if (bp->b_vp == vp && bp->b_lblkno == blkno &&
  877                     (bp->b_flags & B_INVAL) == 0) {
  878                         break;
  879                 }
  880                 bp = bp->b_hash.le_next;
  881         }
  882         return (bp);
  883 }
  884 
  885 /*
  886  * this routine implements clustered async writes for
  887  * clearing out B_DELWRI buffers...  This is much better
  888  * than the old way of writing only one buffer at a time.
  889  */
  890 int
  891 vfs_bio_awrite(struct buf * bp)
  892 {
  893         int i;
  894         daddr_t lblkno = bp->b_lblkno;
  895         struct vnode *vp = bp->b_vp;
  896         int s;
  897         int ncl;
  898         struct buf *bpa;
  899         int nwritten;
  900         int size;
  901         int maxcl;
  902 
  903         s = splbio();
  904         /*
  905          * right now we support clustered writing only to regular files
  906          */
  907         if ((vp->v_type == VREG) && 
  908             (vp->v_mount != 0) && /* Only on nodes that have the size info */
  909             (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
  910 
  911                 size = vp->v_mount->mnt_stat.f_iosize;
  912                 maxcl = MAXPHYS / size;
  913 
  914                 for (i = 1; i < maxcl; i++) {
  915                         if ((bpa = gbincore(vp, lblkno + i)) &&
  916                             ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
  917                             (B_DELWRI | B_CLUSTEROK)) &&
  918                             (bpa->b_bufsize == size)) {
  919                                 if ((bpa->b_blkno == bpa->b_lblkno) ||
  920                                     (bpa->b_blkno != bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
  921                                         break;
  922                         } else {
  923                                 break;
  924                         }
  925                 }
  926                 ncl = i;
  927                 /*
  928                  * this is a possible cluster write
  929                  */
  930                 if (ncl != 1) {
  931                         nwritten = cluster_wbuild(vp, size, lblkno, ncl);
  932                         splx(s);
  933                         return nwritten;
  934                 }
  935         }
  936 
  937         bremfree(bp);
  938         bp->b_flags |= B_BUSY | B_ASYNC;
  939 
  940         splx(s);
  941         /*
  942          * default (old) behavior, writing out only one block
  943          */
  944         nwritten = bp->b_bufsize;
  945         (void) VOP_BWRITE(bp);
  946         return nwritten;
  947 }
  948 
  949 
  950 /*
  951  * Find a buffer header which is available for use.
  952  */
  953 static struct buf *
  954 getnewbuf(struct vnode *vp, daddr_t blkno,
  955         int slpflag, int slptimeo, int size, int maxsize)
  956 {
  957         struct buf *bp, *bp1;
  958         int nbyteswritten = 0;
  959         vm_offset_t addr;
  960         static int writerecursion = 0;
  961 
  962 start:
  963         if (bufspace >= maxbufspace)
  964                 goto trytofreespace;
  965 
  966         /* can we constitute a new buffer? */
  967         if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) {
  968 #if !defined(MAX_PERF)
  969                 if (bp->b_qindex != QUEUE_EMPTY)
  970                         panic("getnewbuf: inconsistent EMPTY queue, qindex=%d",
  971                             bp->b_qindex);
  972 #endif
  973                 bp->b_flags |= B_BUSY;
  974                 bremfree(bp);
  975                 goto fillbuf;
  976         }
  977 trytofreespace:
  978         /*
  979          * We keep the file I/O from hogging metadata I/O
  980          * This is desirable because file data is cached in the
  981          * VM/Buffer cache even if a buffer is freed.
  982          */
  983         if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) {
  984 #if !defined(MAX_PERF)
  985                 if (bp->b_qindex != QUEUE_AGE)
  986                         panic("getnewbuf: inconsistent AGE queue, qindex=%d",
  987                             bp->b_qindex);
  988 #endif
  989         } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) {
  990 #if !defined(MAX_PERF)
  991                 if (bp->b_qindex != QUEUE_LRU)
  992                         panic("getnewbuf: inconsistent LRU queue, qindex=%d",
  993                             bp->b_qindex);
  994 #endif
  995         }
  996         if (!bp) {
  997                 /* wait for a free buffer of any kind */
  998                 needsbuffer |= VFS_BIO_NEED_ANY;
  999                 do
 1000                         if (tsleep(&needsbuffer, (PRIBIO + 4) | slpflag,
 1001                             "newbuf", slptimeo))
 1002                                 return (NULL);
 1003                 while (needsbuffer & VFS_BIO_NEED_ANY);
 1004                 return (0);
 1005         }
 1006         KASSERT(!(bp->b_flags & B_BUSY), 
 1007             ("getnewbuf: busy buffer on free list\n"));
 1008         /*
 1009          * We are fairly aggressive about freeing VMIO buffers, but since
 1010          * the buffering is intact without buffer headers, there is not
 1011          * much loss.  We gain by maintaining non-VMIOed metadata in buffers.
 1012          */
 1013         if ((bp->b_qindex == QUEUE_LRU) && (bp->b_usecount > 0)) {
 1014                 if ((bp->b_flags & B_VMIO) == 0 ||
 1015                         (vmiospace < maxvmiobufspace)) {
 1016                         --bp->b_usecount;
 1017                         TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1018                         if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
 1019                                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1020                                 goto start;
 1021                         }
 1022                         TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1023                 }
 1024         }
 1025 
 1026 
 1027         /* if we are a delayed write, convert to an async write */
 1028         if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
 1029 
 1030                 /*
 1031                  * If our delayed write is likely to be used soon, then
 1032                  * recycle back onto the LRU queue.
 1033                  */
 1034                 if (vp && (bp->b_vp == vp) && (bp->b_qindex == QUEUE_LRU) &&
 1035                         (bp->b_lblkno >= blkno) && (maxsize > 0)) {
 1036 
 1037                         if (bp->b_usecount > 0) {
 1038                                 if (bp->b_lblkno < blkno + (MAXPHYS / maxsize)) {
 1039 
 1040                                         TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1041 
 1042                                         if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
 1043                                                 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1044                                                 bp->b_usecount--;
 1045                                                 goto start;
 1046                                         }
 1047                                         TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 1048                                 }
 1049                         }
 1050                 }
 1051 
 1052                 /*
 1053                  * Certain layered filesystems can recursively re-enter the vfs_bio
 1054                  * code, due to delayed writes.  This helps keep the system from
 1055                  * deadlocking.
 1056                  * This hack to avoid premature panic is courtesy of alfred
 1057                  *   (alfred@freebsd.org)
 1058                  */
 1059                 if (writerecursion > 0) {
 1060                         if (writerecursion > 5) {
 1061                                 int loop = 0;
 1062 norecurse:
 1063                                 bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
 1064                                 while (bp) {
 1065                                         if ((bp->b_flags & B_DELWRI) == 0)
 1066                                                 break;
 1067                                         bp = TAILQ_NEXT(bp, b_freelist);
 1068                                 }
 1069                                 if (bp == NULL) {
 1070                                         bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
 1071                                         while (bp) {
 1072                                                 if ((bp->b_flags & B_DELWRI) == 0)
 1073                                                         break;
 1074                                                 bp = TAILQ_NEXT(bp, b_freelist);
 1075                                         }
 1076                                 }
 1077                                 if (bp == NULL) {
 1078                                         needsbuffer |= VFS_BIO_NEED_ANY;
 1079                                         if (tsleep(&needsbuffer, 
 1080                                                 (PRIBIO + 4) | slpflag, 
 1081                                                 "nbufhack", slptimeo+1))
 1082                                                         return (NULL);
 1083                                         if (loop++ < 5)
 1084                                                 goto norecurse;
 1085                                         else
 1086                                                 goto start;
 1087                                 }
 1088                         } else {
 1089                                 bremfree(bp);
 1090                                 bp->b_flags |= B_BUSY | B_AGE | B_ASYNC;
 1091                                 nbyteswritten += bp->b_bufsize;
 1092                                 ++writerecursion;
 1093                                 VOP_BWRITE(bp);
 1094                                 --writerecursion;
 1095                                 if (!slpflag && !slptimeo) {
 1096                                         return (0);
 1097                                 }
 1098                                 goto start;
 1099                         }
 1100                 } else {
 1101                         ++writerecursion;
 1102                         nbyteswritten += vfs_bio_awrite(bp);
 1103                         --writerecursion;
 1104                         if (!slpflag && !slptimeo) {
 1105                                 return (0);
 1106                         }
 1107                         goto start;
 1108                 }
 1109         }
 1110 
 1111         if (bp->b_flags & B_WANTED) {
 1112                 bp->b_flags &= ~B_WANTED;
 1113                 wakeup(bp);
 1114         }
 1115         bremfree(bp);
 1116         bp->b_flags |= B_BUSY;
 1117 
 1118         if (bp->b_flags & B_VMIO) {
 1119                 bp->b_flags &= ~B_ASYNC;
 1120                 vfs_vmio_release(bp);
 1121         }
 1122 
 1123         if (bp->b_vp)
 1124                 brelvp(bp);
 1125 
 1126 fillbuf:
 1127 
 1128         /* we are not free, nor do we contain interesting data */
 1129         if (bp->b_rcred != NOCRED) {
 1130                 crfree(bp->b_rcred);
 1131                 bp->b_rcred = NOCRED;
 1132         }
 1133         if (bp->b_wcred != NOCRED) {
 1134                 crfree(bp->b_wcred);
 1135                 bp->b_wcred = NOCRED;
 1136         }
 1137         if (LIST_FIRST(&bp->b_dep) != NULL &&
 1138             bioops.io_deallocate)
 1139                 (*bioops.io_deallocate)(bp);
 1140 
 1141         LIST_REMOVE(bp, b_hash);
 1142         LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 1143         if (bp->b_bufsize) {
 1144                 allocbuf(bp, 0);
 1145         }
 1146         bp->b_flags = B_BUSY;
 1147         bp->b_dev = NODEV;
 1148         bp->b_vp = NULL;
 1149         bp->b_blkno = bp->b_lblkno = 0;
 1150         bp->b_offset = NOOFFSET;
 1151         bp->b_iodone = 0;
 1152         bp->b_error = 0;
 1153         bp->b_resid = 0;
 1154         bp->b_bcount = 0;
 1155         bp->b_npages = 0;
 1156         bp->b_dirtyoff = bp->b_dirtyend = 0;
 1157         bp->b_validoff = bp->b_validend = 0;
 1158         bp->b_usecount = 5;
 1159         /* Here, not kern_physio.c, is where this should be done*/
 1160         LIST_INIT(&bp->b_dep);
 1161 
 1162         maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
 1163 
 1164         /*
 1165          * we assume that buffer_map is not at address 0
 1166          */
 1167         addr = 0;
 1168         if (maxsize != bp->b_kvasize) {
 1169                 bfreekva(bp);
 1170                 
 1171 findkvaspace:
 1172                 /*
 1173                  * See if we have buffer kva space
 1174                  */
 1175                 if (vm_map_findspace(buffer_map,
 1176                         vm_map_min(buffer_map), maxsize, &addr)) {
 1177                         if (kvafreespace > 0) {
 1178                                 int totfree = 0, freed;
 1179                                 do {
 1180                                         freed = 0;
 1181                                         for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
 1182                                                 bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist)) {
 1183                                                 if (bp1->b_kvasize != 0) {
 1184                                                         totfree += bp1->b_kvasize;
 1185                                                         freed = bp1->b_kvasize;
 1186                                                         bremfree(bp1);
 1187                                                         bfreekva(bp1);
 1188                                                         brelse(bp1);
 1189                                                         break;
 1190                                                 }
 1191                                         }
 1192                                 } while (freed);
 1193                                 /*
 1194                                  * if we found free space, then retry with the same buffer.
 1195                                  */
 1196                                 if (totfree)
 1197                                         goto findkvaspace;
 1198                         }
 1199                         bp->b_flags |= B_INVAL;
 1200                         brelse(bp);
 1201                         goto trytofreespace;
 1202                 }
 1203         }
 1204 
 1205         /*
 1206          * See if we are below are allocated minimum
 1207          */
 1208         if (bufspace >= (maxbufspace + nbyteswritten)) {
 1209                 bp->b_flags |= B_INVAL;
 1210                 brelse(bp);
 1211                 goto trytofreespace;
 1212         }
 1213 
 1214         /*
 1215          * create a map entry for the buffer -- in essence
 1216          * reserving the kva space.
 1217          */
 1218         if (addr) {
 1219                 vm_map_insert(buffer_map, NULL, 0,
 1220                         addr, addr + maxsize,
 1221                         VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 1222 
 1223                 bp->b_kvabase = (caddr_t) addr;
 1224                 bp->b_kvasize = maxsize;
 1225         }
 1226         bp->b_data = bp->b_kvabase;
 1227         
 1228         return (bp);
 1229 }
 1230 
 1231 static void
 1232 waitfreebuffers(int slpflag, int slptimeo) {
 1233         while (numfreebuffers < hifreebuffers) {
 1234                 flushdirtybuffers(slpflag, slptimeo);
 1235                 if (numfreebuffers >= hifreebuffers)
 1236                         break;
 1237                 needsbuffer |= VFS_BIO_NEED_FREE;
 1238                 if (tsleep(&needsbuffer, (PRIBIO + 4)|slpflag, "biofre", slptimeo))
 1239                         break;
 1240         }
 1241 }
 1242 
 1243 static void
 1244 flushdirtybuffers(int slpflag, int slptimeo) {
 1245         int s;
 1246         static pid_t flushing = 0;
 1247 
 1248         s = splbio();
 1249 
 1250         if (flushing) {
 1251                 if (flushing == curproc->p_pid) {
 1252                         splx(s);
 1253                         return;
 1254                 }
 1255                 while (flushing) {
 1256                         if (tsleep(&flushing, (PRIBIO + 4)|slpflag, "biofls", slptimeo)) {
 1257                                 splx(s);
 1258                                 return;
 1259                         }
 1260                 }
 1261         }
 1262         flushing = curproc->p_pid;
 1263 
 1264         while (numdirtybuffers > lodirtybuffers) {
 1265                 struct buf *bp;
 1266                 needsbuffer |= VFS_BIO_NEED_LOWLIMIT;
 1267                 bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
 1268                 if (bp == NULL)
 1269                         bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
 1270 
 1271                 while (bp && ((bp->b_flags & B_DELWRI) == 0)) {
 1272                         bp = TAILQ_NEXT(bp, b_freelist);
 1273                 }
 1274 
 1275                 if (bp) {
 1276                         vfs_bio_awrite(bp);
 1277                         continue;
 1278                 }
 1279                 break;
 1280         }
 1281 
 1282         flushing = 0;
 1283         wakeup(&flushing);
 1284         splx(s);
 1285 }
 1286 
 1287 /*
 1288  * Check to see if a block is currently memory resident.
 1289  */
 1290 struct buf *
 1291 incore(struct vnode * vp, daddr_t blkno)
 1292 {
 1293         struct buf *bp;
 1294 
 1295         int s = splbio();
 1296         bp = gbincore(vp, blkno);
 1297         splx(s);
 1298         return (bp);
 1299 }
 1300 
 1301 /*
 1302  * Returns true if no I/O is needed to access the
 1303  * associated VM object.  This is like incore except
 1304  * it also hunts around in the VM system for the data.
 1305  */
 1306 
 1307 int
 1308 inmem(struct vnode * vp, daddr_t blkno)
 1309 {
 1310         vm_object_t obj;
 1311         vm_offset_t toff, tinc, size;
 1312         vm_page_t m;
 1313         vm_ooffset_t off;
 1314 
 1315         if (incore(vp, blkno))
 1316                 return 1;
 1317         if (vp->v_mount == NULL)
 1318                 return 0;
 1319         if ((vp->v_object == NULL) || (vp->v_flag & VOBJBUF) == 0)
 1320                 return 0;
 1321 
 1322         obj = vp->v_object;
 1323         size = PAGE_SIZE;
 1324         if (size > vp->v_mount->mnt_stat.f_iosize)
 1325                 size = vp->v_mount->mnt_stat.f_iosize;
 1326         off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 1327 
 1328         for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 1329                 m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 1330                 if (!m)
 1331                         return 0;
 1332                 tinc = size;
 1333                 if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 1334                         tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 1335                 if (vm_page_is_valid(m,
 1336                     (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 1337                         return 0;
 1338         }
 1339         return 1;
 1340 }
 1341 
 1342 /*
 1343  * now we set the dirty range for the buffer --
 1344  * for NFS -- if the file is mapped and pages have
 1345  * been written to, let it know.  We want the
 1346  * entire range of the buffer to be marked dirty if
 1347  * any of the pages have been written to for consistancy
 1348  * with the b_validoff, b_validend set in the nfs write
 1349  * code, and used by the nfs read code.
 1350  */
 1351 static void
 1352 vfs_setdirty(struct buf *bp) {
 1353         int i;
 1354         vm_object_t object;
 1355         vm_offset_t boffset;
 1356 #if 0
 1357         vm_offset_t offset;
 1358 #endif
 1359 
 1360         /*
 1361          * We qualify the scan for modified pages on whether the
 1362          * object has been flushed yet.  The OBJ_WRITEABLE flag
 1363          * is not cleared simply by protecting pages off.
 1364          */
 1365         if ((bp->b_flags & B_VMIO) &&
 1366                 ((object = bp->b_pages[0]->object)->flags & (OBJ_WRITEABLE|OBJ_CLEANING))) {
 1367                 /*
 1368                  * test the pages to see if they have been modified directly
 1369                  * by users through the VM system.
 1370                  */
 1371                 for (i = 0; i < bp->b_npages; i++) {
 1372                         vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 1373                         vm_page_test_dirty(bp->b_pages[i]);
 1374                 }
 1375 
 1376                 /*
 1377                  * scan forwards for the first page modified
 1378                  */
 1379                 for (i = 0; i < bp->b_npages; i++) {
 1380                         if (bp->b_pages[i]->dirty) {
 1381                                 break;
 1382                         }
 1383                 }
 1384                 boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 1385                 if (boffset < bp->b_dirtyoff) {
 1386                         bp->b_dirtyoff = max(boffset, 0);
 1387                 }
 1388 
 1389                 /*
 1390                  * scan backwards for the last page modified
 1391                  */
 1392                 for (i = bp->b_npages - 1; i >= 0; --i) {
 1393                         if (bp->b_pages[i]->dirty) {
 1394                                 break;
 1395                         }
 1396                 }
 1397                 boffset = (i + 1);
 1398 #if 0
 1399                 offset = boffset + bp->b_pages[0]->pindex;
 1400                 if (offset >= object->size)
 1401                         boffset = object->size - bp->b_pages[0]->pindex;
 1402 #endif
 1403                 boffset = (boffset << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 1404                 if (bp->b_dirtyend < boffset)
 1405                         bp->b_dirtyend = min(boffset, bp->b_bufsize);
 1406         }
 1407 }
 1408 
 1409 /*
 1410  * Get a block given a specified block and offset into a file/device.
 1411  */
 1412 struct buf *
 1413 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 1414 {
 1415         struct buf *bp;
 1416         int i, s;
 1417         struct bufhashhdr *bh;
 1418 
 1419 #if !defined(MAX_PERF)
 1420         if (size > MAXBSIZE)
 1421                 panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
 1422 #endif
 1423 
 1424         s = splbio();
 1425 loop:
 1426         if (numfreebuffers < lofreebuffers) {
 1427                 waitfreebuffers(slpflag, slptimeo);
 1428         }
 1429 
 1430         if ((bp = gbincore(vp, blkno))) {
 1431                 if (bp->b_flags & B_BUSY) {
 1432 
 1433                         bp->b_flags |= B_WANTED;
 1434                         if (bp->b_usecount < BUF_MAXUSE)
 1435                                 ++bp->b_usecount;
 1436 
 1437                         if (!tsleep(bp,
 1438                                 (PRIBIO + 4) | slpflag, "getblk", slptimeo)) {
 1439                                 goto loop;
 1440                         }
 1441 
 1442                         splx(s);
 1443                         return (struct buf *) NULL;
 1444                 }
 1445                 bp->b_flags |= B_BUSY | B_CACHE;
 1446                 bremfree(bp);
 1447 
 1448                 /*
 1449                  * check for size inconsistancies (note that they shouldn't
 1450                  * happen but do when filesystems don't handle the size changes
 1451                  * correctly.) We are conservative on metadata and don't just
 1452                  * extend the buffer but write (if needed) and re-constitute it.
 1453                  */
 1454 
 1455                 if (bp->b_bcount != size) {
 1456                         if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) {
 1457                                 allocbuf(bp, size);
 1458                         } else {
 1459                                 if (bp->b_flags & B_DELWRI) {
 1460                                         bp->b_flags |= B_NOCACHE;
 1461                                         VOP_BWRITE(bp);
 1462                                 } else {
 1463                                         if ((bp->b_flags & B_VMIO) &&
 1464                                            (LIST_FIRST(&bp->b_dep) == NULL)) {
 1465                                                 bp->b_flags |= B_RELBUF;
 1466                                                 brelse(bp);
 1467                                         } else {
 1468                                                 bp->b_flags |= B_NOCACHE;
 1469                                                 VOP_BWRITE(bp);
 1470                                         }
 1471                                 }
 1472                                 goto loop;
 1473                         }
 1474                 }
 1475                 KASSERT(bp->b_offset != NOOFFSET, 
 1476                     ("getblk: no buffer offset"));
 1477                 /*
 1478                  * Check that the constituted buffer really deserves for the
 1479                  * B_CACHE bit to be set.  B_VMIO type buffers might not
 1480                  * contain fully valid pages.  Normal (old-style) buffers
 1481                  * should be fully valid.
 1482                  */
 1483                 if (
 1484                     (bp->b_flags & (B_VMIO|B_CACHE)) == (B_VMIO|B_CACHE) &&
 1485                     (bp->b_vp->v_tag != VT_NFS || bp->b_validend <= 0)
 1486                 ) {
 1487                         int checksize = bp->b_bufsize;
 1488                         int poffset = bp->b_offset & PAGE_MASK;
 1489                         int resid;
 1490                         for (i = 0; i < bp->b_npages; i++) {
 1491                                 resid = (checksize > (PAGE_SIZE - poffset)) ?
 1492                                         (PAGE_SIZE - poffset) : checksize;
 1493                                 if (!vm_page_is_valid(bp->b_pages[i], poffset, resid)) {
 1494                                         bp->b_flags &= ~(B_CACHE | B_DONE);
 1495                                         break;
 1496                                 }
 1497                                 checksize -= resid;
 1498                                 poffset = 0;
 1499                         }
 1500                 }
 1501 
 1502                 if (bp->b_usecount < BUF_MAXUSE)
 1503                         ++bp->b_usecount;
 1504                 splx(s);
 1505                 return (bp);
 1506         } else {
 1507                 int bsize, maxsize, vmio;
 1508                 off_t offset;
 1509 
 1510                 if (vp->v_type == VBLK)
 1511                         bsize = DEV_BSIZE;
 1512                 else if (vp->v_mountedhere)
 1513                         bsize = vp->v_mountedhere->mnt_stat.f_iosize;
 1514                 else if (vp->v_mount)
 1515                         bsize = vp->v_mount->mnt_stat.f_iosize;
 1516                 else
 1517                         bsize = size;
 1518 
 1519                 offset = (off_t)blkno * bsize;
 1520                 vmio = (vp->v_object != 0) && (vp->v_flag & VOBJBUF);
 1521                 maxsize = vmio ? size + (offset & PAGE_MASK) : size;
 1522                 maxsize = imax(maxsize, bsize);
 1523 
 1524                 if ((bp = getnewbuf(vp, blkno,
 1525                         slpflag, slptimeo, size, maxsize)) == 0) {
 1526                         if (slpflag || slptimeo) {
 1527                                 splx(s);
 1528                                 return NULL;
 1529                         }
 1530                         goto loop;
 1531                 }
 1532 
 1533                 /*
 1534                  * This code is used to make sure that a buffer is not
 1535                  * created while the getnewbuf routine is blocked.
 1536                  * This can be a problem whether the vnode is locked or not.
 1537                  */
 1538                 if (gbincore(vp, blkno)) {
 1539                         bp->b_flags |= B_INVAL;
 1540                         brelse(bp);
 1541                         goto loop;
 1542                 }
 1543 
 1544                 /*
 1545                  * Insert the buffer into the hash, so that it can
 1546                  * be found by incore.
 1547                  */
 1548                 bp->b_blkno = bp->b_lblkno = blkno;
 1549                 bp->b_offset = offset;
 1550 
 1551                 bgetvp(vp, bp);
 1552                 LIST_REMOVE(bp, b_hash);
 1553                 bh = BUFHASH(vp, blkno);
 1554                 LIST_INSERT_HEAD(bh, bp, b_hash);
 1555 
 1556                 if (vmio) {
 1557                         bp->b_flags |= (B_VMIO | B_CACHE);
 1558 #if defined(VFS_BIO_DEBUG)
 1559                         if (vp->v_type != VREG && vp->v_type != VBLK)
 1560                                 printf("getblk: vmioing file type %d???\n", vp->v_type);
 1561 #endif
 1562                 } else {
 1563                         bp->b_flags &= ~B_VMIO;
 1564                 }
 1565 
 1566                 allocbuf(bp, size);
 1567 
 1568                 splx(s);
 1569                 return (bp);
 1570         }
 1571 }
 1572 
 1573 /*
 1574  * Get an empty, disassociated buffer of given size.
 1575  */
 1576 struct buf *
 1577 geteblk(int size)
 1578 {
 1579         struct buf *bp;
 1580         int s;
 1581 
 1582         s = splbio();
 1583         while ((bp = getnewbuf(0, (daddr_t) 0, 0, 0, size, MAXBSIZE)) == 0);
 1584         splx(s);
 1585         allocbuf(bp, size);
 1586         bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
 1587         return (bp);
 1588 }
 1589 
 1590 
 1591 /*
 1592  * This code constitutes the buffer memory from either anonymous system
 1593  * memory (in the case of non-VMIO operations) or from an associated
 1594  * VM object (in the case of VMIO operations).
 1595  *
 1596  * Note that this code is tricky, and has many complications to resolve
 1597  * deadlock or inconsistant data situations.  Tread lightly!!!
 1598  *
 1599  * Modify the length of a buffer's underlying buffer storage without
 1600  * destroying information (unless, of course the buffer is shrinking).
 1601  */
 1602 int
 1603 allocbuf(struct buf * bp, int size)
 1604 {
 1605 
 1606         int s;
 1607         int newbsize, mbsize;
 1608         int i;
 1609 
 1610 #if !defined(MAX_PERF)
 1611         if (!(bp->b_flags & B_BUSY))
 1612                 panic("allocbuf: buffer not busy");
 1613 
 1614         if (bp->b_kvasize < size)
 1615                 panic("allocbuf: buffer too small");
 1616 #endif
 1617 
 1618         if ((bp->b_flags & B_VMIO) == 0) {
 1619                 caddr_t origbuf;
 1620                 int origbufsize;
 1621                 /*
 1622                  * Just get anonymous memory from the kernel
 1623                  */
 1624                 mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 1625 #if !defined(NO_B_MALLOC)
 1626                 if (bp->b_flags & B_MALLOC)
 1627                         newbsize = mbsize;
 1628                 else
 1629 #endif
 1630                         newbsize = round_page(size);
 1631 
 1632                 if (newbsize < bp->b_bufsize) {
 1633 #if !defined(NO_B_MALLOC)
 1634                         /*
 1635                          * malloced buffers are not shrunk
 1636                          */
 1637                         if (bp->b_flags & B_MALLOC) {
 1638                                 if (newbsize) {
 1639                                         bp->b_bcount = size;
 1640                                 } else {
 1641                                         free(bp->b_data, M_BIOBUF);
 1642                                         bufspace -= bp->b_bufsize;
 1643                                         bufmallocspace -= bp->b_bufsize;
 1644                                         bp->b_data = bp->b_kvabase;
 1645                                         bp->b_bufsize = 0;
 1646                                         bp->b_bcount = 0;
 1647                                         bp->b_flags &= ~B_MALLOC;
 1648                                 }
 1649                                 return 1;
 1650                         }               
 1651 #endif
 1652                         vm_hold_free_pages(
 1653                             bp,
 1654                             (vm_offset_t) bp->b_data + newbsize,
 1655                             (vm_offset_t) bp->b_data + bp->b_bufsize);
 1656                 } else if (newbsize > bp->b_bufsize) {
 1657 #if !defined(NO_B_MALLOC)
 1658                         /*
 1659                          * We only use malloced memory on the first allocation.
 1660                          * and revert to page-allocated memory when the buffer grows.
 1661                          */
 1662                         if ( (bufmallocspace < maxbufmallocspace) &&
 1663                                 (bp->b_bufsize == 0) &&
 1664                                 (mbsize <= PAGE_SIZE/2)) {
 1665 
 1666                                 bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
 1667                                 bp->b_bufsize = mbsize;
 1668                                 bp->b_bcount = size;
 1669                                 bp->b_flags |= B_MALLOC;
 1670                                 bufspace += mbsize;
 1671                                 bufmallocspace += mbsize;
 1672                                 return 1;
 1673                         }
 1674 #endif
 1675                         origbuf = NULL;
 1676                         origbufsize = 0;
 1677 #if !defined(NO_B_MALLOC)
 1678                         /*
 1679                          * If the buffer is growing on its other-than-first allocation,
 1680                          * then we revert to the page-allocation scheme.
 1681                          */
 1682                         if (bp->b_flags & B_MALLOC) {
 1683                                 origbuf = bp->b_data;
 1684                                 origbufsize = bp->b_bufsize;
 1685                                 bp->b_data = bp->b_kvabase;
 1686                                 bufspace -= bp->b_bufsize;
 1687                                 bufmallocspace -= bp->b_bufsize;
 1688                                 bp->b_bufsize = 0;
 1689                                 bp->b_flags &= ~B_MALLOC;
 1690                                 newbsize = round_page(newbsize);
 1691                         }
 1692 #endif
 1693                         vm_hold_load_pages(
 1694                             bp,
 1695                             (vm_offset_t) bp->b_data + bp->b_bufsize,
 1696                             (vm_offset_t) bp->b_data + newbsize);
 1697 #if !defined(NO_B_MALLOC)
 1698                         if (origbuf) {
 1699                                 bcopy(origbuf, bp->b_data, origbufsize);
 1700                                 free(origbuf, M_BIOBUF);
 1701                         }
 1702 #endif
 1703                 }
 1704         } else {
 1705                 vm_page_t m;
 1706                 int desiredpages;
 1707 
 1708                 newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 1709                 desiredpages = (size == 0) ? 0 :
 1710                         num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 1711 
 1712 #if !defined(NO_B_MALLOC)
 1713                 if (bp->b_flags & B_MALLOC)
 1714                         panic("allocbuf: VMIO buffer can't be malloced");
 1715 #endif
 1716 
 1717                 if (newbsize < bp->b_bufsize) {
 1718                         if (desiredpages < bp->b_npages) {
 1719                                 for (i = desiredpages; i < bp->b_npages; i++) {
 1720                                         /*
 1721                                          * the page is not freed here -- it
 1722                                          * is the responsibility of 
 1723                                          * vnode_pager_setsize.  However, we
 1724                                          * have to wait if it is busy in order
 1725                                          * to be able to unwire the page.
 1726                                          */
 1727                                         m = bp->b_pages[i];
 1728                                         KASSERT(m != bogus_page,
 1729                                             ("allocbuf: bogus page found"));
 1730 
 1731                                         while(vm_page_sleep(m, "biodep", &m->busy))
 1732                                                 ;
 1733 
 1734                                         bp->b_pages[i] = NULL;
 1735                                         vm_page_unwire(m, 0);
 1736                                 }
 1737                                 pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) +
 1738                                     (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 1739                                 bp->b_npages = desiredpages;
 1740                         }
 1741                 } else if (newbsize > bp->b_bufsize) {
 1742                         vm_object_t obj;
 1743                         vm_offset_t tinc, toff;
 1744                         vm_ooffset_t off;
 1745                         vm_pindex_t objoff;
 1746                         int pageindex, curbpnpages;
 1747                         struct vnode *vp;
 1748                         int bsize;
 1749                         int orig_validoff = bp->b_validoff;
 1750                         int orig_validend = bp->b_validend;
 1751 
 1752                         vp = bp->b_vp;
 1753 
 1754                         if (vp->v_type == VBLK)
 1755                                 bsize = DEV_BSIZE;
 1756                         else
 1757                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 1758 
 1759                         if (bp->b_npages < desiredpages) {
 1760                                 obj = vp->v_object;
 1761                                 tinc = PAGE_SIZE;
 1762 
 1763                                 off = bp->b_offset;
 1764                                 KASSERT(bp->b_offset != NOOFFSET,
 1765                                     ("allocbuf: no buffer offset"));
 1766                                 curbpnpages = bp->b_npages;
 1767                 doretry:
 1768                                 bp->b_validoff = orig_validoff;
 1769                                 bp->b_validend = orig_validend;
 1770                                 bp->b_flags |= B_CACHE;
 1771                                 for (toff = 0; toff < newbsize; toff += tinc) {
 1772                                         objoff = OFF_TO_IDX(off + toff);
 1773                                         pageindex = objoff - OFF_TO_IDX(off);
 1774                                         tinc = PAGE_SIZE - ((off + toff) & PAGE_MASK);
 1775                                         if (pageindex < curbpnpages) {
 1776 
 1777                                                 m = bp->b_pages[pageindex];
 1778 #ifdef VFS_BIO_DIAG
 1779                                                 if (m->pindex != objoff)
 1780                                                         panic("allocbuf: page changed offset?!!!?");
 1781 #endif
 1782                                                 if (tinc > (newbsize - toff))
 1783                                                         tinc = newbsize - toff;
 1784                                                 if (bp->b_flags & B_CACHE)
 1785                                                         vfs_buf_set_valid(bp, off, toff, tinc, m);
 1786                                                 continue;
 1787                                         }
 1788                                         m = vm_page_lookup(obj, objoff);
 1789                                         if (!m) {
 1790                                                 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
 1791                                                 if (!m) {
 1792                                                         VM_WAIT;
 1793                                                         vm_pageout_deficit += (desiredpages - curbpnpages);
 1794                                                         goto doretry;
 1795                                                 }
 1796 
 1797                                                 vm_page_wire(m);
 1798                                                 vm_page_flag_clear(m, PG_BUSY);
 1799                                                 bp->b_flags &= ~B_CACHE;
 1800 
 1801                                         } else if (m->flags & PG_BUSY) {
 1802                                                 s = splvm();
 1803                                                 if (m->flags & PG_BUSY) {
 1804                                                         vm_page_flag_set(m, PG_WANTED);
 1805                                                         tsleep(m, PVM, "pgtblk", 0);
 1806                                                 }
 1807                                                 splx(s);
 1808                                                 goto doretry;
 1809                                         } else {
 1810                                                 if ((curproc != pageproc) &&
 1811                                                         ((m->queue - m->pc) == PQ_CACHE) &&
 1812                                                     ((cnt.v_free_count + cnt.v_cache_count) <
 1813                                                                 (cnt.v_free_min + cnt.v_cache_min))) {
 1814                                                         pagedaemon_wakeup();
 1815                                                 }
 1816                                                 if (tinc > (newbsize - toff))
 1817                                                         tinc = newbsize - toff;
 1818                                                 if (bp->b_flags & B_CACHE)
 1819                                                         vfs_buf_set_valid(bp, off, toff, tinc, m);
 1820                                                 vm_page_flag_clear(m, PG_ZERO);
 1821                                                 vm_page_wire(m);
 1822                                         }
 1823                                         bp->b_pages[pageindex] = m;
 1824                                         curbpnpages = pageindex + 1;
 1825                                 }
 1826                                 if (vp->v_tag == VT_NFS && 
 1827                                     vp->v_type != VBLK) {
 1828                                         if (bp->b_dirtyend > 0) {
 1829                                                 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
 1830                                                 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
 1831                                         }
 1832                                         if (bp->b_validend == 0)
 1833                                                 bp->b_flags &= ~B_CACHE;
 1834                                 }
 1835                                 bp->b_data = (caddr_t) trunc_page((vm_offset_t)bp->b_data);
 1836                                 bp->b_npages = curbpnpages;
 1837                                 pmap_qenter((vm_offset_t) bp->b_data,
 1838                                         bp->b_pages, bp->b_npages);
 1839                                 ((vm_offset_t) bp->b_data) |= off & PAGE_MASK;
 1840                         }
 1841                 }
 1842         }
 1843         if (bp->b_flags & B_VMIO)
 1844                 vmiospace += (newbsize - bp->b_bufsize);
 1845         bufspace += (newbsize - bp->b_bufsize);
 1846         bp->b_bufsize = newbsize;
 1847         bp->b_bcount = size;
 1848         return 1;
 1849 }
 1850 
 1851 /*
 1852  * Wait for buffer I/O completion, returning error status.
 1853  */
 1854 int
 1855 biowait(register struct buf * bp)
 1856 {
 1857         int s;
 1858 
 1859         s = splbio();
 1860         while ((bp->b_flags & B_DONE) == 0)
 1861 #if defined(NO_SCHEDULE_MODS)
 1862                 tsleep(bp, PRIBIO, "biowait", 0);
 1863 #else
 1864                 if (bp->b_flags & B_READ)
 1865                         tsleep(bp, PRIBIO, "biord", 0);
 1866                 else
 1867                         tsleep(bp, PRIBIO, "biowr", 0);
 1868 #endif
 1869         splx(s);
 1870         if (bp->b_flags & B_EINTR) {
 1871                 bp->b_flags &= ~B_EINTR;
 1872                 return (EINTR);
 1873         }
 1874         if (bp->b_flags & B_ERROR) {
 1875                 return (bp->b_error ? bp->b_error : EIO);
 1876         } else {
 1877                 return (0);
 1878         }
 1879 }
 1880 
 1881 /*
 1882  * Finish I/O on a buffer, calling an optional function.
 1883  * This is usually called from interrupt level, so process blocking
 1884  * is not *a good idea*.
 1885  */
 1886 void
 1887 biodone(register struct buf * bp)
 1888 {
 1889         int s;
 1890 
 1891         s = splbio();
 1892 
 1893 #if !defined(MAX_PERF)
 1894         if (!(bp->b_flags & B_BUSY))
 1895                 panic("biodone: buffer not busy");
 1896 #endif
 1897 
 1898         if (bp->b_flags & B_DONE) {
 1899                 splx(s);
 1900 #if !defined(MAX_PERF)
 1901                 printf("biodone: buffer already done\n");
 1902 #endif
 1903                 return;
 1904         }
 1905         bp->b_flags |= B_DONE;
 1906 
 1907         if (bp->b_flags & B_FREEBUF) {
 1908                 brelse(bp);
 1909                 splx(s);
 1910                 return;
 1911         }
 1912 
 1913         if ((bp->b_flags & B_READ) == 0) {
 1914                 vwakeup(bp);
 1915         }
 1916 
 1917         /* call optional completion function if requested */
 1918         if (bp->b_flags & B_CALL) {
 1919                 bp->b_flags &= ~B_CALL;
 1920                 (*bp->b_iodone) (bp);
 1921                 splx(s);
 1922                 return;
 1923         }
 1924         if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
 1925                 (*bioops.io_complete)(bp);
 1926 
 1927         if (bp->b_flags & B_VMIO) {
 1928                 int i, resid;
 1929                 vm_ooffset_t foff;
 1930                 vm_page_t m;
 1931                 vm_object_t obj;
 1932                 int iosize;
 1933                 struct vnode *vp = bp->b_vp;
 1934 
 1935                 obj = vp->v_object;
 1936 
 1937 #if defined(VFS_BIO_DEBUG)
 1938                 if (vp->v_usecount == 0) {
 1939                         panic("biodone: zero vnode ref count");
 1940                 }
 1941 
 1942                 if (vp->v_object == NULL) {
 1943                         panic("biodone: missing VM object");
 1944                 }
 1945 
 1946                 if ((vp->v_flag & VOBJBUF) == 0) {
 1947                         panic("biodone: vnode is not setup for merged cache");
 1948                 }
 1949 #endif
 1950 
 1951                 foff = bp->b_offset;
 1952                 KASSERT(bp->b_offset != NOOFFSET,
 1953                     ("biodone: no buffer offset"));
 1954 
 1955 #if !defined(MAX_PERF)
 1956                 if (!obj) {
 1957                         panic("biodone: no object");
 1958                 }
 1959 #endif
 1960 #if defined(VFS_BIO_DEBUG)
 1961                 if (obj->paging_in_progress < bp->b_npages) {
 1962                         printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 1963                             obj->paging_in_progress, bp->b_npages);
 1964                 }
 1965 #endif
 1966                 iosize = bp->b_bufsize;
 1967                 for (i = 0; i < bp->b_npages; i++) {
 1968                         int bogusflag = 0;
 1969                         m = bp->b_pages[i];
 1970                         if (m == bogus_page) {
 1971                                 bogusflag = 1;
 1972                                 m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 1973                                 if (!m) {
 1974 #if defined(VFS_BIO_DEBUG)
 1975                                         printf("biodone: page disappeared\n");
 1976 #endif
 1977                                         vm_object_pip_subtract(obj, 1);
 1978                                         continue;
 1979                                 }
 1980                                 bp->b_pages[i] = m;
 1981                                 pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 1982                         }
 1983 #if defined(VFS_BIO_DEBUG)
 1984                         if (OFF_TO_IDX(foff) != m->pindex) {
 1985                                 printf(
 1986 "biodone: foff(%lu)/m->pindex(%d) mismatch\n",
 1987                                     (unsigned long)foff, m->pindex);
 1988                         }
 1989 #endif
 1990                         resid = IDX_TO_OFF(m->pindex + 1) - foff;
 1991                         if (resid > iosize)
 1992                                 resid = iosize;
 1993 
 1994                         /*
 1995                          * In the write case, the valid and clean bits are
 1996                          * already changed correctly, so we only need to do this
 1997                          * here in the read case.
 1998                          */
 1999                         if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
 2000                                 vfs_page_set_valid(bp, foff, i, m);
 2001                         }
 2002                         vm_page_flag_clear(m, PG_ZERO);
 2003 
 2004                         /*
 2005                          * when debugging new filesystems or buffer I/O methods, this
 2006                          * is the most common error that pops up.  if you see this, you
 2007                          * have not set the page busy flag correctly!!!
 2008                          */
 2009                         if (m->busy == 0) {
 2010 #if !defined(MAX_PERF)
 2011                                 printf("biodone: page busy < 0, "
 2012                                     "pindex: %d, foff: 0x(%x,%x), "
 2013                                     "resid: %d, index: %d\n",
 2014                                     (int) m->pindex, (int)(foff >> 32),
 2015                                                 (int) foff & 0xffffffff, resid, i);
 2016 #endif
 2017                                 if (vp->v_type != VBLK)
 2018 #if !defined(MAX_PERF)
 2019                                         printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n",
 2020                                             bp->b_vp->v_mount->mnt_stat.f_iosize,
 2021                                             (int) bp->b_lblkno,
 2022                                             bp->b_flags, bp->b_npages);
 2023                                 else
 2024                                         printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n",
 2025                                             (int) bp->b_lblkno,
 2026                                             bp->b_flags, bp->b_npages);
 2027                                 printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
 2028                                     m->valid, m->dirty, m->wire_count);
 2029 #endif
 2030                                 panic("biodone: page busy < 0\n");
 2031                         }
 2032                         vm_page_io_finish(m);
 2033                         vm_object_pip_subtract(obj, 1);
 2034                         foff += resid;
 2035                         iosize -= resid;
 2036                 }
 2037                 if (obj &&
 2038                         (obj->paging_in_progress == 0) &&
 2039                     (obj->flags & OBJ_PIPWNT)) {
 2040                         vm_object_clear_flag(obj, OBJ_PIPWNT);
 2041                         wakeup(obj);
 2042                 }
 2043         }
 2044         /*
 2045          * For asynchronous completions, release the buffer now. The brelse
 2046          * checks for B_WANTED and will do the wakeup there if necessary - so
 2047          * no need to do a wakeup here in the async case.
 2048          */
 2049 
 2050         if (bp->b_flags & B_ASYNC) {
 2051                 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0)
 2052                         brelse(bp);
 2053                 else
 2054                         bqrelse(bp);
 2055         } else {
 2056                 bp->b_flags &= ~B_WANTED;
 2057                 wakeup(bp);
 2058         }
 2059         splx(s);
 2060 }
 2061 
 2062 #if 0   /* not with kirks code */
 2063 static int vfs_update_interval = 30;
 2064 
 2065 static void
 2066 vfs_update()
 2067 {
 2068         while (1) {
 2069                 tsleep(&vfs_update_wakeup, PUSER, "update",
 2070                     hz * vfs_update_interval);
 2071                 vfs_update_wakeup = 0;
 2072                 sync(curproc, NULL);
 2073         }
 2074 }
 2075 
 2076 static int
 2077 sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS
 2078 {
 2079         int error = sysctl_handle_int(oidp,
 2080                 oidp->oid_arg1, oidp->oid_arg2, req);
 2081         if (!error)
 2082                 wakeup(&vfs_update_wakeup);
 2083         return error;
 2084 }
 2085 
 2086 SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW,
 2087         &vfs_update_interval, 0, sysctl_kern_updateinterval, "I", "");
 2088 
 2089 #endif
 2090 
 2091 
 2092 /*
 2093  * This routine is called in lieu of iodone in the case of
 2094  * incomplete I/O.  This keeps the busy status for pages
 2095  * consistant.
 2096  */
 2097 void
 2098 vfs_unbusy_pages(struct buf * bp)
 2099 {
 2100         int i;
 2101 
 2102         if (bp->b_flags & B_VMIO) {
 2103                 struct vnode *vp = bp->b_vp;
 2104                 vm_object_t obj = vp->v_object;
 2105 
 2106                 for (i = 0; i < bp->b_npages; i++) {
 2107                         vm_page_t m = bp->b_pages[i];
 2108 
 2109                         if (m == bogus_page) {
 2110                                 m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
 2111 #if !defined(MAX_PERF)
 2112                                 if (!m) {
 2113                                         panic("vfs_unbusy_pages: page missing\n");
 2114                                 }
 2115 #endif
 2116                                 bp->b_pages[i] = m;
 2117                                 pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 2118                         }
 2119                         vm_object_pip_subtract(obj, 1);
 2120                         vm_page_flag_clear(m, PG_ZERO);
 2121                         vm_page_io_finish(m);
 2122                 }
 2123                 if (obj->paging_in_progress == 0 &&
 2124                     (obj->flags & OBJ_PIPWNT)) {
 2125                         vm_object_clear_flag(obj, OBJ_PIPWNT);
 2126                         wakeup(obj);
 2127                 }
 2128         }
 2129 }
 2130 
 2131 /*
 2132  * Set NFS' b_validoff and b_validend fields from the valid bits
 2133  * of a page.  If the consumer is not NFS, and the page is not
 2134  * valid for the entire range, clear the B_CACHE flag to force
 2135  * the consumer to re-read the page.
 2136  */
 2137 static void
 2138 vfs_buf_set_valid(struct buf *bp,
 2139                   vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
 2140                   vm_page_t m)
 2141 {
 2142         if (bp->b_vp->v_tag == VT_NFS && bp->b_vp->v_type != VBLK) {
 2143                 vm_offset_t svalid, evalid;
 2144                 int validbits = m->valid >> (((foff+off)&PAGE_MASK)/DEV_BSIZE);
 2145 
 2146                 /*
 2147                  * This only bothers with the first valid range in the
 2148                  * page.
 2149                  */
 2150                 svalid = off;
 2151                 while (validbits && !(validbits & 1)) {
 2152                         svalid += DEV_BSIZE;
 2153                         validbits >>= 1;
 2154                 }
 2155                 evalid = svalid;
 2156                 while (validbits & 1) {
 2157                         evalid += DEV_BSIZE;
 2158                         validbits >>= 1;
 2159                 }
 2160                 evalid = min(evalid, off + size);
 2161                 /*
 2162                  * Make sure this range is contiguous with the range
 2163                  * built up from previous pages.  If not, then we will
 2164                  * just use the range from the previous pages.
 2165                  */
 2166                 if (svalid == bp->b_validend) {
 2167                         bp->b_validoff = min(bp->b_validoff, svalid);
 2168                         bp->b_validend = max(bp->b_validend, evalid);
 2169                 }
 2170         } else if (!vm_page_is_valid(m,
 2171                                      (vm_offset_t) ((foff + off) & PAGE_MASK),
 2172                                      size)) {
 2173                 bp->b_flags &= ~B_CACHE;
 2174         }
 2175 }
 2176 
 2177 /*
 2178  * Set the valid bits in a page, taking care of the b_validoff,
 2179  * b_validend fields which NFS uses to optimise small reads.  Off is
 2180  * the offset within the file and pageno is the page index within the buf.
 2181  */
 2182 static void
 2183 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 2184 {
 2185         struct vnode *vp = bp->b_vp;
 2186         vm_ooffset_t soff, eoff;
 2187 
 2188         soff = off;
 2189         eoff = (off + PAGE_SIZE) & ~PAGE_MASK;
 2190         if (eoff > bp->b_offset + bp->b_bufsize)
 2191                 eoff = bp->b_offset + bp->b_bufsize;
 2192         if (vp->v_tag == VT_NFS && vp->v_type != VBLK) {
 2193                 vm_ooffset_t sv, ev;
 2194                 vm_page_set_invalid(m,
 2195                     (vm_offset_t) (soff & PAGE_MASK),
 2196                     (vm_offset_t) (eoff - soff));
 2197                 sv = (bp->b_offset + bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 2198                 ev = (bp->b_offset + bp->b_validend + (DEV_BSIZE - 1)) & 
 2199                     ~(DEV_BSIZE - 1);
 2200                 soff = qmax(sv, soff);
 2201                 eoff = qmin(ev, eoff);
 2202         }
 2203         if (eoff > soff)
 2204                 vm_page_set_validclean(m,
 2205                (vm_offset_t) (soff & PAGE_MASK),
 2206                (vm_offset_t) (eoff - soff));
 2207 }
 2208 
 2209 /*
 2210  * This routine is called before a device strategy routine.
 2211  * It is used to tell the VM system that paging I/O is in
 2212  * progress, and treat the pages associated with the buffer
 2213  * almost as being PG_BUSY.  Also the object paging_in_progress
 2214  * flag is handled to make sure that the object doesn't become
 2215  * inconsistant.
 2216  */
 2217 void
 2218 vfs_busy_pages(struct buf * bp, int clear_modify)
 2219 {
 2220         int i, bogus;
 2221 
 2222         if (bp->b_flags & B_VMIO) {
 2223                 struct vnode *vp = bp->b_vp;
 2224                 vm_object_t obj = vp->v_object;
 2225                 vm_ooffset_t foff;
 2226 
 2227                 foff = bp->b_offset;
 2228                 KASSERT(bp->b_offset != NOOFFSET,
 2229                     ("vfs_busy_pages: no buffer offset"));
 2230                 vfs_setdirty(bp);
 2231 
 2232 retry:
 2233                 for (i = 0; i < bp->b_npages; i++) {
 2234                         vm_page_t m = bp->b_pages[i];
 2235                         if (vm_page_sleep(m, "vbpage", NULL))
 2236                                 goto retry;
 2237                 }
 2238 
 2239                 bogus = 0;
 2240                 for (i = 0; i < bp->b_npages; i++) {
 2241                         vm_page_t m = bp->b_pages[i];
 2242 
 2243                         vm_page_flag_clear(m, PG_ZERO);
 2244                         if ((bp->b_flags & B_CLUSTER) == 0) {
 2245                                 vm_object_pip_add(obj, 1);
 2246                                 vm_page_io_start(m);
 2247                         }
 2248 
 2249                         vm_page_protect(m, VM_PROT_NONE);
 2250                         if (clear_modify)
 2251                                 vfs_page_set_valid(bp, foff, i, m);
 2252                         else if (m->valid == VM_PAGE_BITS_ALL &&
 2253                                 (bp->b_flags & B_CACHE) == 0) {
 2254                                 bp->b_pages[i] = bogus_page;
 2255                                 bogus++;
 2256                         }
 2257                         foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 2258                 }
 2259                 if (bogus)
 2260                         pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 2261         }
 2262 }
 2263 
 2264 /*
 2265  * Tell the VM system that the pages associated with this buffer
 2266  * are clean.  This is used for delayed writes where the data is
 2267  * going to go to disk eventually without additional VM intevention.
 2268  */
 2269 void
 2270 vfs_clean_pages(struct buf * bp)
 2271 {
 2272         int i;
 2273 
 2274         if (bp->b_flags & B_VMIO) {
 2275                 vm_ooffset_t foff;
 2276                 foff = bp->b_offset;
 2277                 KASSERT(bp->b_offset != NOOFFSET,
 2278                     ("vfs_clean_pages: no buffer offset"));
 2279                 for (i = 0; i < bp->b_npages; i++) {
 2280                         vm_page_t m = bp->b_pages[i];
 2281                         vfs_page_set_valid(bp, foff, i, m);
 2282                         foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 2283                 }
 2284         }
 2285 }
 2286 
 2287 void
 2288 vfs_bio_clrbuf(struct buf *bp) {
 2289         int i, mask = 0;
 2290         caddr_t sa, ea;
 2291         if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
 2292                 if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
 2293                     (bp->b_offset & PAGE_MASK) == 0) {
 2294                         mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
 2295                         if (((bp->b_pages[0]->flags & PG_ZERO) == 0) &&
 2296                             ((bp->b_pages[0]->valid & mask) != mask)) {
 2297                                 bzero(bp->b_data, bp->b_bufsize);
 2298                         }
 2299                         bp->b_pages[0]->valid |= mask;
 2300                         bp->b_resid = 0;
 2301                         return;
 2302                 }
 2303                 ea = sa = bp->b_data;
 2304                 for(i=0;i<bp->b_npages;i++,sa=ea) {
 2305                         int j = ((u_long)sa & PAGE_MASK) / DEV_BSIZE;
 2306                         ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
 2307                         ea = (caddr_t)ulmin((u_long)ea,
 2308                                 (u_long)bp->b_data + bp->b_bufsize);
 2309                         mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 2310                         if ((bp->b_pages[i]->valid & mask) == mask)
 2311                                 continue;
 2312                         if ((bp->b_pages[i]->valid & mask) == 0) {
 2313                                 if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
 2314                                         bzero(sa, ea - sa);
 2315                                 }
 2316                         } else {
 2317                                 for (; sa < ea; sa += DEV_BSIZE, j++) {
 2318                                         if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 2319                                                 (bp->b_pages[i]->valid & (1<<j)) == 0)
 2320                                                 bzero(sa, DEV_BSIZE);
 2321                                 }
 2322                         }
 2323                         bp->b_pages[i]->valid |= mask;
 2324                         vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 2325                 }
 2326                 bp->b_resid = 0;
 2327         } else {
 2328                 clrbuf(bp);
 2329         }
 2330 }
 2331 
 2332 /*
 2333  * vm_hold_load_pages and vm_hold_unload pages get pages into
 2334  * a buffers address space.  The pages are anonymous and are
 2335  * not associated with a file object.
 2336  */
 2337 void
 2338 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 2339 {
 2340         vm_offset_t pg;
 2341         vm_page_t p;
 2342         int index;
 2343 
 2344         to = round_page(to);
 2345         from = round_page(from);
 2346         index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 2347 
 2348         for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 2349 
 2350 tryagain:
 2351 
 2352                 p = vm_page_alloc(kernel_object,
 2353                         ((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 2354                     VM_ALLOC_NORMAL);
 2355                 if (!p) {
 2356                         vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
 2357                         VM_WAIT;
 2358                         goto tryagain;
 2359                 }
 2360                 vm_page_wire(p);
 2361                 p->valid = VM_PAGE_BITS_ALL;
 2362                 vm_page_flag_clear(p, PG_ZERO);
 2363                 pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
 2364                 bp->b_pages[index] = p;
 2365                 vm_page_wakeup(p);
 2366         }
 2367         bp->b_npages = index;
 2368 }
 2369 
 2370 void
 2371 vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 2372 {
 2373         vm_offset_t pg;
 2374         vm_page_t p;
 2375         int index, newnpages;
 2376 
 2377         from = round_page(from);
 2378         to = round_page(to);
 2379         newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 2380 
 2381         for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 2382                 p = bp->b_pages[index];
 2383                 if (p && (index < bp->b_npages)) {
 2384 #if !defined(MAX_PERF)
 2385                         if (p->busy) {
 2386                                 printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n",
 2387                                         bp->b_blkno, bp->b_lblkno);
 2388                         }
 2389 #endif
 2390                         bp->b_pages[index] = NULL;
 2391                         pmap_kremove(pg);
 2392                         vm_page_busy(p);
 2393                         vm_page_unwire(p, 0);
 2394                         vm_page_free(p);
 2395                 }
 2396         }
 2397         bp->b_npages = newnpages;
 2398 }
 2399 
 2400 
 2401 #include "opt_ddb.h"
 2402 #ifdef DDB
 2403 #include <ddb/ddb.h>
 2404 
 2405 DB_SHOW_COMMAND(buffer, db_show_buffer)
 2406 {
 2407         /* get args */
 2408         struct buf *bp = (struct buf *)addr;
 2409 
 2410         if (!have_addr) {
 2411                 db_printf("usage: show buffer <addr>\n");
 2412                 return;
 2413         }
 2414 
 2415         db_printf("b_proc = %p,\nb_flags = 0x%b\n", (void *)bp->b_proc,
 2416                   (u_int)bp->b_flags, PRINT_BUF_FLAGS);
 2417         db_printf("b_error = %d, b_bufsize = %ld, b_bcount = %ld, "
 2418                   "b_resid = %ld\nb_dev = 0x%x, b_data = %p, "
 2419                   "b_blkno = %d, b_pblkno = %d\n",
 2420                   bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 2421                   bp->b_dev, bp->b_data, bp->b_blkno, bp->b_pblkno);
 2422         if (bp->b_npages) {
 2423                 int i;
 2424                 db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 2425                 for (i = 0; i < bp->b_npages; i++) {
 2426                         vm_page_t m;
 2427                         m = bp->b_pages[i];
 2428                         db_printf("(%p, 0x%lx, 0x%lx)", (void *)m->object,
 2429                             (u_long)m->pindex, (u_long)VM_PAGE_TO_PHYS(m));
 2430                         if ((i + 1) < bp->b_npages)
 2431                                 db_printf(",");
 2432                 }
 2433                 db_printf("\n");
 2434         }
 2435 }
 2436 #endif /* DDB */

Cache object: 0b5d38941f4338758121f94aaacb68c9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.