The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_readahead.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_readahead.c,v 1.2.28.1 2007/05/13 07:56:15 pavel Exp $     */
    2 
    3 /*-
    4  * Copyright (c)2003, 2005 YAMAMOTO Takashi,
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * uvm_object read-ahead
   31  *
   32  * TODO:
   33  *      - tune.
   34  *      - handle multiple streams.
   35  *      - find a better way to deal with PGO_LOCKED pager requests.
   36  *        (currently just ignored)
   37  *      - consider the amount of memory in the system.
   38  *      - consider the speed of the underlying device.
   39  *      - consider filesystem block size / block layout.
   40  */
   41 
   42 #include <sys/cdefs.h>
   43 __KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.2.28.1 2007/05/13 07:56:15 pavel Exp $");
   44 
   45 #include <sys/param.h>
   46 #include <sys/pool.h>
   47 
   48 #include <uvm/uvm.h>
   49 #include <uvm/uvm_readahead.h>
   50 
   51 #if defined(READAHEAD_DEBUG)
   52 #define DPRINTF(a)      printf a
   53 #else /* defined(READAHEAD_DEBUG) */
   54 #define DPRINTF(a)      /* nothing */
   55 #endif /* defined(READAHEAD_DEBUG) */
   56 
   57 /*
   58  * uvm_ractx: read-ahead context.
   59  */
   60 
   61 struct uvm_ractx {
   62         int ra_flags;
   63 #define RA_VALID        1
   64         off_t ra_winstart;      /* window start offset */
   65         size_t ra_winsize;      /* window size */
   66         off_t ra_next;          /* next offset to read-ahead */
   67 };
   68 
   69 #if defined(sun2) || (defined(sun3) && defined(_SUN3_))
   70 /* XXX: on sun2 and sun3 (but not sun3x) MAXPHYS is 0xe000 */
   71 #undef MAXPHYS  
   72 #define MAXPHYS         0x8000  /* XXX */
   73 #endif
   74 
   75 #define RA_WINSIZE_INIT MAXPHYS                 /* initial window size */
   76 #define RA_WINSIZE_MAX  (MAXPHYS * 8)           /* max window size */
   77 #define RA_WINSIZE_SEQENTIAL    RA_WINSIZE_MAX  /* fixed window size used for
   78                                                    SEQUENTIAL hint */
   79 #define RA_MINSIZE      (MAXPHYS * 2)           /* min size to start i/o */
   80 #define RA_IOCHUNK      MAXPHYS                 /* read-ahead i/o chunk size */
   81 
   82 static off_t ra_startio(struct uvm_object *, off_t, size_t);
   83 static struct uvm_ractx *ra_allocctx(void);
   84 static void ra_freectx(struct uvm_ractx *);
   85 
   86 static POOL_INIT(ractx_pool, sizeof(struct uvm_ractx), 0, 0, 0, "ractx",
   87     &pool_allocator_nointr);
   88 
   89 static struct uvm_ractx *
   90 ra_allocctx(void)
   91 {
   92 
   93         return pool_get(&ractx_pool, PR_NOWAIT);
   94 }
   95 
   96 static void
   97 ra_freectx(struct uvm_ractx *ra)
   98 {
   99 
  100         pool_put(&ractx_pool, ra);
  101 }
  102 
  103 /*
  104  * ra_startio: start i/o for read-ahead.
  105  *
  106  * => start i/o for each RA_IOCHUNK sized chunk.
  107  * => return offset to which we started i/o.
  108  */
  109 
  110 static off_t
  111 ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
  112 {
  113         const off_t endoff = off + sz;
  114 
  115         DPRINTF(("%s: uobj=%p, off=%" PRIu64 ", endoff=%" PRIu64 "\n",
  116             __func__, uobj, off, endoff));
  117         off = trunc_page(off);
  118         while (off < endoff) {
  119                 const size_t chunksize = RA_IOCHUNK;
  120                 int error;
  121                 size_t donebytes;
  122                 int npages;
  123                 int orignpages;
  124                 size_t bytelen;
  125 
  126                 KASSERT((chunksize & (chunksize - 1)) == 0);
  127                 KASSERT((off & PAGE_MASK) == 0);
  128                 bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
  129                 KASSERT((bytelen & PAGE_MASK) == 0);
  130                 npages = orignpages = bytelen >> PAGE_SHIFT;
  131                 KASSERT(npages != 0);
  132 
  133                 /*
  134                  * use UVM_ADV_RANDOM to avoid recursion.
  135                  */
  136 
  137                 simple_lock(&uobj->vmobjlock);
  138                 error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
  139                     &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, 0);
  140                 DPRINTF(("%s:  off=%" PRIu64 ", bytelen=%zu -> %d\n",
  141                     __func__, off, bytelen, error));
  142                 if (error != 0 && error != EBUSY) {
  143                         if (error != EINVAL) { /* maybe past EOF */
  144                                 DPRINTF(("%s: error=%d\n", __func__, error));
  145                         }
  146                         break;
  147                 }
  148                 KASSERT(orignpages == npages);
  149                 donebytes = orignpages << PAGE_SHIFT;
  150                 off += donebytes;
  151         }
  152 
  153         return off;
  154 }
  155 
  156 /* ------------------------------------------------------------ */
  157 
  158 /*
  159  * uvm_ra_allocctx: allocate a context.
  160  */
  161 
  162 struct uvm_ractx *
  163 uvm_ra_allocctx(void)
  164 {
  165         struct uvm_ractx *ra;
  166 
  167         ra = ra_allocctx();
  168         if (ra != NULL) {
  169                 ra->ra_flags = 0;
  170         }
  171 
  172         return ra;
  173 }
  174 
  175 /*
  176  * uvm_ra_freectx: free a context.
  177  */
  178 
  179 void
  180 uvm_ra_freectx(struct uvm_ractx *ra)
  181 {
  182 
  183         KASSERT(ra != NULL);
  184         ra_freectx(ra);
  185 }
  186 
  187 /*
  188  * uvm_ra_request: update a read-ahead context and start i/o if appropriate.
  189  *
  190  * => called when [reqoff, reqoff+reqsize) is requested.
  191  */
  192 
  193 void
  194 uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
  195     off_t reqoff, size_t reqsize)
  196 {
  197 
  198         if (ra == NULL || advice == UVM_ADV_RANDOM) {
  199                 return;
  200         }
  201 
  202         /*
  203          * XXX needs locking?  maybe.
  204          * but the worst effect is merely a bad read-ahead.
  205          */
  206 
  207         if (advice == UVM_ADV_SEQUENTIAL) {
  208 
  209                 /*
  210                  * always do read-ahead with a large window.
  211                  */
  212 
  213                 if ((ra->ra_flags & RA_VALID) == 0) {
  214                         ra->ra_winstart = ra->ra_next = 0;
  215                         ra->ra_flags |= RA_VALID;
  216                 }
  217                 if (reqoff < ra->ra_winstart) {
  218                         ra->ra_next = reqoff;
  219                 }
  220                 ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
  221                 goto do_readahead;
  222         }
  223 
  224         /*
  225          * a request with UVM_ADV_NORMAL hint.  (ie. no hint)
  226          *
  227          * we keep a sliding window in order to determine:
  228          *      - if the previous read-ahead was successful or not.
  229          *      - how many bytes to read-ahead.
  230          */
  231 
  232         /*
  233          * if it's the first request for this context,
  234          * initialize context and return.
  235          */
  236 
  237         if ((ra->ra_flags & RA_VALID) == 0) {
  238 initialize:
  239                 ra->ra_winstart = ra->ra_next = reqoff + reqsize;
  240                 ra->ra_winsize = RA_WINSIZE_INIT;
  241                 ra->ra_flags |= RA_VALID;
  242                 goto done;
  243         }
  244 
  245         /*
  246          * if it isn't in our window,
  247          * initialize context and return.
  248          * (read-ahead miss)
  249          */
  250 
  251         if (reqoff < ra->ra_winstart ||
  252             ra->ra_winstart + ra->ra_winsize < reqoff) {
  253 
  254                 /*
  255                  * ... unless we seem to be reading the same chunk repeatedly.
  256                  *
  257                  * XXX should have some margin?
  258                  */
  259 
  260                 if (reqoff + reqsize == ra->ra_winstart) {
  261                         DPRINTF(("%s: %p: same block: off=%" PRIu64
  262                             ", size=%zd, winstart=%" PRIu64 "\n",
  263                             __func__, ra, reqoff, reqsize, ra->ra_winstart));
  264                         goto done;
  265                 }
  266                 goto initialize;
  267         }
  268 
  269         /*
  270          * it's in our window. (read-ahead hit)
  271          *      - start read-ahead i/o if appropriate.
  272          *      - advance and enlarge window.
  273          */
  274 
  275 do_readahead:
  276 
  277         /*
  278          * don't bother to read-ahead behind current request.
  279          */
  280 
  281         if (reqoff > ra->ra_next) {
  282                 ra->ra_next = reqoff;
  283         }
  284 
  285         /*
  286          * try to make [reqoff, reqoff+ra_winsize) in-core.
  287          * note that [reqoff, ra_next) is considered already done.
  288          */
  289 
  290         if (reqoff + ra->ra_winsize > ra->ra_next) {
  291                 off_t raoff = MAX(reqoff, ra->ra_next);
  292                 size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;
  293 
  294 #if defined(DIAGNOSTIC)
  295                 if (rasize > RA_WINSIZE_MAX) {
  296 
  297                         /*
  298                          * shouldn't happen as far as we're protected by
  299                          * kernel_lock.
  300                          */
  301 
  302                         printf("%s: corrupted context", __func__);
  303                         rasize = RA_WINSIZE_MAX;
  304                 }
  305 #endif /* defined(DIAGNOSTIC) */
  306 
  307                 /*
  308                  * issue read-ahead only if we can start big enough i/o.
  309                  * otherwise we end up with a stream of small i/o.
  310                  */
  311 
  312                 if (rasize >= RA_MINSIZE) {
  313                         ra->ra_next = ra_startio(uobj, raoff, rasize);
  314                 }
  315         }
  316 
  317         /*
  318          * update window.
  319          *
  320          * enlarge window by reqsize, so that it grows in a predictable manner
  321          * regardless of the size of each read(2).
  322          */
  323 
  324         ra->ra_winstart = reqoff + reqsize;
  325         ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
  326 
  327 done:;
  328 }

Cache object: c01ab117ef4ae36c41b3c8dd782a6c1f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.