The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_readahead.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $     */
    2 
    3 /*-
    4  * Copyright (c)2003, 2005 YAMAMOTO Takashi,
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * uvm_object read-ahead
   31  *
   32  * TODO:
   33  *      - tune.
   34  *      - handle multiple streams.
   35  *      - find a better way to deal with PGO_LOCKED pager requests.
   36  *        (currently just ignored)
   37  *      - consider the amount of memory in the system.
   38  *      - consider the speed of the underlying device.
   39  *      - consider filesystem block size / block layout.
   40  */
   41 
   42 #include <sys/cdefs.h>
   43 __KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $");
   44 
   45 #include <sys/param.h>
   46 #include <sys/pool.h>
   47 
   48 #include <uvm/uvm.h>
   49 #include <uvm/uvm_readahead.h>
   50 
   51 #if defined(READAHEAD_DEBUG)
   52 #define DPRINTF(a)      printf a
   53 #else /* defined(READAHEAD_DEBUG) */
   54 #define DPRINTF(a)      /* nothing */
   55 #endif /* defined(READAHEAD_DEBUG) */
   56 
   57 /*
   58  * uvm_ractx: read-ahead context.
   59  */
   60 
   61 struct uvm_ractx {
   62         int ra_flags;
   63 #define RA_VALID        1
   64         off_t ra_winstart;      /* window start offset */
   65         size_t ra_winsize;      /* window size */
   66         off_t ra_next;          /* next offset to read-ahead */
   67 };
   68 
   69 #if defined(sun2) || (defined(sun3) && defined(_SUN3_))
   70 /* XXX: on sun2 and sun3 (but not sun3x) MAXPHYS is 0xe000 */
   71 #undef MAXPHYS  
   72 #define MAXPHYS         0x8000  /* XXX */
   73 #endif
   74 
   75 #define RA_WINSIZE_INIT MAXPHYS                 /* initial window size */
   76 #define RA_WINSIZE_MAX  (MAXPHYS * 8)           /* max window size */
   77 #define RA_WINSIZE_SEQENTIAL    RA_WINSIZE_MAX  /* fixed window size used for
   78                                                    SEQUENTIAL hint */
   79 #define RA_MINSIZE      (MAXPHYS * 2)           /* min size to start i/o */
   80 #define RA_IOCHUNK      MAXPHYS                 /* read-ahead i/o chunk size */
   81 
   82 static off_t ra_startio(struct uvm_object *, off_t, size_t);
   83 static struct uvm_ractx *ra_allocctx(void);
   84 static void ra_freectx(struct uvm_ractx *);
   85 
   86 static struct pool_cache ractx_cache;
   87 
   88 /*
   89  * uvm_ra_init: initialize readahead module.
   90  */
   91 
   92 void
   93 uvm_ra_init(void)
   94 {
   95 
   96         pool_cache_bootstrap(&ractx_cache, sizeof(struct uvm_ractx), 0, 0, 0,
   97             "ractx", NULL, IPL_NONE, NULL, NULL, NULL);
   98 }
   99 
  100 static struct uvm_ractx *
  101 ra_allocctx(void)
  102 {
  103 
  104         return pool_cache_get(&ractx_cache, PR_NOWAIT);
  105 }
  106 
  107 static void
  108 ra_freectx(struct uvm_ractx *ra)
  109 {
  110 
  111         pool_cache_put(&ractx_cache, ra);
  112 }
  113 
  114 /*
  115  * ra_startio: start i/o for read-ahead.
  116  *
  117  * => start i/o for each RA_IOCHUNK sized chunk.
  118  * => return offset to which we started i/o.
  119  */
  120 
  121 static off_t
  122 ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
  123 {
  124         const off_t endoff = off + sz;
  125 
  126         DPRINTF(("%s: uobj=%p, off=%" PRIu64 ", endoff=%" PRIu64 "\n",
  127             __func__, uobj, off, endoff));
  128         off = trunc_page(off);
  129         while (off < endoff) {
  130                 const size_t chunksize = RA_IOCHUNK;
  131                 int error;
  132                 size_t donebytes;
  133                 int npages;
  134                 int orignpages;
  135                 size_t bytelen;
  136 
  137                 KASSERT((chunksize & (chunksize - 1)) == 0);
  138                 KASSERT((off & PAGE_MASK) == 0);
  139                 bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
  140                 KASSERT((bytelen & PAGE_MASK) == 0);
  141                 npages = orignpages = bytelen >> PAGE_SHIFT;
  142                 KASSERT(npages != 0);
  143 
  144                 /*
  145                  * use UVM_ADV_RANDOM to avoid recursion.
  146                  */
  147 
  148                 error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
  149                     &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, 0);
  150                 DPRINTF(("%s:  off=%" PRIu64 ", bytelen=%zu -> %d\n",
  151                     __func__, off, bytelen, error));
  152                 mutex_enter(&uobj->vmobjlock);
  153                 if (error != 0 && error != EBUSY) {
  154                         if (error != EINVAL) { /* maybe past EOF */
  155                                 DPRINTF(("%s: error=%d\n", __func__, error));
  156                         }
  157                         break;
  158                 }
  159                 KASSERT(orignpages == npages);
  160                 donebytes = orignpages << PAGE_SHIFT;
  161                 off += donebytes;
  162         }
  163 
  164         return off;
  165 }
  166 
  167 /* ------------------------------------------------------------ */
  168 
  169 /*
  170  * uvm_ra_allocctx: allocate a context.
  171  */
  172 
  173 struct uvm_ractx *
  174 uvm_ra_allocctx(void)
  175 {
  176         struct uvm_ractx *ra;
  177 
  178         ra = ra_allocctx();
  179         if (ra != NULL) {
  180                 ra->ra_flags = 0;
  181         }
  182 
  183         return ra;
  184 }
  185 
  186 /*
  187  * uvm_ra_freectx: free a context.
  188  */
  189 
  190 void
  191 uvm_ra_freectx(struct uvm_ractx *ra)
  192 {
  193 
  194         KASSERT(ra != NULL);
  195         ra_freectx(ra);
  196 }
  197 
  198 /*
  199  * uvm_ra_request: update a read-ahead context and start i/o if appropriate.
  200  *
  201  * => called when [reqoff, reqoff+reqsize) is requested.
  202  * => object must be locked by caller, will return locked.
  203  */
  204 
  205 void
  206 uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
  207     off_t reqoff, size_t reqsize)
  208 {
  209 
  210         KASSERT(mutex_owned(&uobj->vmobjlock));
  211 
  212         if (ra == NULL || advice == UVM_ADV_RANDOM) {
  213                 return;
  214         }
  215 
  216         if (advice == UVM_ADV_SEQUENTIAL) {
  217 
  218                 /*
  219                  * always do read-ahead with a large window.
  220                  */
  221 
  222                 if ((ra->ra_flags & RA_VALID) == 0) {
  223                         ra->ra_winstart = ra->ra_next = 0;
  224                         ra->ra_flags |= RA_VALID;
  225                 }
  226                 if (reqoff < ra->ra_winstart) {
  227                         ra->ra_next = reqoff;
  228                 }
  229                 ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
  230                 goto do_readahead;
  231         }
  232 
  233         /*
  234          * a request with UVM_ADV_NORMAL hint.  (ie. no hint)
  235          *
  236          * we keep a sliding window in order to determine:
  237          *      - if the previous read-ahead was successful or not.
  238          *      - how many bytes to read-ahead.
  239          */
  240 
  241         /*
  242          * if it's the first request for this context,
  243          * initialize context and return.
  244          */
  245 
  246         if ((ra->ra_flags & RA_VALID) == 0) {
  247 initialize:
  248                 ra->ra_winstart = ra->ra_next = reqoff + reqsize;
  249                 ra->ra_winsize = RA_WINSIZE_INIT;
  250                 ra->ra_flags |= RA_VALID;
  251                 goto done;
  252         }
  253 
  254         /*
  255          * if it isn't in our window,
  256          * initialize context and return.
  257          * (read-ahead miss)
  258          */
  259 
  260         if (reqoff < ra->ra_winstart ||
  261             ra->ra_winstart + ra->ra_winsize < reqoff) {
  262 
  263                 /*
  264                  * ... unless we seem to be reading the same chunk repeatedly.
  265                  *
  266                  * XXX should have some margin?
  267                  */
  268 
  269                 if (reqoff + reqsize == ra->ra_winstart) {
  270                         DPRINTF(("%s: %p: same block: off=%" PRIu64
  271                             ", size=%zd, winstart=%" PRIu64 "\n",
  272                             __func__, ra, reqoff, reqsize, ra->ra_winstart));
  273                         goto done;
  274                 }
  275                 goto initialize;
  276         }
  277 
  278         /*
  279          * it's in our window. (read-ahead hit)
  280          *      - start read-ahead i/o if appropriate.
  281          *      - advance and enlarge window.
  282          */
  283 
  284 do_readahead:
  285 
  286         /*
  287          * don't bother to read-ahead behind current request.
  288          */
  289 
  290         if (reqoff > ra->ra_next) {
  291                 ra->ra_next = reqoff;
  292         }
  293 
  294         /*
  295          * try to make [reqoff, reqoff+ra_winsize) in-core.
  296          * note that [reqoff, ra_next) is considered already done.
  297          */
  298 
  299         if (reqoff + ra->ra_winsize > ra->ra_next) {
  300                 off_t raoff = MAX(reqoff, ra->ra_next);
  301                 size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;
  302 
  303 #if defined(DIAGNOSTIC)
  304                 if (rasize > RA_WINSIZE_MAX) {
  305                         printf("%s: corrupted context", __func__);
  306                         rasize = RA_WINSIZE_MAX;
  307                 }
  308 #endif /* defined(DIAGNOSTIC) */
  309 
  310                 /*
  311                  * issue read-ahead only if we can start big enough i/o.
  312                  * otherwise we end up with a stream of small i/o.
  313                  */
  314 
  315                 if (rasize >= RA_MINSIZE) {
  316                         ra->ra_next = ra_startio(uobj, raoff, rasize);
  317                 }
  318         }
  319 
  320         /*
  321          * update window.
  322          *
  323          * enlarge window by reqsize, so that it grows in a predictable manner
  324          * regardless of the size of each read(2).
  325          */
  326 
  327         ra->ra_winstart = reqoff + reqsize;
  328         ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
  329 
  330 done:;
  331 }

Cache object: 62e10853c1a268ac84f1766382fc218a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.