The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_memio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1988 University of Utah.
    3  * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the Systems Programming Group of the University of Utah Computer
    8  * Science Department, and code derived from software contributed to
    9  * Berkeley by William Jolitz.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      from: Utah $Hdr: mem.c 1.13 89/10/08$
   36  *      from: @(#)mem.c 7.2 (Berkeley) 5/9/91
   37  * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $
   38  */
   39 
   40 /*
   41  * Memory special file
   42  */
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/buf.h>
   47 #include <sys/conf.h>
   48 #include <sys/fcntl.h>
   49 #include <sys/filio.h>
   50 #include <sys/kernel.h>
   51 #include <sys/malloc.h>
   52 #include <sys/memrange.h>
   53 #include <sys/proc.h>
   54 #include <sys/priv.h>
   55 #include <sys/random.h>
   56 #include <sys/signalvar.h>
   57 #include <sys/uio.h>
   58 #include <sys/vnode.h>
   59 
   60 #include <sys/signal2.h>
   61 #include <sys/mplock2.h>
   62 
   63 #include <vm/vm.h>
   64 #include <vm/pmap.h>
   65 #include <vm/vm_extern.h>
   66 
   67 
   68 static  d_open_t        mmopen;
   69 static  d_close_t       mmclose;
   70 static  d_read_t        mmread;
   71 static  d_write_t       mmwrite;
   72 static  d_ioctl_t       mmioctl;
   73 static  d_mmap_t        memmmap;
   74 static  d_kqfilter_t    mmkqfilter;
   75 
   76 #define CDEV_MAJOR 2
   77 static struct dev_ops mem_ops = {
   78         { "mem", 0, D_MPSAFE },
   79         .d_open =       mmopen,
   80         .d_close =      mmclose,
   81         .d_read =       mmread,
   82         .d_write =      mmwrite,
   83         .d_ioctl =      mmioctl,
   84         .d_kqfilter =   mmkqfilter,
   85         .d_mmap =       memmmap,
   86 };
   87 
   88 static int rand_bolt;
   89 static caddr_t  zbuf;
   90 static cdev_t   zerodev = NULL;
   91 
   92 MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
   93 static int mem_ioctl (cdev_t, u_long, caddr_t, int, struct ucred *);
   94 static int random_ioctl (cdev_t, u_long, caddr_t, int, struct ucred *);
   95 
   96 struct mem_range_softc mem_range_softc;
   97 
   98 
   99 static int
  100 mmopen(struct dev_open_args *ap)
  101 {
  102         cdev_t dev = ap->a_head.a_dev;
  103         int error;
  104 
  105         switch (minor(dev)) {
  106         case 0:
  107         case 1:
  108                 if (ap->a_oflags & FWRITE) {
  109                         if (securelevel > 0 || kernel_mem_readonly)
  110                                 return (EPERM);
  111                 }
  112                 error = 0;
  113                 break;
  114         case 14:
  115                 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0);
  116                 if (error != 0)
  117                         break;
  118                 if (securelevel > 0 || kernel_mem_readonly) {
  119                         error = EPERM;
  120                         break;
  121                 }
  122                 error = cpu_set_iopl();
  123                 break;
  124         default:
  125                 error = 0;
  126                 break;
  127         }
  128         return (error);
  129 }
  130 
  131 static int
  132 mmclose(struct dev_close_args *ap)
  133 {
  134         cdev_t dev = ap->a_head.a_dev;
  135         int error;
  136 
  137         switch (minor(dev)) {
  138         case 14:
  139                 error = cpu_clr_iopl();
  140                 break;
  141         default:
  142                 error = 0;
  143                 break;
  144         }
  145         return (error);
  146 }
  147 
  148 
  149 static int
  150 mmrw(cdev_t dev, struct uio *uio, int flags)
  151 {
  152         int o;
  153         u_int c;
  154         u_int poolsize;
  155         u_long v;
  156         struct iovec *iov;
  157         int error = 0;
  158         caddr_t buf = NULL;
  159 
  160         while (uio->uio_resid > 0 && error == 0) {
  161                 iov = uio->uio_iov;
  162                 if (iov->iov_len == 0) {
  163                         uio->uio_iov++;
  164                         uio->uio_iovcnt--;
  165                         if (uio->uio_iovcnt < 0)
  166                                 panic("mmrw");
  167                         continue;
  168                 }
  169                 switch (minor(dev)) {
  170                 case 0:
  171                         /*
  172                          * minor device 0 is physical memory, /dev/mem 
  173                          */
  174                         v = uio->uio_offset;
  175                         v &= ~(long)PAGE_MASK;
  176                         pmap_kenter((vm_offset_t)ptvmmap, v);
  177                         o = (int)uio->uio_offset & PAGE_MASK;
  178                         c = (u_int)(PAGE_SIZE - ((uintptr_t)iov->iov_base & PAGE_MASK));
  179                         c = min(c, (u_int)(PAGE_SIZE - o));
  180                         c = min(c, (u_int)iov->iov_len);
  181                         error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
  182                         pmap_kremove((vm_offset_t)ptvmmap);
  183                         continue;
  184 
  185                 case 1: {
  186                         /*
  187                          * minor device 1 is kernel memory, /dev/kmem 
  188                          */
  189                         vm_offset_t saddr, eaddr;
  190                         int prot;
  191 
  192                         c = iov->iov_len;
  193 
  194                         /*
  195                          * Make sure that all of the pages are currently 
  196                          * resident so that we don't create any zero-fill
  197                          * pages.
  198                          */
  199                         saddr = trunc_page(uio->uio_offset);
  200                         eaddr = round_page(uio->uio_offset + c);
  201                         if (saddr > eaddr)
  202                                 return EFAULT;
  203 
  204                         /*
  205                          * Make sure the kernel addresses are mapped.
  206                          * platform_direct_mapped() can be used to bypass
  207                          * default mapping via the page table (virtual kernels
  208                          * contain a lot of out-of-band data).
  209                          */
  210                         prot = VM_PROT_READ;
  211                         if (uio->uio_rw != UIO_READ)
  212                                 prot |= VM_PROT_WRITE;
  213                         error = kvm_access_check(saddr, eaddr, prot);
  214                         if (error)
  215                                 return (error);
  216                         error = uiomove((caddr_t)(vm_offset_t)uio->uio_offset,
  217                                         (int)c, uio);
  218                         continue;
  219                 }
  220                 case 2:
  221                         /*
  222                          * minor device 2 (/dev/null) is EOF/RATHOLE
  223                          */
  224                         if (uio->uio_rw == UIO_READ)
  225                                 return (0);
  226                         c = iov->iov_len;
  227                         break;
  228                 case 3:
  229                         /*
  230                          * minor device 3 (/dev/random) is source of filth
  231                          * on read, seeder on write
  232                          */
  233                         if (buf == NULL)
  234                                 buf = kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK);
  235                         c = min(iov->iov_len, PAGE_SIZE);
  236                         if (uio->uio_rw == UIO_WRITE) {
  237                                 error = uiomove(buf, (int)c, uio);
  238                                 if (error == 0)
  239                                         error = add_buffer_randomness(buf, c);
  240                         } else {
  241                                 poolsize = read_random(buf, c);
  242                                 if (poolsize == 0) {
  243                                         if (buf)
  244                                                 kfree(buf, M_TEMP);
  245                                         if ((flags & IO_NDELAY) != 0)
  246                                                 return (EWOULDBLOCK);
  247                                         return (0);
  248                                 }
  249                                 c = min(c, poolsize);
  250                                 error = uiomove(buf, (int)c, uio);
  251                         }
  252                         continue;
  253                 case 4:
  254                         /*
  255                          * minor device 4 (/dev/urandom) is source of muck
  256                          * on read, writes are disallowed.
  257                          */
  258                         c = min(iov->iov_len, PAGE_SIZE);
  259                         if (uio->uio_rw == UIO_WRITE) {
  260                                 error = EPERM;
  261                                 break;
  262                         }
  263                         if (CURSIG(curthread->td_lwp) != 0) {
  264                                 /*
  265                                  * Use tsleep() to get the error code right.
  266                                  * It should return immediately.
  267                                  */
  268                                 error = tsleep(&rand_bolt, PCATCH, "urand", 1);
  269                                 if (error != 0 && error != EWOULDBLOCK)
  270                                         continue;
  271                         }
  272                         if (buf == NULL)
  273                                 buf = kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK);
  274                         poolsize = read_random_unlimited(buf, c);
  275                         c = min(c, poolsize);
  276                         error = uiomove(buf, (int)c, uio);
  277                         continue;
  278                 case 12:
  279                         /*
  280                          * minor device 12 (/dev/zero) is source of nulls 
  281                          * on read, write are disallowed.
  282                          */
  283                         if (uio->uio_rw == UIO_WRITE) {
  284                                 c = iov->iov_len;
  285                                 break;
  286                         }
  287                         if (zbuf == NULL) {
  288                                 zbuf = (caddr_t)kmalloc(PAGE_SIZE, M_TEMP,
  289                                     M_WAITOK | M_ZERO);
  290                         }
  291                         c = min(iov->iov_len, PAGE_SIZE);
  292                         error = uiomove(zbuf, (int)c, uio);
  293                         continue;
  294                 default:
  295                         return (ENODEV);
  296                 }
  297                 if (error)
  298                         break;
  299                 iov->iov_base = (char *)iov->iov_base + c;
  300                 iov->iov_len -= c;
  301                 uio->uio_offset += c;
  302                 uio->uio_resid -= c;
  303         }
  304         if (buf)
  305                 kfree(buf, M_TEMP);
  306         return (error);
  307 }
  308 
  309 static int
  310 mmread(struct dev_read_args *ap)
  311 {
  312         return(mmrw(ap->a_head.a_dev, ap->a_uio, ap->a_ioflag));
  313 }
  314 
  315 static int
  316 mmwrite(struct dev_write_args *ap)
  317 {
  318         return(mmrw(ap->a_head.a_dev, ap->a_uio, ap->a_ioflag));
  319 }
  320 
  321 
  322 
  323 
  324 
  325 /*******************************************************\
  326 * allow user processes to MMAP some memory sections     *
  327 * instead of going through read/write                   *
  328 \*******************************************************/
  329 
  330 static int
  331 memmmap(struct dev_mmap_args *ap)
  332 {
  333         cdev_t dev = ap->a_head.a_dev;
  334 
  335         switch (minor(dev)) {
  336         case 0:
  337                 /* 
  338                  * minor device 0 is physical memory 
  339                  */
  340 #if defined(__i386__)
  341                 ap->a_result = i386_btop(ap->a_offset);
  342 #elif defined(__x86_64__)
  343                 ap->a_result = x86_64_btop(ap->a_offset);
  344 #endif
  345                 return 0;
  346         case 1:
  347                 /*
  348                  * minor device 1 is kernel memory 
  349                  */
  350 #if defined(__i386__)
  351                 ap->a_result = i386_btop(vtophys(ap->a_offset));
  352 #elif defined(__x86_64__)
  353                 ap->a_result = x86_64_btop(vtophys(ap->a_offset));
  354 #endif
  355                 return 0;
  356 
  357         default:
  358                 return EINVAL;
  359         }
  360 }
  361 
  362 static int
  363 mmioctl(struct dev_ioctl_args *ap)
  364 {
  365         cdev_t dev = ap->a_head.a_dev;
  366         int error;
  367 
  368         get_mplock();
  369 
  370         switch (minor(dev)) {
  371         case 0:
  372                 error = mem_ioctl(dev, ap->a_cmd, ap->a_data,
  373                                   ap->a_fflag, ap->a_cred);
  374                 break;
  375         case 3:
  376         case 4:
  377                 error = random_ioctl(dev, ap->a_cmd, ap->a_data,
  378                                      ap->a_fflag, ap->a_cred);
  379                 break;
  380         default:
  381                 error = ENODEV;
  382                 break;
  383         }
  384 
  385         rel_mplock();
  386         return (error);
  387 }
  388 
  389 /*
  390  * Operations for changing memory attributes.
  391  *
  392  * This is basically just an ioctl shim for mem_range_attr_get
  393  * and mem_range_attr_set.
  394  */
  395 static int 
  396 mem_ioctl(cdev_t dev, u_long cmd, caddr_t data, int flags, struct ucred *cred)
  397 {
  398         int nd, error = 0;
  399         struct mem_range_op *mo = (struct mem_range_op *)data;
  400         struct mem_range_desc *md;
  401         
  402         /* is this for us? */
  403         if ((cmd != MEMRANGE_GET) &&
  404             (cmd != MEMRANGE_SET))
  405                 return (ENOTTY);
  406 
  407         /* any chance we can handle this? */
  408         if (mem_range_softc.mr_op == NULL)
  409                 return (EOPNOTSUPP);
  410 
  411         /* do we have any descriptors? */
  412         if (mem_range_softc.mr_ndesc == 0)
  413                 return (ENXIO);
  414 
  415         switch (cmd) {
  416         case MEMRANGE_GET:
  417                 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
  418                 if (nd > 0) {
  419                         md = (struct mem_range_desc *)
  420                                 kmalloc(nd * sizeof(struct mem_range_desc),
  421                                        M_MEMDESC, M_WAITOK);
  422                         error = mem_range_attr_get(md, &nd);
  423                         if (!error)
  424                                 error = copyout(md, mo->mo_desc, 
  425                                         nd * sizeof(struct mem_range_desc));
  426                         kfree(md, M_MEMDESC);
  427                 } else {
  428                         nd = mem_range_softc.mr_ndesc;
  429                 }
  430                 mo->mo_arg[0] = nd;
  431                 break;
  432                 
  433         case MEMRANGE_SET:
  434                 md = (struct mem_range_desc *)kmalloc(sizeof(struct mem_range_desc),
  435                                                     M_MEMDESC, M_WAITOK);
  436                 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
  437                 /* clamp description string */
  438                 md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
  439                 if (error == 0)
  440                         error = mem_range_attr_set(md, &mo->mo_arg[0]);
  441                 kfree(md, M_MEMDESC);
  442                 break;
  443         }
  444         return (error);
  445 }
  446 
  447 /*
  448  * Implementation-neutral, kernel-callable functions for manipulating
  449  * memory range attributes.
  450  */
  451 int
  452 mem_range_attr_get(struct mem_range_desc *mrd, int *arg)
  453 {
  454         /* can we handle this? */
  455         if (mem_range_softc.mr_op == NULL)
  456                 return (EOPNOTSUPP);
  457 
  458         if (*arg == 0) {
  459                 *arg = mem_range_softc.mr_ndesc;
  460         } else {
  461                 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc));
  462         }
  463         return (0);
  464 }
  465 
  466 int
  467 mem_range_attr_set(struct mem_range_desc *mrd, int *arg)
  468 {
  469         /* can we handle this? */
  470         if (mem_range_softc.mr_op == NULL)
  471                 return (EOPNOTSUPP);
  472 
  473         return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
  474 }
  475 
  476 void
  477 mem_range_AP_init(void)
  478 {
  479         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  480                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  481 }
  482 
  483 static int 
  484 random_ioctl(cdev_t dev, u_long cmd, caddr_t data, int flags, struct ucred *cred)
  485 {
  486         int error;
  487         int intr;
  488         
  489         /*
  490          * Even inspecting the state is privileged, since it gives a hint
  491          * about how easily the randomness might be guessed.
  492          */
  493         error = 0;
  494 
  495         switch (cmd) {
  496         /* Really handled in upper layer */
  497         case FIOASYNC:
  498                 break;
  499         case MEM_SETIRQ:
  500                 intr = *(int16_t *)data;
  501                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
  502                         break;
  503                 if (intr < 0 || intr >= MAX_INTS)
  504                         return (EINVAL);
  505                 register_randintr(intr);
  506                 break;
  507         case MEM_CLEARIRQ:
  508                 intr = *(int16_t *)data;
  509                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
  510                         break;
  511                 if (intr < 0 || intr >= MAX_INTS)
  512                         return (EINVAL);
  513                 unregister_randintr(intr);
  514                 break;
  515         case MEM_RETURNIRQ:
  516                 error = ENOTSUP;
  517                 break;
  518         case MEM_FINDIRQ:
  519                 intr = *(int16_t *)data;
  520                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
  521                         break;
  522                 if (intr < 0 || intr >= MAX_INTS)
  523                         return (EINVAL);
  524                 intr = next_registered_randintr(intr);
  525                 if (intr == MAX_INTS)
  526                         return (ENOENT);
  527                 *(u_int16_t *)data = intr;
  528                 break;
  529         default:
  530                 error = ENOTSUP;
  531                 break;
  532         }
  533         return (error);
  534 }
  535 
  536 static int
  537 mm_filter_read(struct knote *kn, long hint)
  538 {
  539         return (1);
  540 }
  541 
  542 static int
  543 mm_filter_write(struct knote *kn, long hint)
  544 {
  545         return (1);
  546 }
  547 
  548 static void
  549 dummy_filter_detach(struct knote *kn) {}
  550 
  551 /* Implemented in kern_nrandom.c */
  552 static struct filterops random_read_filtops =
  553         { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, random_filter_read };
  554 
  555 static struct filterops mm_read_filtops =
  556         { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, mm_filter_read };
  557 
  558 static struct filterops mm_write_filtops =
  559         { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, mm_filter_write };
  560 
  561 int
  562 mmkqfilter(struct dev_kqfilter_args *ap)
  563 {
  564         struct knote *kn = ap->a_kn;
  565         cdev_t dev = ap->a_head.a_dev;
  566 
  567         ap->a_result = 0;
  568         switch (kn->kn_filter) {
  569         case EVFILT_READ:
  570                 switch (minor(dev)) {
  571                 case 3:
  572                         kn->kn_fop = &random_read_filtops;
  573                         break;
  574                 default:
  575                         kn->kn_fop = &mm_read_filtops;
  576                         break;
  577                 }
  578                 break;
  579         case EVFILT_WRITE:
  580                 kn->kn_fop = &mm_write_filtops;
  581                 break;
  582         default:
  583                 ap->a_result = EOPNOTSUPP;
  584                 return (0);
  585         }
  586 
  587         return (0);
  588 }
  589 
  590 int
  591 iszerodev(cdev_t dev)
  592 {
  593         return (zerodev == dev);
  594 }
  595 
  596 static void
  597 mem_drvinit(void *unused)
  598 {
  599 
  600         /* Initialise memory range handling */
  601         if (mem_range_softc.mr_op != NULL)
  602                 mem_range_softc.mr_op->init(&mem_range_softc);
  603 
  604         make_dev(&mem_ops, 0, UID_ROOT, GID_KMEM, 0640, "mem");
  605         make_dev(&mem_ops, 1, UID_ROOT, GID_KMEM, 0640, "kmem");
  606         make_dev(&mem_ops, 2, UID_ROOT, GID_WHEEL, 0666, "null");
  607         make_dev(&mem_ops, 3, UID_ROOT, GID_WHEEL, 0644, "random");
  608         make_dev(&mem_ops, 4, UID_ROOT, GID_WHEEL, 0644, "urandom");
  609         zerodev = make_dev(&mem_ops, 12, UID_ROOT, GID_WHEEL, 0666, "zero");
  610         make_dev(&mem_ops, 14, UID_ROOT, GID_WHEEL, 0600, "io");
  611 }
  612 
  613 SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL)
  614 

Cache object: a4fa542f3053dd76edb3cad23bc57619


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.