The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linuxkpi/common/src/linux_page.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Isilon Systems, Inc.
    3  * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
    4  * Copyright (c) 2017 Mellanox Technologies, Ltd.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include <sys/param.h>
   33 #include <sys/systm.h>
   34 #include <sys/malloc.h>
   35 #include <sys/kernel.h>
   36 #include <sys/sysctl.h>
   37 #include <sys/lock.h>
   38 #include <sys/mutex.h>
   39 #include <sys/rwlock.h>
   40 #include <sys/proc.h>
   41 #include <sys/sched.h>
   42 #include <sys/memrange.h>
   43 
   44 #include <machine/bus.h>
   45 
   46 #include <vm/vm.h>
   47 #include <vm/pmap.h>
   48 #include <vm/vm_param.h>
   49 #include <vm/vm_kern.h>
   50 #include <vm/vm_object.h>
   51 #include <vm/vm_map.h>
   52 #include <vm/vm_page.h>
   53 #include <vm/vm_pageout.h>
   54 #include <vm/vm_pager.h>
   55 #include <vm/vm_radix.h>
   56 #include <vm/vm_reserv.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <vm/uma.h>
   60 #include <vm/uma_int.h>
   61 
   62 #include <linux/gfp.h>
   63 #include <linux/mm.h>
   64 #include <linux/preempt.h>
   65 #include <linux/fs.h>
   66 #include <linux/shmem_fs.h>
   67 #include <linux/kernel.h>
   68 #include <linux/idr.h>
   69 #include <linux/io.h>
   70 #include <linux/io-mapping.h>
   71 
   72 #ifdef __i386__
   73 DEFINE_IDR(mtrr_idr);
   74 static MALLOC_DEFINE(M_LKMTRR, "idr", "Linux MTRR compat");
   75 extern int pat_works;
   76 #endif
   77 
   78 void
   79 si_meminfo(struct sysinfo *si)
   80 {
   81         si->totalram = physmem;
   82         si->totalhigh = 0;
   83         si->mem_unit = PAGE_SIZE;
   84 }
   85 
   86 void *
   87 linux_page_address(struct page *page)
   88 {
   89 
   90         if (page->object != kernel_object) {
   91                 return (PMAP_HAS_DMAP ?
   92                     ((void *)(uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page))) :
   93                     NULL);
   94         }
   95         return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS +
   96             IDX_TO_OFF(page->pindex)));
   97 }
   98 
   99 vm_page_t
  100 linux_alloc_pages(gfp_t flags, unsigned int order)
  101 {
  102         vm_page_t page;
  103 
  104         if (PMAP_HAS_DMAP) {
  105                 unsigned long npages = 1UL << order;
  106                 int req = VM_ALLOC_WIRED;
  107 
  108                 if ((flags & M_ZERO) != 0)
  109                         req |= VM_ALLOC_ZERO;
  110                 if (order == 0 && (flags & GFP_DMA32) == 0) {
  111                         page = vm_page_alloc_noobj(req);
  112                         if (page == NULL)
  113                                 return (NULL);
  114                 } else {
  115                         vm_paddr_t pmax = (flags & GFP_DMA32) ?
  116                             BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR;
  117                 retry:
  118                         page = vm_page_alloc_noobj_contig(req, npages, 0, pmax,
  119                             PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
  120                         if (page == NULL) {
  121                                 if (flags & M_WAITOK) {
  122                                         if (!vm_page_reclaim_contig(req,
  123                                             npages, 0, pmax, PAGE_SIZE, 0)) {
  124                                                 vm_wait(NULL);
  125                                         }
  126                                         flags &= ~M_WAITOK;
  127                                         goto retry;
  128                                 }
  129                                 return (NULL);
  130                         }
  131                 }
  132         } else {
  133                 vm_offset_t vaddr;
  134 
  135                 vaddr = linux_alloc_kmem(flags, order);
  136                 if (vaddr == 0)
  137                         return (NULL);
  138 
  139                 page = PHYS_TO_VM_PAGE(vtophys((void *)vaddr));
  140 
  141                 KASSERT(vaddr == (vm_offset_t)page_address(page),
  142                     ("Page address mismatch"));
  143         }
  144 
  145         return (page);
  146 }
  147 
  148 void
  149 linux_free_pages(vm_page_t page, unsigned int order)
  150 {
  151         if (PMAP_HAS_DMAP) {
  152                 unsigned long npages = 1UL << order;
  153                 unsigned long x;
  154 
  155                 for (x = 0; x != npages; x++) {
  156                         vm_page_t pgo = page + x;
  157 
  158                         if (vm_page_unwire_noq(pgo))
  159                                 vm_page_free(pgo);
  160                 }
  161         } else {
  162                 vm_offset_t vaddr;
  163 
  164                 vaddr = (vm_offset_t)page_address(page);
  165 
  166                 linux_free_kmem(vaddr, order);
  167         }
  168 }
  169 
  170 vm_offset_t
  171 linux_alloc_kmem(gfp_t flags, unsigned int order)
  172 {
  173         size_t size = ((size_t)PAGE_SIZE) << order;
  174         void *addr;
  175 
  176         if ((flags & GFP_DMA32) == 0) {
  177                 addr = kmem_malloc(size, flags & GFP_NATIVE_MASK);
  178         } else {
  179                 addr = kmem_alloc_contig(size, flags & GFP_NATIVE_MASK, 0,
  180                     BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
  181         }
  182         return ((vm_offset_t)addr);
  183 }
  184 
  185 void
  186 linux_free_kmem(vm_offset_t addr, unsigned int order)
  187 {
  188         size_t size = ((size_t)PAGE_SIZE) << order;
  189 
  190         kmem_free((void *)addr, size);
  191 }
  192 
  193 static int
  194 linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages,
  195     int write, struct page **pages)
  196 {
  197         vm_prot_t prot;
  198         size_t len;
  199         int count;
  200 
  201         prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
  202         len = ptoa((vm_offset_t)nr_pages);
  203         count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages);
  204         return (count == -1 ? -EFAULT : nr_pages);
  205 }
  206 
  207 int
  208 __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  209     struct page **pages)
  210 {
  211         vm_map_t map;
  212         vm_page_t *mp;
  213         vm_offset_t va;
  214         vm_offset_t end;
  215         vm_prot_t prot;
  216         int count;
  217 
  218         if (nr_pages == 0 || in_interrupt())
  219                 return (0);
  220 
  221         MPASS(pages != NULL);
  222         map = &curthread->td_proc->p_vmspace->vm_map;
  223         end = start + ptoa((vm_offset_t)nr_pages);
  224         if (!vm_map_range_valid(map, start, end))
  225                 return (-EINVAL);
  226         prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
  227         for (count = 0, mp = pages, va = start; va < end;
  228             mp++, va += PAGE_SIZE, count++) {
  229                 *mp = pmap_extract_and_hold(map->pmap, va, prot);
  230                 if (*mp == NULL)
  231                         break;
  232 
  233                 if ((prot & VM_PROT_WRITE) != 0 &&
  234                     (*mp)->dirty != VM_PAGE_BITS_ALL) {
  235                         /*
  236                          * Explicitly dirty the physical page.  Otherwise, the
  237                          * caller's changes may go unnoticed because they are
  238                          * performed through an unmanaged mapping or by a DMA
  239                          * operation.
  240                          *
  241                          * The object lock is not held here.
  242                          * See vm_page_clear_dirty_mask().
  243                          */
  244                         vm_page_dirty(*mp);
  245                 }
  246         }
  247         return (count);
  248 }
  249 
  250 long
  251 get_user_pages_remote(struct task_struct *task, struct mm_struct *mm,
  252     unsigned long start, unsigned long nr_pages, unsigned int gup_flags,
  253     struct page **pages, struct vm_area_struct **vmas)
  254 {
  255         vm_map_t map;
  256 
  257         map = &task->task_thread->td_proc->p_vmspace->vm_map;
  258         return (linux_get_user_pages_internal(map, start, nr_pages,
  259             !!(gup_flags & FOLL_WRITE), pages));
  260 }
  261 
  262 long
  263 get_user_pages(unsigned long start, unsigned long nr_pages,
  264     unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas)
  265 {
  266         vm_map_t map;
  267 
  268         map = &curthread->td_proc->p_vmspace->vm_map;
  269         return (linux_get_user_pages_internal(map, start, nr_pages,
  270             !!(gup_flags & FOLL_WRITE), pages));
  271 }
  272 
  273 int
  274 is_vmalloc_addr(const void *addr)
  275 {
  276         return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL);
  277 }
  278 
  279 vm_fault_t
  280 lkpi_vmf_insert_pfn_prot_locked(struct vm_area_struct *vma, unsigned long addr,
  281     unsigned long pfn, pgprot_t prot)
  282 {
  283         vm_object_t vm_obj = vma->vm_obj;
  284         vm_object_t tmp_obj;
  285         vm_page_t page;
  286         vm_pindex_t pindex;
  287 
  288         VM_OBJECT_ASSERT_WLOCKED(vm_obj);
  289         pindex = OFF_TO_IDX(addr - vma->vm_start);
  290         if (vma->vm_pfn_count == 0)
  291                 vma->vm_pfn_first = pindex;
  292         MPASS(pindex <= OFF_TO_IDX(vma->vm_end));
  293 
  294 retry:
  295         page = vm_page_grab(vm_obj, pindex, VM_ALLOC_NOCREAT);
  296         if (page == NULL) {
  297                 page = PHYS_TO_VM_PAGE(IDX_TO_OFF(pfn));
  298                 if (!vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL))
  299                         goto retry;
  300                 if (page->object != NULL) {
  301                         tmp_obj = page->object;
  302                         vm_page_xunbusy(page);
  303                         VM_OBJECT_WUNLOCK(vm_obj);
  304                         VM_OBJECT_WLOCK(tmp_obj);
  305                         if (page->object == tmp_obj &&
  306                             vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL)) {
  307                                 KASSERT(page->object == tmp_obj,
  308                                     ("page has changed identity"));
  309                                 KASSERT((page->oflags & VPO_UNMANAGED) == 0,
  310                                     ("page does not belong to shmem"));
  311                                 vm_pager_page_unswapped(page);
  312                                 if (pmap_page_is_mapped(page)) {
  313                                         vm_page_xunbusy(page);
  314                                         VM_OBJECT_WUNLOCK(tmp_obj);
  315                                         printf("%s: page rename failed: page "
  316                                             "is mapped\n", __func__);
  317                                         VM_OBJECT_WLOCK(vm_obj);
  318                                         return (VM_FAULT_NOPAGE);
  319                                 }
  320                                 vm_page_remove(page);
  321                         }
  322                         VM_OBJECT_WUNLOCK(tmp_obj);
  323                         VM_OBJECT_WLOCK(vm_obj);
  324                         goto retry;
  325                 }
  326                 if (vm_page_insert(page, vm_obj, pindex)) {
  327                         vm_page_xunbusy(page);
  328                         return (VM_FAULT_OOM);
  329                 }
  330                 vm_page_valid(page);
  331         }
  332         pmap_page_set_memattr(page, pgprot2cachemode(prot));
  333         vma->vm_pfn_count++;
  334 
  335         return (VM_FAULT_NOPAGE);
  336 }
  337 
  338 int
  339 lkpi_remap_pfn_range(struct vm_area_struct *vma, unsigned long start_addr,
  340     unsigned long start_pfn, unsigned long size, pgprot_t prot)
  341 {
  342         vm_object_t vm_obj;
  343         unsigned long addr, pfn;
  344         int err = 0;
  345 
  346         vm_obj = vma->vm_obj;
  347 
  348         VM_OBJECT_WLOCK(vm_obj);
  349         for (addr = start_addr, pfn = start_pfn;
  350             addr < start_addr + size;
  351             addr += PAGE_SIZE) {
  352                 vm_fault_t ret;
  353 retry:
  354                 ret = lkpi_vmf_insert_pfn_prot_locked(vma, addr, pfn, prot);
  355 
  356                 if ((ret & VM_FAULT_OOM) != 0) {
  357                         VM_OBJECT_WUNLOCK(vm_obj);
  358                         vm_wait(NULL);
  359                         VM_OBJECT_WLOCK(vm_obj);
  360                         goto retry;
  361                 }
  362 
  363                 if ((ret & VM_FAULT_ERROR) != 0) {
  364                         err = -EFAULT;
  365                         break;
  366                 }
  367 
  368                 pfn++;
  369         }
  370         VM_OBJECT_WUNLOCK(vm_obj);
  371 
  372         if (unlikely(err)) {
  373                 zap_vma_ptes(vma, start_addr,
  374                     (pfn - start_pfn) << PAGE_SHIFT);
  375                 return (err);
  376         }
  377 
  378         return (0);
  379 }
  380 
  381 int
  382 lkpi_io_mapping_map_user(struct io_mapping *iomap,
  383     struct vm_area_struct *vma, unsigned long addr,
  384     unsigned long pfn, unsigned long size)
  385 {
  386         pgprot_t prot;
  387         int ret;
  388 
  389         prot = cachemode2protval(iomap->attr);
  390         ret = lkpi_remap_pfn_range(vma, addr, pfn, size, prot);
  391 
  392         return (ret);
  393 }
  394 
  395 /*
  396  * Although FreeBSD version of unmap_mapping_range has semantics and types of
  397  * parameters compatible with Linux version, the values passed in are different
  398  * @obj should match to vm_private_data field of vm_area_struct returned by
  399  *      mmap file operation handler, see linux_file_mmap_single() sources
  400  * @holelen should match to size of area to be munmapped.
  401  */
  402 void
  403 lkpi_unmap_mapping_range(void *obj, loff_t const holebegin __unused,
  404     loff_t const holelen, int even_cows __unused)
  405 {
  406         vm_object_t devobj;
  407         vm_page_t page;
  408         int i, page_count;
  409 
  410         devobj = cdev_pager_lookup(obj);
  411         if (devobj != NULL) {
  412                 page_count = OFF_TO_IDX(holelen);
  413 
  414                 VM_OBJECT_WLOCK(devobj);
  415 retry:
  416                 for (i = 0; i < page_count; i++) {
  417                         page = vm_page_lookup(devobj, i);
  418                         if (page == NULL)
  419                                 continue;
  420                         if (!vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL))
  421                                 goto retry;
  422                         cdev_pager_free_page(devobj, page);
  423                 }
  424                 VM_OBJECT_WUNLOCK(devobj);
  425                 vm_object_deallocate(devobj);
  426         }
  427 }
  428 
  429 int
  430 lkpi_arch_phys_wc_add(unsigned long base, unsigned long size)
  431 {
  432 #ifdef __i386__
  433         struct mem_range_desc *mrdesc;
  434         int error, id, act;
  435 
  436         /* If PAT is available, do nothing */
  437         if (pat_works)
  438                 return (0);
  439 
  440         mrdesc = malloc(sizeof(*mrdesc), M_LKMTRR, M_WAITOK);
  441         mrdesc->mr_base = base;
  442         mrdesc->mr_len = size;
  443         mrdesc->mr_flags = MDF_WRITECOMBINE;
  444         strlcpy(mrdesc->mr_owner, "drm", sizeof(mrdesc->mr_owner));
  445         act = MEMRANGE_SET_UPDATE;
  446         error = mem_range_attr_set(mrdesc, &act);
  447         if (error == 0) {
  448                 error = idr_get_new(&mtrr_idr, mrdesc, &id);
  449                 MPASS(idr_find(&mtrr_idr, id) == mrdesc);
  450                 if (error != 0) {
  451                         act = MEMRANGE_SET_REMOVE;
  452                         mem_range_attr_set(mrdesc, &act);
  453                 }
  454         }
  455         if (error != 0) {
  456                 free(mrdesc, M_LKMTRR);
  457                 pr_warn(
  458                     "Failed to add WC MTRR for [%p-%p]: %d; "
  459                     "performance may suffer\n",
  460                     (void *)base, (void *)(base + size - 1), error);
  461         } else
  462                 pr_warn("Successfully added WC MTRR for [%p-%p]\n",
  463                     (void *)base, (void *)(base + size - 1));
  464 
  465         return (error != 0 ? -error : id + __MTRR_ID_BASE);
  466 #else
  467         return (0);
  468 #endif
  469 }
  470 
  471 void
  472 lkpi_arch_phys_wc_del(int reg)
  473 {
  474 #ifdef __i386__
  475         struct mem_range_desc *mrdesc;
  476         int act;
  477 
  478         /* Check if arch_phys_wc_add() failed. */
  479         if (reg < __MTRR_ID_BASE)
  480                 return;
  481 
  482         mrdesc = idr_find(&mtrr_idr, reg - __MTRR_ID_BASE);
  483         MPASS(mrdesc != NULL);
  484         idr_remove(&mtrr_idr, reg - __MTRR_ID_BASE);
  485         act = MEMRANGE_SET_REMOVE;
  486         mem_range_attr_set(mrdesc, &act);
  487         free(mrdesc, M_LKMTRR);
  488 #endif
  489 }
  490 
  491 /*
  492  * This is a highly simplified version of the Linux page_frag_cache.
  493  * We only support up-to 1 single page as fragment size and we will
  494  * always return a full page.  This may be wasteful on small objects
  495  * but the only known consumer (mt76) is either asking for a half-page
  496  * or a full page.  If this was to become a problem we can implement
  497  * a more elaborate version.
  498  */
  499 void *
  500 linuxkpi_page_frag_alloc(struct page_frag_cache *pfc,
  501     size_t fragsz, gfp_t gfp)
  502 {
  503         vm_page_t pages;
  504 
  505         if (fragsz == 0)
  506                 return (NULL);
  507 
  508         KASSERT(fragsz <= PAGE_SIZE, ("%s: fragsz %zu > PAGE_SIZE not yet "
  509             "supported", __func__, fragsz));
  510 
  511         pages = alloc_pages(gfp, flsl(howmany(fragsz, PAGE_SIZE) - 1));
  512         if (pages == NULL)
  513                 return (NULL);
  514         pfc->va = linux_page_address(pages);
  515 
  516         /* Passed in as "count" to __page_frag_cache_drain(). Unused by us. */
  517         pfc->pagecnt_bias = 0;
  518 
  519         return (pfc->va);
  520 }
  521 
  522 void
  523 linuxkpi_page_frag_free(void *addr)
  524 {
  525         vm_page_t page;
  526 
  527         page = PHYS_TO_VM_PAGE(vtophys(addr));
  528         linux_free_pages(page, 0);
  529 }
  530 
  531 void
  532 linuxkpi__page_frag_cache_drain(struct page *page, size_t count __unused)
  533 {
  534 
  535         linux_free_pages(page, 0);
  536 }

Cache object: 7ac46d3e301546d98a34b2bffb88b9fa


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.