The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/mprotect.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  mm/mprotect.c
    3  *
    4  *  (C) Copyright 1994 Linus Torvalds
    5  *  (C) Copyright 2002 Christoph Hellwig
    6  *
    7  *  Address space accounting code       <alan@lxorguk.ukuu.org.uk>
    8  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
    9  */
   10 
   11 #include <linux/mm.h>
   12 #include <linux/hugetlb.h>
   13 #include <linux/shm.h>
   14 #include <linux/mman.h>
   15 #include <linux/fs.h>
   16 #include <linux/highmem.h>
   17 #include <linux/security.h>
   18 #include <linux/mempolicy.h>
   19 #include <linux/personality.h>
   20 #include <linux/syscalls.h>
   21 #include <linux/swap.h>
   22 #include <linux/swapops.h>
   23 #include <linux/mmu_notifier.h>
   24 #include <linux/migrate.h>
   25 #include <linux/perf_event.h>
   26 #include <asm/uaccess.h>
   27 #include <asm/pgtable.h>
   28 #include <asm/cacheflush.h>
   29 #include <asm/tlbflush.h>
   30 
   31 #ifndef pgprot_modify
   32 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
   33 {
   34         return newprot;
   35 }
   36 #endif
   37 
   38 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
   39                 unsigned long addr, unsigned long end, pgprot_t newprot,
   40                 int dirty_accountable, int prot_numa, bool *ret_all_same_node)
   41 {
   42         struct mm_struct *mm = vma->vm_mm;
   43         pte_t *pte, oldpte;
   44         spinlock_t *ptl;
   45         unsigned long pages = 0;
   46         bool all_same_node = true;
   47         int last_nid = -1;
   48 
   49         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
   50         arch_enter_lazy_mmu_mode();
   51         do {
   52                 oldpte = *pte;
   53                 if (pte_present(oldpte)) {
   54                         pte_t ptent;
   55                         bool updated = false;
   56 
   57                         ptent = ptep_modify_prot_start(mm, addr, pte);
   58                         if (!prot_numa) {
   59                                 ptent = pte_modify(ptent, newprot);
   60                                 updated = true;
   61                         } else {
   62                                 struct page *page;
   63 
   64                                 page = vm_normal_page(vma, addr, oldpte);
   65                                 if (page) {
   66                                         int this_nid = page_to_nid(page);
   67                                         if (last_nid == -1)
   68                                                 last_nid = this_nid;
   69                                         if (last_nid != this_nid)
   70                                                 all_same_node = false;
   71 
   72                                         /* only check non-shared pages */
   73                                         if (!pte_numa(oldpte) &&
   74                                             page_mapcount(page) == 1) {
   75                                                 ptent = pte_mknuma(ptent);
   76                                                 updated = true;
   77                                         }
   78                                 }
   79                         }
   80 
   81                         /*
   82                          * Avoid taking write faults for pages we know to be
   83                          * dirty.
   84                          */
   85                         if (dirty_accountable && pte_dirty(ptent)) {
   86                                 ptent = pte_mkwrite(ptent);
   87                                 updated = true;
   88                         }
   89 
   90                         if (updated)
   91                                 pages++;
   92                         ptep_modify_prot_commit(mm, addr, pte, ptent);
   93                 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
   94                         swp_entry_t entry = pte_to_swp_entry(oldpte);
   95 
   96                         if (is_write_migration_entry(entry)) {
   97                                 /*
   98                                  * A protection check is difficult so
   99                                  * just be safe and disable write
  100                                  */
  101                                 make_migration_entry_read(&entry);
  102                                 set_pte_at(mm, addr, pte,
  103                                         swp_entry_to_pte(entry));
  104                         }
  105                         pages++;
  106                 }
  107         } while (pte++, addr += PAGE_SIZE, addr != end);
  108         arch_leave_lazy_mmu_mode();
  109         pte_unmap_unlock(pte - 1, ptl);
  110 
  111         *ret_all_same_node = all_same_node;
  112         return pages;
  113 }
  114 
  115 #ifdef CONFIG_NUMA_BALANCING
  116 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
  117                                        pmd_t *pmd)
  118 {
  119         spin_lock(&mm->page_table_lock);
  120         set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
  121         spin_unlock(&mm->page_table_lock);
  122 }
  123 #else
  124 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
  125                                        pmd_t *pmd)
  126 {
  127         BUG();
  128 }
  129 #endif /* CONFIG_NUMA_BALANCING */
  130 
  131 static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
  132                 pud_t *pud, unsigned long addr, unsigned long end,
  133                 pgprot_t newprot, int dirty_accountable, int prot_numa)
  134 {
  135         pmd_t *pmd;
  136         unsigned long next;
  137         unsigned long pages = 0;
  138         bool all_same_node;
  139 
  140         pmd = pmd_offset(pud, addr);
  141         do {
  142                 next = pmd_addr_end(addr, end);
  143                 if (pmd_trans_huge(*pmd)) {
  144                         if (next - addr != HPAGE_PMD_SIZE)
  145                                 split_huge_page_pmd(vma, addr, pmd);
  146                         else if (change_huge_pmd(vma, pmd, addr, newprot,
  147                                                  prot_numa)) {
  148                                 pages += HPAGE_PMD_NR;
  149                                 continue;
  150                         }
  151                         /* fall through */
  152                 }
  153                 if (pmd_none_or_clear_bad(pmd))
  154                         continue;
  155                 pages += change_pte_range(vma, pmd, addr, next, newprot,
  156                                  dirty_accountable, prot_numa, &all_same_node);
  157 
  158                 /*
  159                  * If we are changing protections for NUMA hinting faults then
  160                  * set pmd_numa if the examined pages were all on the same
  161                  * node. This allows a regular PMD to be handled as one fault
  162                  * and effectively batches the taking of the PTL
  163                  */
  164                 if (prot_numa && all_same_node)
  165                         change_pmd_protnuma(vma->vm_mm, addr, pmd);
  166         } while (pmd++, addr = next, addr != end);
  167 
  168         return pages;
  169 }
  170 
  171 static inline unsigned long change_pud_range(struct vm_area_struct *vma,
  172                 pgd_t *pgd, unsigned long addr, unsigned long end,
  173                 pgprot_t newprot, int dirty_accountable, int prot_numa)
  174 {
  175         pud_t *pud;
  176         unsigned long next;
  177         unsigned long pages = 0;
  178 
  179         pud = pud_offset(pgd, addr);
  180         do {
  181                 next = pud_addr_end(addr, end);
  182                 if (pud_none_or_clear_bad(pud))
  183                         continue;
  184                 pages += change_pmd_range(vma, pud, addr, next, newprot,
  185                                  dirty_accountable, prot_numa);
  186         } while (pud++, addr = next, addr != end);
  187 
  188         return pages;
  189 }
  190 
  191 static unsigned long change_protection_range(struct vm_area_struct *vma,
  192                 unsigned long addr, unsigned long end, pgprot_t newprot,
  193                 int dirty_accountable, int prot_numa)
  194 {
  195         struct mm_struct *mm = vma->vm_mm;
  196         pgd_t *pgd;
  197         unsigned long next;
  198         unsigned long start = addr;
  199         unsigned long pages = 0;
  200 
  201         BUG_ON(addr >= end);
  202         pgd = pgd_offset(mm, addr);
  203         flush_cache_range(vma, addr, end);
  204         do {
  205                 next = pgd_addr_end(addr, end);
  206                 if (pgd_none_or_clear_bad(pgd))
  207                         continue;
  208                 pages += change_pud_range(vma, pgd, addr, next, newprot,
  209                                  dirty_accountable, prot_numa);
  210         } while (pgd++, addr = next, addr != end);
  211 
  212         /* Only flush the TLB if we actually modified any entries: */
  213         if (pages)
  214                 flush_tlb_range(vma, start, end);
  215 
  216         return pages;
  217 }
  218 
  219 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
  220                        unsigned long end, pgprot_t newprot,
  221                        int dirty_accountable, int prot_numa)
  222 {
  223         struct mm_struct *mm = vma->vm_mm;
  224         unsigned long pages;
  225 
  226         mmu_notifier_invalidate_range_start(mm, start, end);
  227         if (is_vm_hugetlb_page(vma))
  228                 pages = hugetlb_change_protection(vma, start, end, newprot);
  229         else
  230                 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
  231         mmu_notifier_invalidate_range_end(mm, start, end);
  232 
  233         return pages;
  234 }
  235 
  236 int
  237 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
  238         unsigned long start, unsigned long end, unsigned long newflags)
  239 {
  240         struct mm_struct *mm = vma->vm_mm;
  241         unsigned long oldflags = vma->vm_flags;
  242         long nrpages = (end - start) >> PAGE_SHIFT;
  243         unsigned long charged = 0;
  244         pgoff_t pgoff;
  245         int error;
  246         int dirty_accountable = 0;
  247 
  248         if (newflags == oldflags) {
  249                 *pprev = vma;
  250                 return 0;
  251         }
  252 
  253         /*
  254          * If we make a private mapping writable we increase our commit;
  255          * but (without finer accounting) cannot reduce our commit if we
  256          * make it unwritable again. hugetlb mapping were accounted for
  257          * even if read-only so there is no need to account for them here
  258          */
  259         if (newflags & VM_WRITE) {
  260                 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
  261                                                 VM_SHARED|VM_NORESERVE))) {
  262                         charged = nrpages;
  263                         if (security_vm_enough_memory_mm(mm, charged))
  264                                 return -ENOMEM;
  265                         newflags |= VM_ACCOUNT;
  266                 }
  267         }
  268 
  269         /*
  270          * First try to merge with previous and/or next vma.
  271          */
  272         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  273         *pprev = vma_merge(mm, *pprev, start, end, newflags,
  274                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
  275         if (*pprev) {
  276                 vma = *pprev;
  277                 goto success;
  278         }
  279 
  280         *pprev = vma;
  281 
  282         if (start != vma->vm_start) {
  283                 error = split_vma(mm, vma, start, 1);
  284                 if (error)
  285                         goto fail;
  286         }
  287 
  288         if (end != vma->vm_end) {
  289                 error = split_vma(mm, vma, end, 0);
  290                 if (error)
  291                         goto fail;
  292         }
  293 
  294 success:
  295         /*
  296          * vm_flags and vm_page_prot are protected by the mmap_sem
  297          * held in write mode.
  298          */
  299         vma->vm_flags = newflags;
  300         vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
  301                                           vm_get_page_prot(newflags));
  302 
  303         if (vma_wants_writenotify(vma)) {
  304                 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
  305                 dirty_accountable = 1;
  306         }
  307 
  308         change_protection(vma, start, end, vma->vm_page_prot,
  309                           dirty_accountable, 0);
  310 
  311         vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
  312         vm_stat_account(mm, newflags, vma->vm_file, nrpages);
  313         perf_event_mmap(vma);
  314         return 0;
  315 
  316 fail:
  317         vm_unacct_memory(charged);
  318         return error;
  319 }
  320 
  321 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
  322                 unsigned long, prot)
  323 {
  324         unsigned long vm_flags, nstart, end, tmp, reqprot;
  325         struct vm_area_struct *vma, *prev;
  326         int error = -EINVAL;
  327         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
  328         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
  329         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
  330                 return -EINVAL;
  331 
  332         if (start & ~PAGE_MASK)
  333                 return -EINVAL;
  334         if (!len)
  335                 return 0;
  336         len = PAGE_ALIGN(len);
  337         end = start + len;
  338         if (end <= start)
  339                 return -ENOMEM;
  340         if (!arch_validate_prot(prot))
  341                 return -EINVAL;
  342 
  343         reqprot = prot;
  344         /*
  345          * Does the application expect PROT_READ to imply PROT_EXEC:
  346          */
  347         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
  348                 prot |= PROT_EXEC;
  349 
  350         vm_flags = calc_vm_prot_bits(prot);
  351 
  352         down_write(&current->mm->mmap_sem);
  353 
  354         vma = find_vma(current->mm, start);
  355         error = -ENOMEM;
  356         if (!vma)
  357                 goto out;
  358         prev = vma->vm_prev;
  359         if (unlikely(grows & PROT_GROWSDOWN)) {
  360                 if (vma->vm_start >= end)
  361                         goto out;
  362                 start = vma->vm_start;
  363                 error = -EINVAL;
  364                 if (!(vma->vm_flags & VM_GROWSDOWN))
  365                         goto out;
  366         } else {
  367                 if (vma->vm_start > start)
  368                         goto out;
  369                 if (unlikely(grows & PROT_GROWSUP)) {
  370                         end = vma->vm_end;
  371                         error = -EINVAL;
  372                         if (!(vma->vm_flags & VM_GROWSUP))
  373                                 goto out;
  374                 }
  375         }
  376         if (start > vma->vm_start)
  377                 prev = vma;
  378 
  379         for (nstart = start ; ; ) {
  380                 unsigned long newflags;
  381 
  382                 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
  383 
  384                 newflags = vm_flags;
  385                 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
  386 
  387                 /* newflags >> 4 shift VM_MAY% in place of VM_% */
  388                 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
  389                         error = -EACCES;
  390                         goto out;
  391                 }
  392 
  393                 error = security_file_mprotect(vma, reqprot, prot);
  394                 if (error)
  395                         goto out;
  396 
  397                 tmp = vma->vm_end;
  398                 if (tmp > end)
  399                         tmp = end;
  400                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
  401                 if (error)
  402                         goto out;
  403                 nstart = tmp;
  404 
  405                 if (nstart < prev->vm_end)
  406                         nstart = prev->vm_end;
  407                 if (nstart >= end)
  408                         goto out;
  409 
  410                 vma = prev->vm_next;
  411                 if (!vma || vma->vm_start != nstart) {
  412                         error = -ENOMEM;
  413                         goto out;
  414                 }
  415         }
  416 out:
  417         up_write(&current->mm->mmap_sem);
  418         return error;
  419 }

Cache object: a807a854df7bdd3eb7ea685a1c8c900c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.