The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/sys.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/kernel/sys.c
    3  *
    4  *  Copyright (C) 1991, 1992  Linus Torvalds
    5  */
    6 
    7 #include <linux/export.h>
    8 #include <linux/mm.h>
    9 #include <linux/utsname.h>
   10 #include <linux/mman.h>
   11 #include <linux/reboot.h>
   12 #include <linux/prctl.h>
   13 #include <linux/highuid.h>
   14 #include <linux/fs.h>
   15 #include <linux/kmod.h>
   16 #include <linux/perf_event.h>
   17 #include <linux/resource.h>
   18 #include <linux/kernel.h>
   19 #include <linux/kexec.h>
   20 #include <linux/workqueue.h>
   21 #include <linux/capability.h>
   22 #include <linux/device.h>
   23 #include <linux/key.h>
   24 #include <linux/times.h>
   25 #include <linux/posix-timers.h>
   26 #include <linux/security.h>
   27 #include <linux/dcookies.h>
   28 #include <linux/suspend.h>
   29 #include <linux/tty.h>
   30 #include <linux/signal.h>
   31 #include <linux/cn_proc.h>
   32 #include <linux/getcpu.h>
   33 #include <linux/task_io_accounting_ops.h>
   34 #include <linux/seccomp.h>
   35 #include <linux/cpu.h>
   36 #include <linux/personality.h>
   37 #include <linux/ptrace.h>
   38 #include <linux/fs_struct.h>
   39 #include <linux/file.h>
   40 #include <linux/mount.h>
   41 #include <linux/gfp.h>
   42 #include <linux/syscore_ops.h>
   43 #include <linux/version.h>
   44 #include <linux/ctype.h>
   45 
   46 #include <linux/compat.h>
   47 #include <linux/syscalls.h>
   48 #include <linux/kprobes.h>
   49 #include <linux/user_namespace.h>
   50 
   51 #include <linux/kmsg_dump.h>
   52 /* Move somewhere else to avoid recompiling? */
   53 #include <generated/utsrelease.h>
   54 
   55 #include <asm/uaccess.h>
   56 #include <asm/io.h>
   57 #include <asm/unistd.h>
   58 
   59 #ifndef SET_UNALIGN_CTL
   60 # define SET_UNALIGN_CTL(a,b)   (-EINVAL)
   61 #endif
   62 #ifndef GET_UNALIGN_CTL
   63 # define GET_UNALIGN_CTL(a,b)   (-EINVAL)
   64 #endif
   65 #ifndef SET_FPEMU_CTL
   66 # define SET_FPEMU_CTL(a,b)     (-EINVAL)
   67 #endif
   68 #ifndef GET_FPEMU_CTL
   69 # define GET_FPEMU_CTL(a,b)     (-EINVAL)
   70 #endif
   71 #ifndef SET_FPEXC_CTL
   72 # define SET_FPEXC_CTL(a,b)     (-EINVAL)
   73 #endif
   74 #ifndef GET_FPEXC_CTL
   75 # define GET_FPEXC_CTL(a,b)     (-EINVAL)
   76 #endif
   77 #ifndef GET_ENDIAN
   78 # define GET_ENDIAN(a,b)        (-EINVAL)
   79 #endif
   80 #ifndef SET_ENDIAN
   81 # define SET_ENDIAN(a,b)        (-EINVAL)
   82 #endif
   83 #ifndef GET_TSC_CTL
   84 # define GET_TSC_CTL(a)         (-EINVAL)
   85 #endif
   86 #ifndef SET_TSC_CTL
   87 # define SET_TSC_CTL(a)         (-EINVAL)
   88 #endif
   89 
   90 /*
   91  * this is where the system-wide overflow UID and GID are defined, for
   92  * architectures that now have 32-bit UID/GID but didn't in the past
   93  */
   94 
   95 int overflowuid = DEFAULT_OVERFLOWUID;
   96 int overflowgid = DEFAULT_OVERFLOWGID;
   97 
   98 EXPORT_SYMBOL(overflowuid);
   99 EXPORT_SYMBOL(overflowgid);
  100 
  101 /*
  102  * the same as above, but for filesystems which can only store a 16-bit
  103  * UID and GID. as such, this is needed on all architectures
  104  */
  105 
  106 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  107 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  108 
  109 EXPORT_SYMBOL(fs_overflowuid);
  110 EXPORT_SYMBOL(fs_overflowgid);
  111 
  112 /*
  113  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  114  */
  115 
  116 int C_A_D = 1;
  117 struct pid *cad_pid;
  118 EXPORT_SYMBOL(cad_pid);
  119 
  120 /*
  121  * If set, this is used for preparing the system to power off.
  122  */
  123 
  124 void (*pm_power_off_prepare)(void);
  125 
  126 /*
  127  * Returns true if current's euid is same as p's uid or euid,
  128  * or has CAP_SYS_NICE to p's user_ns.
  129  *
  130  * Called with rcu_read_lock, creds are safe
  131  */
  132 static bool set_one_prio_perm(struct task_struct *p)
  133 {
  134         const struct cred *cred = current_cred(), *pcred = __task_cred(p);
  135 
  136         if (uid_eq(pcred->uid,  cred->euid) ||
  137             uid_eq(pcred->euid, cred->euid))
  138                 return true;
  139         if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
  140                 return true;
  141         return false;
  142 }
  143 
  144 /*
  145  * set the priority of a task
  146  * - the caller must hold the RCU read lock
  147  */
  148 static int set_one_prio(struct task_struct *p, int niceval, int error)
  149 {
  150         int no_nice;
  151 
  152         if (!set_one_prio_perm(p)) {
  153                 error = -EPERM;
  154                 goto out;
  155         }
  156         if (niceval < task_nice(p) && !can_nice(p, niceval)) {
  157                 error = -EACCES;
  158                 goto out;
  159         }
  160         no_nice = security_task_setnice(p, niceval);
  161         if (no_nice) {
  162                 error = no_nice;
  163                 goto out;
  164         }
  165         if (error == -ESRCH)
  166                 error = 0;
  167         set_user_nice(p, niceval);
  168 out:
  169         return error;
  170 }
  171 
  172 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
  173 {
  174         struct task_struct *g, *p;
  175         struct user_struct *user;
  176         const struct cred *cred = current_cred();
  177         int error = -EINVAL;
  178         struct pid *pgrp;
  179         kuid_t uid;
  180 
  181         if (which > PRIO_USER || which < PRIO_PROCESS)
  182                 goto out;
  183 
  184         /* normalize: avoid signed division (rounding problems) */
  185         error = -ESRCH;
  186         if (niceval < -20)
  187                 niceval = -20;
  188         if (niceval > 19)
  189                 niceval = 19;
  190 
  191         rcu_read_lock();
  192         read_lock(&tasklist_lock);
  193         switch (which) {
  194                 case PRIO_PROCESS:
  195                         if (who)
  196                                 p = find_task_by_vpid(who);
  197                         else
  198                                 p = current;
  199                         if (p)
  200                                 error = set_one_prio(p, niceval, error);
  201                         break;
  202                 case PRIO_PGRP:
  203                         if (who)
  204                                 pgrp = find_vpid(who);
  205                         else
  206                                 pgrp = task_pgrp(current);
  207                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  208                                 error = set_one_prio(p, niceval, error);
  209                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  210                         break;
  211                 case PRIO_USER:
  212                         uid = make_kuid(cred->user_ns, who);
  213                         user = cred->user;
  214                         if (!who)
  215                                 uid = cred->uid;
  216                         else if (!uid_eq(uid, cred->uid) &&
  217                                  !(user = find_user(uid)))
  218                                 goto out_unlock;        /* No processes for this user */
  219 
  220                         do_each_thread(g, p) {
  221                                 if (uid_eq(task_uid(p), uid))
  222                                         error = set_one_prio(p, niceval, error);
  223                         } while_each_thread(g, p);
  224                         if (!uid_eq(uid, cred->uid))
  225                                 free_uid(user);         /* For find_user() */
  226                         break;
  227         }
  228 out_unlock:
  229         read_unlock(&tasklist_lock);
  230         rcu_read_unlock();
  231 out:
  232         return error;
  233 }
  234 
  235 /*
  236  * Ugh. To avoid negative return values, "getpriority()" will
  237  * not return the normal nice-value, but a negated value that
  238  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
  239  * to stay compatible.
  240  */
  241 SYSCALL_DEFINE2(getpriority, int, which, int, who)
  242 {
  243         struct task_struct *g, *p;
  244         struct user_struct *user;
  245         const struct cred *cred = current_cred();
  246         long niceval, retval = -ESRCH;
  247         struct pid *pgrp;
  248         kuid_t uid;
  249 
  250         if (which > PRIO_USER || which < PRIO_PROCESS)
  251                 return -EINVAL;
  252 
  253         rcu_read_lock();
  254         read_lock(&tasklist_lock);
  255         switch (which) {
  256                 case PRIO_PROCESS:
  257                         if (who)
  258                                 p = find_task_by_vpid(who);
  259                         else
  260                                 p = current;
  261                         if (p) {
  262                                 niceval = 20 - task_nice(p);
  263                                 if (niceval > retval)
  264                                         retval = niceval;
  265                         }
  266                         break;
  267                 case PRIO_PGRP:
  268                         if (who)
  269                                 pgrp = find_vpid(who);
  270                         else
  271                                 pgrp = task_pgrp(current);
  272                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  273                                 niceval = 20 - task_nice(p);
  274                                 if (niceval > retval)
  275                                         retval = niceval;
  276                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  277                         break;
  278                 case PRIO_USER:
  279                         uid = make_kuid(cred->user_ns, who);
  280                         user = cred->user;
  281                         if (!who)
  282                                 uid = cred->uid;
  283                         else if (!uid_eq(uid, cred->uid) &&
  284                                  !(user = find_user(uid)))
  285                                 goto out_unlock;        /* No processes for this user */
  286 
  287                         do_each_thread(g, p) {
  288                                 if (uid_eq(task_uid(p), uid)) {
  289                                         niceval = 20 - task_nice(p);
  290                                         if (niceval > retval)
  291                                                 retval = niceval;
  292                                 }
  293                         } while_each_thread(g, p);
  294                         if (!uid_eq(uid, cred->uid))
  295                                 free_uid(user);         /* for find_user() */
  296                         break;
  297         }
  298 out_unlock:
  299         read_unlock(&tasklist_lock);
  300         rcu_read_unlock();
  301 
  302         return retval;
  303 }
  304 
  305 /**
  306  *      emergency_restart - reboot the system
  307  *
  308  *      Without shutting down any hardware or taking any locks
  309  *      reboot the system.  This is called when we know we are in
  310  *      trouble so this is our best effort to reboot.  This is
  311  *      safe to call in interrupt context.
  312  */
  313 void emergency_restart(void)
  314 {
  315         kmsg_dump(KMSG_DUMP_EMERG);
  316         machine_emergency_restart();
  317 }
  318 EXPORT_SYMBOL_GPL(emergency_restart);
  319 
  320 void kernel_restart_prepare(char *cmd)
  321 {
  322         blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
  323         system_state = SYSTEM_RESTART;
  324         usermodehelper_disable();
  325         device_shutdown();
  326         syscore_shutdown();
  327 }
  328 
  329 /**
  330  *      register_reboot_notifier - Register function to be called at reboot time
  331  *      @nb: Info about notifier function to be called
  332  *
  333  *      Registers a function with the list of functions
  334  *      to be called at reboot time.
  335  *
  336  *      Currently always returns zero, as blocking_notifier_chain_register()
  337  *      always returns zero.
  338  */
  339 int register_reboot_notifier(struct notifier_block *nb)
  340 {
  341         return blocking_notifier_chain_register(&reboot_notifier_list, nb);
  342 }
  343 EXPORT_SYMBOL(register_reboot_notifier);
  344 
  345 /**
  346  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
  347  *      @nb: Hook to be unregistered
  348  *
  349  *      Unregisters a previously registered reboot
  350  *      notifier function.
  351  *
  352  *      Returns zero on success, or %-ENOENT on failure.
  353  */
  354 int unregister_reboot_notifier(struct notifier_block *nb)
  355 {
  356         return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
  357 }
  358 EXPORT_SYMBOL(unregister_reboot_notifier);
  359 
  360 /**
  361  *      kernel_restart - reboot the system
  362  *      @cmd: pointer to buffer containing command to execute for restart
  363  *              or %NULL
  364  *
  365  *      Shutdown everything and perform a clean reboot.
  366  *      This is not safe to call in interrupt context.
  367  */
  368 void kernel_restart(char *cmd)
  369 {
  370         kernel_restart_prepare(cmd);
  371         disable_nonboot_cpus();
  372         if (!cmd)
  373                 printk(KERN_EMERG "Restarting system.\n");
  374         else
  375                 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
  376         kmsg_dump(KMSG_DUMP_RESTART);
  377         machine_restart(cmd);
  378 }
  379 EXPORT_SYMBOL_GPL(kernel_restart);
  380 
  381 static void kernel_shutdown_prepare(enum system_states state)
  382 {
  383         blocking_notifier_call_chain(&reboot_notifier_list,
  384                 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
  385         system_state = state;
  386         usermodehelper_disable();
  387         device_shutdown();
  388 }
  389 /**
  390  *      kernel_halt - halt the system
  391  *
  392  *      Shutdown everything and perform a clean system halt.
  393  */
  394 void kernel_halt(void)
  395 {
  396         kernel_shutdown_prepare(SYSTEM_HALT);
  397         syscore_shutdown();
  398         printk(KERN_EMERG "System halted.\n");
  399         kmsg_dump(KMSG_DUMP_HALT);
  400         machine_halt();
  401 }
  402 
  403 EXPORT_SYMBOL_GPL(kernel_halt);
  404 
  405 /**
  406  *      kernel_power_off - power_off the system
  407  *
  408  *      Shutdown everything and perform a clean system power_off.
  409  */
  410 void kernel_power_off(void)
  411 {
  412         kernel_shutdown_prepare(SYSTEM_POWER_OFF);
  413         if (pm_power_off_prepare)
  414                 pm_power_off_prepare();
  415         disable_nonboot_cpus();
  416         syscore_shutdown();
  417         printk(KERN_EMERG "Power down.\n");
  418         kmsg_dump(KMSG_DUMP_POWEROFF);
  419         machine_power_off();
  420 }
  421 EXPORT_SYMBOL_GPL(kernel_power_off);
  422 
  423 static DEFINE_MUTEX(reboot_mutex);
  424 
  425 /*
  426  * Reboot system call: for obvious reasons only root may call it,
  427  * and even root needs to set up some magic numbers in the registers
  428  * so that some mistake won't make this reboot the whole machine.
  429  * You can also set the meaning of the ctrl-alt-del-key here.
  430  *
  431  * reboot doesn't sync: do that yourself before calling this.
  432  */
  433 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
  434                 void __user *, arg)
  435 {
  436         char buffer[256];
  437         int ret = 0;
  438 
  439         /* We only trust the superuser with rebooting the system. */
  440         if (!capable(CAP_SYS_BOOT))
  441                 return -EPERM;
  442 
  443         /* For safety, we require "magic" arguments. */
  444         if (magic1 != LINUX_REBOOT_MAGIC1 ||
  445             (magic2 != LINUX_REBOOT_MAGIC2 &&
  446                         magic2 != LINUX_REBOOT_MAGIC2A &&
  447                         magic2 != LINUX_REBOOT_MAGIC2B &&
  448                         magic2 != LINUX_REBOOT_MAGIC2C))
  449                 return -EINVAL;
  450 
  451         /*
  452          * If pid namespaces are enabled and the current task is in a child
  453          * pid_namespace, the command is handled by reboot_pid_ns() which will
  454          * call do_exit().
  455          */
  456         ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
  457         if (ret)
  458                 return ret;
  459 
  460         /* Instead of trying to make the power_off code look like
  461          * halt when pm_power_off is not set do it the easy way.
  462          */
  463         if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
  464                 cmd = LINUX_REBOOT_CMD_HALT;
  465 
  466         mutex_lock(&reboot_mutex);
  467         switch (cmd) {
  468         case LINUX_REBOOT_CMD_RESTART:
  469                 kernel_restart(NULL);
  470                 break;
  471 
  472         case LINUX_REBOOT_CMD_CAD_ON:
  473                 C_A_D = 1;
  474                 break;
  475 
  476         case LINUX_REBOOT_CMD_CAD_OFF:
  477                 C_A_D = 0;
  478                 break;
  479 
  480         case LINUX_REBOOT_CMD_HALT:
  481                 kernel_halt();
  482                 do_exit(0);
  483                 panic("cannot halt");
  484 
  485         case LINUX_REBOOT_CMD_POWER_OFF:
  486                 kernel_power_off();
  487                 do_exit(0);
  488                 break;
  489 
  490         case LINUX_REBOOT_CMD_RESTART2:
  491                 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
  492                         ret = -EFAULT;
  493                         break;
  494                 }
  495                 buffer[sizeof(buffer) - 1] = '\0';
  496 
  497                 kernel_restart(buffer);
  498                 break;
  499 
  500 #ifdef CONFIG_KEXEC
  501         case LINUX_REBOOT_CMD_KEXEC:
  502                 ret = kernel_kexec();
  503                 break;
  504 #endif
  505 
  506 #ifdef CONFIG_HIBERNATION
  507         case LINUX_REBOOT_CMD_SW_SUSPEND:
  508                 ret = hibernate();
  509                 break;
  510 #endif
  511 
  512         default:
  513                 ret = -EINVAL;
  514                 break;
  515         }
  516         mutex_unlock(&reboot_mutex);
  517         return ret;
  518 }
  519 
  520 static void deferred_cad(struct work_struct *dummy)
  521 {
  522         kernel_restart(NULL);
  523 }
  524 
  525 /*
  526  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
  527  * As it's called within an interrupt, it may NOT sync: the only choice
  528  * is whether to reboot at once, or just ignore the ctrl-alt-del.
  529  */
  530 void ctrl_alt_del(void)
  531 {
  532         static DECLARE_WORK(cad_work, deferred_cad);
  533 
  534         if (C_A_D)
  535                 schedule_work(&cad_work);
  536         else
  537                 kill_cad_pid(SIGINT, 1);
  538 }
  539         
  540 /*
  541  * Unprivileged users may change the real gid to the effective gid
  542  * or vice versa.  (BSD-style)
  543  *
  544  * If you set the real gid at all, or set the effective gid to a value not
  545  * equal to the real gid, then the saved gid is set to the new effective gid.
  546  *
  547  * This makes it possible for a setgid program to completely drop its
  548  * privileges, which is often a useful assertion to make when you are doing
  549  * a security audit over a program.
  550  *
  551  * The general idea is that a program which uses just setregid() will be
  552  * 100% compatible with BSD.  A program which uses just setgid() will be
  553  * 100% compatible with POSIX with saved IDs. 
  554  *
  555  * SMP: There are not races, the GIDs are checked only by filesystem
  556  *      operations (as far as semantic preservation is concerned).
  557  */
  558 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
  559 {
  560         struct user_namespace *ns = current_user_ns();
  561         const struct cred *old;
  562         struct cred *new;
  563         int retval;
  564         kgid_t krgid, kegid;
  565 
  566         krgid = make_kgid(ns, rgid);
  567         kegid = make_kgid(ns, egid);
  568 
  569         if ((rgid != (gid_t) -1) && !gid_valid(krgid))
  570                 return -EINVAL;
  571         if ((egid != (gid_t) -1) && !gid_valid(kegid))
  572                 return -EINVAL;
  573 
  574         new = prepare_creds();
  575         if (!new)
  576                 return -ENOMEM;
  577         old = current_cred();
  578 
  579         retval = -EPERM;
  580         if (rgid != (gid_t) -1) {
  581                 if (gid_eq(old->gid, krgid) ||
  582                     gid_eq(old->egid, krgid) ||
  583                     nsown_capable(CAP_SETGID))
  584                         new->gid = krgid;
  585                 else
  586                         goto error;
  587         }
  588         if (egid != (gid_t) -1) {
  589                 if (gid_eq(old->gid, kegid) ||
  590                     gid_eq(old->egid, kegid) ||
  591                     gid_eq(old->sgid, kegid) ||
  592                     nsown_capable(CAP_SETGID))
  593                         new->egid = kegid;
  594                 else
  595                         goto error;
  596         }
  597 
  598         if (rgid != (gid_t) -1 ||
  599             (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
  600                 new->sgid = new->egid;
  601         new->fsgid = new->egid;
  602 
  603         return commit_creds(new);
  604 
  605 error:
  606         abort_creds(new);
  607         return retval;
  608 }
  609 
  610 /*
  611  * setgid() is implemented like SysV w/ SAVED_IDS 
  612  *
  613  * SMP: Same implicit races as above.
  614  */
  615 SYSCALL_DEFINE1(setgid, gid_t, gid)
  616 {
  617         struct user_namespace *ns = current_user_ns();
  618         const struct cred *old;
  619         struct cred *new;
  620         int retval;
  621         kgid_t kgid;
  622 
  623         kgid = make_kgid(ns, gid);
  624         if (!gid_valid(kgid))
  625                 return -EINVAL;
  626 
  627         new = prepare_creds();
  628         if (!new)
  629                 return -ENOMEM;
  630         old = current_cred();
  631 
  632         retval = -EPERM;
  633         if (nsown_capable(CAP_SETGID))
  634                 new->gid = new->egid = new->sgid = new->fsgid = kgid;
  635         else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
  636                 new->egid = new->fsgid = kgid;
  637         else
  638                 goto error;
  639 
  640         return commit_creds(new);
  641 
  642 error:
  643         abort_creds(new);
  644         return retval;
  645 }
  646 
  647 /*
  648  * change the user struct in a credentials set to match the new UID
  649  */
  650 static int set_user(struct cred *new)
  651 {
  652         struct user_struct *new_user;
  653 
  654         new_user = alloc_uid(new->uid);
  655         if (!new_user)
  656                 return -EAGAIN;
  657 
  658         /*
  659          * We don't fail in case of NPROC limit excess here because too many
  660          * poorly written programs don't check set*uid() return code, assuming
  661          * it never fails if called by root.  We may still enforce NPROC limit
  662          * for programs doing set*uid()+execve() by harmlessly deferring the
  663          * failure to the execve() stage.
  664          */
  665         if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
  666                         new_user != INIT_USER)
  667                 current->flags |= PF_NPROC_EXCEEDED;
  668         else
  669                 current->flags &= ~PF_NPROC_EXCEEDED;
  670 
  671         free_uid(new->user);
  672         new->user = new_user;
  673         return 0;
  674 }
  675 
  676 /*
  677  * Unprivileged users may change the real uid to the effective uid
  678  * or vice versa.  (BSD-style)
  679  *
  680  * If you set the real uid at all, or set the effective uid to a value not
  681  * equal to the real uid, then the saved uid is set to the new effective uid.
  682  *
  683  * This makes it possible for a setuid program to completely drop its
  684  * privileges, which is often a useful assertion to make when you are doing
  685  * a security audit over a program.
  686  *
  687  * The general idea is that a program which uses just setreuid() will be
  688  * 100% compatible with BSD.  A program which uses just setuid() will be
  689  * 100% compatible with POSIX with saved IDs. 
  690  */
  691 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
  692 {
  693         struct user_namespace *ns = current_user_ns();
  694         const struct cred *old;
  695         struct cred *new;
  696         int retval;
  697         kuid_t kruid, keuid;
  698 
  699         kruid = make_kuid(ns, ruid);
  700         keuid = make_kuid(ns, euid);
  701 
  702         if ((ruid != (uid_t) -1) && !uid_valid(kruid))
  703                 return -EINVAL;
  704         if ((euid != (uid_t) -1) && !uid_valid(keuid))
  705                 return -EINVAL;
  706 
  707         new = prepare_creds();
  708         if (!new)
  709                 return -ENOMEM;
  710         old = current_cred();
  711 
  712         retval = -EPERM;
  713         if (ruid != (uid_t) -1) {
  714                 new->uid = kruid;
  715                 if (!uid_eq(old->uid, kruid) &&
  716                     !uid_eq(old->euid, kruid) &&
  717                     !nsown_capable(CAP_SETUID))
  718                         goto error;
  719         }
  720 
  721         if (euid != (uid_t) -1) {
  722                 new->euid = keuid;
  723                 if (!uid_eq(old->uid, keuid) &&
  724                     !uid_eq(old->euid, keuid) &&
  725                     !uid_eq(old->suid, keuid) &&
  726                     !nsown_capable(CAP_SETUID))
  727                         goto error;
  728         }
  729 
  730         if (!uid_eq(new->uid, old->uid)) {
  731                 retval = set_user(new);
  732                 if (retval < 0)
  733                         goto error;
  734         }
  735         if (ruid != (uid_t) -1 ||
  736             (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
  737                 new->suid = new->euid;
  738         new->fsuid = new->euid;
  739 
  740         retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
  741         if (retval < 0)
  742                 goto error;
  743 
  744         return commit_creds(new);
  745 
  746 error:
  747         abort_creds(new);
  748         return retval;
  749 }
  750                 
  751 /*
  752  * setuid() is implemented like SysV with SAVED_IDS 
  753  * 
  754  * Note that SAVED_ID's is deficient in that a setuid root program
  755  * like sendmail, for example, cannot set its uid to be a normal 
  756  * user and then switch back, because if you're root, setuid() sets
  757  * the saved uid too.  If you don't like this, blame the bright people
  758  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
  759  * will allow a root program to temporarily drop privileges and be able to
  760  * regain them by swapping the real and effective uid.  
  761  */
  762 SYSCALL_DEFINE1(setuid, uid_t, uid)
  763 {
  764         struct user_namespace *ns = current_user_ns();
  765         const struct cred *old;
  766         struct cred *new;
  767         int retval;
  768         kuid_t kuid;
  769 
  770         kuid = make_kuid(ns, uid);
  771         if (!uid_valid(kuid))
  772                 return -EINVAL;
  773 
  774         new = prepare_creds();
  775         if (!new)
  776                 return -ENOMEM;
  777         old = current_cred();
  778 
  779         retval = -EPERM;
  780         if (nsown_capable(CAP_SETUID)) {
  781                 new->suid = new->uid = kuid;
  782                 if (!uid_eq(kuid, old->uid)) {
  783                         retval = set_user(new);
  784                         if (retval < 0)
  785                                 goto error;
  786                 }
  787         } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
  788                 goto error;
  789         }
  790 
  791         new->fsuid = new->euid = kuid;
  792 
  793         retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
  794         if (retval < 0)
  795                 goto error;
  796 
  797         return commit_creds(new);
  798 
  799 error:
  800         abort_creds(new);
  801         return retval;
  802 }
  803 
  804 
  805 /*
  806  * This function implements a generic ability to update ruid, euid,
  807  * and suid.  This allows you to implement the 4.4 compatible seteuid().
  808  */
  809 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
  810 {
  811         struct user_namespace *ns = current_user_ns();
  812         const struct cred *old;
  813         struct cred *new;
  814         int retval;
  815         kuid_t kruid, keuid, ksuid;
  816 
  817         kruid = make_kuid(ns, ruid);
  818         keuid = make_kuid(ns, euid);
  819         ksuid = make_kuid(ns, suid);
  820 
  821         if ((ruid != (uid_t) -1) && !uid_valid(kruid))
  822                 return -EINVAL;
  823 
  824         if ((euid != (uid_t) -1) && !uid_valid(keuid))
  825                 return -EINVAL;
  826 
  827         if ((suid != (uid_t) -1) && !uid_valid(ksuid))
  828                 return -EINVAL;
  829 
  830         new = prepare_creds();
  831         if (!new)
  832                 return -ENOMEM;
  833 
  834         old = current_cred();
  835 
  836         retval = -EPERM;
  837         if (!nsown_capable(CAP_SETUID)) {
  838                 if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
  839                     !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
  840                         goto error;
  841                 if (euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
  842                     !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
  843                         goto error;
  844                 if (suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
  845                     !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
  846                         goto error;
  847         }
  848 
  849         if (ruid != (uid_t) -1) {
  850                 new->uid = kruid;
  851                 if (!uid_eq(kruid, old->uid)) {
  852                         retval = set_user(new);
  853                         if (retval < 0)
  854                                 goto error;
  855                 }
  856         }
  857         if (euid != (uid_t) -1)
  858                 new->euid = keuid;
  859         if (suid != (uid_t) -1)
  860                 new->suid = ksuid;
  861         new->fsuid = new->euid;
  862 
  863         retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
  864         if (retval < 0)
  865                 goto error;
  866 
  867         return commit_creds(new);
  868 
  869 error:
  870         abort_creds(new);
  871         return retval;
  872 }
  873 
  874 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
  875 {
  876         const struct cred *cred = current_cred();
  877         int retval;
  878         uid_t ruid, euid, suid;
  879 
  880         ruid = from_kuid_munged(cred->user_ns, cred->uid);
  881         euid = from_kuid_munged(cred->user_ns, cred->euid);
  882         suid = from_kuid_munged(cred->user_ns, cred->suid);
  883 
  884         if (!(retval   = put_user(ruid, ruidp)) &&
  885             !(retval   = put_user(euid, euidp)))
  886                 retval = put_user(suid, suidp);
  887 
  888         return retval;
  889 }
  890 
  891 /*
  892  * Same as above, but for rgid, egid, sgid.
  893  */
  894 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
  895 {
  896         struct user_namespace *ns = current_user_ns();
  897         const struct cred *old;
  898         struct cred *new;
  899         int retval;
  900         kgid_t krgid, kegid, ksgid;
  901 
  902         krgid = make_kgid(ns, rgid);
  903         kegid = make_kgid(ns, egid);
  904         ksgid = make_kgid(ns, sgid);
  905 
  906         if ((rgid != (gid_t) -1) && !gid_valid(krgid))
  907                 return -EINVAL;
  908         if ((egid != (gid_t) -1) && !gid_valid(kegid))
  909                 return -EINVAL;
  910         if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
  911                 return -EINVAL;
  912 
  913         new = prepare_creds();
  914         if (!new)
  915                 return -ENOMEM;
  916         old = current_cred();
  917 
  918         retval = -EPERM;
  919         if (!nsown_capable(CAP_SETGID)) {
  920                 if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
  921                     !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
  922                         goto error;
  923                 if (egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
  924                     !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
  925                         goto error;
  926                 if (sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
  927                     !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
  928                         goto error;
  929         }
  930 
  931         if (rgid != (gid_t) -1)
  932                 new->gid = krgid;
  933         if (egid != (gid_t) -1)
  934                 new->egid = kegid;
  935         if (sgid != (gid_t) -1)
  936                 new->sgid = ksgid;
  937         new->fsgid = new->egid;
  938 
  939         return commit_creds(new);
  940 
  941 error:
  942         abort_creds(new);
  943         return retval;
  944 }
  945 
  946 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
  947 {
  948         const struct cred *cred = current_cred();
  949         int retval;
  950         gid_t rgid, egid, sgid;
  951 
  952         rgid = from_kgid_munged(cred->user_ns, cred->gid);
  953         egid = from_kgid_munged(cred->user_ns, cred->egid);
  954         sgid = from_kgid_munged(cred->user_ns, cred->sgid);
  955 
  956         if (!(retval   = put_user(rgid, rgidp)) &&
  957             !(retval   = put_user(egid, egidp)))
  958                 retval = put_user(sgid, sgidp);
  959 
  960         return retval;
  961 }
  962 
  963 
  964 /*
  965  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
  966  * is used for "access()" and for the NFS daemon (letting nfsd stay at
  967  * whatever uid it wants to). It normally shadows "euid", except when
  968  * explicitly set by setfsuid() or for access..
  969  */
  970 SYSCALL_DEFINE1(setfsuid, uid_t, uid)
  971 {
  972         const struct cred *old;
  973         struct cred *new;
  974         uid_t old_fsuid;
  975         kuid_t kuid;
  976 
  977         old = current_cred();
  978         old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
  979 
  980         kuid = make_kuid(old->user_ns, uid);
  981         if (!uid_valid(kuid))
  982                 return old_fsuid;
  983 
  984         new = prepare_creds();
  985         if (!new)
  986                 return old_fsuid;
  987 
  988         if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
  989             uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
  990             nsown_capable(CAP_SETUID)) {
  991                 if (!uid_eq(kuid, old->fsuid)) {
  992                         new->fsuid = kuid;
  993                         if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
  994                                 goto change_okay;
  995                 }
  996         }
  997 
  998         abort_creds(new);
  999         return old_fsuid;
 1000 
 1001 change_okay:
 1002         commit_creds(new);
 1003         return old_fsuid;
 1004 }
 1005 
 1006 /*
 1007  * Samma på svenska..
 1008  */
 1009 SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 1010 {
 1011         const struct cred *old;
 1012         struct cred *new;
 1013         gid_t old_fsgid;
 1014         kgid_t kgid;
 1015 
 1016         old = current_cred();
 1017         old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
 1018 
 1019         kgid = make_kgid(old->user_ns, gid);
 1020         if (!gid_valid(kgid))
 1021                 return old_fsgid;
 1022 
 1023         new = prepare_creds();
 1024         if (!new)
 1025                 return old_fsgid;
 1026 
 1027         if (gid_eq(kgid, old->gid)  || gid_eq(kgid, old->egid)  ||
 1028             gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
 1029             nsown_capable(CAP_SETGID)) {
 1030                 if (!gid_eq(kgid, old->fsgid)) {
 1031                         new->fsgid = kgid;
 1032                         goto change_okay;
 1033                 }
 1034         }
 1035 
 1036         abort_creds(new);
 1037         return old_fsgid;
 1038 
 1039 change_okay:
 1040         commit_creds(new);
 1041         return old_fsgid;
 1042 }
 1043 
 1044 void do_sys_times(struct tms *tms)
 1045 {
 1046         cputime_t tgutime, tgstime, cutime, cstime;
 1047 
 1048         spin_lock_irq(&current->sighand->siglock);
 1049         thread_group_cputime_adjusted(current, &tgutime, &tgstime);
 1050         cutime = current->signal->cutime;
 1051         cstime = current->signal->cstime;
 1052         spin_unlock_irq(&current->sighand->siglock);
 1053         tms->tms_utime = cputime_to_clock_t(tgutime);
 1054         tms->tms_stime = cputime_to_clock_t(tgstime);
 1055         tms->tms_cutime = cputime_to_clock_t(cutime);
 1056         tms->tms_cstime = cputime_to_clock_t(cstime);
 1057 }
 1058 
 1059 SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
 1060 {
 1061         if (tbuf) {
 1062                 struct tms tmp;
 1063 
 1064                 do_sys_times(&tmp);
 1065                 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 1066                         return -EFAULT;
 1067         }
 1068         force_successful_syscall_return();
 1069         return (long) jiffies_64_to_clock_t(get_jiffies_64());
 1070 }
 1071 
 1072 /*
 1073  * This needs some heavy checking ...
 1074  * I just haven't the stomach for it. I also don't fully
 1075  * understand sessions/pgrp etc. Let somebody who does explain it.
 1076  *
 1077  * OK, I think I have the protection semantics right.... this is really
 1078  * only important on a multi-user system anyway, to make sure one user
 1079  * can't send a signal to a process owned by another.  -TYT, 12/12/91
 1080  *
 1081  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 1082  * LBT 04.03.94
 1083  */
 1084 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 1085 {
 1086         struct task_struct *p;
 1087         struct task_struct *group_leader = current->group_leader;
 1088         struct pid *pgrp;
 1089         int err;
 1090 
 1091         if (!pid)
 1092                 pid = task_pid_vnr(group_leader);
 1093         if (!pgid)
 1094                 pgid = pid;
 1095         if (pgid < 0)
 1096                 return -EINVAL;
 1097         rcu_read_lock();
 1098 
 1099         /* From this point forward we keep holding onto the tasklist lock
 1100          * so that our parent does not change from under us. -DaveM
 1101          */
 1102         write_lock_irq(&tasklist_lock);
 1103 
 1104         err = -ESRCH;
 1105         p = find_task_by_vpid(pid);
 1106         if (!p)
 1107                 goto out;
 1108 
 1109         err = -EINVAL;
 1110         if (!thread_group_leader(p))
 1111                 goto out;
 1112 
 1113         if (same_thread_group(p->real_parent, group_leader)) {
 1114                 err = -EPERM;
 1115                 if (task_session(p) != task_session(group_leader))
 1116                         goto out;
 1117                 err = -EACCES;
 1118                 if (p->did_exec)
 1119                         goto out;
 1120         } else {
 1121                 err = -ESRCH;
 1122                 if (p != group_leader)
 1123                         goto out;
 1124         }
 1125 
 1126         err = -EPERM;
 1127         if (p->signal->leader)
 1128                 goto out;
 1129 
 1130         pgrp = task_pid(p);
 1131         if (pgid != pid) {
 1132                 struct task_struct *g;
 1133 
 1134                 pgrp = find_vpid(pgid);
 1135                 g = pid_task(pgrp, PIDTYPE_PGID);
 1136                 if (!g || task_session(g) != task_session(group_leader))
 1137                         goto out;
 1138         }
 1139 
 1140         err = security_task_setpgid(p, pgid);
 1141         if (err)
 1142                 goto out;
 1143 
 1144         if (task_pgrp(p) != pgrp)
 1145                 change_pid(p, PIDTYPE_PGID, pgrp);
 1146 
 1147         err = 0;
 1148 out:
 1149         /* All paths lead to here, thus we are safe. -DaveM */
 1150         write_unlock_irq(&tasklist_lock);
 1151         rcu_read_unlock();
 1152         return err;
 1153 }
 1154 
 1155 SYSCALL_DEFINE1(getpgid, pid_t, pid)
 1156 {
 1157         struct task_struct *p;
 1158         struct pid *grp;
 1159         int retval;
 1160 
 1161         rcu_read_lock();
 1162         if (!pid)
 1163                 grp = task_pgrp(current);
 1164         else {
 1165                 retval = -ESRCH;
 1166                 p = find_task_by_vpid(pid);
 1167                 if (!p)
 1168                         goto out;
 1169                 grp = task_pgrp(p);
 1170                 if (!grp)
 1171                         goto out;
 1172 
 1173                 retval = security_task_getpgid(p);
 1174                 if (retval)
 1175                         goto out;
 1176         }
 1177         retval = pid_vnr(grp);
 1178 out:
 1179         rcu_read_unlock();
 1180         return retval;
 1181 }
 1182 
 1183 #ifdef __ARCH_WANT_SYS_GETPGRP
 1184 
 1185 SYSCALL_DEFINE0(getpgrp)
 1186 {
 1187         return sys_getpgid(0);
 1188 }
 1189 
 1190 #endif
 1191 
 1192 SYSCALL_DEFINE1(getsid, pid_t, pid)
 1193 {
 1194         struct task_struct *p;
 1195         struct pid *sid;
 1196         int retval;
 1197 
 1198         rcu_read_lock();
 1199         if (!pid)
 1200                 sid = task_session(current);
 1201         else {
 1202                 retval = -ESRCH;
 1203                 p = find_task_by_vpid(pid);
 1204                 if (!p)
 1205                         goto out;
 1206                 sid = task_session(p);
 1207                 if (!sid)
 1208                         goto out;
 1209 
 1210                 retval = security_task_getsid(p);
 1211                 if (retval)
 1212                         goto out;
 1213         }
 1214         retval = pid_vnr(sid);
 1215 out:
 1216         rcu_read_unlock();
 1217         return retval;
 1218 }
 1219 
 1220 SYSCALL_DEFINE0(setsid)
 1221 {
 1222         struct task_struct *group_leader = current->group_leader;
 1223         struct pid *sid = task_pid(group_leader);
 1224         pid_t session = pid_vnr(sid);
 1225         int err = -EPERM;
 1226 
 1227         write_lock_irq(&tasklist_lock);
 1228         /* Fail if I am already a session leader */
 1229         if (group_leader->signal->leader)
 1230                 goto out;
 1231 
 1232         /* Fail if a process group id already exists that equals the
 1233          * proposed session id.
 1234          */
 1235         if (pid_task(sid, PIDTYPE_PGID))
 1236                 goto out;
 1237 
 1238         group_leader->signal->leader = 1;
 1239         __set_special_pids(sid);
 1240 
 1241         proc_clear_tty(group_leader);
 1242 
 1243         err = session;
 1244 out:
 1245         write_unlock_irq(&tasklist_lock);
 1246         if (err > 0) {
 1247                 proc_sid_connector(group_leader);
 1248                 sched_autogroup_create_attach(group_leader);
 1249         }
 1250         return err;
 1251 }
 1252 
 1253 DECLARE_RWSEM(uts_sem);
 1254 
 1255 #ifdef COMPAT_UTS_MACHINE
 1256 #define override_architecture(name) \
 1257         (personality(current->personality) == PER_LINUX32 && \
 1258          copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
 1259                       sizeof(COMPAT_UTS_MACHINE)))
 1260 #else
 1261 #define override_architecture(name)     0
 1262 #endif
 1263 
 1264 /*
 1265  * Work around broken programs that cannot handle "Linux 3.0".
 1266  * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
 1267  */
 1268 static int override_release(char __user *release, size_t len)
 1269 {
 1270         int ret = 0;
 1271 
 1272         if (current->personality & UNAME26) {
 1273                 const char *rest = UTS_RELEASE;
 1274                 char buf[65] = { 0 };
 1275                 int ndots = 0;
 1276                 unsigned v;
 1277                 size_t copy;
 1278 
 1279                 while (*rest) {
 1280                         if (*rest == '.' && ++ndots >= 3)
 1281                                 break;
 1282                         if (!isdigit(*rest) && *rest != '.')
 1283                                 break;
 1284                         rest++;
 1285                 }
 1286                 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
 1287                 copy = clamp_t(size_t, len, 1, sizeof(buf));
 1288                 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
 1289                 ret = copy_to_user(release, buf, copy + 1);
 1290         }
 1291         return ret;
 1292 }
 1293 
 1294 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 1295 {
 1296         int errno = 0;
 1297 
 1298         down_read(&uts_sem);
 1299         if (copy_to_user(name, utsname(), sizeof *name))
 1300                 errno = -EFAULT;
 1301         up_read(&uts_sem);
 1302 
 1303         if (!errno && override_release(name->release, sizeof(name->release)))
 1304                 errno = -EFAULT;
 1305         if (!errno && override_architecture(name))
 1306                 errno = -EFAULT;
 1307         return errno;
 1308 }
 1309 
 1310 #ifdef __ARCH_WANT_SYS_OLD_UNAME
 1311 /*
 1312  * Old cruft
 1313  */
 1314 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
 1315 {
 1316         int error = 0;
 1317 
 1318         if (!name)
 1319                 return -EFAULT;
 1320 
 1321         down_read(&uts_sem);
 1322         if (copy_to_user(name, utsname(), sizeof(*name)))
 1323                 error = -EFAULT;
 1324         up_read(&uts_sem);
 1325 
 1326         if (!error && override_release(name->release, sizeof(name->release)))
 1327                 error = -EFAULT;
 1328         if (!error && override_architecture(name))
 1329                 error = -EFAULT;
 1330         return error;
 1331 }
 1332 
 1333 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 1334 {
 1335         int error;
 1336 
 1337         if (!name)
 1338                 return -EFAULT;
 1339         if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
 1340                 return -EFAULT;
 1341 
 1342         down_read(&uts_sem);
 1343         error = __copy_to_user(&name->sysname, &utsname()->sysname,
 1344                                __OLD_UTS_LEN);
 1345         error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
 1346         error |= __copy_to_user(&name->nodename, &utsname()->nodename,
 1347                                 __OLD_UTS_LEN);
 1348         error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
 1349         error |= __copy_to_user(&name->release, &utsname()->release,
 1350                                 __OLD_UTS_LEN);
 1351         error |= __put_user(0, name->release + __OLD_UTS_LEN);
 1352         error |= __copy_to_user(&name->version, &utsname()->version,
 1353                                 __OLD_UTS_LEN);
 1354         error |= __put_user(0, name->version + __OLD_UTS_LEN);
 1355         error |= __copy_to_user(&name->machine, &utsname()->machine,
 1356                                 __OLD_UTS_LEN);
 1357         error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 1358         up_read(&uts_sem);
 1359 
 1360         if (!error && override_architecture(name))
 1361                 error = -EFAULT;
 1362         if (!error && override_release(name->release, sizeof(name->release)))
 1363                 error = -EFAULT;
 1364         return error ? -EFAULT : 0;
 1365 }
 1366 #endif
 1367 
 1368 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
 1369 {
 1370         int errno;
 1371         char tmp[__NEW_UTS_LEN];
 1372 
 1373         if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
 1374                 return -EPERM;
 1375 
 1376         if (len < 0 || len > __NEW_UTS_LEN)
 1377                 return -EINVAL;
 1378         down_write(&uts_sem);
 1379         errno = -EFAULT;
 1380         if (!copy_from_user(tmp, name, len)) {
 1381                 struct new_utsname *u = utsname();
 1382 
 1383                 memcpy(u->nodename, tmp, len);
 1384                 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
 1385                 errno = 0;
 1386                 uts_proc_notify(UTS_PROC_HOSTNAME);
 1387         }
 1388         up_write(&uts_sem);
 1389         return errno;
 1390 }
 1391 
 1392 #ifdef __ARCH_WANT_SYS_GETHOSTNAME
 1393 
 1394 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
 1395 {
 1396         int i, errno;
 1397         struct new_utsname *u;
 1398 
 1399         if (len < 0)
 1400                 return -EINVAL;
 1401         down_read(&uts_sem);
 1402         u = utsname();
 1403         i = 1 + strlen(u->nodename);
 1404         if (i > len)
 1405                 i = len;
 1406         errno = 0;
 1407         if (copy_to_user(name, u->nodename, i))
 1408                 errno = -EFAULT;
 1409         up_read(&uts_sem);
 1410         return errno;
 1411 }
 1412 
 1413 #endif
 1414 
 1415 /*
 1416  * Only setdomainname; getdomainname can be implemented by calling
 1417  * uname()
 1418  */
 1419 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
 1420 {
 1421         int errno;
 1422         char tmp[__NEW_UTS_LEN];
 1423 
 1424         if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
 1425                 return -EPERM;
 1426         if (len < 0 || len > __NEW_UTS_LEN)
 1427                 return -EINVAL;
 1428 
 1429         down_write(&uts_sem);
 1430         errno = -EFAULT;
 1431         if (!copy_from_user(tmp, name, len)) {
 1432                 struct new_utsname *u = utsname();
 1433 
 1434                 memcpy(u->domainname, tmp, len);
 1435                 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
 1436                 errno = 0;
 1437                 uts_proc_notify(UTS_PROC_DOMAINNAME);
 1438         }
 1439         up_write(&uts_sem);
 1440         return errno;
 1441 }
 1442 
 1443 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 1444 {
 1445         struct rlimit value;
 1446         int ret;
 1447 
 1448         ret = do_prlimit(current, resource, NULL, &value);
 1449         if (!ret)
 1450                 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
 1451 
 1452         return ret;
 1453 }
 1454 
 1455 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 1456 
 1457 /*
 1458  *      Back compatibility for getrlimit. Needed for some apps.
 1459  */
 1460  
 1461 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 1462                 struct rlimit __user *, rlim)
 1463 {
 1464         struct rlimit x;
 1465         if (resource >= RLIM_NLIMITS)
 1466                 return -EINVAL;
 1467 
 1468         task_lock(current->group_leader);
 1469         x = current->signal->rlim[resource];
 1470         task_unlock(current->group_leader);
 1471         if (x.rlim_cur > 0x7FFFFFFF)
 1472                 x.rlim_cur = 0x7FFFFFFF;
 1473         if (x.rlim_max > 0x7FFFFFFF)
 1474                 x.rlim_max = 0x7FFFFFFF;
 1475         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
 1476 }
 1477 
 1478 #endif
 1479 
 1480 static inline bool rlim64_is_infinity(__u64 rlim64)
 1481 {
 1482 #if BITS_PER_LONG < 64
 1483         return rlim64 >= ULONG_MAX;
 1484 #else
 1485         return rlim64 == RLIM64_INFINITY;
 1486 #endif
 1487 }
 1488 
 1489 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
 1490 {
 1491         if (rlim->rlim_cur == RLIM_INFINITY)
 1492                 rlim64->rlim_cur = RLIM64_INFINITY;
 1493         else
 1494                 rlim64->rlim_cur = rlim->rlim_cur;
 1495         if (rlim->rlim_max == RLIM_INFINITY)
 1496                 rlim64->rlim_max = RLIM64_INFINITY;
 1497         else
 1498                 rlim64->rlim_max = rlim->rlim_max;
 1499 }
 1500 
 1501 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
 1502 {
 1503         if (rlim64_is_infinity(rlim64->rlim_cur))
 1504                 rlim->rlim_cur = RLIM_INFINITY;
 1505         else
 1506                 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
 1507         if (rlim64_is_infinity(rlim64->rlim_max))
 1508                 rlim->rlim_max = RLIM_INFINITY;
 1509         else
 1510                 rlim->rlim_max = (unsigned long)rlim64->rlim_max;
 1511 }
 1512 
 1513 /* make sure you are allowed to change @tsk limits before calling this */
 1514 int do_prlimit(struct task_struct *tsk, unsigned int resource,
 1515                 struct rlimit *new_rlim, struct rlimit *old_rlim)
 1516 {
 1517         struct rlimit *rlim;
 1518         int retval = 0;
 1519 
 1520         if (resource >= RLIM_NLIMITS)
 1521                 return -EINVAL;
 1522         if (new_rlim) {
 1523                 if (new_rlim->rlim_cur > new_rlim->rlim_max)
 1524                         return -EINVAL;
 1525                 if (resource == RLIMIT_NOFILE &&
 1526                                 new_rlim->rlim_max > sysctl_nr_open)
 1527                         return -EPERM;
 1528         }
 1529 
 1530         /* protect tsk->signal and tsk->sighand from disappearing */
 1531         read_lock(&tasklist_lock);
 1532         if (!tsk->sighand) {
 1533                 retval = -ESRCH;
 1534                 goto out;
 1535         }
 1536 
 1537         rlim = tsk->signal->rlim + resource;
 1538         task_lock(tsk->group_leader);
 1539         if (new_rlim) {
 1540                 /* Keep the capable check against init_user_ns until
 1541                    cgroups can contain all limits */
 1542                 if (new_rlim->rlim_max > rlim->rlim_max &&
 1543                                 !capable(CAP_SYS_RESOURCE))
 1544                         retval = -EPERM;
 1545                 if (!retval)
 1546                         retval = security_task_setrlimit(tsk->group_leader,
 1547                                         resource, new_rlim);
 1548                 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 1549                         /*
 1550                          * The caller is asking for an immediate RLIMIT_CPU
 1551                          * expiry.  But we use the zero value to mean "it was
 1552                          * never set".  So let's cheat and make it one second
 1553                          * instead
 1554                          */
 1555                         new_rlim->rlim_cur = 1;
 1556                 }
 1557         }
 1558         if (!retval) {
 1559                 if (old_rlim)
 1560                         *old_rlim = *rlim;
 1561                 if (new_rlim)
 1562                         *rlim = *new_rlim;
 1563         }
 1564         task_unlock(tsk->group_leader);
 1565 
 1566         /*
 1567          * RLIMIT_CPU handling.   Note that the kernel fails to return an error
 1568          * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
 1569          * very long-standing error, and fixing it now risks breakage of
 1570          * applications, so we live with it
 1571          */
 1572          if (!retval && new_rlim && resource == RLIMIT_CPU &&
 1573                          new_rlim->rlim_cur != RLIM_INFINITY)
 1574                 update_rlimit_cpu(tsk, new_rlim->rlim_cur);
 1575 out:
 1576         read_unlock(&tasklist_lock);
 1577         return retval;
 1578 }
 1579 
 1580 /* rcu lock must be held */
 1581 static int check_prlimit_permission(struct task_struct *task)
 1582 {
 1583         const struct cred *cred = current_cred(), *tcred;
 1584 
 1585         if (current == task)
 1586                 return 0;
 1587 
 1588         tcred = __task_cred(task);
 1589         if (uid_eq(cred->uid, tcred->euid) &&
 1590             uid_eq(cred->uid, tcred->suid) &&
 1591             uid_eq(cred->uid, tcred->uid)  &&
 1592             gid_eq(cred->gid, tcred->egid) &&
 1593             gid_eq(cred->gid, tcred->sgid) &&
 1594             gid_eq(cred->gid, tcred->gid))
 1595                 return 0;
 1596         if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
 1597                 return 0;
 1598 
 1599         return -EPERM;
 1600 }
 1601 
 1602 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
 1603                 const struct rlimit64 __user *, new_rlim,
 1604                 struct rlimit64 __user *, old_rlim)
 1605 {
 1606         struct rlimit64 old64, new64;
 1607         struct rlimit old, new;
 1608         struct task_struct *tsk;
 1609         int ret;
 1610 
 1611         if (new_rlim) {
 1612                 if (copy_from_user(&new64, new_rlim, sizeof(new64)))
 1613                         return -EFAULT;
 1614                 rlim64_to_rlim(&new64, &new);
 1615         }
 1616 
 1617         rcu_read_lock();
 1618         tsk = pid ? find_task_by_vpid(pid) : current;
 1619         if (!tsk) {
 1620                 rcu_read_unlock();
 1621                 return -ESRCH;
 1622         }
 1623         ret = check_prlimit_permission(tsk);
 1624         if (ret) {
 1625                 rcu_read_unlock();
 1626                 return ret;
 1627         }
 1628         get_task_struct(tsk);
 1629         rcu_read_unlock();
 1630 
 1631         ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
 1632                         old_rlim ? &old : NULL);
 1633 
 1634         if (!ret && old_rlim) {
 1635                 rlim_to_rlim64(&old, &old64);
 1636                 if (copy_to_user(old_rlim, &old64, sizeof(old64)))
 1637                         ret = -EFAULT;
 1638         }
 1639 
 1640         put_task_struct(tsk);
 1641         return ret;
 1642 }
 1643 
 1644 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 1645 {
 1646         struct rlimit new_rlim;
 1647 
 1648         if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
 1649                 return -EFAULT;
 1650         return do_prlimit(current, resource, &new_rlim, NULL);
 1651 }
 1652 
 1653 /*
 1654  * It would make sense to put struct rusage in the task_struct,
 1655  * except that would make the task_struct be *really big*.  After
 1656  * task_struct gets moved into malloc'ed memory, it would
 1657  * make sense to do this.  It will make moving the rest of the information
 1658  * a lot simpler!  (Which we're not doing right now because we're not
 1659  * measuring them yet).
 1660  *
 1661  * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
 1662  * races with threads incrementing their own counters.  But since word
 1663  * reads are atomic, we either get new values or old values and we don't
 1664  * care which for the sums.  We always take the siglock to protect reading
 1665  * the c* fields from p->signal from races with exit.c updating those
 1666  * fields when reaping, so a sample either gets all the additions of a
 1667  * given child after it's reaped, or none so this sample is before reaping.
 1668  *
 1669  * Locking:
 1670  * We need to take the siglock for CHILDEREN, SELF and BOTH
 1671  * for  the cases current multithreaded, non-current single threaded
 1672  * non-current multithreaded.  Thread traversal is now safe with
 1673  * the siglock held.
 1674  * Strictly speaking, we donot need to take the siglock if we are current and
 1675  * single threaded,  as no one else can take our signal_struct away, no one
 1676  * else can  reap the  children to update signal->c* counters, and no one else
 1677  * can race with the signal-> fields. If we do not take any lock, the
 1678  * signal-> fields could be read out of order while another thread was just
 1679  * exiting. So we should  place a read memory barrier when we avoid the lock.
 1680  * On the writer side,  write memory barrier is implied in  __exit_signal
 1681  * as __exit_signal releases  the siglock spinlock after updating the signal->
 1682  * fields. But we don't do this yet to keep things simple.
 1683  *
 1684  */
 1685 
 1686 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 1687 {
 1688         r->ru_nvcsw += t->nvcsw;
 1689         r->ru_nivcsw += t->nivcsw;
 1690         r->ru_minflt += t->min_flt;
 1691         r->ru_majflt += t->maj_flt;
 1692         r->ru_inblock += task_io_get_inblock(t);
 1693         r->ru_oublock += task_io_get_oublock(t);
 1694 }
 1695 
 1696 static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 1697 {
 1698         struct task_struct *t;
 1699         unsigned long flags;
 1700         cputime_t tgutime, tgstime, utime, stime;
 1701         unsigned long maxrss = 0;
 1702 
 1703         memset((char *) r, 0, sizeof *r);
 1704         utime = stime = 0;
 1705 
 1706         if (who == RUSAGE_THREAD) {
 1707                 task_cputime_adjusted(current, &utime, &stime);
 1708                 accumulate_thread_rusage(p, r);
 1709                 maxrss = p->signal->maxrss;
 1710                 goto out;
 1711         }
 1712 
 1713         if (!lock_task_sighand(p, &flags))
 1714                 return;
 1715 
 1716         switch (who) {
 1717                 case RUSAGE_BOTH:
 1718                 case RUSAGE_CHILDREN:
 1719                         utime = p->signal->cutime;
 1720                         stime = p->signal->cstime;
 1721                         r->ru_nvcsw = p->signal->cnvcsw;
 1722                         r->ru_nivcsw = p->signal->cnivcsw;
 1723                         r->ru_minflt = p->signal->cmin_flt;
 1724                         r->ru_majflt = p->signal->cmaj_flt;
 1725                         r->ru_inblock = p->signal->cinblock;
 1726                         r->ru_oublock = p->signal->coublock;
 1727                         maxrss = p->signal->cmaxrss;
 1728 
 1729                         if (who == RUSAGE_CHILDREN)
 1730                                 break;
 1731 
 1732                 case RUSAGE_SELF:
 1733                         thread_group_cputime_adjusted(p, &tgutime, &tgstime);
 1734                         utime += tgutime;
 1735                         stime += tgstime;
 1736                         r->ru_nvcsw += p->signal->nvcsw;
 1737                         r->ru_nivcsw += p->signal->nivcsw;
 1738                         r->ru_minflt += p->signal->min_flt;
 1739                         r->ru_majflt += p->signal->maj_flt;
 1740                         r->ru_inblock += p->signal->inblock;
 1741                         r->ru_oublock += p->signal->oublock;
 1742                         if (maxrss < p->signal->maxrss)
 1743                                 maxrss = p->signal->maxrss;
 1744                         t = p;
 1745                         do {
 1746                                 accumulate_thread_rusage(t, r);
 1747                                 t = next_thread(t);
 1748                         } while (t != p);
 1749                         break;
 1750 
 1751                 default:
 1752                         BUG();
 1753         }
 1754         unlock_task_sighand(p, &flags);
 1755 
 1756 out:
 1757         cputime_to_timeval(utime, &r->ru_utime);
 1758         cputime_to_timeval(stime, &r->ru_stime);
 1759 
 1760         if (who != RUSAGE_CHILDREN) {
 1761                 struct mm_struct *mm = get_task_mm(p);
 1762                 if (mm) {
 1763                         setmax_mm_hiwater_rss(&maxrss, mm);
 1764                         mmput(mm);
 1765                 }
 1766         }
 1767         r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
 1768 }
 1769 
 1770 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
 1771 {
 1772         struct rusage r;
 1773         k_getrusage(p, who, &r);
 1774         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
 1775 }
 1776 
 1777 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
 1778 {
 1779         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
 1780             who != RUSAGE_THREAD)
 1781                 return -EINVAL;
 1782         return getrusage(current, who, ru);
 1783 }
 1784 
 1785 SYSCALL_DEFINE1(umask, int, mask)
 1786 {
 1787         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
 1788         return mask;
 1789 }
 1790 
 1791 #ifdef CONFIG_CHECKPOINT_RESTORE
 1792 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 1793 {
 1794         struct fd exe;
 1795         struct dentry *dentry;
 1796         int err;
 1797 
 1798         exe = fdget(fd);
 1799         if (!exe.file)
 1800                 return -EBADF;
 1801 
 1802         dentry = exe.file->f_path.dentry;
 1803 
 1804         /*
 1805          * Because the original mm->exe_file points to executable file, make
 1806          * sure that this one is executable as well, to avoid breaking an
 1807          * overall picture.
 1808          */
 1809         err = -EACCES;
 1810         if (!S_ISREG(dentry->d_inode->i_mode)   ||
 1811             exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 1812                 goto exit;
 1813 
 1814         err = inode_permission(dentry->d_inode, MAY_EXEC);
 1815         if (err)
 1816                 goto exit;
 1817 
 1818         down_write(&mm->mmap_sem);
 1819 
 1820         /*
 1821          * Forbid mm->exe_file change if old file still mapped.
 1822          */
 1823         err = -EBUSY;
 1824         if (mm->exe_file) {
 1825                 struct vm_area_struct *vma;
 1826 
 1827                 for (vma = mm->mmap; vma; vma = vma->vm_next)
 1828                         if (vma->vm_file &&
 1829                             path_equal(&vma->vm_file->f_path,
 1830                                        &mm->exe_file->f_path))
 1831                                 goto exit_unlock;
 1832         }
 1833 
 1834         /*
 1835          * The symlink can be changed only once, just to disallow arbitrary
 1836          * transitions malicious software might bring in. This means one
 1837          * could make a snapshot over all processes running and monitor
 1838          * /proc/pid/exe changes to notice unusual activity if needed.
 1839          */
 1840         err = -EPERM;
 1841         if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
 1842                 goto exit_unlock;
 1843 
 1844         err = 0;
 1845         set_mm_exe_file(mm, exe.file);  /* this grabs a reference to exe.file */
 1846 exit_unlock:
 1847         up_write(&mm->mmap_sem);
 1848 
 1849 exit:
 1850         fdput(exe);
 1851         return err;
 1852 }
 1853 
 1854 static int prctl_set_mm(int opt, unsigned long addr,
 1855                         unsigned long arg4, unsigned long arg5)
 1856 {
 1857         unsigned long rlim = rlimit(RLIMIT_DATA);
 1858         struct mm_struct *mm = current->mm;
 1859         struct vm_area_struct *vma;
 1860         int error;
 1861 
 1862         if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
 1863                 return -EINVAL;
 1864 
 1865         if (!capable(CAP_SYS_RESOURCE))
 1866                 return -EPERM;
 1867 
 1868         if (opt == PR_SET_MM_EXE_FILE)
 1869                 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
 1870 
 1871         if (addr >= TASK_SIZE || addr < mmap_min_addr)
 1872                 return -EINVAL;
 1873 
 1874         error = -EINVAL;
 1875 
 1876         down_read(&mm->mmap_sem);
 1877         vma = find_vma(mm, addr);
 1878 
 1879         switch (opt) {
 1880         case PR_SET_MM_START_CODE:
 1881                 mm->start_code = addr;
 1882                 break;
 1883         case PR_SET_MM_END_CODE:
 1884                 mm->end_code = addr;
 1885                 break;
 1886         case PR_SET_MM_START_DATA:
 1887                 mm->start_data = addr;
 1888                 break;
 1889         case PR_SET_MM_END_DATA:
 1890                 mm->end_data = addr;
 1891                 break;
 1892 
 1893         case PR_SET_MM_START_BRK:
 1894                 if (addr <= mm->end_data)
 1895                         goto out;
 1896 
 1897                 if (rlim < RLIM_INFINITY &&
 1898                     (mm->brk - addr) +
 1899                     (mm->end_data - mm->start_data) > rlim)
 1900                         goto out;
 1901 
 1902                 mm->start_brk = addr;
 1903                 break;
 1904 
 1905         case PR_SET_MM_BRK:
 1906                 if (addr <= mm->end_data)
 1907                         goto out;
 1908 
 1909                 if (rlim < RLIM_INFINITY &&
 1910                     (addr - mm->start_brk) +
 1911                     (mm->end_data - mm->start_data) > rlim)
 1912                         goto out;
 1913 
 1914                 mm->brk = addr;
 1915                 break;
 1916 
 1917         /*
 1918          * If command line arguments and environment
 1919          * are placed somewhere else on stack, we can
 1920          * set them up here, ARG_START/END to setup
 1921          * command line argumets and ENV_START/END
 1922          * for environment.
 1923          */
 1924         case PR_SET_MM_START_STACK:
 1925         case PR_SET_MM_ARG_START:
 1926         case PR_SET_MM_ARG_END:
 1927         case PR_SET_MM_ENV_START:
 1928         case PR_SET_MM_ENV_END:
 1929                 if (!vma) {
 1930                         error = -EFAULT;
 1931                         goto out;
 1932                 }
 1933                 if (opt == PR_SET_MM_START_STACK)
 1934                         mm->start_stack = addr;
 1935                 else if (opt == PR_SET_MM_ARG_START)
 1936                         mm->arg_start = addr;
 1937                 else if (opt == PR_SET_MM_ARG_END)
 1938                         mm->arg_end = addr;
 1939                 else if (opt == PR_SET_MM_ENV_START)
 1940                         mm->env_start = addr;
 1941                 else if (opt == PR_SET_MM_ENV_END)
 1942                         mm->env_end = addr;
 1943                 break;
 1944 
 1945         /*
 1946          * This doesn't move auxiliary vector itself
 1947          * since it's pinned to mm_struct, but allow
 1948          * to fill vector with new values. It's up
 1949          * to a caller to provide sane values here
 1950          * otherwise user space tools which use this
 1951          * vector might be unhappy.
 1952          */
 1953         case PR_SET_MM_AUXV: {
 1954                 unsigned long user_auxv[AT_VECTOR_SIZE];
 1955 
 1956                 if (arg4 > sizeof(user_auxv))
 1957                         goto out;
 1958                 up_read(&mm->mmap_sem);
 1959 
 1960                 if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
 1961                         return -EFAULT;
 1962 
 1963                 /* Make sure the last entry is always AT_NULL */
 1964                 user_auxv[AT_VECTOR_SIZE - 2] = 0;
 1965                 user_auxv[AT_VECTOR_SIZE - 1] = 0;
 1966 
 1967                 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
 1968 
 1969                 task_lock(current);
 1970                 memcpy(mm->saved_auxv, user_auxv, arg4);
 1971                 task_unlock(current);
 1972 
 1973                 return 0;
 1974         }
 1975         default:
 1976                 goto out;
 1977         }
 1978 
 1979         error = 0;
 1980 out:
 1981         up_read(&mm->mmap_sem);
 1982         return error;
 1983 }
 1984 
 1985 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 1986 {
 1987         return put_user(me->clear_child_tid, tid_addr);
 1988 }
 1989 
 1990 #else /* CONFIG_CHECKPOINT_RESTORE */
 1991 static int prctl_set_mm(int opt, unsigned long addr,
 1992                         unsigned long arg4, unsigned long arg5)
 1993 {
 1994         return -EINVAL;
 1995 }
 1996 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 1997 {
 1998         return -EINVAL;
 1999 }
 2000 #endif
 2001 
 2002 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 2003                 unsigned long, arg4, unsigned long, arg5)
 2004 {
 2005         struct task_struct *me = current;
 2006         unsigned char comm[sizeof(me->comm)];
 2007         long error;
 2008 
 2009         error = security_task_prctl(option, arg2, arg3, arg4, arg5);
 2010         if (error != -ENOSYS)
 2011                 return error;
 2012 
 2013         error = 0;
 2014         switch (option) {
 2015                 case PR_SET_PDEATHSIG:
 2016                         if (!valid_signal(arg2)) {
 2017                                 error = -EINVAL;
 2018                                 break;
 2019                         }
 2020                         me->pdeath_signal = arg2;
 2021                         break;
 2022                 case PR_GET_PDEATHSIG:
 2023                         error = put_user(me->pdeath_signal, (int __user *)arg2);
 2024                         break;
 2025                 case PR_GET_DUMPABLE:
 2026                         error = get_dumpable(me->mm);
 2027                         break;
 2028                 case PR_SET_DUMPABLE:
 2029                         if (arg2 < 0 || arg2 > 1) {
 2030                                 error = -EINVAL;
 2031                                 break;
 2032                         }
 2033                         set_dumpable(me->mm, arg2);
 2034                         break;
 2035 
 2036                 case PR_SET_UNALIGN:
 2037                         error = SET_UNALIGN_CTL(me, arg2);
 2038                         break;
 2039                 case PR_GET_UNALIGN:
 2040                         error = GET_UNALIGN_CTL(me, arg2);
 2041                         break;
 2042                 case PR_SET_FPEMU:
 2043                         error = SET_FPEMU_CTL(me, arg2);
 2044                         break;
 2045                 case PR_GET_FPEMU:
 2046                         error = GET_FPEMU_CTL(me, arg2);
 2047                         break;
 2048                 case PR_SET_FPEXC:
 2049                         error = SET_FPEXC_CTL(me, arg2);
 2050                         break;
 2051                 case PR_GET_FPEXC:
 2052                         error = GET_FPEXC_CTL(me, arg2);
 2053                         break;
 2054                 case PR_GET_TIMING:
 2055                         error = PR_TIMING_STATISTICAL;
 2056                         break;
 2057                 case PR_SET_TIMING:
 2058                         if (arg2 != PR_TIMING_STATISTICAL)
 2059                                 error = -EINVAL;
 2060                         break;
 2061                 case PR_SET_NAME:
 2062                         comm[sizeof(me->comm)-1] = 0;
 2063                         if (strncpy_from_user(comm, (char __user *)arg2,
 2064                                               sizeof(me->comm) - 1) < 0)
 2065                                 return -EFAULT;
 2066                         set_task_comm(me, comm);
 2067                         proc_comm_connector(me);
 2068                         break;
 2069                 case PR_GET_NAME:
 2070                         get_task_comm(comm, me);
 2071                         if (copy_to_user((char __user *)arg2, comm,
 2072                                          sizeof(comm)))
 2073                                 return -EFAULT;
 2074                         break;
 2075                 case PR_GET_ENDIAN:
 2076                         error = GET_ENDIAN(me, arg2);
 2077                         break;
 2078                 case PR_SET_ENDIAN:
 2079                         error = SET_ENDIAN(me, arg2);
 2080                         break;
 2081                 case PR_GET_SECCOMP:
 2082                         error = prctl_get_seccomp();
 2083                         break;
 2084                 case PR_SET_SECCOMP:
 2085                         error = prctl_set_seccomp(arg2, (char __user *)arg3);
 2086                         break;
 2087                 case PR_GET_TSC:
 2088                         error = GET_TSC_CTL(arg2);
 2089                         break;
 2090                 case PR_SET_TSC:
 2091                         error = SET_TSC_CTL(arg2);
 2092                         break;
 2093                 case PR_TASK_PERF_EVENTS_DISABLE:
 2094                         error = perf_event_task_disable();
 2095                         break;
 2096                 case PR_TASK_PERF_EVENTS_ENABLE:
 2097                         error = perf_event_task_enable();
 2098                         break;
 2099                 case PR_GET_TIMERSLACK:
 2100                         error = current->timer_slack_ns;
 2101                         break;
 2102                 case PR_SET_TIMERSLACK:
 2103                         if (arg2 <= 0)
 2104                                 current->timer_slack_ns =
 2105                                         current->default_timer_slack_ns;
 2106                         else
 2107                                 current->timer_slack_ns = arg2;
 2108                         break;
 2109                 case PR_MCE_KILL:
 2110                         if (arg4 | arg5)
 2111                                 return -EINVAL;
 2112                         switch (arg2) {
 2113                         case PR_MCE_KILL_CLEAR:
 2114                                 if (arg3 != 0)
 2115                                         return -EINVAL;
 2116                                 current->flags &= ~PF_MCE_PROCESS;
 2117                                 break;
 2118                         case PR_MCE_KILL_SET:
 2119                                 current->flags |= PF_MCE_PROCESS;
 2120                                 if (arg3 == PR_MCE_KILL_EARLY)
 2121                                         current->flags |= PF_MCE_EARLY;
 2122                                 else if (arg3 == PR_MCE_KILL_LATE)
 2123                                         current->flags &= ~PF_MCE_EARLY;
 2124                                 else if (arg3 == PR_MCE_KILL_DEFAULT)
 2125                                         current->flags &=
 2126                                                 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
 2127                                 else
 2128                                         return -EINVAL;
 2129                                 break;
 2130                         default:
 2131                                 return -EINVAL;
 2132                         }
 2133                         break;
 2134                 case PR_MCE_KILL_GET:
 2135                         if (arg2 | arg3 | arg4 | arg5)
 2136                                 return -EINVAL;
 2137                         if (current->flags & PF_MCE_PROCESS)
 2138                                 error = (current->flags & PF_MCE_EARLY) ?
 2139                                         PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
 2140                         else
 2141                                 error = PR_MCE_KILL_DEFAULT;
 2142                         break;
 2143                 case PR_SET_MM:
 2144                         error = prctl_set_mm(arg2, arg3, arg4, arg5);
 2145                         break;
 2146                 case PR_GET_TID_ADDRESS:
 2147                         error = prctl_get_tid_address(me, (int __user **)arg2);
 2148                         break;
 2149                 case PR_SET_CHILD_SUBREAPER:
 2150                         me->signal->is_child_subreaper = !!arg2;
 2151                         break;
 2152                 case PR_GET_CHILD_SUBREAPER:
 2153                         error = put_user(me->signal->is_child_subreaper,
 2154                                          (int __user *) arg2);
 2155                         break;
 2156                 case PR_SET_NO_NEW_PRIVS:
 2157                         if (arg2 != 1 || arg3 || arg4 || arg5)
 2158                                 return -EINVAL;
 2159 
 2160                         current->no_new_privs = 1;
 2161                         break;
 2162                 case PR_GET_NO_NEW_PRIVS:
 2163                         if (arg2 || arg3 || arg4 || arg5)
 2164                                 return -EINVAL;
 2165                         return current->no_new_privs ? 1 : 0;
 2166                 default:
 2167                         error = -EINVAL;
 2168                         break;
 2169         }
 2170         return error;
 2171 }
 2172 
 2173 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
 2174                 struct getcpu_cache __user *, unused)
 2175 {
 2176         int err = 0;
 2177         int cpu = raw_smp_processor_id();
 2178         if (cpup)
 2179                 err |= put_user(cpu, cpup);
 2180         if (nodep)
 2181                 err |= put_user(cpu_to_node(cpu), nodep);
 2182         return err ? -EFAULT : 0;
 2183 }
 2184 
 2185 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
 2186 
 2187 static void argv_cleanup(struct subprocess_info *info)
 2188 {
 2189         argv_free(info->argv);
 2190 }
 2191 
 2192 static int __orderly_poweroff(void)
 2193 {
 2194         int argc;
 2195         char **argv;
 2196         static char *envp[] = {
 2197                 "HOME=/",
 2198                 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
 2199                 NULL
 2200         };
 2201         int ret;
 2202 
 2203         argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
 2204         if (argv == NULL) {
 2205                 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
 2206                        __func__, poweroff_cmd);
 2207                 return -ENOMEM;
 2208         }
 2209 
 2210         ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC,
 2211                                       NULL, argv_cleanup, NULL);
 2212         if (ret == -ENOMEM)
 2213                 argv_free(argv);
 2214 
 2215         return ret;
 2216 }
 2217 
 2218 /**
 2219  * orderly_poweroff - Trigger an orderly system poweroff
 2220  * @force: force poweroff if command execution fails
 2221  *
 2222  * This may be called from any context to trigger a system shutdown.
 2223  * If the orderly shutdown fails, it will force an immediate shutdown.
 2224  */
 2225 int orderly_poweroff(bool force)
 2226 {
 2227         int ret = __orderly_poweroff();
 2228 
 2229         if (ret && force) {
 2230                 printk(KERN_WARNING "Failed to start orderly shutdown: "
 2231                        "forcing the issue\n");
 2232 
 2233                 /*
 2234                  * I guess this should try to kick off some daemon to sync and
 2235                  * poweroff asap.  Or not even bother syncing if we're doing an
 2236                  * emergency shutdown?
 2237                  */
 2238                 emergency_sync();
 2239                 kernel_power_off();
 2240         }
 2241 
 2242         return ret;
 2243 }
 2244 EXPORT_SYMBOL_GPL(orderly_poweroff);

Cache object: 8264fe5e6308828341db648fd3f12b17


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.