The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_procdesc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009 Robert N. M. Watson
    3  * All rights reserved.
    4  *
    5  * This software was developed at the University of Cambridge Computer
    6  * Laboratory with support from a grant from Google, Inc.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 /*-
   31  * FreeBSD process descriptor facility.
   32  *
   33  * Some processes are represented by a file descriptor, which will be used in
   34  * preference to signaling and pids for the purposes of process management,
   35  * and is, in effect, a form of capability.  When a process descriptor is
   36  * used with a process, it ceases to be visible to certain traditional UNIX
   37  * process facilities, such as waitpid(2).
   38  *
   39  * Some semantics:
   40  *
   41  * - At most one process descriptor will exist for any process, although
   42  *   references to that descriptor may be held from many processes (or even
   43  *   be in flight between processes over a local domain socket).
   44  * - Last close on the process descriptor will terminate the process using
   45  *   SIGKILL and reparent it to init so that there's a process to reap it
   46  *   when it's done exiting.
   47  * - If the process exits before the descriptor is closed, it will not
   48  *   generate SIGCHLD on termination, or be picked up by waitpid().
   49  * - The pdkill(2) system call may be used to deliver a signal to the process
   50  *   using its process descriptor.
   51  * - The pdwait4(2) system call may be used to block (or not) on a process
   52  *   descriptor to collect termination information.
   53  *
   54  * Open questions:
   55  *
   56  * - How to handle ptrace(2)?
   57  * - Will we want to add a pidtoprocdesc(2) system call to allow process
   58  *   descriptors to be created for processes without pdfork(2)?
   59  */
   60 
   61 #include <sys/cdefs.h>
   62 __FBSDID("$FreeBSD: releng/11.0/sys/kern/sys_procdesc.c 301573 2016-06-08 02:09:14Z oshogbo $");
   63 
   64 #include <sys/param.h>
   65 #include <sys/capsicum.h>
   66 #include <sys/fcntl.h>
   67 #include <sys/file.h>
   68 #include <sys/filedesc.h>
   69 #include <sys/kernel.h>
   70 #include <sys/lock.h>
   71 #include <sys/mutex.h>
   72 #include <sys/poll.h>
   73 #include <sys/proc.h>
   74 #include <sys/procdesc.h>
   75 #include <sys/resourcevar.h>
   76 #include <sys/stat.h>
   77 #include <sys/sysproto.h>
   78 #include <sys/sysctl.h>
   79 #include <sys/systm.h>
   80 #include <sys/ucred.h>
   81 #include <sys/user.h>
   82 
   83 #include <security/audit/audit.h>
   84 
   85 #include <vm/uma.h>
   86 
   87 FEATURE(process_descriptors, "Process Descriptors");
   88 
   89 static uma_zone_t procdesc_zone;
   90 
   91 static fo_poll_t        procdesc_poll;
   92 static fo_kqfilter_t    procdesc_kqfilter;
   93 static fo_stat_t        procdesc_stat;
   94 static fo_close_t       procdesc_close;
   95 static fo_fill_kinfo_t  procdesc_fill_kinfo;
   96 
   97 static struct fileops procdesc_ops = {
   98         .fo_read = invfo_rdwr,
   99         .fo_write = invfo_rdwr,
  100         .fo_truncate = invfo_truncate,
  101         .fo_ioctl = invfo_ioctl,
  102         .fo_poll = procdesc_poll,
  103         .fo_kqfilter = procdesc_kqfilter,
  104         .fo_stat = procdesc_stat,
  105         .fo_close = procdesc_close,
  106         .fo_chmod = invfo_chmod,
  107         .fo_chown = invfo_chown,
  108         .fo_sendfile = invfo_sendfile,
  109         .fo_fill_kinfo = procdesc_fill_kinfo,
  110         .fo_flags = DFLAG_PASSABLE,
  111 };
  112 
  113 /*
  114  * Initialize with VFS so that process descriptors are available along with
  115  * other file descriptor types.  As long as it runs before init(8) starts,
  116  * there shouldn't be a problem.
  117  */
  118 static void
  119 procdesc_init(void *dummy __unused)
  120 {
  121 
  122         procdesc_zone = uma_zcreate("procdesc", sizeof(struct procdesc),
  123             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  124         if (procdesc_zone == NULL)
  125                 panic("procdesc_init: procdesc_zone not initialized");
  126 }
  127 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, procdesc_init, NULL);
  128 
  129 /*
  130  * Return a locked process given a process descriptor, or ESRCH if it has
  131  * died.
  132  */
  133 int
  134 procdesc_find(struct thread *td, int fd, cap_rights_t *rightsp,
  135     struct proc **p)
  136 {
  137         struct procdesc *pd;
  138         struct file *fp;
  139         int error;
  140 
  141         error = fget(td, fd, rightsp, &fp);
  142         if (error)
  143                 return (error);
  144         if (fp->f_type != DTYPE_PROCDESC) {
  145                 error = EBADF;
  146                 goto out;
  147         }
  148         pd = fp->f_data;
  149         sx_slock(&proctree_lock);
  150         if (pd->pd_proc != NULL) {
  151                 *p = pd->pd_proc;
  152                 PROC_LOCK(*p);
  153         } else
  154                 error = ESRCH;
  155         sx_sunlock(&proctree_lock);
  156 out:
  157         fdrop(fp, td);
  158         return (error);
  159 }
  160 
  161 /*
  162  * Function to be used by procstat(1) sysctls when returning procdesc
  163  * information.
  164  */
  165 pid_t
  166 procdesc_pid(struct file *fp_procdesc)
  167 {
  168         struct procdesc *pd;
  169 
  170         KASSERT(fp_procdesc->f_type == DTYPE_PROCDESC,
  171            ("procdesc_pid: !procdesc"));
  172 
  173         pd = fp_procdesc->f_data;
  174         return (pd->pd_pid);
  175 }
  176 
  177 /*
  178  * Retrieve the PID associated with a process descriptor.
  179  */
  180 int
  181 kern_pdgetpid(struct thread *td, int fd, cap_rights_t *rightsp, pid_t *pidp)
  182 {
  183         struct file *fp;
  184         int error;
  185 
  186         error = fget(td, fd, rightsp, &fp);
  187         if (error)
  188                 return (error);
  189         if (fp->f_type != DTYPE_PROCDESC) {
  190                 error = EBADF;
  191                 goto out;
  192         }
  193         *pidp = procdesc_pid(fp);
  194 out:
  195         fdrop(fp, td);
  196         return (error);
  197 }
  198 
  199 /*
  200  * System call to return the pid of a process given its process descriptor.
  201  */
  202 int
  203 sys_pdgetpid(struct thread *td, struct pdgetpid_args *uap)
  204 {
  205         cap_rights_t rights;
  206         pid_t pid;
  207         int error;
  208 
  209         AUDIT_ARG_FD(uap->fd);
  210         error = kern_pdgetpid(td, uap->fd,
  211             cap_rights_init(&rights, CAP_PDGETPID), &pid);
  212         if (error == 0)
  213                 error = copyout(&pid, uap->pidp, sizeof(pid));
  214         return (error);
  215 }
  216 
  217 /*
  218  * When a new process is forked by pdfork(), a file descriptor is allocated
  219  * by the fork code first, then the process is forked, and then we get a
  220  * chance to set up the process descriptor.  Failure is not permitted at this
  221  * point, so procdesc_new() must succeed.
  222  */
  223 void
  224 procdesc_new(struct proc *p, int flags)
  225 {
  226         struct procdesc *pd;
  227 
  228         pd = uma_zalloc(procdesc_zone, M_WAITOK | M_ZERO);
  229         pd->pd_proc = p;
  230         pd->pd_pid = p->p_pid;
  231         p->p_procdesc = pd;
  232         pd->pd_flags = 0;
  233         if (flags & PD_DAEMON)
  234                 pd->pd_flags |= PDF_DAEMON;
  235         PROCDESC_LOCK_INIT(pd);
  236         knlist_init_mtx(&pd->pd_selinfo.si_note, &pd->pd_lock);
  237 
  238         /*
  239          * Process descriptors start out with two references: one from their
  240          * struct file, and the other from their struct proc.
  241          */
  242         refcount_init(&pd->pd_refcount, 2);
  243 }
  244 
  245 /*
  246  * Create a new process decriptor for the process that refers to it.
  247  */
  248 int
  249 procdesc_falloc(struct thread *td, struct file **resultfp, int *resultfd,
  250     int flags, struct filecaps *fcaps)
  251 {
  252         int fflags;
  253 
  254         fflags = 0;
  255         if (flags & PD_CLOEXEC)
  256                 fflags = O_CLOEXEC;
  257 
  258         return (falloc_caps(td, resultfp, resultfd, fflags, fcaps));
  259 }
  260 
  261 /*
  262  * Initialize a file with a process descriptor.
  263  */
  264 void
  265 procdesc_finit(struct procdesc *pdp, struct file *fp)
  266 {
  267 
  268         finit(fp, FREAD | FWRITE, DTYPE_PROCDESC, pdp, &procdesc_ops);
  269 }
  270 
  271 static void
  272 procdesc_free(struct procdesc *pd)
  273 {
  274 
  275         /*
  276          * When the last reference is released, we assert that the descriptor
  277          * has been closed, but not that the process has exited, as we will
  278          * detach the descriptor before the process dies if the descript is
  279          * closed, as we can't wait synchronously.
  280          */
  281         if (refcount_release(&pd->pd_refcount)) {
  282                 KASSERT(pd->pd_proc == NULL,
  283                     ("procdesc_free: pd_proc != NULL"));
  284                 KASSERT((pd->pd_flags & PDF_CLOSED),
  285                     ("procdesc_free: !PDF_CLOSED"));
  286 
  287                 knlist_destroy(&pd->pd_selinfo.si_note);
  288                 PROCDESC_LOCK_DESTROY(pd);
  289                 uma_zfree(procdesc_zone, pd);
  290         }
  291 }
  292 
  293 /*
  294  * procdesc_exit() - notify a process descriptor that its process is exiting.
  295  * We use the proctree_lock to ensure that process exit either happens
  296  * strictly before or strictly after a concurrent call to procdesc_close().
  297  */
  298 int
  299 procdesc_exit(struct proc *p)
  300 {
  301         struct procdesc *pd;
  302 
  303         sx_assert(&proctree_lock, SA_XLOCKED);
  304         PROC_LOCK_ASSERT(p, MA_OWNED);
  305         KASSERT(p->p_procdesc != NULL, ("procdesc_exit: p_procdesc NULL"));
  306 
  307         pd = p->p_procdesc;
  308 
  309         PROCDESC_LOCK(pd);
  310         KASSERT((pd->pd_flags & PDF_CLOSED) == 0 || p->p_pptr == initproc,
  311             ("procdesc_exit: closed && parent not init"));
  312 
  313         pd->pd_flags |= PDF_EXITED;
  314         pd->pd_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
  315 
  316         /*
  317          * If the process descriptor has been closed, then we have nothing
  318          * to do; return 1 so that init will get SIGCHLD and do the reaping.
  319          * Clean up the procdesc now rather than letting it happen during
  320          * that reap.
  321          */
  322         if (pd->pd_flags & PDF_CLOSED) {
  323                 PROCDESC_UNLOCK(pd);
  324                 pd->pd_proc = NULL;
  325                 p->p_procdesc = NULL;
  326                 procdesc_free(pd);
  327                 return (1);
  328         }
  329         if (pd->pd_flags & PDF_SELECTED) {
  330                 pd->pd_flags &= ~PDF_SELECTED;
  331                 selwakeup(&pd->pd_selinfo);
  332         }
  333         KNOTE_LOCKED(&pd->pd_selinfo.si_note, NOTE_EXIT);
  334         PROCDESC_UNLOCK(pd);
  335         return (0);
  336 }
  337 
  338 /*
  339  * When a process descriptor is reaped, perhaps as a result of close() or
  340  * pdwait4(), release the process's reference on the process descriptor.
  341  */
  342 void
  343 procdesc_reap(struct proc *p)
  344 {
  345         struct procdesc *pd;
  346 
  347         sx_assert(&proctree_lock, SA_XLOCKED);
  348         KASSERT(p->p_procdesc != NULL, ("procdesc_reap: p_procdesc == NULL"));
  349 
  350         pd = p->p_procdesc;
  351         pd->pd_proc = NULL;
  352         p->p_procdesc = NULL;
  353         procdesc_free(pd);
  354 }
  355 
  356 /*
  357  * procdesc_close() - last close on a process descriptor.  If the process is
  358  * still running, terminate with SIGKILL (unless PDF_DAEMON is set) and let
  359  * init(8) clean up the mess; if not, we have to clean up the zombie ourselves.
  360  */
  361 static int
  362 procdesc_close(struct file *fp, struct thread *td)
  363 {
  364         struct procdesc *pd;
  365         struct proc *p;
  366 
  367         KASSERT(fp->f_type == DTYPE_PROCDESC, ("procdesc_close: !procdesc"));
  368 
  369         pd = fp->f_data;
  370         fp->f_ops = &badfileops;
  371         fp->f_data = NULL;
  372 
  373         sx_xlock(&proctree_lock);
  374         PROCDESC_LOCK(pd);
  375         pd->pd_flags |= PDF_CLOSED;
  376         PROCDESC_UNLOCK(pd);
  377         p = pd->pd_proc;
  378         if (p == NULL) {
  379                 /*
  380                  * This is the case where process' exit status was already
  381                  * collected and procdesc_reap() was already called.
  382                  */
  383                 sx_xunlock(&proctree_lock);
  384         } else {
  385                 PROC_LOCK(p);
  386                 if (p->p_state == PRS_ZOMBIE) {
  387                         /*
  388                          * If the process is already dead and just awaiting
  389                          * reaping, do that now.  This will release the
  390                          * process's reference to the process descriptor when it
  391                          * calls back into procdesc_reap().
  392                          */
  393                         PROC_SLOCK(p);
  394                         proc_reap(curthread, p, NULL, 0);
  395                 } else {
  396                         /*
  397                          * If the process is not yet dead, we need to kill it,
  398                          * but we can't wait around synchronously for it to go
  399                          * away, as that path leads to madness (and deadlocks).
  400                          * First, detach the process from its descriptor so that
  401                          * its exit status will be reported normally.
  402                          */
  403                         pd->pd_proc = NULL;
  404                         p->p_procdesc = NULL;
  405                         procdesc_free(pd);
  406 
  407                         /*
  408                          * Next, reparent it to init(8) so that there's someone
  409                          * to pick up the pieces; finally, terminate with
  410                          * prejudice.
  411                          */
  412                         p->p_sigparent = SIGCHLD;
  413                         proc_reparent(p, initproc);
  414                         if ((pd->pd_flags & PDF_DAEMON) == 0)
  415                                 kern_psignal(p, SIGKILL);
  416                         PROC_UNLOCK(p);
  417                         sx_xunlock(&proctree_lock);
  418                 }
  419         }
  420 
  421         /*
  422          * Release the file descriptor's reference on the process descriptor.
  423          */
  424         procdesc_free(pd);
  425         return (0);
  426 }
  427 
  428 static int
  429 procdesc_poll(struct file *fp, int events, struct ucred *active_cred,
  430     struct thread *td)
  431 {
  432         struct procdesc *pd;
  433         int revents;
  434 
  435         revents = 0;
  436         pd = fp->f_data;
  437         PROCDESC_LOCK(pd);
  438         if (pd->pd_flags & PDF_EXITED)
  439                 revents |= POLLHUP;
  440         if (revents == 0) {
  441                 selrecord(td, &pd->pd_selinfo);
  442                 pd->pd_flags |= PDF_SELECTED;
  443         }
  444         PROCDESC_UNLOCK(pd);
  445         return (revents);
  446 }
  447 
  448 static void
  449 procdesc_kqops_detach(struct knote *kn)
  450 {
  451         struct procdesc *pd;
  452 
  453         pd = kn->kn_fp->f_data;
  454         knlist_remove(&pd->pd_selinfo.si_note, kn, 0);
  455 }
  456 
  457 static int
  458 procdesc_kqops_event(struct knote *kn, long hint)
  459 {
  460         struct procdesc *pd;
  461         u_int event;
  462 
  463         pd = kn->kn_fp->f_data;
  464         if (hint == 0) {
  465                 /*
  466                  * Initial test after registration. Generate a NOTE_EXIT in
  467                  * case the process already terminated before registration.
  468                  */
  469                 event = pd->pd_flags & PDF_EXITED ? NOTE_EXIT : 0;
  470         } else {
  471                 /* Mask off extra data. */
  472                 event = (u_int)hint & NOTE_PCTRLMASK;
  473         }
  474 
  475         /* If the user is interested in this event, record it. */
  476         if (kn->kn_sfflags & event)
  477                 kn->kn_fflags |= event;
  478 
  479         /* Process is gone, so flag the event as finished. */
  480         if (event == NOTE_EXIT) {
  481                 kn->kn_flags |= EV_EOF | EV_ONESHOT;
  482                 if (kn->kn_fflags & NOTE_EXIT)
  483                         kn->kn_data = pd->pd_xstat;
  484                 if (kn->kn_fflags == 0)
  485                         kn->kn_flags |= EV_DROP;
  486                 return (1);
  487         }
  488 
  489         return (kn->kn_fflags != 0);
  490 }
  491 
  492 static struct filterops procdesc_kqops = {
  493         .f_isfd = 1,
  494         .f_detach = procdesc_kqops_detach,
  495         .f_event = procdesc_kqops_event,
  496 };
  497 
  498 static int
  499 procdesc_kqfilter(struct file *fp, struct knote *kn)
  500 {
  501         struct procdesc *pd;
  502 
  503         pd = fp->f_data;
  504         switch (kn->kn_filter) {
  505         case EVFILT_PROCDESC:
  506                 kn->kn_fop = &procdesc_kqops;
  507                 kn->kn_flags |= EV_CLEAR;
  508                 knlist_add(&pd->pd_selinfo.si_note, kn, 0);
  509                 return (0);
  510         default:
  511                 return (EINVAL);
  512         }
  513 }
  514 
  515 static int
  516 procdesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
  517     struct thread *td)
  518 {
  519         struct procdesc *pd;
  520         struct timeval pstart;
  521 
  522         /*
  523          * XXXRW: Perhaps we should cache some more information from the
  524          * process so that we can return it reliably here even after it has
  525          * died.  For example, caching its credential data.
  526          */
  527         bzero(sb, sizeof(*sb));
  528         pd = fp->f_data;
  529         sx_slock(&proctree_lock);
  530         if (pd->pd_proc != NULL) {
  531                 PROC_LOCK(pd->pd_proc);
  532 
  533                 /* Set birth and [acm] times to process start time. */
  534                 pstart = pd->pd_proc->p_stats->p_start;
  535                 timevaladd(&pstart, &boottime);
  536                 TIMEVAL_TO_TIMESPEC(&pstart, &sb->st_birthtim);
  537                 sb->st_atim = sb->st_birthtim;
  538                 sb->st_ctim = sb->st_birthtim;
  539                 sb->st_mtim = sb->st_birthtim;
  540                 if (pd->pd_proc->p_state != PRS_ZOMBIE)
  541                         sb->st_mode = S_IFREG | S_IRWXU;
  542                 else
  543                         sb->st_mode = S_IFREG;
  544                 sb->st_uid = pd->pd_proc->p_ucred->cr_ruid;
  545                 sb->st_gid = pd->pd_proc->p_ucred->cr_rgid;
  546                 PROC_UNLOCK(pd->pd_proc);
  547         } else
  548                 sb->st_mode = S_IFREG;
  549         sx_sunlock(&proctree_lock);
  550         return (0);
  551 }
  552 
  553 static int
  554 procdesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
  555     struct filedesc *fdp)
  556 {
  557         struct procdesc *pdp;
  558 
  559         kif->kf_type = KF_TYPE_PROCDESC;
  560         pdp = fp->f_data;
  561         kif->kf_un.kf_proc.kf_pid = pdp->pd_pid;
  562         return (0);
  563 }

Cache object: 612ba5460674c37aeebc0111fceb2c03


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.