The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/servers/rs/manager.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Changes:
    3  *   Jul 22, 2005:      Created  (Jorrit N. Herder)
    4  */
    5 
    6 #include "inc.h"
    7 #include <unistd.h>
    8 #include <sys/types.h>
    9 #include <sys/wait.h>
   10 #include <minix/dmap.h>
   11 
   12 /* Allocate variables. */
   13 struct rproc rproc[NR_SYS_PROCS];               /* system process table */
   14 struct rproc *rproc_ptr[NR_PROCS];              /* mapping for fast access */
   15 int nr_in_use;                                  /* number of services */
   16 extern int errno;                               /* error status */
   17 
   18 /* Prototypes for internal functions that do the hard work. */
   19 FORWARD _PROTOTYPE( int start_service, (struct rproc *rp) );
   20 FORWARD _PROTOTYPE( int stop_service, (struct rproc *rp,int how) );
   21 
   22 PRIVATE int shutting_down = FALSE;
   23 
   24 #define EXEC_FAILED     49                      /* recognizable status */
   25 
   26 /*===========================================================================*
   27  *                                      do_up                                *
   28  *===========================================================================*/
   29 PUBLIC int do_up(m_ptr)
   30 message *m_ptr;                                 /* request message pointer */
   31 {
   32 /* A request was made to start a new system service. Dismember the request 
   33  * message and gather all information needed to start the service. Starting
   34  * is done by a helper routine.
   35  */
   36   register struct rproc *rp;                    /* system process table */
   37   int slot_nr;                                  /* local table entry */
   38   int arg_count;                                /* number of arguments */
   39   char *cmd_ptr;                                /* parse command string */
   40   enum dev_style dev_style;                     /* device style */
   41   int s;                                        /* status variable */
   42 
   43   /* See if there is a free entry in the table with system processes. */
   44   if (nr_in_use >= NR_SYS_PROCS) return(EAGAIN); 
   45   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
   46       rp = &rproc[slot_nr];                     /* get pointer to slot */
   47       if (! rp->r_flags & RS_IN_USE)            /* check if available */
   48           break;
   49   }
   50   nr_in_use ++;                                 /* update administration */
   51 
   52   /* Obtain command name and parameters. This is a space-separated string
   53    * that looks like "/sbin/service arg1 arg2 ...". Arguments are optional.
   54    */
   55   if (m_ptr->RS_CMD_LEN > MAX_COMMAND_LEN) return(E2BIG);
   56   if (OK!=(s=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->RS_CMD_ADDR, 
   57         SELF, (vir_bytes) rp->r_cmd, m_ptr->RS_CMD_LEN))) return(s);
   58   rp->r_cmd[m_ptr->RS_CMD_LEN] = '\0';          /* ensure it is terminated */
   59   if (rp->r_cmd[0] != '/') return(EINVAL);      /* insist on absolute path */
   60 
   61   /* Build argument vector to be passed to execute call. The format of the
   62    * arguments vector is: path, arguments, NULL. 
   63    */
   64   arg_count = 0;                                /* initialize arg count */
   65   rp->r_argv[arg_count++] = rp->r_cmd;          /* start with path */
   66   cmd_ptr = rp->r_cmd;                          /* do some parsing */ 
   67   while(*cmd_ptr != '\0') {                     /* stop at end of string */
   68       if (*cmd_ptr == ' ') {                    /* next argument */
   69           *cmd_ptr = '\0';                      /* terminate previous */
   70           while (*++cmd_ptr == ' ') ;           /* skip spaces */
   71           if (*cmd_ptr == '\0') break;          /* no arg following */
   72           if (arg_count>MAX_NR_ARGS+1) break;   /* arg vector full */
   73           rp->r_argv[arg_count++] = cmd_ptr;    /* add to arg vector */
   74       }
   75       cmd_ptr ++;                               /* continue parsing */
   76   }
   77   rp->r_argv[arg_count] = NULL;                 /* end with NULL pointer */
   78   rp->r_argc = arg_count;
   79 
   80   /* Initialize some fields. */
   81   rp->r_period = m_ptr->RS_PERIOD;
   82   rp->r_dev_nr = m_ptr->RS_DEV_MAJOR;
   83   rp->r_dev_style = STYLE_DEV; 
   84   rp->r_restarts = -1;                          /* will be incremented */
   85   
   86   /* All information was gathered. Now try to start the system service. */
   87   return(start_service(rp));
   88 }
   89 
   90 
   91 /*===========================================================================*
   92  *                              do_down                                      *
   93  *===========================================================================*/
   94 PUBLIC int do_down(message *m_ptr)
   95 {
   96   register struct rproc *rp;
   97   pid_t pid = (pid_t) m_ptr->RS_PID;
   98 
   99   for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
  100       if (rp->r_flags & RS_IN_USE && rp->r_pid == pid) {
  101 #if VERBOSE
  102           printf("stopping %d (%d)\n", pid, m_ptr->RS_PID);
  103 #endif
  104           stop_service(rp,RS_EXITING);
  105           return(OK);
  106       }
  107   }
  108 #if VERBOSE
  109   printf("not found %d (%d)\n", pid, m_ptr->RS_PID);
  110 #endif
  111   return(ESRCH);
  112 }
  113 
  114 
  115 /*===========================================================================*
  116  *                              do_refresh                                   *
  117  *===========================================================================*/
  118 PUBLIC int do_refresh(message *m_ptr)
  119 {
  120   register struct rproc *rp;
  121   pid_t pid = (pid_t) m_ptr->RS_PID;
  122 
  123   for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
  124       if (rp->r_flags & RS_IN_USE && rp->r_pid == pid) {
  125 #if VERBOSE
  126           printf("refreshing %d (%d)\n", pid, m_ptr->RS_PID);
  127 #endif
  128           stop_service(rp,RS_REFRESHING);
  129           return(OK);
  130       }
  131   }
  132 #if VERBOSE
  133   printf("not found %d (%d)\n", pid, m_ptr->RS_PID);
  134 #endif
  135   return(ESRCH);
  136 }
  137 
  138 /*===========================================================================*
  139  *                              do_rescue                                    *
  140  *===========================================================================*/
  141 PUBLIC int do_rescue(message *m_ptr)
  142 {
  143   char rescue_dir[MAX_RESCUE_DIR_LEN];
  144   int s;
  145 
  146   /* Copy rescue directory from user. */
  147   if (m_ptr->RS_CMD_LEN > MAX_RESCUE_DIR_LEN) return(E2BIG);
  148   if (OK!=(s=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->RS_CMD_ADDR, 
  149         SELF, (vir_bytes) rescue_dir, m_ptr->RS_CMD_LEN))) return(s);
  150   rescue_dir[m_ptr->RS_CMD_LEN] = '\0';         /* ensure it is terminated */
  151   if (rescue_dir[0] != '/') return(EINVAL);     /* insist on absolute path */
  152 
  153   /* Change RS' directory to the rescue directory. Provided that the needed
  154    * binaries are in the rescue dir, this makes recovery possible even if the 
  155    * (root) file system is no longer available, because no directory lookups
  156    * are required. Thus if an absolute path fails, we can try to strip the 
  157    * path an see if the command is in the rescue dir. 
  158    */
  159   if (chdir(rescue_dir) != 0) return(errno);
  160   return(OK);
  161 }
  162 
  163 /*===========================================================================*
  164  *                              do_shutdown                                  *
  165  *===========================================================================*/
  166 PUBLIC int do_shutdown(message *m_ptr)
  167 {
  168   /* Set flag so that RS server knows services shouldn't be restarted. */
  169   shutting_down = TRUE;
  170   return(OK);
  171 }
  172 
  173 /*===========================================================================*
  174  *                              do_exit                                      *
  175  *===========================================================================*/
  176 PUBLIC void do_exit(message *m_ptr)
  177 {
  178   register struct rproc *rp;
  179   pid_t exit_pid;
  180   int exit_status;
  181 
  182 #if VERBOSE
  183   printf("RS: got SIGCHLD signal, doing wait to get exited child.\n");
  184 #endif
  185 
  186   /* See which child exited and what the exit status is. This is done in a
  187    * loop because multiple childs may have exited, all reported by one 
  188    * SIGCHLD signal. The WNOHANG options is used to prevent blocking if, 
  189    * somehow, no exited child can be found. 
  190    */
  191   while ( (exit_pid = waitpid(-1, &exit_status, WNOHANG)) != 0 ) {
  192 
  193 #if VERBOSE
  194       printf("RS: proc %d, pid %d, ", rp->r_proc_nr, exit_pid); 
  195       if (WIFSIGNALED(exit_status)) {
  196           printf("killed, signal number %d\n", WTERMSIG(exit_status));
  197       } 
  198       else if (WIFEXITED(exit_status)) {
  199           printf("normal exit, status %d\n", WEXITSTATUS(exit_status));
  200       }
  201 #endif
  202 
  203       /* Search the system process table to see who exited. 
  204        * This should always succeed. 
  205        */
  206       for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
  207           if ((rp->r_flags & RS_IN_USE) && rp->r_pid == exit_pid) {
  208 
  209               rproc_ptr[rp->r_proc_nr] = NULL;          /* invalidate */
  210 
  211               if ((rp->r_flags & RS_EXITING) || shutting_down) {
  212                   rp->r_flags = 0;                      /* release slot */
  213                   rproc_ptr[rp->r_proc_nr] = NULL;
  214               }
  215               else if(rp->r_flags & RS_REFRESHING) {
  216                       rp->r_restarts = -1;              /* reset counter */
  217                       start_service(rp);                /* direct restart */
  218               }
  219               else if (WIFEXITED(exit_status) &&
  220                       WEXITSTATUS(exit_status) == EXEC_FAILED) {
  221                   rp->r_flags = 0;                      /* release slot */
  222               }
  223               else {
  224 #if VERBOSE
  225                   printf("Unexpected exit. Restarting %s\n", rp->r_cmd);
  226 #endif
  227                   /* Determine what to do. If this is the first unexpected 
  228                    * exit, immediately restart this service. Otherwise use
  229                    * a binary exponetial backoff.
  230                    */
  231                   if (rp->r_restarts > 0) {
  232                       rp->r_backoff = 1 << MIN(rp->r_restarts,(BACKOFF_BITS-1));
  233                       rp->r_backoff = MIN(rp->r_backoff,MAX_BACKOFF); 
  234                   }
  235                   else {
  236                       start_service(rp);                /* direct restart */
  237                   }
  238               }
  239               break;
  240           }
  241       }
  242   }
  243 }
  244 
  245 /*===========================================================================*
  246  *                              do_period                                    *
  247  *===========================================================================*/
  248 PUBLIC void do_period(m_ptr)
  249 message *m_ptr;
  250 {
  251   register struct rproc *rp;
  252   clock_t now = m_ptr->NOTIFY_TIMESTAMP;
  253   int s;
  254 
  255   /* Search system services table. Only check slots that are in use. */
  256   for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
  257       if (rp->r_flags & RS_IN_USE) {
  258 
  259           /* If the service is to be revived (because it repeatedly exited, 
  260            * and was not directly restarted), the binary backoff field is  
  261            * greater than zero. 
  262            */
  263           if (rp->r_backoff > 0) {
  264               rp->r_backoff -= 1;
  265               if (rp->r_backoff == 0) {
  266                   start_service(rp);
  267               }
  268           }
  269 
  270           /* If the service was signaled with a SIGTERM and fails to respond,
  271            * kill the system service with a SIGKILL signal.
  272            */
  273           else if (rp->r_stop_tm > 0 && now - rp->r_stop_tm > 2*RS_DELTA_T
  274            && rp->r_pid > 0) {
  275               kill(rp->r_pid, SIGKILL);         /* terminate */
  276           }
  277         
  278           /* There seems to be no special conditions. If the service has a 
  279            * period assigned check its status. 
  280            */
  281           else if (rp->r_period > 0) {
  282 
  283               /* Check if an answer to a status request is still pending. If 
  284                * the driver didn't respond within time, kill it to simulate 
  285                * a crash. The failure will be detected and the service will 
  286                * be restarted automatically.
  287                */
  288               if (rp->r_alive_tm < rp->r_check_tm) { 
  289                   if (now - rp->r_alive_tm > 2*rp->r_period &&
  290                       rp->r_pid > 0) { 
  291 #if VERBOSE
  292                       printf("RS: service %d reported late\n", rp->r_proc_nr); 
  293 #endif
  294                       kill(rp->r_pid, SIGKILL);         /* simulate crash */
  295                   }
  296               }
  297 
  298               /* No answer pending. Check if a period expired since the last
  299                * check and, if so request the system service's status.
  300                */
  301               else if (now - rp->r_check_tm > rp->r_period) {
  302 #if VERBOSE
  303                   printf("RS: status request sent to %d\n", rp->r_proc_nr); 
  304 #endif
  305                   notify(rp->r_proc_nr);                /* request status */
  306                   rp->r_check_tm = now;                 /* mark time */
  307               }
  308           }
  309       }
  310   }
  311 
  312   /* Reschedule a synchronous alarm for the next period. */
  313   if (OK != (s=sys_setalarm(RS_DELTA_T, 0)))
  314       panic("RS", "couldn't set alarm", s);
  315 }
  316 
  317 
  318 /*===========================================================================*
  319  *                              start_service                                *
  320  *===========================================================================*/
  321 PRIVATE int start_service(rp)
  322 struct rproc *rp;
  323 {
  324 /* Try to execute the given system service. Fork a new process. The child
  325  * process will be inhibited from running by the NO_PRIV flag. Only let the
  326  * child run once its privileges have been set by the parent.
  327  */
  328   int child_proc_nr;                            /* child process slot */
  329   pid_t child_pid;                              /* child's process id */
  330   char *file_only;
  331   int s;
  332   message m;
  333 
  334   /* Now fork and branch for parent and child process (and check for error). */
  335   child_pid = fork();
  336   switch(child_pid) {                                   /* see fork(2) */
  337   case -1:                                              /* fork failed */
  338       report("RS", "warning, fork() failed", errno);    /* shouldn't happen */
  339       return(errno);                                    /* return error */
  340 
  341   case 0:                                               /* child process */
  342       /* Try to execute the binary that has an absolute path. If this fails, 
  343        * e.g., because the root file system cannot be read, try to strip of
  344        * the path, and see if the command is in RS' current working dir.
  345        */
  346       execve(rp->r_argv[0], rp->r_argv, NULL);          /* POSIX execute */
  347       file_only = strrchr(rp->r_argv[0], '/') + 1;
  348       execve(file_only, rp->r_argv, NULL);              /* POSIX execute */
  349       printf("RS: exec failed for %s: %d\n", rp->r_argv[0], errno);
  350       exit(EXEC_FAILED);                                /* terminate child */
  351 
  352   default:                                              /* parent process */
  353       child_proc_nr = getnprocnr(child_pid);            /* get child slot */ 
  354       break;                                            /* continue below */
  355   }
  356 
  357   /* Only the parent process (the RS server) gets to this point. The child
  358    * is still inhibited from running because it's privilege structure is
  359    * not yet set. First try to set the device driver mapping at the FS.
  360    */
  361   if (rp->r_dev_nr > 0) {                               /* set driver map */
  362       if ((s=mapdriver(child_proc_nr, rp->r_dev_nr, rp->r_dev_style)) < 0) {
  363           report("RS", "couldn't map driver", errno);
  364           if(child_pid > 0) kill(child_pid, SIGKILL);   /* kill driver */
  365           else report("RS", "didn't kill pid", child_pid);
  366           rp->r_flags |= RS_EXITING;                    /* expect exit */
  367           return(s);                                    /* return error */
  368       }
  369   }
  370 
  371   /* The device driver mapping has been set, or the service was not a driver.
  372    * Now, set the privilege structure for the child process to let is run.
  373    * This should succeed: we tested number in use above.
  374    */
  375   m.PR_PROC_NR = child_proc_nr;
  376   if ((s = _taskcall(SYSTEM, SYS_PRIVCTL, &m)) < 0) {   /* set privileges */
  377       report("RS","call to SYSTEM failed", s);          /* to let child run */
  378       if(child_pid > 0) kill(child_pid, SIGKILL);       /* kill driver */
  379       else report("RS", "didn't kill pid", child_pid);
  380       rp->r_flags |= RS_EXITING;                        /* expect exit */
  381       return(s);                                        /* return error */
  382   }
  383 
  384 #if VERBOSE
  385       printf("RS: started '%s', major %d, pid %d, proc_nr %d\n", 
  386           rp->r_cmd, rp->r_dev_nr, child_pid, child_proc_nr);
  387 #endif
  388 
  389   /* The system service now has been successfully started. Update the rest
  390    * of the system process table that is maintain by the RS server. The only 
  391    * thing that can go wrong now, is that execution fails at the child. If 
  392    * that's the case, the child will exit. 
  393    */
  394   rp->r_flags = RS_IN_USE;                      /* mark slot in use */
  395   rp->r_restarts += 1;                          /* raise nr of restarts */
  396   rp->r_proc_nr = child_proc_nr;                /* set child details */
  397   rp->r_pid = child_pid;
  398   rp->r_check_tm = 0;                           /* not check yet */
  399   getuptime(&rp->r_alive_tm);                   /* currently alive */
  400   rp->r_stop_tm = 0;                            /* not exiting yet */
  401   rproc_ptr[child_proc_nr] = rp;                /* mapping for fast access */
  402   return(OK);
  403 }
  404 
  405 /*===========================================================================*
  406  *                              stop_service                                 *
  407  *===========================================================================*/
  408 PRIVATE int stop_service(rp,how)
  409 struct rproc *rp;
  410 int how;
  411 {
  412   /* Try to stop the system service. First send a SIGTERM signal to ask the
  413    * system service to terminate. If the service didn't install a signal 
  414    * handler, it will be killed. If it did and ignores the signal, we'll
  415    * find out because we record the time here and send a SIGKILL.
  416    */
  417 #if VERBOSE
  418   printf("RS tries to stop %s (pid %d)\n", rp->r_cmd, rp->r_pid);
  419 #endif
  420 
  421   rp->r_flags |= how;                           /* what to on exit? */
  422   if(rp->r_pid > 0) kill(rp->r_pid, SIGTERM);   /* first try friendly */
  423   else report("RS", "didn't kill pid", rp->r_pid);
  424   getuptime(&rp->r_stop_tm);                    /* record current time */
  425 }
  426 
  427 
  428 /*===========================================================================*
  429  *                              do_getsysinfo                                *
  430  *===========================================================================*/
  431 PUBLIC int do_getsysinfo(m_ptr)
  432 message *m_ptr;
  433 {
  434   vir_bytes src_addr, dst_addr;
  435   int dst_proc;
  436   size_t len;
  437   int s;
  438 
  439   switch(m_ptr->m1_i1) {
  440   case SI_PROC_TAB:
  441         src_addr = (vir_bytes) rproc;
  442         len = sizeof(struct rproc) * NR_SYS_PROCS;
  443         break; 
  444   default:
  445         return(EINVAL);
  446   }
  447 
  448   dst_proc = m_ptr->m_source;
  449   dst_addr = (vir_bytes) m_ptr->m1_p1;
  450   if (OK != (s=sys_datacopy(SELF, src_addr, dst_proc, dst_addr, len)))
  451         return(s);
  452   return(OK);
  453 }
  454 

Cache object: 5a9a1d299bd3f84664bcb83d8931a6a5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.