1 /*
2 * Changes:
3 * Jul 22, 2005: Created (Jorrit N. Herder)
4 */
5
6 #include "inc.h"
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/wait.h>
10 #include <minix/dmap.h>
11
12 /* Allocate variables. */
13 struct rproc rproc[NR_SYS_PROCS]; /* system process table */
14 struct rproc *rproc_ptr[NR_PROCS]; /* mapping for fast access */
15 int nr_in_use; /* number of services */
16 extern int errno; /* error status */
17
18 /* Prototypes for internal functions that do the hard work. */
19 FORWARD _PROTOTYPE( int start_service, (struct rproc *rp) );
20 FORWARD _PROTOTYPE( int stop_service, (struct rproc *rp,int how) );
21
22 PRIVATE int shutting_down = FALSE;
23
24 #define EXEC_FAILED 49 /* recognizable status */
25
26 /*===========================================================================*
27 * do_up *
28 *===========================================================================*/
29 PUBLIC int do_up(m_ptr)
30 message *m_ptr; /* request message pointer */
31 {
32 /* A request was made to start a new system service. Dismember the request
33 * message and gather all information needed to start the service. Starting
34 * is done by a helper routine.
35 */
36 register struct rproc *rp; /* system process table */
37 int slot_nr; /* local table entry */
38 int arg_count; /* number of arguments */
39 char *cmd_ptr; /* parse command string */
40 enum dev_style dev_style; /* device style */
41 int s; /* status variable */
42
43 /* See if there is a free entry in the table with system processes. */
44 if (nr_in_use >= NR_SYS_PROCS) return(EAGAIN);
45 for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
46 rp = &rproc[slot_nr]; /* get pointer to slot */
47 if (! rp->r_flags & RS_IN_USE) /* check if available */
48 break;
49 }
50 nr_in_use ++; /* update administration */
51
52 /* Obtain command name and parameters. This is a space-separated string
53 * that looks like "/sbin/service arg1 arg2 ...". Arguments are optional.
54 */
55 if (m_ptr->RS_CMD_LEN > MAX_COMMAND_LEN) return(E2BIG);
56 if (OK!=(s=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->RS_CMD_ADDR,
57 SELF, (vir_bytes) rp->r_cmd, m_ptr->RS_CMD_LEN))) return(s);
58 rp->r_cmd[m_ptr->RS_CMD_LEN] = '\0'; /* ensure it is terminated */
59 if (rp->r_cmd[0] != '/') return(EINVAL); /* insist on absolute path */
60
61 /* Build argument vector to be passed to execute call. The format of the
62 * arguments vector is: path, arguments, NULL.
63 */
64 arg_count = 0; /* initialize arg count */
65 rp->r_argv[arg_count++] = rp->r_cmd; /* start with path */
66 cmd_ptr = rp->r_cmd; /* do some parsing */
67 while(*cmd_ptr != '\0') { /* stop at end of string */
68 if (*cmd_ptr == ' ') { /* next argument */
69 *cmd_ptr = '\0'; /* terminate previous */
70 while (*++cmd_ptr == ' ') ; /* skip spaces */
71 if (*cmd_ptr == '\0') break; /* no arg following */
72 if (arg_count>MAX_NR_ARGS+1) break; /* arg vector full */
73 rp->r_argv[arg_count++] = cmd_ptr; /* add to arg vector */
74 }
75 cmd_ptr ++; /* continue parsing */
76 }
77 rp->r_argv[arg_count] = NULL; /* end with NULL pointer */
78 rp->r_argc = arg_count;
79
80 /* Initialize some fields. */
81 rp->r_period = m_ptr->RS_PERIOD;
82 rp->r_dev_nr = m_ptr->RS_DEV_MAJOR;
83 rp->r_dev_style = STYLE_DEV;
84 rp->r_restarts = -1; /* will be incremented */
85
86 /* All information was gathered. Now try to start the system service. */
87 return(start_service(rp));
88 }
89
90
91 /*===========================================================================*
92 * do_down *
93 *===========================================================================*/
94 PUBLIC int do_down(message *m_ptr)
95 {
96 register struct rproc *rp;
97 pid_t pid = (pid_t) m_ptr->RS_PID;
98
99 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
100 if (rp->r_flags & RS_IN_USE && rp->r_pid == pid) {
101 #if VERBOSE
102 printf("stopping %d (%d)\n", pid, m_ptr->RS_PID);
103 #endif
104 stop_service(rp,RS_EXITING);
105 return(OK);
106 }
107 }
108 #if VERBOSE
109 printf("not found %d (%d)\n", pid, m_ptr->RS_PID);
110 #endif
111 return(ESRCH);
112 }
113
114
115 /*===========================================================================*
116 * do_refresh *
117 *===========================================================================*/
118 PUBLIC int do_refresh(message *m_ptr)
119 {
120 register struct rproc *rp;
121 pid_t pid = (pid_t) m_ptr->RS_PID;
122
123 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
124 if (rp->r_flags & RS_IN_USE && rp->r_pid == pid) {
125 #if VERBOSE
126 printf("refreshing %d (%d)\n", pid, m_ptr->RS_PID);
127 #endif
128 stop_service(rp,RS_REFRESHING);
129 return(OK);
130 }
131 }
132 #if VERBOSE
133 printf("not found %d (%d)\n", pid, m_ptr->RS_PID);
134 #endif
135 return(ESRCH);
136 }
137
138 /*===========================================================================*
139 * do_rescue *
140 *===========================================================================*/
141 PUBLIC int do_rescue(message *m_ptr)
142 {
143 char rescue_dir[MAX_RESCUE_DIR_LEN];
144 int s;
145
146 /* Copy rescue directory from user. */
147 if (m_ptr->RS_CMD_LEN > MAX_RESCUE_DIR_LEN) return(E2BIG);
148 if (OK!=(s=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->RS_CMD_ADDR,
149 SELF, (vir_bytes) rescue_dir, m_ptr->RS_CMD_LEN))) return(s);
150 rescue_dir[m_ptr->RS_CMD_LEN] = '\0'; /* ensure it is terminated */
151 if (rescue_dir[0] != '/') return(EINVAL); /* insist on absolute path */
152
153 /* Change RS' directory to the rescue directory. Provided that the needed
154 * binaries are in the rescue dir, this makes recovery possible even if the
155 * (root) file system is no longer available, because no directory lookups
156 * are required. Thus if an absolute path fails, we can try to strip the
157 * path an see if the command is in the rescue dir.
158 */
159 if (chdir(rescue_dir) != 0) return(errno);
160 return(OK);
161 }
162
163 /*===========================================================================*
164 * do_shutdown *
165 *===========================================================================*/
166 PUBLIC int do_shutdown(message *m_ptr)
167 {
168 /* Set flag so that RS server knows services shouldn't be restarted. */
169 shutting_down = TRUE;
170 return(OK);
171 }
172
173 /*===========================================================================*
174 * do_exit *
175 *===========================================================================*/
176 PUBLIC void do_exit(message *m_ptr)
177 {
178 register struct rproc *rp;
179 pid_t exit_pid;
180 int exit_status;
181
182 #if VERBOSE
183 printf("RS: got SIGCHLD signal, doing wait to get exited child.\n");
184 #endif
185
186 /* See which child exited and what the exit status is. This is done in a
187 * loop because multiple childs may have exited, all reported by one
188 * SIGCHLD signal. The WNOHANG options is used to prevent blocking if,
189 * somehow, no exited child can be found.
190 */
191 while ( (exit_pid = waitpid(-1, &exit_status, WNOHANG)) != 0 ) {
192
193 #if VERBOSE
194 printf("RS: proc %d, pid %d, ", rp->r_proc_nr, exit_pid);
195 if (WIFSIGNALED(exit_status)) {
196 printf("killed, signal number %d\n", WTERMSIG(exit_status));
197 }
198 else if (WIFEXITED(exit_status)) {
199 printf("normal exit, status %d\n", WEXITSTATUS(exit_status));
200 }
201 #endif
202
203 /* Search the system process table to see who exited.
204 * This should always succeed.
205 */
206 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
207 if ((rp->r_flags & RS_IN_USE) && rp->r_pid == exit_pid) {
208
209 rproc_ptr[rp->r_proc_nr] = NULL; /* invalidate */
210
211 if ((rp->r_flags & RS_EXITING) || shutting_down) {
212 rp->r_flags = 0; /* release slot */
213 rproc_ptr[rp->r_proc_nr] = NULL;
214 }
215 else if(rp->r_flags & RS_REFRESHING) {
216 rp->r_restarts = -1; /* reset counter */
217 start_service(rp); /* direct restart */
218 }
219 else if (WIFEXITED(exit_status) &&
220 WEXITSTATUS(exit_status) == EXEC_FAILED) {
221 rp->r_flags = 0; /* release slot */
222 }
223 else {
224 #if VERBOSE
225 printf("Unexpected exit. Restarting %s\n", rp->r_cmd);
226 #endif
227 /* Determine what to do. If this is the first unexpected
228 * exit, immediately restart this service. Otherwise use
229 * a binary exponetial backoff.
230 */
231 if (rp->r_restarts > 0) {
232 rp->r_backoff = 1 << MIN(rp->r_restarts,(BACKOFF_BITS-1));
233 rp->r_backoff = MIN(rp->r_backoff,MAX_BACKOFF);
234 }
235 else {
236 start_service(rp); /* direct restart */
237 }
238 }
239 break;
240 }
241 }
242 }
243 }
244
245 /*===========================================================================*
246 * do_period *
247 *===========================================================================*/
248 PUBLIC void do_period(m_ptr)
249 message *m_ptr;
250 {
251 register struct rproc *rp;
252 clock_t now = m_ptr->NOTIFY_TIMESTAMP;
253 int s;
254
255 /* Search system services table. Only check slots that are in use. */
256 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
257 if (rp->r_flags & RS_IN_USE) {
258
259 /* If the service is to be revived (because it repeatedly exited,
260 * and was not directly restarted), the binary backoff field is
261 * greater than zero.
262 */
263 if (rp->r_backoff > 0) {
264 rp->r_backoff -= 1;
265 if (rp->r_backoff == 0) {
266 start_service(rp);
267 }
268 }
269
270 /* If the service was signaled with a SIGTERM and fails to respond,
271 * kill the system service with a SIGKILL signal.
272 */
273 else if (rp->r_stop_tm > 0 && now - rp->r_stop_tm > 2*RS_DELTA_T
274 && rp->r_pid > 0) {
275 kill(rp->r_pid, SIGKILL); /* terminate */
276 }
277
278 /* There seems to be no special conditions. If the service has a
279 * period assigned check its status.
280 */
281 else if (rp->r_period > 0) {
282
283 /* Check if an answer to a status request is still pending. If
284 * the driver didn't respond within time, kill it to simulate
285 * a crash. The failure will be detected and the service will
286 * be restarted automatically.
287 */
288 if (rp->r_alive_tm < rp->r_check_tm) {
289 if (now - rp->r_alive_tm > 2*rp->r_period &&
290 rp->r_pid > 0) {
291 #if VERBOSE
292 printf("RS: service %d reported late\n", rp->r_proc_nr);
293 #endif
294 kill(rp->r_pid, SIGKILL); /* simulate crash */
295 }
296 }
297
298 /* No answer pending. Check if a period expired since the last
299 * check and, if so request the system service's status.
300 */
301 else if (now - rp->r_check_tm > rp->r_period) {
302 #if VERBOSE
303 printf("RS: status request sent to %d\n", rp->r_proc_nr);
304 #endif
305 notify(rp->r_proc_nr); /* request status */
306 rp->r_check_tm = now; /* mark time */
307 }
308 }
309 }
310 }
311
312 /* Reschedule a synchronous alarm for the next period. */
313 if (OK != (s=sys_setalarm(RS_DELTA_T, 0)))
314 panic("RS", "couldn't set alarm", s);
315 }
316
317
318 /*===========================================================================*
319 * start_service *
320 *===========================================================================*/
321 PRIVATE int start_service(rp)
322 struct rproc *rp;
323 {
324 /* Try to execute the given system service. Fork a new process. The child
325 * process will be inhibited from running by the NO_PRIV flag. Only let the
326 * child run once its privileges have been set by the parent.
327 */
328 int child_proc_nr; /* child process slot */
329 pid_t child_pid; /* child's process id */
330 char *file_only;
331 int s;
332 message m;
333
334 /* Now fork and branch for parent and child process (and check for error). */
335 child_pid = fork();
336 switch(child_pid) { /* see fork(2) */
337 case -1: /* fork failed */
338 report("RS", "warning, fork() failed", errno); /* shouldn't happen */
339 return(errno); /* return error */
340
341 case 0: /* child process */
342 /* Try to execute the binary that has an absolute path. If this fails,
343 * e.g., because the root file system cannot be read, try to strip of
344 * the path, and see if the command is in RS' current working dir.
345 */
346 execve(rp->r_argv[0], rp->r_argv, NULL); /* POSIX execute */
347 file_only = strrchr(rp->r_argv[0], '/') + 1;
348 execve(file_only, rp->r_argv, NULL); /* POSIX execute */
349 printf("RS: exec failed for %s: %d\n", rp->r_argv[0], errno);
350 exit(EXEC_FAILED); /* terminate child */
351
352 default: /* parent process */
353 child_proc_nr = getnprocnr(child_pid); /* get child slot */
354 break; /* continue below */
355 }
356
357 /* Only the parent process (the RS server) gets to this point. The child
358 * is still inhibited from running because it's privilege structure is
359 * not yet set. First try to set the device driver mapping at the FS.
360 */
361 if (rp->r_dev_nr > 0) { /* set driver map */
362 if ((s=mapdriver(child_proc_nr, rp->r_dev_nr, rp->r_dev_style)) < 0) {
363 report("RS", "couldn't map driver", errno);
364 if(child_pid > 0) kill(child_pid, SIGKILL); /* kill driver */
365 else report("RS", "didn't kill pid", child_pid);
366 rp->r_flags |= RS_EXITING; /* expect exit */
367 return(s); /* return error */
368 }
369 }
370
371 /* The device driver mapping has been set, or the service was not a driver.
372 * Now, set the privilege structure for the child process to let is run.
373 * This should succeed: we tested number in use above.
374 */
375 m.PR_PROC_NR = child_proc_nr;
376 if ((s = _taskcall(SYSTEM, SYS_PRIVCTL, &m)) < 0) { /* set privileges */
377 report("RS","call to SYSTEM failed", s); /* to let child run */
378 if(child_pid > 0) kill(child_pid, SIGKILL); /* kill driver */
379 else report("RS", "didn't kill pid", child_pid);
380 rp->r_flags |= RS_EXITING; /* expect exit */
381 return(s); /* return error */
382 }
383
384 #if VERBOSE
385 printf("RS: started '%s', major %d, pid %d, proc_nr %d\n",
386 rp->r_cmd, rp->r_dev_nr, child_pid, child_proc_nr);
387 #endif
388
389 /* The system service now has been successfully started. Update the rest
390 * of the system process table that is maintain by the RS server. The only
391 * thing that can go wrong now, is that execution fails at the child. If
392 * that's the case, the child will exit.
393 */
394 rp->r_flags = RS_IN_USE; /* mark slot in use */
395 rp->r_restarts += 1; /* raise nr of restarts */
396 rp->r_proc_nr = child_proc_nr; /* set child details */
397 rp->r_pid = child_pid;
398 rp->r_check_tm = 0; /* not check yet */
399 getuptime(&rp->r_alive_tm); /* currently alive */
400 rp->r_stop_tm = 0; /* not exiting yet */
401 rproc_ptr[child_proc_nr] = rp; /* mapping for fast access */
402 return(OK);
403 }
404
405 /*===========================================================================*
406 * stop_service *
407 *===========================================================================*/
408 PRIVATE int stop_service(rp,how)
409 struct rproc *rp;
410 int how;
411 {
412 /* Try to stop the system service. First send a SIGTERM signal to ask the
413 * system service to terminate. If the service didn't install a signal
414 * handler, it will be killed. If it did and ignores the signal, we'll
415 * find out because we record the time here and send a SIGKILL.
416 */
417 #if VERBOSE
418 printf("RS tries to stop %s (pid %d)\n", rp->r_cmd, rp->r_pid);
419 #endif
420
421 rp->r_flags |= how; /* what to on exit? */
422 if(rp->r_pid > 0) kill(rp->r_pid, SIGTERM); /* first try friendly */
423 else report("RS", "didn't kill pid", rp->r_pid);
424 getuptime(&rp->r_stop_tm); /* record current time */
425 }
426
427
428 /*===========================================================================*
429 * do_getsysinfo *
430 *===========================================================================*/
431 PUBLIC int do_getsysinfo(m_ptr)
432 message *m_ptr;
433 {
434 vir_bytes src_addr, dst_addr;
435 int dst_proc;
436 size_t len;
437 int s;
438
439 switch(m_ptr->m1_i1) {
440 case SI_PROC_TAB:
441 src_addr = (vir_bytes) rproc;
442 len = sizeof(struct rproc) * NR_SYS_PROCS;
443 break;
444 default:
445 return(EINVAL);
446 }
447
448 dst_proc = m_ptr->m_source;
449 dst_addr = (vir_bytes) m_ptr->m1_p1;
450 if (OK != (s=sys_datacopy(SELF, src_addr, dst_proc, dst_addr, len)))
451 return(s);
452 return(OK);
453 }
454
Cache object: 5a9a1d299bd3f84664bcb83d8931a6a5
|