FreeBSD/Linux Kernel Cross Reference
sys/kernel/system.c
1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
6 *
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
11 *
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * send_sig: send a signal directly to a system process
16 * cause_sig: take action to cause a signal to occur via PM
17 * umap_local: map virtual address in LOCAL_SEG to physical
18 * umap_remote: map virtual address in REMOTE_SEG to physical
19 * umap_bios: map virtual address in BIOS_SEG to physical
20 * virtual_copy: copy bytes from one virtual address to another
21 * get_randomness: accumulate randomness in a buffer
22 *
23 * Changes:
24 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
25 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
26 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
27 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
28 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
29 */
30
31 #include "kernel.h"
32 #include "system.h"
33 #include <stdlib.h>
34 #include <signal.h>
35 #include <unistd.h>
36 #include <sys/sigcontext.h>
37 #if (CHIP == INTEL)
38 #include <ibm/memory.h>
39 #include "protect.h"
40 #endif
41
42 /* Declaration of the call vector that defines the mapping of system calls
43 * to handler functions. The vector is initialized in sys_init() with map(),
44 * which makes sure the system call numbers are ok. No space is allocated,
45 * because the dummy is declared extern. If an illegal call is given, the
46 * array size will be negative and this won't compile.
47 */
48 PUBLIC int (*call_vec[NR_SYS_CALLS])(message *m_ptr);
49
50 #define map(call_nr, handler) \
51 {extern int dummy[NR_SYS_CALLS>(unsigned)(call_nr-KERNEL_CALL) ? 1:-1];} \
52 call_vec[(call_nr-KERNEL_CALL)] = (handler)
53
54 FORWARD _PROTOTYPE( void initialize, (void));
55
56 /*===========================================================================*
57 * sys_task *
58 *===========================================================================*/
59 PUBLIC void sys_task()
60 {
61 /* Main entry point of sys_task. Get the message and dispatch on type. */
62 static message m;
63 register int result;
64 register struct proc *caller_ptr;
65 unsigned int call_nr;
66 int s;
67
68 /* Initialize the system task. */
69 initialize();
70
71 while (TRUE) {
72 /* Get work. Block and wait until a request message arrives. */
73 receive(ANY, &m);
74 call_nr = (unsigned) m.m_type - KERNEL_CALL;
75 caller_ptr = proc_addr(m.m_source);
76
77 /* See if the caller made a valid request and try to handle it. */
78 if (! (priv(caller_ptr)->s_call_mask & (1<<call_nr))) {
79 #if DEBUG_ENABLE_IPC_WARNINGS
80 kprintf("SYSTEM: request %d from %d denied.\n", call_nr,m.m_source);
81 #endif
82 result = ECALLDENIED; /* illegal message type */
83 } else if (call_nr >= NR_SYS_CALLS) { /* check call number */
84 #if DEBUG_ENABLE_IPC_WARNINGS
85 kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source);
86 #endif
87 result = EBADREQUEST; /* illegal message type */
88 }
89 else {
90 result = (*call_vec[call_nr])(&m); /* handle the system call */
91 }
92
93 /* Send a reply, unless inhibited by a handler function. Use the kernel
94 * function lock_send() to prevent a system call trap. The destination
95 * is known to be blocked waiting for a message.
96 */
97 if (result != EDONTREPLY) {
98 m.m_type = result; /* report status of call */
99 if (OK != (s=lock_send(m.m_source, &m))) {
100 kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s);
101 }
102 }
103 }
104 }
105
106 /*===========================================================================*
107 * initialize *
108 *===========================================================================*/
109 PRIVATE void initialize(void)
110 {
111 register struct priv *sp;
112 int i;
113
114 /* Initialize IRQ handler hooks. Mark all hooks available. */
115 for (i=0; i<NR_IRQ_HOOKS; i++) {
116 irq_hooks[i].proc_nr = NONE;
117 }
118
119 /* Initialize all alarm timers for all processes. */
120 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
121 tmr_inittimer(&(sp->s_alarm_timer));
122 }
123
124 /* Initialize the call vector to a safe default handler. Some system calls
125 * may be disabled or nonexistant. Then explicitely map known calls to their
126 * handler functions. This is done with a macro that gives a compile error
127 * if an illegal call number is used. The ordering is not important here.
128 */
129 for (i=0; i<NR_SYS_CALLS; i++) {
130 call_vec[i] = do_unused;
131 }
132
133 /* Process management. */
134 map(SYS_FORK, do_fork); /* a process forked a new process */
135 map(SYS_EXEC, do_exec); /* update process after execute */
136 map(SYS_EXIT, do_exit); /* clean up after process exit */
137 map(SYS_NICE, do_nice); /* set scheduling priority */
138 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
139 map(SYS_TRACE, do_trace); /* request a trace operation */
140
141 /* Signal handling. */
142 map(SYS_KILL, do_kill); /* cause a process to be signaled */
143 map(SYS_GETKSIG, do_getksig); /* PM checks for pending signals */
144 map(SYS_ENDKSIG, do_endksig); /* PM finished processing signal */
145 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
146 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
147
148 /* Device I/O. */
149 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
150 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
151 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
152 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
153 map(SYS_INT86, do_int86); /* real-mode BIOS calls */
154
155 /* Memory management. */
156 map(SYS_NEWMAP, do_newmap); /* set up a process memory map */
157 map(SYS_SEGCTL, do_segctl); /* add segment and get selector */
158 map(SYS_MEMSET, do_memset); /* write char to memory area */
159
160 /* Copying. */
161 map(SYS_UMAP, do_umap); /* map virtual to physical address */
162 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
163 map(SYS_PHYSCOPY, do_physcopy); /* use physical addressing */
164 map(SYS_VIRVCOPY, do_virvcopy); /* vector with copy requests */
165 map(SYS_PHYSVCOPY, do_physvcopy); /* vector with copy requests */
166
167 /* Clock functionality. */
168 map(SYS_TIMES, do_times); /* get uptime and process times */
169 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
170
171 /* System control. */
172 map(SYS_ABORT, do_abort); /* abort MINIX */
173 map(SYS_GETINFO, do_getinfo); /* request system information */
174 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
175 }
176
177 /*===========================================================================*
178 * get_priv *
179 *===========================================================================*/
180 PUBLIC int get_priv(rc, proc_type)
181 register struct proc *rc; /* new (child) process pointer */
182 int proc_type; /* system or user process flag */
183 {
184 /* Get a privilege structure. All user processes share the same privilege
185 * structure. System processes get their own privilege structure.
186 */
187 register struct priv *sp; /* privilege structure */
188
189 if (proc_type == SYS_PROC) { /* find a new slot */
190 for (sp = BEG_PRIV_ADDR; sp < END_PRIV_ADDR; ++sp)
191 if (sp->s_proc_nr == NONE && sp->s_id != USER_PRIV_ID) break;
192 if (sp->s_proc_nr != NONE) return(ENOSPC);
193 rc->p_priv = sp; /* assign new slot */
194 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
195 rc->p_priv->s_flags = SYS_PROC; /* mark as privileged */
196 } else {
197 rc->p_priv = &priv[USER_PRIV_ID]; /* use shared slot */
198 rc->p_priv->s_proc_nr = INIT_PROC_NR; /* set association */
199 rc->p_priv->s_flags = 0; /* no initial flags */
200 }
201 return(OK);
202 }
203
204 /*===========================================================================*
205 * get_randomness *
206 *===========================================================================*/
207 PUBLIC void get_randomness(source)
208 int source;
209 {
210 /* On machines with the RDTSC (cycle counter read instruction - pentium
211 * and up), use that for high-resolution raw entropy gathering. Otherwise,
212 * use the realtime clock (tick resolution).
213 *
214 * Unfortunately this test is run-time - we don't want to bother with
215 * compiling different kernels for different machines.
216 *
217 * On machines without RDTSC, we use read_clock().
218 */
219 int r_next;
220 unsigned long tsc_high, tsc_low;
221
222 source %= RANDOM_SOURCES;
223 r_next= krandom.bin[source].r_next;
224 if (machine.processor > 486) {
225 read_tsc(&tsc_high, &tsc_low);
226 krandom.bin[source].r_buf[r_next] = tsc_low;
227 } else {
228 krandom.bin[source].r_buf[r_next] = read_clock();
229 }
230 if (krandom.bin[source].r_size < RANDOM_ELEMENTS) {
231 krandom.bin[source].r_size ++;
232 }
233 krandom.bin[source].r_next = (r_next + 1 ) % RANDOM_ELEMENTS;
234 }
235
236 /*===========================================================================*
237 * send_sig *
238 *===========================================================================*/
239 PUBLIC void send_sig(proc_nr, sig_nr)
240 int proc_nr; /* system process to be signalled */
241 int sig_nr; /* signal to be sent, 1 to _NSIG */
242 {
243 /* Notify a system process about a signal. This is straightforward. Simply
244 * set the signal that is to be delivered in the pending signals map and
245 * send a notification with source SYSTEM.
246 */
247 register struct proc *rp;
248
249 rp = proc_addr(proc_nr);
250 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
251 lock_notify(SYSTEM, proc_nr);
252 }
253
254 /*===========================================================================*
255 * cause_sig *
256 *===========================================================================*/
257 PUBLIC void cause_sig(proc_nr, sig_nr)
258 int proc_nr; /* process to be signalled */
259 int sig_nr; /* signal to be sent, 1 to _NSIG */
260 {
261 /* A system process wants to send a signal to a process. Examples are:
262 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
263 * - TTY wanting to cause SIGINT upon getting a DEL
264 * - FS wanting to cause SIGPIPE for a broken pipe
265 * Signals are handled by sending a message to PM. This function handles the
266 * signals and makes sure the PM gets them by sending a notification. The
267 * process being signaled is blocked while PM has not finished all signals
268 * for it.
269 * Race conditions between calls to this function and the system calls that
270 * process pending kernel signals cannot exist. Signal related functions are
271 * only called when a user process causes a CPU exception and from the kernel
272 * process level, which runs to completion.
273 */
274 register struct proc *rp;
275
276 /* Check if the signal is already pending. Process it otherwise. */
277 rp = proc_addr(proc_nr);
278 if (! sigismember(&rp->p_pending, sig_nr)) {
279 sigaddset(&rp->p_pending, sig_nr);
280 if (! (rp->p_rts_flags & SIGNALED)) { /* other pending */
281 if (rp->p_rts_flags == 0) lock_dequeue(rp); /* make not ready */
282 rp->p_rts_flags |= SIGNALED | SIG_PENDING; /* update flags */
283 send_sig(PM_PROC_NR, SIGKSIG);
284 }
285 }
286 }
287
288 /*===========================================================================*
289 * umap_bios *
290 *===========================================================================*/
291 PUBLIC phys_bytes umap_bios(rp, vir_addr, bytes)
292 register struct proc *rp; /* pointer to proc table entry for process */
293 vir_bytes vir_addr; /* virtual address in BIOS segment */
294 vir_bytes bytes; /* # of bytes to be copied */
295 {
296 /* Calculate the physical memory address at the BIOS. Note: currently, BIOS
297 * address zero (the first BIOS interrupt vector) is not considered, as an
298 * error here, but since the physical address will be zero as well, the
299 * calling function will think an error occurred. This is not a problem,
300 * since no one uses the first BIOS interrupt vector.
301 */
302
303 /* Check all acceptable ranges. */
304 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
305 return (phys_bytes) vir_addr;
306 else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
307 return (phys_bytes) vir_addr;
308
309 #if DEAD_CODE /* brutal fix, if the above is too restrictive */
310 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= UPPER_MEM_END)
311 return (phys_bytes) vir_addr;
312 #endif
313
314 kprintf("Warning, error in umap_bios, virtual address 0x%x\n", vir_addr);
315 return 0;
316 }
317
318 /*===========================================================================*
319 * umap_local *
320 *===========================================================================*/
321 PUBLIC phys_bytes umap_local(rp, seg, vir_addr, bytes)
322 register struct proc *rp; /* pointer to proc table entry for process */
323 int seg; /* T, D, or S segment */
324 vir_bytes vir_addr; /* virtual address in bytes within the seg */
325 vir_bytes bytes; /* # of bytes to be copied */
326 {
327 /* Calculate the physical memory address for a given virtual address. */
328 vir_clicks vc; /* the virtual address in clicks */
329 phys_bytes pa; /* intermediate variables as phys_bytes */
330 #if (CHIP == INTEL)
331 phys_bytes seg_base;
332 #endif
333
334 /* If 'seg' is D it could really be S and vice versa. T really means T.
335 * If the virtual address falls in the gap, it causes a problem. On the
336 * 8088 it is probably a legal stack reference, since "stackfaults" are
337 * not detected by the hardware. On 8088s, the gap is called S and
338 * accepted, but on other machines it is called D and rejected.
339 * The Atari ST behaves like the 8088 in this respect.
340 */
341
342 if (bytes <= 0) return( (phys_bytes) 0);
343 if (vir_addr + bytes <= vir_addr) return 0; /* overflow */
344 vc = (vir_addr + bytes - 1) >> CLICK_SHIFT; /* last click of data */
345
346 #if (CHIP == INTEL) || (CHIP == M68000)
347 if (seg != T)
348 seg = (vc < rp->p_memmap[D].mem_vir + rp->p_memmap[D].mem_len ? D : S);
349 #else
350 if (seg != T)
351 seg = (vc < rp->p_memmap[S].mem_vir ? D : S);
352 #endif
353
354 if ((vir_addr>>CLICK_SHIFT) >= rp->p_memmap[seg].mem_vir +
355 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
356
357 if (vc >= rp->p_memmap[seg].mem_vir +
358 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
359
360 #if (CHIP == INTEL)
361 seg_base = (phys_bytes) rp->p_memmap[seg].mem_phys;
362 seg_base = seg_base << CLICK_SHIFT; /* segment origin in bytes */
363 #endif
364 pa = (phys_bytes) vir_addr;
365 #if (CHIP != M68000)
366 pa -= rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
367 return(seg_base + pa);
368 #endif
369 #if (CHIP == M68000)
370 pa -= (phys_bytes)rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
371 pa += (phys_bytes)rp->p_memmap[seg].mem_phys << CLICK_SHIFT;
372 return(pa);
373 #endif
374 }
375
376 /*===========================================================================*
377 * umap_remote *
378 *===========================================================================*/
379 PUBLIC phys_bytes umap_remote(rp, seg, vir_addr, bytes)
380 register struct proc *rp; /* pointer to proc table entry for process */
381 int seg; /* index of remote segment */
382 vir_bytes vir_addr; /* virtual address in bytes within the seg */
383 vir_bytes bytes; /* # of bytes to be copied */
384 {
385 /* Calculate the physical memory address for a given virtual address. */
386 struct far_mem *fm;
387
388 if (bytes <= 0) return( (phys_bytes) 0);
389 if (seg < 0 || seg >= NR_REMOTE_SEGS) return( (phys_bytes) 0);
390
391 fm = &rp->p_priv->s_farmem[seg];
392 if (! fm->in_use) return( (phys_bytes) 0);
393 if (vir_addr + bytes > fm->mem_len) return( (phys_bytes) 0);
394
395 return(fm->mem_phys + (phys_bytes) vir_addr);
396 }
397
398 /*===========================================================================*
399 * virtual_copy *
400 *===========================================================================*/
401 PUBLIC int virtual_copy(src_addr, dst_addr, bytes)
402 struct vir_addr *src_addr; /* source virtual address */
403 struct vir_addr *dst_addr; /* destination virtual address */
404 vir_bytes bytes; /* # of bytes to copy */
405 {
406 /* Copy bytes from virtual address src_addr to virtual address dst_addr.
407 * Virtual addresses can be in ABS, LOCAL_SEG, REMOTE_SEG, or BIOS_SEG.
408 */
409 struct vir_addr *vir_addr[2]; /* virtual source and destination address */
410 phys_bytes phys_addr[2]; /* absolute source and destination */
411 int seg_index;
412 int i;
413
414 /* Check copy count. */
415 if (bytes <= 0) return(EDOM);
416
417 /* Do some more checks and map virtual addresses to physical addresses. */
418 vir_addr[_SRC_] = src_addr;
419 vir_addr[_DST_] = dst_addr;
420 for (i=_SRC_; i<=_DST_; i++) {
421
422 /* Get physical address. */
423 switch((vir_addr[i]->segment & SEGMENT_TYPE)) {
424 case LOCAL_SEG:
425 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
426 phys_addr[i] = umap_local( proc_addr(vir_addr[i]->proc_nr),
427 seg_index, vir_addr[i]->offset, bytes );
428 break;
429 case REMOTE_SEG:
430 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
431 phys_addr[i] = umap_remote( proc_addr(vir_addr[i]->proc_nr),
432 seg_index, vir_addr[i]->offset, bytes );
433 break;
434 case BIOS_SEG:
435 phys_addr[i] = umap_bios( proc_addr(vir_addr[i]->proc_nr),
436 vir_addr[i]->offset, bytes );
437 break;
438 case PHYS_SEG:
439 phys_addr[i] = vir_addr[i]->offset;
440 break;
441 default:
442 return(EINVAL);
443 }
444
445 /* Check if mapping succeeded. */
446 if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG)
447 return(EFAULT);
448 }
449
450 /* Now copy bytes between physical addresseses. */
451 phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes);
452 return(OK);
453 }
454
Cache object: 8b63903ad2417dae6534545815dc26e9
|