The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/system.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /* This task handles the interface between the kernel and user-level servers.
    2  * System services can be accessed by doing a system call. System calls are 
    3  * transformed into request messages, which are handled by this task. By 
    4  * convention, a sys_call() is transformed in a SYS_CALL request message that
    5  * is handled in a function named do_call(). 
    6  *
    7  * A private call vector is used to map all system calls to the functions that
    8  * handle them. The actual handler functions are contained in separate files
    9  * to keep this file clean. The call vector is used in the system task's main
   10  * loop to handle all incoming requests.  
   11  *
   12  * In addition to the main sys_task() entry point, which starts the main loop,
   13  * there are several other minor entry points:
   14  *   get_priv:          assign privilege structure to user or system process
   15  *   send_sig:          send a signal directly to a system process
   16  *   cause_sig:         take action to cause a signal to occur via PM
   17  *   umap_local:        map virtual address in LOCAL_SEG to physical 
   18  *   umap_remote:       map virtual address in REMOTE_SEG to physical 
   19  *   umap_bios:         map virtual address in BIOS_SEG to physical 
   20  *   virtual_copy:      copy bytes from one virtual address to another 
   21  *   get_randomness:    accumulate randomness in a buffer
   22  *
   23  * Changes:
   24  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
   25  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder) 
   26  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
   27  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
   28  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
   29  */
   30 
   31 #include "kernel.h"
   32 #include "system.h"
   33 #include <stdlib.h>
   34 #include <signal.h>
   35 #include <unistd.h>
   36 #include <sys/sigcontext.h>
   37 #if (CHIP == INTEL)
   38 #include <ibm/memory.h>
   39 #include "protect.h"
   40 #endif
   41 
   42 /* Declaration of the call vector that defines the mapping of system calls 
   43  * to handler functions. The vector is initialized in sys_init() with map(), 
   44  * which makes sure the system call numbers are ok. No space is allocated, 
   45  * because the dummy is declared extern. If an illegal call is given, the 
   46  * array size will be negative and this won't compile. 
   47  */
   48 PUBLIC int (*call_vec[NR_SYS_CALLS])(message *m_ptr);
   49 
   50 #define map(call_nr, handler) \
   51     {extern int dummy[NR_SYS_CALLS>(unsigned)(call_nr-KERNEL_CALL) ? 1:-1];} \
   52     call_vec[(call_nr-KERNEL_CALL)] = (handler)  
   53 
   54 FORWARD _PROTOTYPE( void initialize, (void));
   55 
   56 /*===========================================================================*
   57  *                              sys_task                                     *
   58  *===========================================================================*/
   59 PUBLIC void sys_task()
   60 {
   61 /* Main entry point of sys_task.  Get the message and dispatch on type. */
   62   static message m;
   63   register int result;
   64   register struct proc *caller_ptr;
   65   unsigned int call_nr;
   66   int s;
   67 
   68   /* Initialize the system task. */
   69   initialize();
   70 
   71   while (TRUE) {
   72       /* Get work. Block and wait until a request message arrives. */
   73       receive(ANY, &m);                 
   74       call_nr = (unsigned) m.m_type - KERNEL_CALL;      
   75       caller_ptr = proc_addr(m.m_source);       
   76 
   77       /* See if the caller made a valid request and try to handle it. */
   78       if (! (priv(caller_ptr)->s_call_mask & (1<<call_nr))) {
   79 #if DEBUG_ENABLE_IPC_WARNINGS
   80           kprintf("SYSTEM: request %d from %d denied.\n", call_nr,m.m_source);
   81 #endif
   82           result = ECALLDENIED;                 /* illegal message type */
   83       } else if (call_nr >= NR_SYS_CALLS) {             /* check call number */
   84 #if DEBUG_ENABLE_IPC_WARNINGS
   85           kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source);
   86 #endif
   87           result = EBADREQUEST;                 /* illegal message type */
   88       } 
   89       else {
   90           result = (*call_vec[call_nr])(&m);    /* handle the system call */
   91       }
   92 
   93       /* Send a reply, unless inhibited by a handler function. Use the kernel
   94        * function lock_send() to prevent a system call trap. The destination
   95        * is known to be blocked waiting for a message.
   96        */
   97       if (result != EDONTREPLY) {
   98           m.m_type = result;                    /* report status of call */
   99           if (OK != (s=lock_send(m.m_source, &m))) {
  100               kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s);
  101           }
  102       }
  103   }
  104 }
  105 
  106 /*===========================================================================*
  107  *                              initialize                                   *
  108  *===========================================================================*/
  109 PRIVATE void initialize(void)
  110 {
  111   register struct priv *sp;
  112   int i;
  113 
  114   /* Initialize IRQ handler hooks. Mark all hooks available. */
  115   for (i=0; i<NR_IRQ_HOOKS; i++) {
  116       irq_hooks[i].proc_nr = NONE;
  117   }
  118 
  119   /* Initialize all alarm timers for all processes. */
  120   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
  121     tmr_inittimer(&(sp->s_alarm_timer));
  122   }
  123 
  124   /* Initialize the call vector to a safe default handler. Some system calls 
  125    * may be disabled or nonexistant. Then explicitely map known calls to their
  126    * handler functions. This is done with a macro that gives a compile error
  127    * if an illegal call number is used. The ordering is not important here.
  128    */
  129   for (i=0; i<NR_SYS_CALLS; i++) {
  130       call_vec[i] = do_unused;
  131   }
  132 
  133   /* Process management. */
  134   map(SYS_FORK, do_fork);               /* a process forked a new process */
  135   map(SYS_EXEC, do_exec);               /* update process after execute */
  136   map(SYS_EXIT, do_exit);               /* clean up after process exit */
  137   map(SYS_NICE, do_nice);               /* set scheduling priority */
  138   map(SYS_PRIVCTL, do_privctl);         /* system privileges control */
  139   map(SYS_TRACE, do_trace);             /* request a trace operation */
  140 
  141   /* Signal handling. */
  142   map(SYS_KILL, do_kill);               /* cause a process to be signaled */
  143   map(SYS_GETKSIG, do_getksig);         /* PM checks for pending signals */
  144   map(SYS_ENDKSIG, do_endksig);         /* PM finished processing signal */
  145   map(SYS_SIGSEND, do_sigsend);         /* start POSIX-style signal */
  146   map(SYS_SIGRETURN, do_sigreturn);     /* return from POSIX-style signal */
  147 
  148   /* Device I/O. */
  149   map(SYS_IRQCTL, do_irqctl);           /* interrupt control operations */ 
  150   map(SYS_DEVIO, do_devio);             /* inb, inw, inl, outb, outw, outl */ 
  151   map(SYS_SDEVIO, do_sdevio);           /* phys_insb, _insw, _outsb, _outsw */
  152   map(SYS_VDEVIO, do_vdevio);           /* vector with devio requests */ 
  153   map(SYS_INT86, do_int86);             /* real-mode BIOS calls */ 
  154 
  155   /* Memory management. */
  156   map(SYS_NEWMAP, do_newmap);           /* set up a process memory map */
  157   map(SYS_SEGCTL, do_segctl);           /* add segment and get selector */
  158   map(SYS_MEMSET, do_memset);           /* write char to memory area */
  159 
  160   /* Copying. */
  161   map(SYS_UMAP, do_umap);               /* map virtual to physical address */
  162   map(SYS_VIRCOPY, do_vircopy);         /* use pure virtual addressing */
  163   map(SYS_PHYSCOPY, do_physcopy);       /* use physical addressing */
  164   map(SYS_VIRVCOPY, do_virvcopy);       /* vector with copy requests */
  165   map(SYS_PHYSVCOPY, do_physvcopy);     /* vector with copy requests */
  166 
  167   /* Clock functionality. */
  168   map(SYS_TIMES, do_times);             /* get uptime and process times */
  169   map(SYS_SETALARM, do_setalarm);       /* schedule a synchronous alarm */
  170 
  171   /* System control. */
  172   map(SYS_ABORT, do_abort);             /* abort MINIX */
  173   map(SYS_GETINFO, do_getinfo);         /* request system information */ 
  174   map(SYS_IOPENABLE, do_iopenable);     /* Enable I/O */
  175 }
  176 
  177 /*===========================================================================*
  178  *                              get_priv                                     *
  179  *===========================================================================*/
  180 PUBLIC int get_priv(rc, proc_type)
  181 register struct proc *rc;               /* new (child) process pointer */
  182 int proc_type;                          /* system or user process flag */
  183 {
  184 /* Get a privilege structure. All user processes share the same privilege 
  185  * structure. System processes get their own privilege structure. 
  186  */
  187   register struct priv *sp;                     /* privilege structure */
  188 
  189   if (proc_type == SYS_PROC) {                  /* find a new slot */
  190       for (sp = BEG_PRIV_ADDR; sp < END_PRIV_ADDR; ++sp) 
  191           if (sp->s_proc_nr == NONE && sp->s_id != USER_PRIV_ID) break; 
  192       if (sp->s_proc_nr != NONE) return(ENOSPC);
  193       rc->p_priv = sp;                          /* assign new slot */
  194       rc->p_priv->s_proc_nr = proc_nr(rc);      /* set association */
  195       rc->p_priv->s_flags = SYS_PROC;           /* mark as privileged */
  196   } else {
  197       rc->p_priv = &priv[USER_PRIV_ID];         /* use shared slot */
  198       rc->p_priv->s_proc_nr = INIT_PROC_NR;     /* set association */
  199       rc->p_priv->s_flags = 0;                  /* no initial flags */
  200   }
  201   return(OK);
  202 }
  203 
  204 /*===========================================================================*
  205  *                              get_randomness                               *
  206  *===========================================================================*/
  207 PUBLIC void get_randomness(source)
  208 int source;
  209 {
  210 /* On machines with the RDTSC (cycle counter read instruction - pentium
  211  * and up), use that for high-resolution raw entropy gathering. Otherwise,
  212  * use the realtime clock (tick resolution).
  213  *
  214  * Unfortunately this test is run-time - we don't want to bother with
  215  * compiling different kernels for different machines.
  216  *
  217  * On machines without RDTSC, we use read_clock().
  218  */
  219   int r_next;
  220   unsigned long tsc_high, tsc_low;
  221 
  222   source %= RANDOM_SOURCES;
  223   r_next= krandom.bin[source].r_next;
  224   if (machine.processor > 486) {
  225       read_tsc(&tsc_high, &tsc_low);
  226       krandom.bin[source].r_buf[r_next] = tsc_low;
  227   } else {
  228       krandom.bin[source].r_buf[r_next] = read_clock();
  229   }
  230   if (krandom.bin[source].r_size < RANDOM_ELEMENTS) {
  231         krandom.bin[source].r_size ++;
  232   }
  233   krandom.bin[source].r_next = (r_next + 1 ) % RANDOM_ELEMENTS;
  234 }
  235 
  236 /*===========================================================================*
  237  *                              send_sig                                     *
  238  *===========================================================================*/
  239 PUBLIC void send_sig(proc_nr, sig_nr)
  240 int proc_nr;                    /* system process to be signalled */
  241 int sig_nr;                     /* signal to be sent, 1 to _NSIG */
  242 {
  243 /* Notify a system process about a signal. This is straightforward. Simply
  244  * set the signal that is to be delivered in the pending signals map and 
  245  * send a notification with source SYSTEM.
  246  */ 
  247   register struct proc *rp;
  248 
  249   rp = proc_addr(proc_nr);
  250   sigaddset(&priv(rp)->s_sig_pending, sig_nr);
  251   lock_notify(SYSTEM, proc_nr); 
  252 }
  253 
  254 /*===========================================================================*
  255  *                              cause_sig                                    *
  256  *===========================================================================*/
  257 PUBLIC void cause_sig(proc_nr, sig_nr)
  258 int proc_nr;                    /* process to be signalled */
  259 int sig_nr;                     /* signal to be sent, 1 to _NSIG */
  260 {
  261 /* A system process wants to send a signal to a process.  Examples are:
  262  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
  263  *  - TTY wanting to cause SIGINT upon getting a DEL
  264  *  - FS wanting to cause SIGPIPE for a broken pipe 
  265  * Signals are handled by sending a message to PM.  This function handles the 
  266  * signals and makes sure the PM gets them by sending a notification. The 
  267  * process being signaled is blocked while PM has not finished all signals 
  268  * for it. 
  269  * Race conditions between calls to this function and the system calls that
  270  * process pending kernel signals cannot exist. Signal related functions are
  271  * only called when a user process causes a CPU exception and from the kernel 
  272  * process level, which runs to completion.
  273  */
  274   register struct proc *rp;
  275 
  276   /* Check if the signal is already pending. Process it otherwise. */
  277   rp = proc_addr(proc_nr);
  278   if (! sigismember(&rp->p_pending, sig_nr)) {
  279       sigaddset(&rp->p_pending, sig_nr);
  280       if (! (rp->p_rts_flags & SIGNALED)) {             /* other pending */
  281           if (rp->p_rts_flags == 0) lock_dequeue(rp);   /* make not ready */
  282           rp->p_rts_flags |= SIGNALED | SIG_PENDING;    /* update flags */
  283           send_sig(PM_PROC_NR, SIGKSIG);
  284       }
  285   }
  286 }
  287 
  288 /*===========================================================================*
  289  *                              umap_bios                                    *
  290  *===========================================================================*/
  291 PUBLIC phys_bytes umap_bios(rp, vir_addr, bytes)
  292 register struct proc *rp;       /* pointer to proc table entry for process */
  293 vir_bytes vir_addr;             /* virtual address in BIOS segment */
  294 vir_bytes bytes;                /* # of bytes to be copied */
  295 {
  296 /* Calculate the physical memory address at the BIOS. Note: currently, BIOS
  297  * address zero (the first BIOS interrupt vector) is not considered, as an 
  298  * error here, but since the physical address will be zero as well, the 
  299  * calling function will think an error occurred. This is not a problem,
  300  * since no one uses the first BIOS interrupt vector.  
  301  */
  302 
  303   /* Check all acceptable ranges. */
  304   if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
  305         return (phys_bytes) vir_addr;
  306   else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
  307         return (phys_bytes) vir_addr;
  308 
  309 #if DEAD_CODE   /* brutal fix, if the above is too restrictive */
  310   if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= UPPER_MEM_END)
  311         return (phys_bytes) vir_addr;
  312 #endif
  313 
  314   kprintf("Warning, error in umap_bios, virtual address 0x%x\n", vir_addr);
  315   return 0;
  316 }
  317 
  318 /*===========================================================================*
  319  *                              umap_local                                   *
  320  *===========================================================================*/
  321 PUBLIC phys_bytes umap_local(rp, seg, vir_addr, bytes)
  322 register struct proc *rp;       /* pointer to proc table entry for process */
  323 int seg;                        /* T, D, or S segment */
  324 vir_bytes vir_addr;             /* virtual address in bytes within the seg */
  325 vir_bytes bytes;                /* # of bytes to be copied */
  326 {
  327 /* Calculate the physical memory address for a given virtual address. */
  328   vir_clicks vc;                /* the virtual address in clicks */
  329   phys_bytes pa;                /* intermediate variables as phys_bytes */
  330 #if (CHIP == INTEL)
  331   phys_bytes seg_base;
  332 #endif
  333 
  334   /* If 'seg' is D it could really be S and vice versa.  T really means T.
  335    * If the virtual address falls in the gap,  it causes a problem. On the
  336    * 8088 it is probably a legal stack reference, since "stackfaults" are
  337    * not detected by the hardware.  On 8088s, the gap is called S and
  338    * accepted, but on other machines it is called D and rejected.
  339    * The Atari ST behaves like the 8088 in this respect.
  340    */
  341 
  342   if (bytes <= 0) return( (phys_bytes) 0);
  343   if (vir_addr + bytes <= vir_addr) return 0;   /* overflow */
  344   vc = (vir_addr + bytes - 1) >> CLICK_SHIFT;   /* last click of data */
  345 
  346 #if (CHIP == INTEL) || (CHIP == M68000)
  347   if (seg != T)
  348         seg = (vc < rp->p_memmap[D].mem_vir + rp->p_memmap[D].mem_len ? D : S);
  349 #else
  350   if (seg != T)
  351         seg = (vc < rp->p_memmap[S].mem_vir ? D : S);
  352 #endif
  353 
  354   if ((vir_addr>>CLICK_SHIFT) >= rp->p_memmap[seg].mem_vir + 
  355         rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
  356 
  357   if (vc >= rp->p_memmap[seg].mem_vir + 
  358         rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
  359 
  360 #if (CHIP == INTEL)
  361   seg_base = (phys_bytes) rp->p_memmap[seg].mem_phys;
  362   seg_base = seg_base << CLICK_SHIFT;   /* segment origin in bytes */
  363 #endif
  364   pa = (phys_bytes) vir_addr;
  365 #if (CHIP != M68000)
  366   pa -= rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
  367   return(seg_base + pa);
  368 #endif
  369 #if (CHIP == M68000)
  370   pa -= (phys_bytes)rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
  371   pa += (phys_bytes)rp->p_memmap[seg].mem_phys << CLICK_SHIFT;
  372   return(pa);
  373 #endif
  374 }
  375 
  376 /*===========================================================================*
  377  *                              umap_remote                                  *
  378  *===========================================================================*/
  379 PUBLIC phys_bytes umap_remote(rp, seg, vir_addr, bytes)
  380 register struct proc *rp;       /* pointer to proc table entry for process */
  381 int seg;                        /* index of remote segment */
  382 vir_bytes vir_addr;             /* virtual address in bytes within the seg */
  383 vir_bytes bytes;                /* # of bytes to be copied */
  384 {
  385 /* Calculate the physical memory address for a given virtual address. */
  386   struct far_mem *fm;
  387 
  388   if (bytes <= 0) return( (phys_bytes) 0);
  389   if (seg < 0 || seg >= NR_REMOTE_SEGS) return( (phys_bytes) 0);
  390 
  391   fm = &rp->p_priv->s_farmem[seg];
  392   if (! fm->in_use) return( (phys_bytes) 0);
  393   if (vir_addr + bytes > fm->mem_len) return( (phys_bytes) 0);
  394 
  395   return(fm->mem_phys + (phys_bytes) vir_addr); 
  396 }
  397 
  398 /*===========================================================================*
  399  *                              virtual_copy                                 *
  400  *===========================================================================*/
  401 PUBLIC int virtual_copy(src_addr, dst_addr, bytes)
  402 struct vir_addr *src_addr;      /* source virtual address */
  403 struct vir_addr *dst_addr;      /* destination virtual address */
  404 vir_bytes bytes;                /* # of bytes to copy  */
  405 {
  406 /* Copy bytes from virtual address src_addr to virtual address dst_addr. 
  407  * Virtual addresses can be in ABS, LOCAL_SEG, REMOTE_SEG, or BIOS_SEG.
  408  */
  409   struct vir_addr *vir_addr[2]; /* virtual source and destination address */
  410   phys_bytes phys_addr[2];      /* absolute source and destination */ 
  411   int seg_index;
  412   int i;
  413 
  414   /* Check copy count. */
  415   if (bytes <= 0) return(EDOM);
  416 
  417   /* Do some more checks and map virtual addresses to physical addresses. */
  418   vir_addr[_SRC_] = src_addr;
  419   vir_addr[_DST_] = dst_addr;
  420   for (i=_SRC_; i<=_DST_; i++) {
  421 
  422       /* Get physical address. */
  423       switch((vir_addr[i]->segment & SEGMENT_TYPE)) {
  424       case LOCAL_SEG:
  425           seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
  426           phys_addr[i] = umap_local( proc_addr(vir_addr[i]->proc_nr), 
  427               seg_index, vir_addr[i]->offset, bytes );
  428           break;
  429       case REMOTE_SEG:
  430           seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
  431           phys_addr[i] = umap_remote( proc_addr(vir_addr[i]->proc_nr), 
  432               seg_index, vir_addr[i]->offset, bytes );
  433           break;
  434       case BIOS_SEG:
  435           phys_addr[i] = umap_bios( proc_addr(vir_addr[i]->proc_nr),
  436               vir_addr[i]->offset, bytes );
  437           break;
  438       case PHYS_SEG:
  439           phys_addr[i] = vir_addr[i]->offset;
  440           break;
  441       default:
  442           return(EINVAL);
  443       }
  444 
  445       /* Check if mapping succeeded. */
  446       if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG) 
  447           return(EFAULT);
  448   }
  449 
  450   /* Now copy bytes between physical addresseses. */
  451   phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes);
  452   return(OK);
  453 }
  454 

Cache object: 8b63903ad2417dae6534545815dc26e9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.