The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/port/sysproc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 #include        "u.h"
    2 #include        "tos.h"
    3 #include        "../port/lib.h"
    4 #include        "mem.h"
    5 #include        "dat.h"
    6 #include        "fns.h"
    7 #include        "../port/error.h"
    8 #include        "edf.h"
    9 
   10 #include        <a.out.h>
   11 
   12 int     shargs(char*, int, char**);
   13 
   14 extern void checkpages(void);
   15 extern void checkpagerefs(void);
   16 
   17 long
   18 sysr1(ulong*)
   19 {
   20         checkpagerefs();
   21         return 0;
   22 }
   23 
   24 long
   25 sysrfork(ulong *arg)
   26 {
   27         Proc *p;
   28         int n, i;
   29         Fgrp *ofg;
   30         Pgrp *opg;
   31         Rgrp *org;
   32         Egrp *oeg;
   33         ulong pid, flag;
   34         Mach *wm;
   35 
   36         flag = arg[0];
   37         /* Check flags before we commit */
   38         if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
   39                 error(Ebadarg);
   40         if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
   41                 error(Ebadarg);
   42         if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
   43                 error(Ebadarg);
   44 
   45         if((flag&RFPROC) == 0) {
   46                 if(flag & (RFMEM|RFNOWAIT))
   47                         error(Ebadarg);
   48                 if(flag & (RFFDG|RFCFDG)) {
   49                         ofg = up->fgrp;
   50                         if(flag & RFFDG)
   51                                 up->fgrp = dupfgrp(ofg);
   52                         else
   53                                 up->fgrp = dupfgrp(nil);
   54                         closefgrp(ofg);
   55                 }
   56                 if(flag & (RFNAMEG|RFCNAMEG)) {
   57                         opg = up->pgrp;
   58                         up->pgrp = newpgrp();
   59                         if(flag & RFNAMEG)
   60                                 pgrpcpy(up->pgrp, opg);
   61                         /* inherit noattach */
   62                         up->pgrp->noattach = opg->noattach;
   63                         closepgrp(opg);
   64                 }
   65                 if(flag & RFNOMNT)
   66                         up->pgrp->noattach = 1;
   67                 if(flag & RFREND) {
   68                         org = up->rgrp;
   69                         up->rgrp = newrgrp();
   70                         closergrp(org);
   71                 }
   72                 if(flag & (RFENVG|RFCENVG)) {
   73                         oeg = up->egrp;
   74                         up->egrp = smalloc(sizeof(Egrp));
   75                         up->egrp->ref = 1;
   76                         if(flag & RFENVG)
   77                                 envcpy(up->egrp, oeg);
   78                         closeegrp(oeg);
   79                 }
   80                 if(flag & RFNOTEG)
   81                         up->noteid = incref(&noteidalloc);
   82                 return 0;
   83         }
   84 
   85         p = newproc();
   86 
   87         p->fpsave = up->fpsave;
   88         p->scallnr = up->scallnr;
   89         p->s = up->s;
   90         p->nerrlab = 0;
   91         p->slash = up->slash;
   92         p->dot = up->dot;
   93         incref(p->dot);
   94 
   95         memmove(p->note, up->note, sizeof(p->note));
   96         p->privatemem = up->privatemem;
   97         p->noswap = up->noswap;
   98         p->nnote = up->nnote;
   99         p->notified = 0;
  100         p->lastnote = up->lastnote;
  101         p->notify = up->notify;
  102         p->ureg = up->ureg;
  103         p->dbgreg = 0;
  104 
  105         /* Make a new set of memory segments */
  106         n = flag & RFMEM;
  107         qlock(&p->seglock);
  108         if(waserror()){
  109                 qunlock(&p->seglock);
  110                 nexterror();
  111         }
  112         for(i = 0; i < NSEG; i++)
  113                 if(up->seg[i])
  114                         p->seg[i] = dupseg(up->seg, i, n);
  115         qunlock(&p->seglock);
  116         poperror();
  117 
  118         /* File descriptors */
  119         if(flag & (RFFDG|RFCFDG)) {
  120                 if(flag & RFFDG)
  121                         p->fgrp = dupfgrp(up->fgrp);
  122                 else
  123                         p->fgrp = dupfgrp(nil);
  124         }
  125         else {
  126                 p->fgrp = up->fgrp;
  127                 incref(p->fgrp);
  128         }
  129 
  130         /* Process groups */
  131         if(flag & (RFNAMEG|RFCNAMEG)) {
  132                 p->pgrp = newpgrp();
  133                 if(flag & RFNAMEG)
  134                         pgrpcpy(p->pgrp, up->pgrp);
  135                 /* inherit noattach */
  136                 p->pgrp->noattach = up->pgrp->noattach;
  137         }
  138         else {
  139                 p->pgrp = up->pgrp;
  140                 incref(p->pgrp);
  141         }
  142         if(flag & RFNOMNT)
  143                 up->pgrp->noattach = 1;
  144 
  145         if(flag & RFREND)
  146                 p->rgrp = newrgrp();
  147         else {
  148                 incref(up->rgrp);
  149                 p->rgrp = up->rgrp;
  150         }
  151 
  152         /* Environment group */
  153         if(flag & (RFENVG|RFCENVG)) {
  154                 p->egrp = smalloc(sizeof(Egrp));
  155                 p->egrp->ref = 1;
  156                 if(flag & RFENVG)
  157                         envcpy(p->egrp, up->egrp);
  158         }
  159         else {
  160                 p->egrp = up->egrp;
  161                 incref(p->egrp);
  162         }
  163         p->hang = up->hang;
  164         p->procmode = up->procmode;
  165 
  166         /* Craft a return frame which will cause the child to pop out of
  167          * the scheduler in user mode with the return register zero
  168          */
  169         forkchild(p, up->dbgreg);
  170 
  171         p->parent = up;
  172         p->parentpid = up->pid;
  173         if(flag&RFNOWAIT)
  174                 p->parentpid = 0;
  175         else {
  176                 lock(&up->exl);
  177                 up->nchild++;
  178                 unlock(&up->exl);
  179         }
  180         if((flag&RFNOTEG) == 0)
  181                 p->noteid = up->noteid;
  182 
  183         p->fpstate = up->fpstate;
  184         pid = p->pid;
  185         memset(p->time, 0, sizeof(p->time));
  186         p->time[TReal] = MACHP(0)->ticks;
  187 
  188         kstrdup(&p->text, up->text);
  189         kstrdup(&p->user, up->user);
  190         /*
  191          *  since the bss/data segments are now shareable,
  192          *  any mmu info about this process is now stale
  193          *  (i.e. has bad properties) and has to be discarded.
  194          */
  195         flushmmu();
  196         p->basepri = up->basepri;
  197         p->priority = up->basepri;
  198         p->fixedpri = up->fixedpri;
  199         p->mp = up->mp;
  200         wm = up->wired;
  201         if(wm)
  202                 procwired(p, wm->machno);
  203         ready(p);
  204         sched();
  205         return pid;
  206 }
  207 
  208 static ulong
  209 l2be(long l)
  210 {
  211         uchar *cp;
  212 
  213         cp = (uchar*)&l;
  214         return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  215 }
  216 
  217 long
  218 sysexec(ulong *arg)
  219 {
  220         Segment *s, *ts;
  221         ulong t, d, b;
  222         int i;
  223         Chan *tc;
  224         char **argv, **argp;
  225         char *a, *charp, *args, *file;
  226         char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
  227         ulong ssize, spage, nargs, nbytes, n, bssend;
  228         int indir;
  229         Exec exec;
  230         char line[sizeof(Exec)];
  231         Fgrp *f;
  232         Image *img;
  233         ulong magic, text, entry, data, bss;
  234         Tos *tos;
  235 
  236         validaddr(arg[0], 1, 0);
  237         file = (char*)arg[0];
  238         indir = 0;
  239         elem = nil;
  240         if(waserror()){
  241                 free(elem);
  242                 nexterror();
  243         }
  244         for(;;){
  245                 tc = namec(file, Aopen, OEXEC, 0);
  246                 if(waserror()){
  247                         cclose(tc);
  248                         nexterror();
  249                 }
  250                 if(!indir)
  251                         kstrdup(&elem, up->genbuf);
  252 
  253                 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  254                 if(n < 2)
  255                         error(Ebadexec);
  256                 magic = l2be(exec.magic);
  257                 text = l2be(exec.text);
  258                 entry = l2be(exec.entry);
  259                 if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
  260                         if(text >= USTKTOP-UTZERO
  261                         || entry < UTZERO+sizeof(Exec)
  262                         || entry >= UTZERO+sizeof(Exec)+text)
  263                                 error(Ebadexec);
  264                         break; /* for binary */
  265                 }
  266 
  267                 /*
  268                  * Process #! /bin/sh args ...
  269                  */
  270                 memmove(line, &exec, sizeof(Exec));
  271                 if(indir || line[0]!='#' || line[1]!='!')
  272                         error(Ebadexec);
  273                 n = shargs(line, n, progarg);
  274                 if(n == 0)
  275                         error(Ebadexec);
  276                 indir = 1;
  277                 /*
  278                  * First arg becomes complete file name
  279                  */
  280                 progarg[n++] = file;
  281                 progarg[n] = 0;
  282                 validaddr(arg[1], BY2WD, 1);
  283                 arg[1] += BY2WD;
  284                 file = progarg[0];
  285                 if(strlen(elem) >= sizeof progelem)
  286                         error(Ebadexec);
  287                 strcpy(progelem, elem);
  288                 progarg[0] = progelem;
  289                 poperror();
  290                 cclose(tc);
  291         }
  292 
  293         data = l2be(exec.data);
  294         bss = l2be(exec.bss);
  295         t = (UTZERO+sizeof(Exec)+text+(BY2PG-1)) & ~(BY2PG-1);
  296         d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
  297         bssend = t + data + bss;
  298         b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
  299         if(t >= KZERO || d >= KZERO || b >= KZERO)
  300                 error(Ebadexec);
  301 
  302         /*
  303          * Args: pass 1: count
  304          */
  305         nbytes = sizeof(Tos);           /* hole for profiling clock at top of stack (and more) */
  306         nargs = 0;
  307         if(indir){
  308                 argp = progarg;
  309                 while(*argp){
  310                         a = *argp++;
  311                         nbytes += strlen(a) + 1;
  312                         nargs++;
  313                 }
  314         }
  315         evenaddr(arg[1]);
  316         argp = (char**)arg[1];
  317         validaddr((ulong)argp, BY2WD, 0);
  318         while(*argp){
  319                 a = *argp++;
  320                 if(((ulong)argp&(BY2PG-1)) < BY2WD)
  321                         validaddr((ulong)argp, BY2WD, 0);
  322                 validaddr((ulong)a, 1, 0);
  323                 nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
  324                 nargs++;
  325         }
  326         ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
  327 
  328         /*
  329          * 8-byte align SP for those (e.g. sparc) that need it.
  330          * execregs() will subtract another 4 bytes for argc.
  331          */
  332         if((ssize+4) & 7)
  333                 ssize += 4;
  334         spage = (ssize+(BY2PG-1)) >> PGSHIFT;
  335 
  336         /*
  337          * Build the stack segment, putting it in kernel virtual for the moment
  338          */
  339         if(spage > TSTKSIZ)
  340                 error(Enovmem);
  341 
  342         qlock(&up->seglock);
  343         if(waserror()){
  344                 qunlock(&up->seglock);
  345                 nexterror();
  346         }
  347         up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
  348 
  349         /*
  350          * Args: pass 2: assemble; the pages will be faulted in
  351          */
  352         tos = (Tos*)(TSTKTOP - sizeof(Tos));
  353         tos->cyclefreq = m->cyclefreq;
  354         cycles((uvlong*)&tos->pcycles);
  355         tos->pcycles = -tos->pcycles;
  356         tos->kcycles = tos->pcycles;
  357         tos->clock = 0;
  358         argv = (char**)(TSTKTOP - ssize);
  359         charp = (char*)(TSTKTOP - nbytes);
  360         args = charp;
  361         if(indir)
  362                 argp = progarg;
  363         else
  364                 argp = (char**)arg[1];
  365 
  366         for(i=0; i<nargs; i++){
  367                 if(indir && *argp==0) {
  368                         indir = 0;
  369                         argp = (char**)arg[1];
  370                 }
  371                 *argv++ = charp + (USTKTOP-TSTKTOP);
  372                 n = strlen(*argp) + 1;
  373                 memmove(charp, *argp++, n);
  374                 charp += n;
  375         }
  376 
  377         free(up->text);
  378         up->text = elem;
  379         elem = nil;     /* so waserror() won't free elem */
  380         USED(elem);
  381 
  382         /* copy args; easiest from new process's stack */
  383         n = charp - args;
  384         if(n > 128)     /* don't waste too much space on huge arg lists */
  385                 n = 128;
  386         a = up->args;
  387         up->args = nil;
  388         free(a);
  389         up->args = smalloc(n);
  390         memmove(up->args, args, n);
  391         if(n>0 && up->args[n-1]!='\0'){
  392                 /* make sure last arg is NUL-terminated */
  393                 /* put NUL at UTF-8 character boundary */
  394                 for(i=n-1; i>0; --i)
  395                         if(fullrune(up->args+i, n-i))
  396                                 break;
  397                 up->args[i] = 0;
  398                 n = i+1;
  399         }
  400         up->nargs = n;
  401 
  402         /*
  403          * Committed.
  404          * Free old memory.
  405          * Special segments are maintained across exec
  406          */
  407         for(i = SSEG; i <= BSEG; i++) {
  408                 putseg(up->seg[i]);
  409                 /* prevent a second free if we have an error */
  410                 up->seg[i] = 0;
  411         }
  412         for(i = BSEG+1; i < NSEG; i++) {
  413                 s = up->seg[i];
  414                 if(s != 0 && (s->type&SG_CEXEC)) {
  415                         putseg(s);
  416                         up->seg[i] = 0;
  417                 }
  418         }
  419 
  420         /*
  421          * Close on exec
  422          */
  423         f = up->fgrp;
  424         for(i=0; i<=f->maxfd; i++)
  425                 fdclose(i, CCEXEC);
  426 
  427         /* Text.  Shared. Attaches to cache image if possible */
  428         /* attachimage returns a locked cache image */
  429         img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
  430         ts = img->s;
  431         up->seg[TSEG] = ts;
  432         ts->flushme = 1;
  433         ts->fstart = 0;
  434         ts->flen = sizeof(Exec)+text;
  435         unlock(img);
  436 
  437         /* Data. Shared. */
  438         s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
  439         up->seg[DSEG] = s;
  440 
  441         /* Attached by hand */
  442         incref(img);
  443         s->image = img;
  444         s->fstart = ts->fstart+ts->flen;
  445         s->flen = data;
  446 
  447         /* BSS. Zero fill on demand */
  448         up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
  449 
  450         /*
  451          * Move the stack
  452          */
  453         s = up->seg[ESEG];
  454         up->seg[ESEG] = 0;
  455         up->seg[SSEG] = s;
  456         qunlock(&up->seglock);
  457         poperror();     /* seglock */
  458         poperror();     /* elem */
  459         s->base = USTKTOP-USTKSIZE;
  460         s->top = USTKTOP;
  461         relocateseg(s, USTKTOP-TSTKTOP);
  462 
  463         /*
  464          *  '/' processes are higher priority (hack to make /ip more responsive).
  465          */
  466         if(devtab[tc->type]->dc == L'/')
  467                 up->basepri = PriRoot;
  468         up->priority = up->basepri;
  469         poperror();
  470         cclose(tc);
  471 
  472         /*
  473          *  At this point, the mmu contains info about the old address
  474          *  space and needs to be flushed
  475          */
  476         flushmmu();
  477         qlock(&up->debug);
  478         up->nnote = 0;
  479         up->notify = 0;
  480         up->notified = 0;
  481         up->privatemem = 0;
  482         procsetup(up);
  483         qunlock(&up->debug);
  484         if(up->hang)
  485                 up->procctl = Proc_stopme;
  486 
  487         return execregs(entry, ssize, nargs);
  488 }
  489 
  490 int
  491 shargs(char *s, int n, char **ap)
  492 {
  493         int i;
  494 
  495         s += 2;
  496         n -= 2;         /* skip #! */
  497         for(i=0; s[i]!='\n'; i++)
  498                 if(i == n-1)
  499                         return 0;
  500         s[i] = 0;
  501         *ap = 0;
  502         i = 0;
  503         for(;;) {
  504                 while(*s==' ' || *s=='\t')
  505                         s++;
  506                 if(*s == 0)
  507                         break;
  508                 i++;
  509                 *ap++ = s;
  510                 *ap = 0;
  511                 while(*s && *s!=' ' && *s!='\t')
  512                         s++;
  513                 if(*s == 0)
  514                         break;
  515                 else
  516                         *s++ = 0;
  517         }
  518         return i;
  519 }
  520 
  521 int
  522 return0(void*)
  523 {
  524         return 0;
  525 }
  526 
  527 long
  528 syssleep(ulong *arg)
  529 {
  530 
  531         int n;
  532 
  533         n = arg[0];
  534         if(n <= 0) {
  535                 if (up->edf && (up->edf->flags & Admitted))
  536                         edfyield();
  537                 else
  538                         yield();
  539                 return 0;
  540         }
  541         if(n < TK2MS(1))
  542                 n = TK2MS(1);
  543         tsleep(&up->sleep, return0, 0, n);
  544         return 0;
  545 }
  546 
  547 long
  548 sysalarm(ulong *arg)
  549 {
  550         return procalarm(arg[0]);
  551 }
  552 
  553 long
  554 sysexits(ulong *arg)
  555 {
  556         char *status;
  557         char *inval = "invalid exit string";
  558         char buf[ERRMAX];
  559 
  560         status = (char*)arg[0];
  561         if(status){
  562                 if(waserror())
  563                         status = inval;
  564                 else{
  565                         validaddr((ulong)status, 1, 0);
  566                         if(vmemchr(status, 0, ERRMAX) == 0){
  567                                 memmove(buf, status, ERRMAX);
  568                                 buf[ERRMAX-1] = 0;
  569                                 status = buf;
  570                         }
  571                         poperror();
  572                 }
  573 
  574         }
  575         pexit(status, 1);
  576         return 0;               /* not reached */
  577 }
  578 
  579 long
  580 sys_wait(ulong *arg)
  581 {
  582         int pid;
  583         Waitmsg w;
  584         OWaitmsg *ow;
  585 
  586         if(arg[0] == 0)
  587                 return pwait(nil);
  588 
  589         validaddr(arg[0], sizeof(OWaitmsg), 1);
  590         evenaddr(arg[0]);
  591         pid = pwait(&w);
  592         if(pid >= 0){
  593                 ow = (OWaitmsg*)arg[0];
  594                 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  595                 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  596                 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  597                 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  598                 strncpy(ow->msg, w.msg, sizeof(ow->msg));
  599                 ow->msg[sizeof(ow->msg)-1] = '\0';
  600         }
  601         return pid;
  602 }
  603 
  604 long
  605 sysawait(ulong *arg)
  606 {
  607         int i;
  608         int pid;
  609         Waitmsg w;
  610         ulong n;
  611 
  612         n = arg[1];
  613         validaddr(arg[0], n, 1);
  614         pid = pwait(&w);
  615         if(pid < 0)
  616                 return -1;
  617         i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
  618                 w.pid,
  619                 w.time[TUser], w.time[TSys], w.time[TReal],
  620                 w.msg);
  621 
  622         return i;
  623 }
  624 
  625 void
  626 werrstr(char *fmt, ...)
  627 {
  628         va_list va;
  629 
  630         if(up == nil)
  631                 return;
  632 
  633         va_start(va, fmt);
  634         vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  635         va_end(va);
  636 }
  637 
  638 static long
  639 generrstr(char *buf, uint nbuf)
  640 {
  641         char tmp[ERRMAX];
  642 
  643         if(nbuf == 0)
  644                 error(Ebadarg);
  645         validaddr((ulong)buf, nbuf, 1);
  646         if(nbuf > sizeof tmp)
  647                 nbuf = sizeof tmp;
  648         memmove(tmp, buf, nbuf);
  649 
  650         /* make sure it's NUL-terminated */
  651         tmp[nbuf-1] = '\0';
  652         memmove(buf, up->syserrstr, nbuf);
  653         buf[nbuf-1] = '\0';
  654         memmove(up->syserrstr, tmp, nbuf);
  655         return 0;
  656 }
  657 
  658 long
  659 syserrstr(ulong *arg)
  660 {
  661         return generrstr((char*)arg[0], arg[1]);
  662 }
  663 
  664 /* compatibility for old binaries */
  665 long
  666 sys_errstr(ulong *arg)
  667 {
  668         return generrstr((char*)arg[0], 64);
  669 }
  670 
  671 long
  672 sysnotify(ulong *arg)
  673 {
  674         if(arg[0] != 0)
  675                 validaddr(arg[0], sizeof(ulong), 0);
  676         up->notify = (int(*)(void*, char*))(arg[0]);
  677         return 0;
  678 }
  679 
  680 long
  681 sysnoted(ulong *arg)
  682 {
  683         if(arg[0]!=NRSTR && !up->notified)
  684                 error(Egreg);
  685         return 0;
  686 }
  687 
  688 long
  689 syssegbrk(ulong *arg)
  690 {
  691         int i;
  692         ulong addr;
  693         Segment *s;
  694 
  695         addr = arg[0];
  696         for(i = 0; i < NSEG; i++) {
  697                 s = up->seg[i];
  698                 if(s == 0 || addr < s->base || addr >= s->top)
  699                         continue;
  700                 switch(s->type&SG_TYPE) {
  701                 case SG_TEXT:
  702                 case SG_DATA:
  703                 case SG_STACK:
  704                         error(Ebadarg);
  705                 default:
  706                         return ibrk(arg[1], i);
  707                 }
  708         }
  709 
  710         error(Ebadarg);
  711         return 0;               /* not reached */
  712 }
  713 
  714 long
  715 syssegattach(ulong *arg)
  716 {
  717         return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
  718 }
  719 
  720 long
  721 syssegdetach(ulong *arg)
  722 {
  723         int i;
  724         ulong addr;
  725         Segment *s;
  726 
  727         qlock(&up->seglock);
  728         if(waserror()){
  729                 qunlock(&up->seglock);
  730                 nexterror();
  731         }
  732 
  733         s = 0;
  734         addr = arg[0];
  735         for(i = 0; i < NSEG; i++)
  736                 if(s = up->seg[i]) {
  737                         qlock(&s->lk);
  738                         if((addr >= s->base && addr < s->top) ||
  739                            (s->top == s->base && addr == s->base))
  740                                 goto found;
  741                         qunlock(&s->lk);
  742                 }
  743 
  744         error(Ebadarg);
  745 
  746 found:
  747         /*
  748          * Check we are not detaching the initial stack segment.
  749          */
  750         if(s == up->seg[SSEG]){
  751                 qunlock(&s->lk);
  752                 error(Ebadarg);
  753         }
  754         up->seg[i] = 0;
  755         qunlock(&s->lk);
  756         putseg(s);
  757         qunlock(&up->seglock);
  758         poperror();
  759 
  760         /* Ensure we flush any entries from the lost segment */
  761         flushmmu();
  762         return 0;
  763 }
  764 
  765 long
  766 syssegfree(ulong *arg)
  767 {
  768         Segment *s;
  769         ulong from, to;
  770 
  771         from = arg[0];
  772         s = seg(up, from, 1);
  773         if(s == nil)
  774                 error(Ebadarg);
  775         to = (from + arg[1]) & ~(BY2PG-1);
  776         from = PGROUND(from);
  777 
  778         if(to > s->top) {
  779                 qunlock(&s->lk);
  780                 error(Ebadarg);
  781         }
  782 
  783         mfreeseg(s, from, (to - from) / BY2PG);
  784         qunlock(&s->lk);
  785         flushmmu();
  786 
  787         return 0;
  788 }
  789 
  790 /* For binary compatibility */
  791 long
  792 sysbrk_(ulong *arg)
  793 {
  794         return ibrk(arg[0], BSEG);
  795 }
  796 
  797 long
  798 sysrendezvous(ulong *arg)
  799 {
  800         uintptr tag, val;
  801         Proc *p, **l;
  802 
  803         tag = arg[0];
  804         l = &REND(up->rgrp, tag);
  805         up->rendval = ~(uintptr)0;
  806 
  807         lock(up->rgrp);
  808         for(p = *l; p; p = p->rendhash) {
  809                 if(p->rendtag == tag) {
  810                         *l = p->rendhash;
  811                         val = p->rendval;
  812                         p->rendval = arg[1];
  813 
  814                         while(p->mach != 0)
  815                                 ;
  816                         ready(p);
  817                         unlock(up->rgrp);
  818                         return val;
  819                 }
  820                 l = &p->rendhash;
  821         }
  822 
  823         /* Going to sleep here */
  824         up->rendtag = tag;
  825         up->rendval = arg[1];
  826         up->rendhash = *l;
  827         *l = up;
  828         up->state = Rendezvous;
  829         unlock(up->rgrp);
  830 
  831         sched();
  832 
  833         return up->rendval;
  834 }
  835 
  836 /*
  837  * The implementation of semaphores is complicated by needing
  838  * to avoid rescheduling in syssemrelease, so that it is safe
  839  * to call from real-time processes.  This means syssemrelease
  840  * cannot acquire any qlocks, only spin locks.
  841  * 
  842  * Semacquire and semrelease must both manipulate the semaphore
  843  * wait list.  Lock-free linked lists only exist in theory, not
  844  * in practice, so the wait list is protected by a spin lock.
  845  * 
  846  * The semaphore value *addr is stored in user memory, so it
  847  * cannot be read or written while holding spin locks.
  848  * 
  849  * Thus, we can access the list only when holding the lock, and
  850  * we can access the semaphore only when not holding the lock.
  851  * This makes things interesting.  Note that sleep's condition function
  852  * is called while holding two locks - r and up->rlock - so it cannot
  853  * access the semaphore value either.
  854  * 
  855  * An acquirer announces its intention to try for the semaphore
  856  * by putting a Sema structure onto the wait list and then
  857  * setting Sema.waiting.  After one last check of semaphore,
  858  * the acquirer sleeps until Sema.waiting==0.  A releaser of n
  859  * must wake up n acquirers who have Sema.waiting set.  It does
  860  * this by clearing Sema.waiting and then calling wakeup.
  861  * 
  862  * There are three interesting races here.  
  863  
  864  * The first is that in this particular sleep/wakeup usage, a single
  865  * wakeup can rouse a process from two consecutive sleeps!  
  866  * The ordering is:
  867  * 
  868  *      (a) set Sema.waiting = 1
  869  *      (a) call sleep
  870  *      (b) set Sema.waiting = 0
  871  *      (a) check Sema.waiting inside sleep, return w/o sleeping
  872  *      (a) try for semaphore, fail
  873  *      (a) set Sema.waiting = 1
  874  *      (a) call sleep
  875  *      (b) call wakeup(a)
  876  *      (a) wake up again
  877  * 
  878  * This is okay - semacquire will just go around the loop
  879  * again.  It does mean that at the top of the for(;;) loop in
  880  * semacquire, phore.waiting might already be set to 1.
  881  * 
  882  * The second is that a releaser might wake an acquirer who is
  883  * interrupted before he can acquire the lock.  Since
  884  * release(n) issues only n wakeup calls -- only n can be used
  885  * anyway -- if the interrupted process is not going to use his
  886  * wakeup call he must pass it on to another acquirer.
  887  * 
  888  * The third race is similar to the second but more subtle.  An
  889  * acquirer sets waiting=1 and then does a final canacquire()
  890  * before going to sleep.  The opposite order would result in
  891  * missing wakeups that happen between canacquire and
  892  * waiting=1.  (In fact, the whole point of Sema.waiting is to
  893  * avoid missing wakeups between canacquire() and sleep().) But
  894  * there can be spurious wakeups between a successful
  895  * canacquire() and the following semdequeue().  This wakeup is
  896  * not useful to the acquirer, since he has already acquired
  897  * the semaphore.  Like in the previous case, though, the
  898  * acquirer must pass the wakeup call along.
  899  * 
  900  * This is all rather subtle.  The code below has been verified
  901  * with the spin model /sys/src/9/port/semaphore.p.  The
  902  * original code anticipated the second race but not the first
  903  * or third, which were caught only with spin.  The first race
  904  * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  905  * It was lucky that my abstract model of sleep/wakeup still managed
  906  * to preserve that behavior.
  907  *
  908  * I remain slightly concerned about memory coherence
  909  * outside of locks.  The spin model does not take 
  910  * queued processor writes into account so we have to
  911  * think hard.  The only variables accessed outside locks
  912  * are the semaphore value itself and the boolean flag
  913  * Sema.waiting.  The value is only accessed with cmpswap,
  914  * whose job description includes doing the right thing as
  915  * far as memory coherence across processors.  That leaves
  916  * Sema.waiting.  To handle it, we call coherence() before each
  917  * read and after each write.           - rsc
  918  */
  919 
  920 /* Add semaphore p with addr a to list in seg. */
  921 static void
  922 semqueue(Segment *s, long *a, Sema *p)
  923 {
  924         memset(p, 0, sizeof *p);
  925         p->addr = a;
  926         lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
  927         p->next = &s->sema;
  928         p->prev = s->sema.prev;
  929         p->next->prev = p;
  930         p->prev->next = p;
  931         unlock(&s->sema);
  932 }
  933 
  934 /* Remove semaphore p from list in seg. */
  935 static void
  936 semdequeue(Segment *s, Sema *p)
  937 {
  938         lock(&s->sema);
  939         p->next->prev = p->prev;
  940         p->prev->next = p->next;
  941         unlock(&s->sema);
  942 }
  943 
  944 /* Wake up n waiters with addr a on list in seg. */
  945 static void
  946 semwakeup(Segment *s, long *a, long n)
  947 {
  948         Sema *p;
  949         
  950         lock(&s->sema);
  951         for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
  952                 if(p->addr == a && p->waiting){
  953                         p->waiting = 0;
  954                         coherence();
  955                         wakeup(p);
  956                         n--;
  957                 }
  958         }
  959         unlock(&s->sema);
  960 }
  961 
  962 /* Add delta to semaphore and wake up waiters as appropriate. */
  963 static long
  964 semrelease(Segment *s, long *addr, long delta)
  965 {
  966         long value;
  967 
  968         do
  969                 value = *addr;
  970         while(!cmpswap(addr, value, value+delta));
  971         semwakeup(s, addr, delta);
  972         return value+delta;
  973 }
  974 
  975 /* Try to acquire semaphore using compare-and-swap */
  976 static int
  977 canacquire(long *addr)
  978 {
  979         long value;
  980         
  981         while((value=*addr) > 0)
  982                 if(cmpswap(addr, value, value-1))
  983                         return 1;
  984         return 0;
  985 }               
  986 
  987 /* Should we wake up? */
  988 static int
  989 semawoke(void *p)
  990 {
  991         coherence();
  992         return !((Sema*)p)->waiting;
  993 }
  994 
  995 /* Acquire semaphore (subtract 1). */
  996 static int
  997 semacquire(Segment *s, long *addr, int block)
  998 {
  999         int acquired;
 1000         Sema phore;
 1001 
 1002         if(canacquire(addr))
 1003                 return 1;
 1004         if(!block)
 1005                 return 0;
 1006 
 1007         acquired = 0;
 1008         semqueue(s, addr, &phore);
 1009         for(;;){
 1010                 phore.waiting = 1;
 1011                 coherence();
 1012                 if(canacquire(addr)){
 1013                         acquired = 1;
 1014                         break;
 1015                 }
 1016                 if(waserror())
 1017                         break;
 1018                 sleep(&phore, semawoke, &phore);
 1019                 poperror();
 1020         }
 1021         semdequeue(s, &phore);
 1022         coherence();    /* not strictly necessary due to lock in semdequeue */
 1023         if(!phore.waiting)
 1024                 semwakeup(s, addr, 1);
 1025         if(!acquired)
 1026                 nexterror();
 1027         return 1;
 1028 }
 1029 
 1030 long
 1031 syssemacquire(ulong *arg)
 1032 {
 1033         int block;
 1034         long *addr;
 1035         Segment *s;
 1036 
 1037         validaddr(arg[0], sizeof(long), 1);
 1038         evenaddr(arg[0]);
 1039         addr = (long*)arg[0];
 1040         block = arg[1];
 1041         
 1042         if((s = seg(up, (ulong)addr, 0)) == nil)
 1043                 error(Ebadarg);
 1044         if(*addr < 0)
 1045                 error(Ebadarg);
 1046         return semacquire(s, addr, block);
 1047 }
 1048 
 1049 long
 1050 syssemrelease(ulong *arg)
 1051 {
 1052         long *addr, delta;
 1053         Segment *s;
 1054 
 1055         validaddr(arg[0], sizeof(long), 1);
 1056         evenaddr(arg[0]);
 1057         addr = (long*)arg[0];
 1058         delta = arg[1];
 1059 
 1060         if((s = seg(up, (ulong)addr, 0)) == nil)
 1061                 error(Ebadarg);
 1062         if(delta < 0 || *addr < 0)
 1063                 error(Ebadarg);
 1064         return semrelease(s, addr, arg[1]);
 1065 }

Cache object: 61130316d2ef380df5f726b7493a194c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.