The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * NET          An implementation of the SOCKET network access protocol.
    3  *
    4  * Version:     @(#)socket.c    1.1.93  18/02/95
    5  *
    6  * Authors:     Orest Zborowski, <obz@Kodak.COM>
    7  *              Ross Biro, <bir7@leland.Stanford.Edu>
    8  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
    9  *
   10  * Fixes:
   11  *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
   12  *                                      shutdown()
   13  *              Alan Cox        :       verify_area() fixes
   14  *              Alan Cox        :       Removed DDI
   15  *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
   16  *              Alan Cox        :       Moved a load of checks to the very
   17  *                                      top level.
   18  *              Alan Cox        :       Move address structures to/from user
   19  *                                      mode above the protocol layers.
   20  *              Rob Janssen     :       Allow 0 length sends.
   21  *              Alan Cox        :       Asynchronous I/O support (cribbed from the
   22  *                                      tty drivers).
   23  *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
   24  *              Jeff Uphoff     :       Made max number of sockets command-line
   25  *                                      configurable.
   26  *              Matti Aarnio    :       Made the number of sockets dynamic,
   27  *                                      to be allocated when needed, and mr.
   28  *                                      Uphoff's max is used as max to be
   29  *                                      allowed to allocate.
   30  *              Linus           :       Argh. removed all the socket allocation
   31  *                                      altogether: it's in the inode now.
   32  *              Alan Cox        :       Made sock_alloc()/sock_release() public
   33  *                                      for NetROM and future kernel nfsd type
   34  *                                      stuff.
   35  *              Alan Cox        :       sendmsg/recvmsg basics.
   36  *              Tom Dyas        :       Export net symbols.
   37  *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
   38  *              Alan Cox        :       Added thread locking to sys_* calls
   39  *                                      for sockets. May have errors at the
   40  *                                      moment.
   41  *              Kevin Buhr      :       Fixed the dumb errors in the above.
   42  *              Andi Kleen      :       Some small cleanups, optimizations,
   43  *                                      and fixed a copy_from_user() bug.
   44  *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
   45  *              Tigran Aivazian :       Made listen(2) backlog sanity checks 
   46  *                                      protocol-independent
   47  *
   48  *
   49  *              This program is free software; you can redistribute it and/or
   50  *              modify it under the terms of the GNU General Public License
   51  *              as published by the Free Software Foundation; either version
   52  *              2 of the License, or (at your option) any later version.
   53  *
   54  *
   55  *      This module is effectively the top level interface to the BSD socket
   56  *      paradigm. 
   57  *
   58  */
   59 
   60 #include <linux/config.h>
   61 #include <linux/mm.h>
   62 #include <linux/smp_lock.h>
   63 #include <linux/socket.h>
   64 #include <linux/file.h>
   65 #include <linux/net.h>
   66 #include <linux/interrupt.h>
   67 #include <linux/netdevice.h>
   68 #include <linux/proc_fs.h>
   69 #include <linux/wanrouter.h>
   70 #include <linux/netlink.h>
   71 #include <linux/rtnetlink.h>
   72 #include <linux/init.h>
   73 #include <linux/poll.h>
   74 #include <linux/cache.h>
   75 #include <linux/module.h>
   76 #include <linux/highmem.h>
   77 
   78 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
   79 #include <linux/kmod.h>
   80 #endif
   81 
   82 #include <asm/uaccess.h>
   83 
   84 #include <net/sock.h>
   85 #include <net/scm.h>
   86 #include <linux/netfilter.h>
   87 
   88 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
   89 static ssize_t sock_read(struct file *file, char *buf,
   90                          size_t size, loff_t *ppos);
   91 static ssize_t sock_write(struct file *file, const char *buf,
   92                           size_t size, loff_t *ppos);
   93 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
   94 
   95 static int sock_close(struct inode *inode, struct file *file);
   96 static unsigned int sock_poll(struct file *file,
   97                               struct poll_table_struct *wait);
   98 static int sock_ioctl(struct inode *inode, struct file *file,
   99                       unsigned int cmd, unsigned long arg);
  100 static int sock_fasync(int fd, struct file *filp, int on);
  101 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
  102                           unsigned long count, loff_t *ppos);
  103 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
  104                           unsigned long count, loff_t *ppos);
  105 static ssize_t sock_sendpage(struct file *file, struct page *page,
  106                              int offset, size_t size, loff_t *ppos, int more);
  107 
  108 
  109 /*
  110  *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
  111  *      in the operation structures but are done directly via the socketcall() multiplexor.
  112  */
  113 
  114 static struct file_operations socket_file_ops = {
  115         llseek:         no_llseek,
  116         read:           sock_read,
  117         write:          sock_write,
  118         poll:           sock_poll,
  119         ioctl:          sock_ioctl,
  120         mmap:           sock_mmap,
  121         open:           sock_no_open,   /* special open code to disallow open via /proc */
  122         release:        sock_close,
  123         fasync:         sock_fasync,
  124         readv:          sock_readv,
  125         writev:         sock_writev,
  126         sendpage:       sock_sendpage
  127 };
  128 
  129 /*
  130  *      The protocol list. Each protocol is registered in here.
  131  */
  132 
  133 static struct net_proto_family *net_families[NPROTO];
  134 
  135 #ifdef CONFIG_SMP
  136 static atomic_t net_family_lockct = ATOMIC_INIT(0);
  137 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
  138 
  139 /* The strategy is: modifications net_family vector are short, do not
  140    sleep and veeery rare, but read access should be free of any exclusive
  141    locks.
  142  */
  143 
  144 static void net_family_write_lock(void)
  145 {
  146         spin_lock(&net_family_lock);
  147         while (atomic_read(&net_family_lockct) != 0) {
  148                 spin_unlock(&net_family_lock);
  149 
  150                 yield();
  151 
  152                 spin_lock(&net_family_lock);
  153         }
  154 }
  155 
  156 static __inline__ void net_family_write_unlock(void)
  157 {
  158         spin_unlock(&net_family_lock);
  159 }
  160 
  161 static __inline__ void net_family_read_lock(void)
  162 {
  163         atomic_inc(&net_family_lockct);
  164         spin_unlock_wait(&net_family_lock);
  165 }
  166 
  167 static __inline__ void net_family_read_unlock(void)
  168 {
  169         atomic_dec(&net_family_lockct);
  170 }
  171 
  172 #else
  173 #define net_family_write_lock() do { } while(0)
  174 #define net_family_write_unlock() do { } while(0)
  175 #define net_family_read_lock() do { } while(0)
  176 #define net_family_read_unlock() do { } while(0)
  177 #endif
  178 
  179 
  180 /*
  181  *      Statistics counters of the socket lists
  182  */
  183 
  184 static union {
  185         int     counter;
  186         char    __pad[SMP_CACHE_BYTES];
  187 } sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
  188 
  189 /*
  190  *      Support routines. Move socket addresses back and forth across the kernel/user
  191  *      divide and look after the messy bits.
  192  */
  193 
  194 #define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
  195                                            16 for IP, 16 for IPX,
  196                                            24 for IPv6,
  197                                            about 80 for AX.25 
  198                                            must be at least one bigger than
  199                                            the AF_UNIX size (see net/unix/af_unix.c
  200                                            :unix_mkname()).  
  201                                          */
  202                                          
  203 /**
  204  *      move_addr_to_kernel     -       copy a socket address into kernel space
  205  *      @uaddr: Address in user space
  206  *      @kaddr: Address in kernel space
  207  *      @ulen: Length in user space
  208  *
  209  *      The address is copied into kernel space. If the provided address is
  210  *      too long an error code of -EINVAL is returned. If the copy gives
  211  *      invalid addresses -EFAULT is returned. On a success 0 is returned.
  212  */
  213 
  214 int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
  215 {
  216         if(ulen<0||ulen>MAX_SOCK_ADDR)
  217                 return -EINVAL;
  218         if(ulen==0)
  219                 return 0;
  220         if(copy_from_user(kaddr,uaddr,ulen))
  221                 return -EFAULT;
  222         return 0;
  223 }
  224 
  225 /**
  226  *      move_addr_to_user       -       copy an address to user space
  227  *      @kaddr: kernel space address
  228  *      @klen: length of address in kernel
  229  *      @uaddr: user space address
  230  *      @ulen: pointer to user length field
  231  *
  232  *      The value pointed to by ulen on entry is the buffer length available.
  233  *      This is overwritten with the buffer space used. -EINVAL is returned
  234  *      if an overlong buffer is specified or a negative buffer size. -EFAULT
  235  *      is returned if either the buffer or the length field are not
  236  *      accessible.
  237  *      After copying the data up to the limit the user specifies, the true
  238  *      length of the data is written over the length limit the user
  239  *      specified. Zero is returned for a success.
  240  */
  241  
  242 int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
  243 {
  244         int err;
  245         int len;
  246 
  247         if((err=get_user(len, ulen)))
  248                 return err;
  249         if(len>klen)
  250                 len=klen;
  251         if(len<0 || len> MAX_SOCK_ADDR)
  252                 return -EINVAL;
  253         if(len)
  254         {
  255                 if(copy_to_user(uaddr,kaddr,len))
  256                         return -EFAULT;
  257         }
  258         /*
  259          *      "fromlen shall refer to the value before truncation.."
  260          *                      1003.1g
  261          */
  262         return __put_user(klen, ulen);
  263 }
  264 
  265 #define SOCKFS_MAGIC 0x534F434B
  266 static int sockfs_statfs(struct super_block *sb, struct statfs *buf)
  267 {
  268         buf->f_type = SOCKFS_MAGIC;
  269         buf->f_bsize = 1024;
  270         buf->f_namelen = 255;
  271         return 0;
  272 }
  273 
  274 static struct super_operations sockfs_ops = {
  275         statfs:         sockfs_statfs,
  276 };
  277 
  278 static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent)
  279 {
  280         struct inode *root = new_inode(sb);
  281         if (!root)
  282                 return NULL;
  283         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
  284         root->i_uid = root->i_gid = 0;
  285         root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
  286         sb->s_blocksize = 1024;
  287         sb->s_blocksize_bits = 10;
  288         sb->s_magic = SOCKFS_MAGIC;
  289         sb->s_op        = &sockfs_ops;
  290         sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 });
  291         if (!sb->s_root) {
  292                 iput(root);
  293                 return NULL;
  294         }
  295         sb->s_root->d_sb = sb;
  296         sb->s_root->d_parent = sb->s_root;
  297         d_instantiate(sb->s_root, root);
  298         return sb;
  299 }
  300 
  301 static struct vfsmount *sock_mnt;
  302 static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super, FS_NOMOUNT);
  303 static int sockfs_delete_dentry(struct dentry *dentry)
  304 {
  305         return 1;
  306 }
  307 static struct dentry_operations sockfs_dentry_operations = {
  308         d_delete:       sockfs_delete_dentry,
  309 };
  310 
  311 /*
  312  *      Obtains the first available file descriptor and sets it up for use.
  313  *
  314  *      This function creates file structure and maps it to fd space
  315  *      of current process. On success it returns file descriptor
  316  *      and file struct implicitly stored in sock->file.
  317  *      Note that another thread may close file descriptor before we return
  318  *      from this function. We use the fact that now we do not refer
  319  *      to socket after mapping. If one day we will need it, this
  320  *      function will increment ref. count on file by 1.
  321  *
  322  *      In any case returned fd MAY BE not valid!
  323  *      This race condition is unavoidable
  324  *      with shared fd spaces, we cannot solve it inside kernel,
  325  *      but we take care of internal coherence yet.
  326  */
  327 
  328 static int sock_map_fd(struct socket *sock)
  329 {
  330         int fd;
  331         struct qstr this;
  332         char name[32];
  333 
  334         /*
  335          *      Find a file descriptor suitable for return to the user. 
  336          */
  337 
  338         fd = get_unused_fd();
  339         if (fd >= 0) {
  340                 struct file *file = get_empty_filp();
  341 
  342                 if (!file) {
  343                         put_unused_fd(fd);
  344                         fd = -ENFILE;
  345                         goto out;
  346                 }
  347 
  348                 sprintf(name, "[%lu]", sock->inode->i_ino);
  349                 this.name = name;
  350                 this.len = strlen(name);
  351                 this.hash = sock->inode->i_ino;
  352 
  353                 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
  354                 if (!file->f_dentry) {
  355                         put_filp(file);
  356                         put_unused_fd(fd);
  357                         fd = -ENOMEM;
  358                         goto out;
  359                 }
  360                 file->f_dentry->d_op = &sockfs_dentry_operations;
  361                 d_add(file->f_dentry, sock->inode);
  362                 file->f_vfsmnt = mntget(sock_mnt);
  363 
  364                 sock->file = file;
  365                 file->f_op = sock->inode->i_fop = &socket_file_ops;
  366                 file->f_mode = 3;
  367                 file->f_flags = O_RDWR;
  368                 file->f_pos = 0;
  369                 fd_install(fd, file);
  370         }
  371 
  372 out:
  373         return fd;
  374 }
  375 
  376 extern __inline__ struct socket *socki_lookup(struct inode *inode)
  377 {
  378         return &inode->u.socket_i;
  379 }
  380 
  381 /**
  382  *      sockfd_lookup   -       Go from a file number to its socket slot
  383  *      @fd: file handle
  384  *      @err: pointer to an error code return
  385  *
  386  *      The file handle passed in is locked and the socket it is bound
  387  *      too is returned. If an error occurs the err pointer is overwritten
  388  *      with a negative errno code and NULL is returned. The function checks
  389  *      for both invalid handles and passing a handle which is not a socket.
  390  *
  391  *      On a success the socket object pointer is returned.
  392  */
  393 
  394 struct socket *sockfd_lookup(int fd, int *err)
  395 {
  396         struct file *file;
  397         struct inode *inode;
  398         struct socket *sock;
  399 
  400         if (!(file = fget(fd)))
  401         {
  402                 *err = -EBADF;
  403                 return NULL;
  404         }
  405 
  406         inode = file->f_dentry->d_inode;
  407         if (!inode->i_sock || !(sock = socki_lookup(inode)))
  408         {
  409                 *err = -ENOTSOCK;
  410                 fput(file);
  411                 return NULL;
  412         }
  413 
  414         if (sock->file != file) {
  415                 printk(KERN_ERR "socki_lookup: socket file changed!\n");
  416                 sock->file = file;
  417         }
  418         return sock;
  419 }
  420 
  421 extern __inline__ void sockfd_put(struct socket *sock)
  422 {
  423         fput(sock->file);
  424 }
  425 
  426 /**
  427  *      sock_alloc      -       allocate a socket
  428  *      
  429  *      Allocate a new inode and socket object. The two are bound together
  430  *      and initialised. The socket is then returned. If we are out of inodes
  431  *      NULL is returned.
  432  */
  433 
  434 struct socket *sock_alloc(void)
  435 {
  436         struct inode * inode;
  437         struct socket * sock;
  438 
  439         inode = new_inode(sock_mnt->mnt_sb);
  440         if (!inode)
  441                 return NULL;
  442 
  443         inode->i_dev = NODEV;
  444         sock = socki_lookup(inode);
  445 
  446         inode->i_mode = S_IFSOCK|S_IRWXUGO;
  447         inode->i_sock = 1;
  448         inode->i_uid = current->fsuid;
  449         inode->i_gid = current->fsgid;
  450 
  451         sock->inode = inode;
  452         init_waitqueue_head(&sock->wait);
  453         sock->fasync_list = NULL;
  454         sock->state = SS_UNCONNECTED;
  455         sock->flags = 0;
  456         sock->ops = NULL;
  457         sock->sk = NULL;
  458         sock->file = NULL;
  459 
  460         sockets_in_use[smp_processor_id()].counter++;
  461         return sock;
  462 }
  463 
  464 /*
  465  *      In theory you can't get an open on this inode, but /proc provides
  466  *      a back door. Remember to keep it shut otherwise you'll let the
  467  *      creepy crawlies in.
  468  */
  469   
  470 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
  471 {
  472         return -ENXIO;
  473 }
  474 
  475 /**
  476  *      sock_release    -       close a socket
  477  *      @sock: socket to close
  478  *
  479  *      The socket is released from the protocol stack if it has a release
  480  *      callback, and the inode is then released if the socket is bound to
  481  *      an inode not a file. 
  482  */
  483  
  484 void sock_release(struct socket *sock)
  485 {
  486         if (sock->ops) 
  487                 sock->ops->release(sock);
  488 
  489         if (sock->fasync_list)
  490                 printk(KERN_ERR "sock_release: fasync list not empty!\n");
  491 
  492         sockets_in_use[smp_processor_id()].counter--;
  493         if (!sock->file) {
  494                 iput(sock->inode);
  495                 return;
  496         }
  497         sock->file=NULL;
  498 }
  499 
  500 int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
  501 {
  502         int err;
  503         struct scm_cookie scm;
  504 
  505         err = scm_send(sock, msg, &scm);
  506         if (err >= 0) {
  507                 err = sock->ops->sendmsg(sock, msg, size, &scm);
  508                 scm_destroy(&scm);
  509         }
  510         return err;
  511 }
  512 
  513 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
  514 {
  515         struct scm_cookie scm;
  516 
  517         memset(&scm, 0, sizeof(scm));
  518 
  519         size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
  520         if (size >= 0)
  521                 scm_recv(sock, msg, &scm, flags);
  522 
  523         return size;
  524 }
  525 
  526 
  527 /*
  528  *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
  529  *      area ubuf...ubuf+size-1 is writable before asking the protocol.
  530  */
  531 
  532 static ssize_t sock_read(struct file *file, char *ubuf,
  533                          size_t size, loff_t *ppos)
  534 {
  535         struct socket *sock;
  536         struct iovec iov;
  537         struct msghdr msg;
  538         int flags;
  539 
  540         if (ppos != &file->f_pos)
  541                 return -ESPIPE;
  542         if (size==0)            /* Match SYS5 behaviour */
  543                 return 0;
  544 
  545         sock = socki_lookup(file->f_dentry->d_inode); 
  546 
  547         msg.msg_name=NULL;
  548         msg.msg_namelen=0;
  549         msg.msg_iov=&iov;
  550         msg.msg_iovlen=1;
  551         msg.msg_control=NULL;
  552         msg.msg_controllen=0;
  553         iov.iov_base=ubuf;
  554         iov.iov_len=size;
  555         flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
  556 
  557         return sock_recvmsg(sock, &msg, size, flags);
  558 }
  559 
  560 
  561 /*
  562  *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
  563  *      is readable by the user process.
  564  */
  565 
  566 static ssize_t sock_write(struct file *file, const char *ubuf,
  567                           size_t size, loff_t *ppos)
  568 {
  569         struct socket *sock;
  570         struct msghdr msg;
  571         struct iovec iov;
  572         
  573         if (ppos != &file->f_pos)
  574                 return -ESPIPE;
  575         if(size==0)             /* Match SYS5 behaviour */
  576                 return 0;
  577 
  578         sock = socki_lookup(file->f_dentry->d_inode); 
  579 
  580         msg.msg_name=NULL;
  581         msg.msg_namelen=0;
  582         msg.msg_iov=&iov;
  583         msg.msg_iovlen=1;
  584         msg.msg_control=NULL;
  585         msg.msg_controllen=0;
  586         msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
  587         if (sock->type == SOCK_SEQPACKET)
  588                 msg.msg_flags |= MSG_EOR;
  589         iov.iov_base=(void *)ubuf;
  590         iov.iov_len=size;
  591         
  592         return sock_sendmsg(sock, &msg, size);
  593 }
  594 
  595 ssize_t sock_sendpage(struct file *file, struct page *page,
  596                       int offset, size_t size, loff_t *ppos, int more)
  597 {
  598         struct socket *sock;
  599         int flags;
  600 
  601         if (ppos != &file->f_pos)
  602                 return -ESPIPE;
  603 
  604         sock = socki_lookup(file->f_dentry->d_inode);
  605 
  606         flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
  607         if (more)
  608                 flags |= MSG_MORE;
  609 
  610         return sock->ops->sendpage(sock, page, offset, size, flags);
  611 }
  612 
  613 int sock_readv_writev(int type, struct inode * inode, struct file * file,
  614                       const struct iovec * iov, long count, long size)
  615 {
  616         struct msghdr msg;
  617         struct socket *sock;
  618 
  619         sock = socki_lookup(inode);
  620 
  621         msg.msg_name = NULL;
  622         msg.msg_namelen = 0;
  623         msg.msg_control = NULL;
  624         msg.msg_controllen = 0;
  625         msg.msg_iov = (struct iovec *) iov;
  626         msg.msg_iovlen = count;
  627         msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
  628 
  629         /* read() does a VERIFY_WRITE */
  630         if (type == VERIFY_WRITE)
  631                 return sock_recvmsg(sock, &msg, size, msg.msg_flags);
  632 
  633         if (sock->type == SOCK_SEQPACKET)
  634                 msg.msg_flags |= MSG_EOR;
  635 
  636         return sock_sendmsg(sock, &msg, size);
  637 }
  638 
  639 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
  640                           unsigned long count, loff_t *ppos)
  641 {
  642         size_t tot_len = 0;
  643         int i;
  644         for (i = 0 ; i < count ; i++)
  645                 tot_len += vector[i].iov_len;
  646         return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
  647                                  file, vector, count, tot_len);
  648 }
  649         
  650 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
  651                            unsigned long count, loff_t *ppos)
  652 {
  653         size_t tot_len = 0;
  654         int i;
  655         for (i = 0 ; i < count ; i++)
  656                 tot_len += vector[i].iov_len;
  657         return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
  658                                  file, vector, count, tot_len);
  659 }
  660 
  661 /*
  662  *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
  663  *      with it - that's up to the protocol still.
  664  */
  665 
  666 int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
  667            unsigned long arg)
  668 {
  669         struct socket *sock;
  670         int err;
  671 
  672         unlock_kernel();
  673         sock = socki_lookup(inode);
  674         err = sock->ops->ioctl(sock, cmd, arg);
  675         lock_kernel();
  676 
  677         return err;
  678 }
  679 
  680 
  681 /* No kernel lock held - perfect */
  682 static unsigned int sock_poll(struct file *file, poll_table * wait)
  683 {
  684         struct socket *sock;
  685 
  686         /*
  687          *      We can't return errors to poll, so it's either yes or no. 
  688          */
  689         sock = socki_lookup(file->f_dentry->d_inode);
  690         return sock->ops->poll(file, sock, wait);
  691 }
  692 
  693 static int sock_mmap(struct file * file, struct vm_area_struct * vma)
  694 {
  695         struct socket *sock = socki_lookup(file->f_dentry->d_inode);
  696 
  697         return sock->ops->mmap(file, sock, vma);
  698 }
  699 
  700 int sock_close(struct inode *inode, struct file *filp)
  701 {
  702         /*
  703          *      It was possible the inode is NULL we were 
  704          *      closing an unfinished socket. 
  705          */
  706 
  707         if (!inode)
  708         {
  709                 printk(KERN_DEBUG "sock_close: NULL inode\n");
  710                 return 0;
  711         }
  712         sock_fasync(-1, filp, 0);
  713         sock_release(socki_lookup(inode));
  714         return 0;
  715 }
  716 
  717 /*
  718  *      Update the socket async list
  719  *
  720  *      Fasync_list locking strategy.
  721  *
  722  *      1. fasync_list is modified only under process context socket lock
  723  *         i.e. under semaphore.
  724  *      2. fasync_list is used under read_lock(&sk->callback_lock)
  725  *         or under socket lock.
  726  *      3. fasync_list can be used from softirq context, so that
  727  *         modification under socket lock have to be enhanced with
  728  *         write_lock_bh(&sk->callback_lock).
  729  *                                                      --ANK (990710)
  730  */
  731 
  732 static int sock_fasync(int fd, struct file *filp, int on)
  733 {
  734         struct fasync_struct *fa, *fna=NULL, **prev;
  735         struct socket *sock;
  736         struct sock *sk;
  737 
  738         if (on)
  739         {
  740                 fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
  741                 if(fna==NULL)
  742                         return -ENOMEM;
  743         }
  744 
  745         sock = socki_lookup(filp->f_dentry->d_inode);
  746         
  747         if ((sk=sock->sk) == NULL) {
  748                 if (fna)
  749                         kfree(fna);
  750                 return -EINVAL;
  751         }
  752 
  753         lock_sock(sk);
  754 
  755         prev=&(sock->fasync_list);
  756 
  757         for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
  758                 if (fa->fa_file==filp)
  759                         break;
  760 
  761         if(on)
  762         {
  763                 if(fa!=NULL)
  764                 {
  765                         write_lock_bh(&sk->callback_lock);
  766                         fa->fa_fd=fd;
  767                         write_unlock_bh(&sk->callback_lock);
  768 
  769                         kfree(fna);
  770                         goto out;
  771                 }
  772                 fna->fa_file=filp;
  773                 fna->fa_fd=fd;
  774                 fna->magic=FASYNC_MAGIC;
  775                 fna->fa_next=sock->fasync_list;
  776                 write_lock_bh(&sk->callback_lock);
  777                 sock->fasync_list=fna;
  778                 write_unlock_bh(&sk->callback_lock);
  779         }
  780         else
  781         {
  782                 if (fa!=NULL)
  783                 {
  784                         write_lock_bh(&sk->callback_lock);
  785                         *prev=fa->fa_next;
  786                         write_unlock_bh(&sk->callback_lock);
  787                         kfree(fa);
  788                 }
  789         }
  790 
  791 out:
  792         release_sock(sock->sk);
  793         return 0;
  794 }
  795 
  796 /* This function may be called only under socket lock or callback_lock */
  797 
  798 int sock_wake_async(struct socket *sock, int how, int band)
  799 {
  800         if (!sock || !sock->fasync_list)
  801                 return -1;
  802         switch (how)
  803         {
  804         case 1:
  805                 
  806                 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
  807                         break;
  808                 goto call_kill;
  809         case 2:
  810                 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
  811                         break;
  812                 /* fall through */
  813         case 0:
  814         call_kill:
  815                 __kill_fasync(sock->fasync_list, SIGIO, band);
  816                 break;
  817         case 3:
  818                 __kill_fasync(sock->fasync_list, SIGURG, band);
  819         }
  820         return 0;
  821 }
  822 
  823 
  824 int sock_create(int family, int type, int protocol, struct socket **res)
  825 {
  826         int i;
  827         struct socket *sock;
  828 
  829         /*
  830          *      Check protocol is in range
  831          */
  832         if (family < 0 || family >= NPROTO)
  833                 return -EAFNOSUPPORT;
  834         if (type < 0 || type >= SOCK_MAX)
  835                 return -EINVAL;
  836 
  837         /* Compatibility.
  838 
  839            This uglymoron is moved from INET layer to here to avoid
  840            deadlock in module load.
  841          */
  842         if (family == PF_INET && type == SOCK_PACKET) {
  843                 static int warned; 
  844                 if (!warned) {
  845                         warned = 1;
  846                         printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
  847                 }
  848                 family = PF_PACKET;
  849         }
  850                 
  851 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  852         /* Attempt to load a protocol module if the find failed. 
  853          * 
  854          * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
  855          * requested real, full-featured networking support upon configuration.
  856          * Otherwise module support will break!
  857          */
  858         if (net_families[family]==NULL)
  859         {
  860                 char module_name[30];
  861                 sprintf(module_name,"net-pf-%d",family);
  862                 request_module(module_name);
  863         }
  864 #endif
  865 
  866         net_family_read_lock();
  867         if (net_families[family] == NULL) {
  868                 i = -EAFNOSUPPORT;
  869                 goto out;
  870         }
  871 
  872 /*
  873  *      Allocate the socket and allow the family to set things up. if
  874  *      the protocol is 0, the family is instructed to select an appropriate
  875  *      default.
  876  */
  877 
  878         if (!(sock = sock_alloc())) 
  879         {
  880                 printk(KERN_WARNING "socket: no more sockets\n");
  881                 i = -ENFILE;            /* Not exactly a match, but its the
  882                                            closest posix thing */
  883                 goto out;
  884         }
  885 
  886         sock->type  = type;
  887 
  888         if ((i = net_families[family]->create(sock, protocol)) < 0) 
  889         {
  890                 sock_release(sock);
  891                 goto out;
  892         }
  893 
  894         *res = sock;
  895 
  896 out:
  897         net_family_read_unlock();
  898         return i;
  899 }
  900 
  901 asmlinkage long sys_socket(int family, int type, int protocol)
  902 {
  903         int retval;
  904         struct socket *sock;
  905 
  906         retval = sock_create(family, type, protocol, &sock);
  907         if (retval < 0)
  908                 goto out;
  909 
  910         retval = sock_map_fd(sock);
  911         if (retval < 0)
  912                 goto out_release;
  913 
  914 out:
  915         /* It may be already another descriptor 8) Not kernel problem. */
  916         return retval;
  917 
  918 out_release:
  919         sock_release(sock);
  920         return retval;
  921 }
  922 
  923 /*
  924  *      Create a pair of connected sockets.
  925  */
  926 
  927 asmlinkage long sys_socketpair(int family, int type, int protocol, int usockvec[2])
  928 {
  929         struct socket *sock1, *sock2;
  930         int fd1, fd2, err;
  931 
  932         /*
  933          * Obtain the first socket and check if the underlying protocol
  934          * supports the socketpair call.
  935          */
  936 
  937         err = sock_create(family, type, protocol, &sock1);
  938         if (err < 0)
  939                 goto out;
  940 
  941         err = sock_create(family, type, protocol, &sock2);
  942         if (err < 0)
  943                 goto out_release_1;
  944 
  945         err = sock1->ops->socketpair(sock1, sock2);
  946         if (err < 0) 
  947                 goto out_release_both;
  948 
  949         fd1 = fd2 = -1;
  950 
  951         err = sock_map_fd(sock1);
  952         if (err < 0)
  953                 goto out_release_both;
  954         fd1 = err;
  955 
  956         err = sock_map_fd(sock2);
  957         if (err < 0)
  958                 goto out_close_1;
  959         fd2 = err;
  960 
  961         /* fd1 and fd2 may be already another descriptors.
  962          * Not kernel problem.
  963          */
  964 
  965         err = put_user(fd1, &usockvec[0]); 
  966         if (!err)
  967                 err = put_user(fd2, &usockvec[1]);
  968         if (!err)
  969                 return 0;
  970 
  971         sys_close(fd2);
  972         sys_close(fd1);
  973         return err;
  974 
  975 out_close_1:
  976         sock_release(sock2);
  977         sys_close(fd1);
  978         return err;
  979 
  980 out_release_both:
  981         sock_release(sock2);
  982 out_release_1:
  983         sock_release(sock1);
  984 out:
  985         return err;
  986 }
  987 
  988 
  989 /*
  990  *      Bind a name to a socket. Nothing much to do here since it's
  991  *      the protocol's responsibility to handle the local address.
  992  *
  993  *      We move the socket address to kernel space before we call
  994  *      the protocol layer (having also checked the address is ok).
  995  */
  996 
  997 asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
  998 {
  999         struct socket *sock;
 1000         char address[MAX_SOCK_ADDR];
 1001         int err;
 1002 
 1003         if((sock = sockfd_lookup(fd,&err))!=NULL)
 1004         {
 1005                 if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
 1006                         err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
 1007                 sockfd_put(sock);
 1008         }                       
 1009         return err;
 1010 }
 1011 
 1012 
 1013 /*
 1014  *      Perform a listen. Basically, we allow the protocol to do anything
 1015  *      necessary for a listen, and if that works, we mark the socket as
 1016  *      ready for listening.
 1017  */
 1018 
 1019 asmlinkage long sys_listen(int fd, int backlog)
 1020 {
 1021         struct socket *sock;
 1022         int err;
 1023         
 1024         if ((sock = sockfd_lookup(fd, &err)) != NULL) {
 1025                 if ((unsigned) backlog > SOMAXCONN)
 1026                         backlog = SOMAXCONN;
 1027                 err=sock->ops->listen(sock, backlog);
 1028                 sockfd_put(sock);
 1029         }
 1030         return err;
 1031 }
 1032 
 1033 
 1034 /*
 1035  *      For accept, we attempt to create a new socket, set up the link
 1036  *      with the client, wake up the client, then return the new
 1037  *      connected fd. We collect the address of the connector in kernel
 1038  *      space and move it to user at the very end. This is unclean because
 1039  *      we open the socket then return an error.
 1040  *
 1041  *      1003.1g adds the ability to recvmsg() to query connection pending
 1042  *      status to recvmsg. We need to add that support in a way thats
 1043  *      clean when we restucture accept also.
 1044  */
 1045 
 1046 asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 1047 {
 1048         struct socket *sock, *newsock;
 1049         int err, len;
 1050         char address[MAX_SOCK_ADDR];
 1051 
 1052         sock = sockfd_lookup(fd, &err);
 1053         if (!sock)
 1054                 goto out;
 1055 
 1056         err = -EMFILE;
 1057         if (!(newsock = sock_alloc())) 
 1058                 goto out_put;
 1059 
 1060         newsock->type = sock->type;
 1061         newsock->ops = sock->ops;
 1062 
 1063         err = sock->ops->accept(sock, newsock, sock->file->f_flags);
 1064         if (err < 0)
 1065                 goto out_release;
 1066 
 1067         if (upeer_sockaddr) {
 1068                 if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
 1069                         err = -ECONNABORTED;
 1070                         goto out_release;
 1071                 }
 1072                 err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
 1073                 if (err < 0)
 1074                         goto out_release;
 1075         }
 1076 
 1077         /* File flags are not inherited via accept() unlike another OSes. */
 1078 
 1079         if ((err = sock_map_fd(newsock)) < 0)
 1080                 goto out_release;
 1081 
 1082 out_put:
 1083         sockfd_put(sock);
 1084 out:
 1085         return err;
 1086 
 1087 out_release:
 1088         sock_release(newsock);
 1089         goto out_put;
 1090 }
 1091 
 1092 
 1093 /*
 1094  *      Attempt to connect to a socket with the server address.  The address
 1095  *      is in user space so we verify it is OK and move it to kernel space.
 1096  *
 1097  *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 1098  *      break bindings
 1099  *
 1100  *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 1101  *      other SEQPACKET protocols that take time to connect() as it doesn't
 1102  *      include the -EINPROGRESS status for such sockets.
 1103  */
 1104 
 1105 asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 1106 {
 1107         struct socket *sock;
 1108         char address[MAX_SOCK_ADDR];
 1109         int err;
 1110 
 1111         sock = sockfd_lookup(fd, &err);
 1112         if (!sock)
 1113                 goto out;
 1114         err = move_addr_to_kernel(uservaddr, addrlen, address);
 1115         if (err < 0)
 1116                 goto out_put;
 1117         err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
 1118                                  sock->file->f_flags);
 1119 out_put:
 1120         sockfd_put(sock);
 1121 out:
 1122         return err;
 1123 }
 1124 
 1125 /*
 1126  *      Get the local address ('name') of a socket object. Move the obtained
 1127  *      name to user space.
 1128  */
 1129 
 1130 asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 1131 {
 1132         struct socket *sock;
 1133         char address[MAX_SOCK_ADDR];
 1134         int len, err;
 1135         
 1136         sock = sockfd_lookup(fd, &err);
 1137         if (!sock)
 1138                 goto out;
 1139         err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
 1140         if (err)
 1141                 goto out_put;
 1142         err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
 1143 
 1144 out_put:
 1145         sockfd_put(sock);
 1146 out:
 1147         return err;
 1148 }
 1149 
 1150 /*
 1151  *      Get the remote address ('name') of a socket object. Move the obtained
 1152  *      name to user space.
 1153  */
 1154 
 1155 asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 1156 {
 1157         struct socket *sock;
 1158         char address[MAX_SOCK_ADDR];
 1159         int len, err;
 1160 
 1161         if ((sock = sockfd_lookup(fd, &err))!=NULL)
 1162         {
 1163                 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
 1164                 if (!err)
 1165                         err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 1166                 sockfd_put(sock);
 1167         }
 1168         return err;
 1169 }
 1170 
 1171 /*
 1172  *      Send a datagram to a given address. We move the address into kernel
 1173  *      space and check the user space data area is readable before invoking
 1174  *      the protocol.
 1175  */
 1176 
 1177 asmlinkage long sys_sendto(int fd, void * buff, size_t len, unsigned flags,
 1178                            struct sockaddr *addr, int addr_len)
 1179 {
 1180         struct socket *sock;
 1181         char address[MAX_SOCK_ADDR];
 1182         int err;
 1183         struct msghdr msg;
 1184         struct iovec iov;
 1185         
 1186         sock = sockfd_lookup(fd, &err);
 1187         if (!sock)
 1188                 goto out;
 1189         iov.iov_base=buff;
 1190         iov.iov_len=len;
 1191         msg.msg_name=NULL;
 1192         msg.msg_iov=&iov;
 1193         msg.msg_iovlen=1;
 1194         msg.msg_control=NULL;
 1195         msg.msg_controllen=0;
 1196         msg.msg_namelen=0;
 1197         if(addr)
 1198         {
 1199                 err = move_addr_to_kernel(addr, addr_len, address);
 1200                 if (err < 0)
 1201                         goto out_put;
 1202                 msg.msg_name=address;
 1203                 msg.msg_namelen=addr_len;
 1204         }
 1205         if (sock->file->f_flags & O_NONBLOCK)
 1206                 flags |= MSG_DONTWAIT;
 1207         msg.msg_flags = flags;
 1208         err = sock_sendmsg(sock, &msg, len);
 1209 
 1210 out_put:                
 1211         sockfd_put(sock);
 1212 out:
 1213         return err;
 1214 }
 1215 
 1216 /*
 1217  *      Send a datagram down a socket. 
 1218  */
 1219 
 1220 asmlinkage long sys_send(int fd, void * buff, size_t len, unsigned flags)
 1221 {
 1222         return sys_sendto(fd, buff, len, flags, NULL, 0);
 1223 }
 1224 
 1225 /*
 1226  *      Receive a frame from the socket and optionally record the address of the 
 1227  *      sender. We verify the buffers are writable and if needed move the
 1228  *      sender address from kernel to user space.
 1229  */
 1230 
 1231 asmlinkage long sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
 1232                              struct sockaddr *addr, int *addr_len)
 1233 {
 1234         struct socket *sock;
 1235         struct iovec iov;
 1236         struct msghdr msg;
 1237         char address[MAX_SOCK_ADDR];
 1238         int err,err2;
 1239 
 1240         sock = sockfd_lookup(fd, &err);
 1241         if (!sock)
 1242                 goto out;
 1243 
 1244         msg.msg_control=NULL;
 1245         msg.msg_controllen=0;
 1246         msg.msg_iovlen=1;
 1247         msg.msg_iov=&iov;
 1248         iov.iov_len=size;
 1249         iov.iov_base=ubuf;
 1250         msg.msg_name=address;
 1251         msg.msg_namelen=MAX_SOCK_ADDR;
 1252         if (sock->file->f_flags & O_NONBLOCK)
 1253                 flags |= MSG_DONTWAIT;
 1254         err=sock_recvmsg(sock, &msg, size, flags);
 1255 
 1256         if(err >= 0 && addr != NULL)
 1257         {
 1258                 err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
 1259                 if(err2<0)
 1260                         err=err2;
 1261         }
 1262         sockfd_put(sock);                       
 1263 out:
 1264         return err;
 1265 }
 1266 
 1267 /*
 1268  *      Receive a datagram from a socket. 
 1269  */
 1270 
 1271 asmlinkage long sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
 1272 {
 1273         return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
 1274 }
 1275 
 1276 /*
 1277  *      Set a socket option. Because we don't know the option lengths we have
 1278  *      to pass the user mode parameter for the protocols to sort out.
 1279  */
 1280 
 1281 asmlinkage long sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
 1282 {
 1283         int err;
 1284         struct socket *sock;
 1285 
 1286         if (optlen < 0)
 1287                 return -EINVAL;
 1288                         
 1289         if ((sock = sockfd_lookup(fd, &err))!=NULL)
 1290         {
 1291                 if (level == SOL_SOCKET)
 1292                         err=sock_setsockopt(sock,level,optname,optval,optlen);
 1293                 else
 1294                         err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
 1295                 sockfd_put(sock);
 1296         }
 1297         return err;
 1298 }
 1299 
 1300 /*
 1301  *      Get a socket option. Because we don't know the option lengths we have
 1302  *      to pass a user mode parameter for the protocols to sort out.
 1303  */
 1304 
 1305 asmlinkage long sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
 1306 {
 1307         int err;
 1308         struct socket *sock;
 1309 
 1310         if ((sock = sockfd_lookup(fd, &err))!=NULL)
 1311         {
 1312                 if (level == SOL_SOCKET)
 1313                         err=sock_getsockopt(sock,level,optname,optval,optlen);
 1314                 else
 1315                         err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
 1316                 sockfd_put(sock);
 1317         }
 1318         return err;
 1319 }
 1320 
 1321 
 1322 /*
 1323  *      Shutdown a socket.
 1324  */
 1325 
 1326 asmlinkage long sys_shutdown(int fd, int how)
 1327 {
 1328         int err;
 1329         struct socket *sock;
 1330 
 1331         if ((sock = sockfd_lookup(fd, &err))!=NULL)
 1332         {
 1333                 err=sock->ops->shutdown(sock, how);
 1334                 sockfd_put(sock);
 1335         }
 1336         return err;
 1337 }
 1338 
 1339 /*
 1340  *      BSD sendmsg interface
 1341  */
 1342 
 1343 asmlinkage long sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
 1344 {
 1345         struct socket *sock;
 1346         char address[MAX_SOCK_ADDR];
 1347         struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 1348         unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
 1349         unsigned char *ctl_buf = ctl;
 1350         struct msghdr msg_sys;
 1351         int err, ctl_len, iov_size, total_len;
 1352         
 1353         err = -EFAULT;
 1354         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
 1355                 goto out; 
 1356 
 1357         sock = sockfd_lookup(fd, &err);
 1358         if (!sock) 
 1359                 goto out;
 1360 
 1361         /* do not move before msg_sys is valid */
 1362         err = -EMSGSIZE;
 1363         if (msg_sys.msg_iovlen > UIO_MAXIOV)
 1364                 goto out_put;
 1365 
 1366         /* Check whether to allocate the iovec area*/
 1367         err = -ENOMEM;
 1368         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 1369         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
 1370                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 1371                 if (!iov)
 1372                         goto out_put;
 1373         }
 1374 
 1375         /* This will also move the address data into kernel space */
 1376         err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
 1377         if (err < 0) 
 1378                 goto out_freeiov;
 1379         total_len = err;
 1380 
 1381         err = -ENOBUFS;
 1382 
 1383         if (msg_sys.msg_controllen > INT_MAX)
 1384                 goto out_freeiov;
 1385         ctl_len = msg_sys.msg_controllen; 
 1386         if (ctl_len) 
 1387         {
 1388                 if (ctl_len > sizeof(ctl))
 1389                 {
 1390                         ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
 1391                         if (ctl_buf == NULL) 
 1392                                 goto out_freeiov;
 1393                 }
 1394                 err = -EFAULT;
 1395                 if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
 1396                         goto out_freectl;
 1397                 msg_sys.msg_control = ctl_buf;
 1398         }
 1399         msg_sys.msg_flags = flags;
 1400 
 1401         if (sock->file->f_flags & O_NONBLOCK)
 1402                 msg_sys.msg_flags |= MSG_DONTWAIT;
 1403         err = sock_sendmsg(sock, &msg_sys, total_len);
 1404 
 1405 out_freectl:
 1406         if (ctl_buf != ctl)    
 1407                 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
 1408 out_freeiov:
 1409         if (iov != iovstack)
 1410                 sock_kfree_s(sock->sk, iov, iov_size);
 1411 out_put:
 1412         sockfd_put(sock);
 1413 out:       
 1414         return err;
 1415 }
 1416 
 1417 /*
 1418  *      BSD recvmsg interface
 1419  */
 1420 
 1421 asmlinkage long sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
 1422 {
 1423         struct socket *sock;
 1424         struct iovec iovstack[UIO_FASTIOV];
 1425         struct iovec *iov=iovstack;
 1426         struct msghdr msg_sys;
 1427         unsigned long cmsg_ptr;
 1428         int err, iov_size, total_len, len;
 1429 
 1430         /* kernel mode address */
 1431         char addr[MAX_SOCK_ADDR];
 1432 
 1433         /* user mode address pointers */
 1434         struct sockaddr *uaddr;
 1435         int *uaddr_len;
 1436         
 1437         err=-EFAULT;
 1438         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
 1439                 goto out;
 1440 
 1441         sock = sockfd_lookup(fd, &err);
 1442         if (!sock)
 1443                 goto out;
 1444 
 1445         err = -EMSGSIZE;
 1446         if (msg_sys.msg_iovlen > UIO_MAXIOV)
 1447                 goto out_put;
 1448         
 1449         /* Check whether to allocate the iovec area*/
 1450         err = -ENOMEM;
 1451         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 1452         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
 1453                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 1454                 if (!iov)
 1455                         goto out_put;
 1456         }
 1457 
 1458         /*
 1459          *      Save the user-mode address (verify_iovec will change the
 1460          *      kernel msghdr to use the kernel address space)
 1461          */
 1462          
 1463         uaddr = msg_sys.msg_name;
 1464         uaddr_len = &msg->msg_namelen;
 1465         err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
 1466         if (err < 0)
 1467                 goto out_freeiov;
 1468         total_len=err;
 1469 
 1470         cmsg_ptr = (unsigned long)msg_sys.msg_control;
 1471         msg_sys.msg_flags = 0;
 1472         
 1473         if (sock->file->f_flags & O_NONBLOCK)
 1474                 flags |= MSG_DONTWAIT;
 1475         err = sock_recvmsg(sock, &msg_sys, total_len, flags);
 1476         if (err < 0)
 1477                 goto out_freeiov;
 1478         len = err;
 1479 
 1480         if (uaddr != NULL) {
 1481                 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
 1482                 if (err < 0)
 1483                         goto out_freeiov;
 1484         }
 1485         err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
 1486         if (err)
 1487                 goto out_freeiov;
 1488         err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
 1489                                                          &msg->msg_controllen);
 1490         if (err)
 1491                 goto out_freeiov;
 1492         err = len;
 1493 
 1494 out_freeiov:
 1495         if (iov != iovstack)
 1496                 sock_kfree_s(sock->sk, iov, iov_size);
 1497 out_put:
 1498         sockfd_put(sock);
 1499 out:
 1500         return err;
 1501 }
 1502 
 1503 
 1504 /*
 1505  *      Perform a file control on a socket file descriptor.
 1506  *
 1507  *      Doesn't acquire a fd lock, because no network fcntl
 1508  *      function sleeps currently.
 1509  */
 1510 
 1511 int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
 1512 {
 1513         struct socket *sock;
 1514 
 1515         sock = socki_lookup (filp->f_dentry->d_inode);
 1516         if (sock && sock->ops)
 1517                 return sock_no_fcntl(sock, cmd, arg);
 1518         return(-EINVAL);
 1519 }
 1520 
 1521 /* Argument list sizes for sys_socketcall */
 1522 #define AL(x) ((x) * sizeof(unsigned long))
 1523 static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 1524                                 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 1525                                 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
 1526 #undef AL
 1527 
 1528 /*
 1529  *      System call vectors. 
 1530  *
 1531  *      Argument checking cleaned up. Saved 20% in size.
 1532  *  This function doesn't need to set the kernel lock because
 1533  *  it is set by the callees. 
 1534  */
 1535 
 1536 asmlinkage long sys_socketcall(int call, unsigned long *args)
 1537 {
 1538         unsigned long a[6];
 1539         unsigned long a0,a1;
 1540         int err;
 1541 
 1542         if(call<1||call>SYS_RECVMSG)
 1543                 return -EINVAL;
 1544 
 1545         /* copy_from_user should be SMP safe. */
 1546         if (copy_from_user(a, args, nargs[call]))
 1547                 return -EFAULT;
 1548                 
 1549         a0=a[0];
 1550         a1=a[1];
 1551         
 1552         switch(call) 
 1553         {
 1554                 case SYS_SOCKET:
 1555                         err = sys_socket(a0,a1,a[2]);
 1556                         break;
 1557                 case SYS_BIND:
 1558                         err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
 1559                         break;
 1560                 case SYS_CONNECT:
 1561                         err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
 1562                         break;
 1563                 case SYS_LISTEN:
 1564                         err = sys_listen(a0,a1);
 1565                         break;
 1566                 case SYS_ACCEPT:
 1567                         err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
 1568                         break;
 1569                 case SYS_GETSOCKNAME:
 1570                         err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
 1571                         break;
 1572                 case SYS_GETPEERNAME:
 1573                         err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
 1574                         break;
 1575                 case SYS_SOCKETPAIR:
 1576                         err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
 1577                         break;
 1578                 case SYS_SEND:
 1579                         err = sys_send(a0, (void *)a1, a[2], a[3]);
 1580                         break;
 1581                 case SYS_SENDTO:
 1582                         err = sys_sendto(a0,(void *)a1, a[2], a[3],
 1583                                          (struct sockaddr *)a[4], a[5]);
 1584                         break;
 1585                 case SYS_RECV:
 1586                         err = sys_recv(a0, (void *)a1, a[2], a[3]);
 1587                         break;
 1588                 case SYS_RECVFROM:
 1589                         err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
 1590                                            (struct sockaddr *)a[4], (int *)a[5]);
 1591                         break;
 1592                 case SYS_SHUTDOWN:
 1593                         err = sys_shutdown(a0,a1);
 1594                         break;
 1595                 case SYS_SETSOCKOPT:
 1596                         err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
 1597                         break;
 1598                 case SYS_GETSOCKOPT:
 1599                         err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
 1600                         break;
 1601                 case SYS_SENDMSG:
 1602                         err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
 1603                         break;
 1604                 case SYS_RECVMSG:
 1605                         err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
 1606                         break;
 1607                 default:
 1608                         err = -EINVAL;
 1609                         break;
 1610         }
 1611         return err;
 1612 }
 1613 
 1614 /*
 1615  *      This function is called by a protocol handler that wants to
 1616  *      advertise its address family, and have it linked into the
 1617  *      SOCKET module.
 1618  */
 1619 
 1620 int sock_register(struct net_proto_family *ops)
 1621 {
 1622         int err;
 1623 
 1624         if (ops->family >= NPROTO) {
 1625                 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
 1626                 return -ENOBUFS;
 1627         }
 1628         net_family_write_lock();
 1629         err = -EEXIST;
 1630         if (net_families[ops->family] == NULL) {
 1631                 net_families[ops->family]=ops;
 1632                 err = 0;
 1633         }
 1634         net_family_write_unlock();
 1635         return err;
 1636 }
 1637 
 1638 /*
 1639  *      This function is called by a protocol handler that wants to
 1640  *      remove its address family, and have it unlinked from the
 1641  *      SOCKET module.
 1642  */
 1643 
 1644 int sock_unregister(int family)
 1645 {
 1646         if (family < 0 || family >= NPROTO)
 1647                 return -1;
 1648 
 1649         net_family_write_lock();
 1650         net_families[family]=NULL;
 1651         net_family_write_unlock();
 1652         return 0;
 1653 }
 1654 
 1655 
 1656 extern void sk_init(void);
 1657 
 1658 #ifdef CONFIG_WAN_ROUTER
 1659 extern void wanrouter_init(void);
 1660 #endif
 1661 
 1662 #ifdef CONFIG_BLUEZ
 1663 extern void bluez_init(void);
 1664 #endif
 1665 
 1666 void __init sock_init(void)
 1667 {
 1668         int i;
 1669 
 1670         printk(KERN_INFO "Linux NET4.0 for Linux 2.4\n");
 1671         printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
 1672 
 1673         /*
 1674          *      Initialize all address (protocol) families. 
 1675          */
 1676          
 1677         for (i = 0; i < NPROTO; i++) 
 1678                 net_families[i] = NULL;
 1679 
 1680         /*
 1681          *      Initialize sock SLAB cache.
 1682          */
 1683          
 1684         sk_init();
 1685 
 1686 #ifdef SLAB_SKB
 1687         /*
 1688          *      Initialize skbuff SLAB cache 
 1689          */
 1690         skb_init();
 1691 #endif
 1692 
 1693         /*
 1694          *      Wan router layer. 
 1695          */
 1696 
 1697 #ifdef CONFIG_WAN_ROUTER         
 1698         wanrouter_init();
 1699 #endif
 1700 
 1701         /*
 1702          *      Initialize the protocols module. 
 1703          */
 1704 
 1705         register_filesystem(&sock_fs_type);
 1706         sock_mnt = kern_mount(&sock_fs_type);
 1707         /* The real protocol initialization is performed when
 1708          *  do_initcalls is run.  
 1709          */
 1710 
 1711 
 1712         /*
 1713          * The netlink device handler may be needed early.
 1714          */
 1715 
 1716 #ifdef CONFIG_NET
 1717         rtnetlink_init();
 1718 #endif
 1719 #ifdef CONFIG_NETLINK_DEV
 1720         init_netlink();
 1721 #endif
 1722 #ifdef CONFIG_NETFILTER
 1723         netfilter_init();
 1724 #endif
 1725 
 1726 #ifdef CONFIG_BLUEZ
 1727         bluez_init();
 1728 #endif
 1729 }
 1730 
 1731 int socket_get_info(char *buffer, char **start, off_t offset, int length)
 1732 {
 1733         int len, cpu;
 1734         int counter = 0;
 1735 
 1736         for (cpu=0; cpu<smp_num_cpus; cpu++)
 1737                 counter += sockets_in_use[cpu_logical_map(cpu)].counter;
 1738 
 1739         /* It can be negative, by the way. 8) */
 1740         if (counter < 0)
 1741                 counter = 0;
 1742 
 1743         len = sprintf(buffer, "sockets: used %d\n", counter);
 1744         if (offset >= len)
 1745         {
 1746                 *start = buffer;
 1747                 return 0;
 1748         }
 1749         *start = buffer + offset;
 1750         len -= offset;
 1751         if (len > length)
 1752                 len = length;
 1753         if (len < 0)
 1754                 len = 0;
 1755         return len;
 1756 }

Cache object: 825e28c0e53a6c9a01d0dccfccb8b62e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.