The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/ipv4/af_inet.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
    3  *              operating system.  INET is implemented using the  BSD Socket
    4  *              interface as the means of communication with the user level.
    5  *
    6  *              PF_INET protocol family socket handler.
    7  *
    8  * Version:     $Id: af_inet.c,v 1.136 2001/11/06 22:21:08 davem Exp $
    9  *
   10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
   11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   12  *              Florian La Roche, <flla@stud.uni-sb.de>
   13  *              Alan Cox, <A.Cox@swansea.ac.uk>
   14  *
   15  * Changes (see also sock.c)
   16  *
   17  *              piggy,
   18  *              Karl Knutson    :       Socket protocol table
   19  *              A.N.Kuznetsov   :       Socket death error in accept().
   20  *              John Richardson :       Fix non blocking error in connect()
   21  *                                      so sockets that fail to connect
   22  *                                      don't return -EINPROGRESS.
   23  *              Alan Cox        :       Asynchronous I/O support
   24  *              Alan Cox        :       Keep correct socket pointer on sock structures
   25  *                                      when accept() ed
   26  *              Alan Cox        :       Semantics of SO_LINGER aren't state moved
   27  *                                      to close when you look carefully. With
   28  *                                      this fixed and the accept bug fixed 
   29  *                                      some RPC stuff seems happier.
   30  *              Niibe Yutaka    :       4.4BSD style write async I/O
   31  *              Alan Cox, 
   32  *              Tony Gale       :       Fixed reuse semantics.
   33  *              Alan Cox        :       bind() shouldn't abort existing but dead
   34  *                                      sockets. Stops FTP netin:.. I hope.
   35  *              Alan Cox        :       bind() works correctly for RAW sockets. Note
   36  *                                      that FreeBSD at least was broken in this respect
   37  *                                      so be careful with compatibility tests...
   38  *              Alan Cox        :       routing cache support
   39  *              Alan Cox        :       memzero the socket structure for compactness.
   40  *              Matt Day        :       nonblock connect error handler
   41  *              Alan Cox        :       Allow large numbers of pending sockets
   42  *                                      (eg for big web sites), but only if
   43  *                                      specifically application requested.
   44  *              Alan Cox        :       New buffering throughout IP. Used dumbly.
   45  *              Alan Cox        :       New buffering now used smartly.
   46  *              Alan Cox        :       BSD rather than common sense interpretation of
   47  *                                      listen.
   48  *              Germano Caronni :       Assorted small races.
   49  *              Alan Cox        :       sendmsg/recvmsg basic support.
   50  *              Alan Cox        :       Only sendmsg/recvmsg now supported.
   51  *              Alan Cox        :       Locked down bind (see security list).
   52  *              Alan Cox        :       Loosened bind a little.
   53  *              Mike McLagan    :       ADD/DEL DLCI Ioctls
   54  *      Willy Konynenberg       :       Transparent proxying support.
   55  *              David S. Miller :       New socket lookup architecture.
   56  *                                      Some other random speedups.
   57  *              Cyrus Durgin    :       Cleaned up file for kmod hacks.
   58  *              Andi Kleen      :       Fix inet_stream_connect TCP race.
   59  *
   60  *              This program is free software; you can redistribute it and/or
   61  *              modify it under the terms of the GNU General Public License
   62  *              as published by the Free Software Foundation; either version
   63  *              2 of the License, or (at your option) any later version.
   64  */
   65 
   66 #include <linux/config.h>
   67 #include <linux/errno.h>
   68 #include <linux/types.h>
   69 #include <linux/socket.h>
   70 #include <linux/in.h>
   71 #include <linux/kernel.h>
   72 #include <linux/major.h>
   73 #include <linux/sched.h>
   74 #include <linux/timer.h>
   75 #include <linux/string.h>
   76 #include <linux/sockios.h>
   77 #include <linux/net.h>
   78 #include <linux/fcntl.h>
   79 #include <linux/mm.h>
   80 #include <linux/interrupt.h>
   81 #include <linux/proc_fs.h>
   82 #include <linux/stat.h>
   83 #include <linux/init.h>
   84 #include <linux/poll.h>
   85 #include <linux/netfilter_ipv4.h>
   86 
   87 #include <asm/uaccess.h>
   88 #include <asm/system.h>
   89 
   90 #include <linux/smp_lock.h>
   91 #include <linux/inet.h>
   92 #include <linux/netdevice.h>
   93 #include <linux/brlock.h>
   94 #include <net/ip.h>
   95 #include <net/protocol.h>
   96 #include <net/arp.h>
   97 #include <net/route.h>
   98 #include <net/tcp.h>
   99 #include <net/udp.h>
  100 #include <linux/skbuff.h>
  101 #include <net/sock.h>
  102 #include <net/raw.h>
  103 #include <net/icmp.h>
  104 #include <net/ipip.h>
  105 #include <net/inet_common.h>
  106 #ifdef CONFIG_IP_MROUTE
  107 #include <linux/mroute.h>
  108 #endif
  109 #include <linux/if_bridge.h>
  110 #ifdef CONFIG_KMOD
  111 #include <linux/kmod.h>
  112 #endif
  113 #ifdef CONFIG_NET_DIVERT
  114 #include <linux/divert.h>
  115 #endif /* CONFIG_NET_DIVERT */
  116 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
  117 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
  118 #endif  /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
  119 
  120 struct linux_mib net_statistics[NR_CPUS*2];
  121 
  122 #ifdef INET_REFCNT_DEBUG
  123 atomic_t inet_sock_nr;
  124 #endif
  125 
  126 extern int raw_get_info(char *, char **, off_t, int);
  127 extern int snmp_get_info(char *, char **, off_t, int);
  128 extern int netstat_get_info(char *, char **, off_t, int);
  129 extern int afinet_get_info(char *, char **, off_t, int);
  130 extern int tcp_get_info(char *, char **, off_t, int);
  131 extern int udp_get_info(char *, char **, off_t, int);
  132 extern void ip_mc_drop_socket(struct sock *sk);
  133 
  134 #ifdef CONFIG_DLCI
  135 extern int dlci_ioctl(unsigned int, void*);
  136 #endif
  137 
  138 #ifdef CONFIG_DLCI_MODULE
  139 int (*dlci_ioctl_hook)(unsigned int, void *);
  140 #endif
  141 
  142 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
  143 int (*br_ioctl_hook)(unsigned long);
  144 #endif
  145 
  146 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
  147 int (*vlan_ioctl_hook)(unsigned long arg);
  148 #endif
  149 
  150 /* The inetsw table contains everything that inet_create needs to
  151  * build a new socket.
  152  */
  153 struct list_head inetsw[SOCK_MAX];
  154 
  155 /* New destruction routine */
  156 
  157 void inet_sock_destruct(struct sock *sk)
  158 {
  159         __skb_queue_purge(&sk->receive_queue);
  160         __skb_queue_purge(&sk->error_queue);
  161 
  162         if (sk->type == SOCK_STREAM && sk->state != TCP_CLOSE) {
  163                 printk("Attempt to release TCP socket in state %d %p\n",
  164                        sk->state,
  165                        sk);
  166                 return;
  167         }
  168         if (!sk->dead) {
  169                 printk("Attempt to release alive inet socket %p\n", sk);
  170                 return;
  171         }
  172 
  173         BUG_TRAP(atomic_read(&sk->rmem_alloc) == 0);
  174         BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
  175         BUG_TRAP(sk->wmem_queued == 0);
  176         BUG_TRAP(sk->forward_alloc == 0);
  177 
  178         if (sk->protinfo.af_inet.opt)
  179                 kfree(sk->protinfo.af_inet.opt);
  180         dst_release(sk->dst_cache);
  181 #ifdef INET_REFCNT_DEBUG
  182         atomic_dec(&inet_sock_nr);
  183         printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", sk, atomic_read(&inet_sock_nr));
  184 #endif
  185 }
  186 
  187 void inet_sock_release(struct sock *sk)
  188 {
  189         if (sk->prot->destroy)
  190                 sk->prot->destroy(sk);
  191 
  192         /* Observation: when inet_sock_release is called, processes have
  193          * no access to socket. But net still has.
  194          * Step one, detach it from networking:
  195          *
  196          * A. Remove from hash tables.
  197          */
  198 
  199         sk->prot->unhash(sk);
  200 
  201         /* In this point socket cannot receive new packets,
  202          * but it is possible that some packets are in flight
  203          * because some CPU runs receiver and did hash table lookup
  204          * before we unhashed socket. They will achieve receive queue
  205          * and will be purged by socket destructor.
  206          *
  207          * Also we still have packets pending on receive
  208          * queue and probably, our own packets waiting in device queues.
  209          * sock_destroy will drain receive queue, but transmitted
  210          * packets will delay socket destruction until the last reference
  211          * will be released.
  212          */
  213 
  214         sock_orphan(sk);
  215 
  216 #ifdef INET_REFCNT_DEBUG
  217         if (atomic_read(&sk->refcnt) != 1) {
  218                 printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
  219         }
  220 #endif
  221         sock_put(sk);
  222 }
  223 
  224 
  225 /*
  226  *      The routines beyond this point handle the behaviour of an AF_INET
  227  *      socket object. Mostly it punts to the subprotocols of IP to do
  228  *      the work.
  229  */
  230  
  231 
  232 /*
  233  *      Set socket options on an inet socket.
  234  */
  235  
  236 int inet_setsockopt(struct socket *sock, int level, int optname,
  237                     char *optval, int optlen)
  238 {
  239         struct sock *sk=sock->sk;
  240 
  241         return sk->prot->setsockopt(sk,level,optname,optval,optlen);
  242 }
  243 
  244 /*
  245  *      Get a socket option on an AF_INET socket.
  246  *
  247  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
  248  *      asynchronous errors should be reported by getsockopt. We assume
  249  *      this means if you specify SO_ERROR (otherwise whats the point of it).
  250  */
  251 
  252 int inet_getsockopt(struct socket *sock, int level, int optname,
  253                     char *optval, int *optlen)
  254 {
  255         struct sock *sk=sock->sk;
  256 
  257         return sk->prot->getsockopt(sk,level,optname,optval,optlen);
  258 }
  259 
  260 /*
  261  *      Automatically bind an unbound socket.
  262  */
  263 
  264 static int inet_autobind(struct sock *sk)
  265 {
  266         /* We may need to bind the socket. */
  267         lock_sock(sk);
  268         if (sk->num == 0) {
  269                 if (sk->prot->get_port(sk, 0) != 0) {
  270                         release_sock(sk);
  271                         return -EAGAIN;
  272                 }
  273                 sk->sport = htons(sk->num);
  274         }
  275         release_sock(sk);
  276         return 0;
  277 }
  278 
  279 /*
  280  *      Move a socket into listening state.
  281  */
  282  
  283 int inet_listen(struct socket *sock, int backlog)
  284 {
  285         struct sock *sk = sock->sk;
  286         unsigned char old_state;
  287         int err;
  288 
  289         lock_sock(sk);
  290 
  291         err = -EINVAL;
  292         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
  293                 goto out;
  294 
  295         old_state = sk->state;
  296         if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN)))
  297                 goto out;
  298 
  299         /* Really, if the socket is already in listen state
  300          * we can only allow the backlog to be adjusted.
  301          */
  302         if (old_state != TCP_LISTEN) {
  303                 err = tcp_listen_start(sk);
  304                 if (err)
  305                         goto out;
  306         }
  307         sk->max_ack_backlog = backlog;
  308         err = 0;
  309 
  310 out:
  311         release_sock(sk);
  312         return err;
  313 }
  314 
  315 /*
  316  *      Create an inet socket.
  317  */
  318 
  319 static int inet_create(struct socket *sock, int protocol)
  320 {
  321         struct sock *sk;
  322         struct list_head *p;
  323         struct inet_protosw *answer;
  324 
  325         sock->state = SS_UNCONNECTED;
  326         sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
  327         if (sk == NULL) 
  328                 goto do_oom;
  329   
  330         /* Look for the requested type/protocol pair. */
  331         answer = NULL;
  332         br_read_lock_bh(BR_NETPROTO_LOCK);
  333         list_for_each(p, &inetsw[sock->type]) {
  334                 answer = list_entry(p, struct inet_protosw, list);
  335 
  336                 /* Check the non-wild match. */
  337                 if (protocol == answer->protocol) {
  338                         if (protocol != IPPROTO_IP)
  339                                 break;
  340                 } else {
  341                         /* Check for the two wild cases. */
  342                         if (IPPROTO_IP == protocol) {
  343                                 protocol = answer->protocol;
  344                                 break;
  345                         }
  346                         if (IPPROTO_IP == answer->protocol)
  347                                 break;
  348                 }
  349                 answer = NULL;
  350         }
  351         br_read_unlock_bh(BR_NETPROTO_LOCK);
  352 
  353         if (!answer)
  354                 goto free_and_badtype;
  355         if (answer->capability > 0 && !capable(answer->capability))
  356                 goto free_and_badperm;
  357         if (!protocol)
  358                 goto free_and_noproto;
  359 
  360         sock->ops = answer->ops;
  361         sk->prot = answer->prot;
  362         sk->no_check = answer->no_check;
  363         if (INET_PROTOSW_REUSE & answer->flags)
  364                 sk->reuse = 1;
  365 
  366         if (SOCK_RAW == sock->type) {
  367                 sk->num = protocol;
  368                 if (IPPROTO_RAW == protocol)
  369                         sk->protinfo.af_inet.hdrincl = 1;
  370         }
  371 
  372         if (ipv4_config.no_pmtu_disc)
  373                 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
  374         else
  375                 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
  376 
  377         sk->protinfo.af_inet.id = 0;
  378 
  379         sock_init_data(sock,sk);
  380 
  381         sk->destruct = inet_sock_destruct;
  382 
  383         sk->zapped      = 0;
  384         sk->family      = PF_INET;
  385         sk->protocol    = protocol;
  386 
  387         sk->backlog_rcv = sk->prot->backlog_rcv;
  388 
  389         sk->protinfo.af_inet.ttl        = sysctl_ip_default_ttl;
  390 
  391         sk->protinfo.af_inet.mc_loop    = 1;
  392         sk->protinfo.af_inet.mc_ttl     = 1;
  393         sk->protinfo.af_inet.mc_index   = 0;
  394         sk->protinfo.af_inet.mc_list    = NULL;
  395 
  396 #ifdef INET_REFCNT_DEBUG
  397         atomic_inc(&inet_sock_nr);
  398 #endif
  399 
  400         if (sk->num) {
  401                 /* It assumes that any protocol which allows
  402                  * the user to assign a number at socket
  403                  * creation time automatically
  404                  * shares.
  405                  */
  406                 sk->sport = htons(sk->num);
  407 
  408                 /* Add to protocol hash chains. */
  409                 sk->prot->hash(sk);
  410         }
  411 
  412         if (sk->prot->init) {
  413                 int err = sk->prot->init(sk);
  414                 if (err != 0) {
  415                         inet_sock_release(sk);
  416                         return err;
  417                 }
  418         }
  419         return 0;
  420 
  421 free_and_badtype:
  422         sk_free(sk);
  423         return -ESOCKTNOSUPPORT;
  424 
  425 free_and_badperm:
  426         sk_free(sk);
  427         return -EPERM;
  428 
  429 free_and_noproto:
  430         sk_free(sk);
  431         return -EPROTONOSUPPORT;
  432 
  433 do_oom:
  434         return -ENOBUFS;
  435 }
  436 
  437 
  438 /*
  439  *      The peer socket should always be NULL (or else). When we call this
  440  *      function we are destroying the object and from then on nobody
  441  *      should refer to it.
  442  */
  443  
  444 int inet_release(struct socket *sock)
  445 {
  446         struct sock *sk = sock->sk;
  447 
  448         if (sk) {
  449                 long timeout;
  450 
  451                 /* Applications forget to leave groups before exiting */
  452                 ip_mc_drop_socket(sk);
  453 
  454                 /* If linger is set, we don't return until the close
  455                  * is complete.  Otherwise we return immediately. The
  456                  * actually closing is done the same either way.
  457                  *
  458                  * If the close is due to the process exiting, we never
  459                  * linger..
  460                  */
  461                 timeout = 0;
  462                 if (sk->linger && !(current->flags & PF_EXITING))
  463                         timeout = sk->lingertime;
  464                 sock->sk = NULL;
  465                 sk->prot->close(sk, timeout);
  466         }
  467         return(0);
  468 }
  469 
  470 /* It is off by default, see below. */
  471 int sysctl_ip_nonlocal_bind;
  472 
  473 static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  474 {
  475         struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
  476         struct sock *sk=sock->sk;
  477         unsigned short snum;
  478         int chk_addr_ret;
  479         int err;
  480 
  481         /* If the socket has its own bind function then use it. (RAW) */
  482         if(sk->prot->bind)
  483                 return sk->prot->bind(sk, uaddr, addr_len);
  484 
  485         if (addr_len < sizeof(struct sockaddr_in))
  486                 return -EINVAL;
  487 
  488         chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
  489 
  490         /* Not specified by any standard per-se, however it breaks too
  491          * many applications when removed.  It is unfortunate since
  492          * allowing applications to make a non-local bind solves
  493          * several problems with systems using dynamic addressing.
  494          * (ie. your servers still start up even if your ISDN link
  495          *  is temporarily down)
  496          */
  497         if (sysctl_ip_nonlocal_bind == 0 && 
  498             sk->protinfo.af_inet.freebind == 0 &&
  499             addr->sin_addr.s_addr != INADDR_ANY &&
  500             chk_addr_ret != RTN_LOCAL &&
  501             chk_addr_ret != RTN_MULTICAST &&
  502             chk_addr_ret != RTN_BROADCAST)
  503                 return -EADDRNOTAVAIL;
  504 
  505         snum = ntohs(addr->sin_port);
  506         if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
  507                 return -EACCES;
  508 
  509         /*      We keep a pair of addresses. rcv_saddr is the one
  510          *      used by hash lookups, and saddr is used for transmit.
  511          *
  512          *      In the BSD API these are the same except where it
  513          *      would be illegal to use them (multicast/broadcast) in
  514          *      which case the sending device address is used.
  515          */
  516         lock_sock(sk);
  517 
  518         /* Check these errors (active socket, double bind). */
  519         err = -EINVAL;
  520         if ((sk->state != TCP_CLOSE)                    ||
  521             (sk->num != 0))
  522                 goto out;
  523 
  524         sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
  525         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
  526                 sk->saddr = 0;  /* Use device */
  527 
  528         /* Make sure we are allowed to bind here. */
  529         if (sk->prot->get_port(sk, snum) != 0) {
  530                 sk->saddr = sk->rcv_saddr = 0;
  531                 err = -EADDRINUSE;
  532                 goto out;
  533         }
  534 
  535         if (sk->rcv_saddr)
  536                 sk->userlocks |= SOCK_BINDADDR_LOCK;
  537         if (snum)
  538                 sk->userlocks |= SOCK_BINDPORT_LOCK;
  539         sk->sport = htons(sk->num);
  540         sk->daddr = 0;
  541         sk->dport = 0;
  542         sk_dst_reset(sk);
  543         err = 0;
  544 out:
  545         release_sock(sk);
  546         return err;
  547 }
  548 
  549 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
  550                        int addr_len, int flags)
  551 {
  552         struct sock *sk=sock->sk;
  553 
  554         if (uaddr->sa_family == AF_UNSPEC)
  555                 return sk->prot->disconnect(sk, flags);
  556 
  557         if (sk->num==0 && inet_autobind(sk) != 0)
  558                 return -EAGAIN;
  559         return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
  560 }
  561 
  562 static long inet_wait_for_connect(struct sock *sk, long timeo)
  563 {
  564         DECLARE_WAITQUEUE(wait, current);
  565 
  566         __set_current_state(TASK_INTERRUPTIBLE);
  567         add_wait_queue(sk->sleep, &wait);
  568 
  569         /* Basic assumption: if someone sets sk->err, he _must_
  570          * change state of the socket from TCP_SYN_*.
  571          * Connect() does not allow to get error notifications
  572          * without closing the socket.
  573          */
  574         while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
  575                 release_sock(sk);
  576                 timeo = schedule_timeout(timeo);
  577                 lock_sock(sk);
  578                 if (signal_pending(current) || !timeo)
  579                         break;
  580                 set_current_state(TASK_INTERRUPTIBLE);
  581         }
  582         __set_current_state(TASK_RUNNING);
  583         remove_wait_queue(sk->sleep, &wait);
  584         return timeo;
  585 }
  586 
  587 /*
  588  *      Connect to a remote host. There is regrettably still a little
  589  *      TCP 'magic' in here.
  590  */
  591  
  592 int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
  593                         int addr_len, int flags)
  594 {
  595         struct sock *sk=sock->sk;
  596         int err;
  597         long timeo;
  598 
  599         lock_sock(sk);
  600 
  601         if (uaddr->sa_family == AF_UNSPEC) {
  602                 err = sk->prot->disconnect(sk, flags);
  603                 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
  604                 goto out;
  605         }
  606 
  607         switch (sock->state) {
  608         default:
  609                 err = -EINVAL;
  610                 goto out;
  611         case SS_CONNECTED:
  612                 err = -EISCONN;
  613                 goto out;
  614         case SS_CONNECTING:
  615                 err = -EALREADY;
  616                 /* Fall out of switch with err, set for this state */
  617                 break;
  618         case SS_UNCONNECTED:
  619                 err = -EISCONN;
  620                 if (sk->state != TCP_CLOSE) 
  621                         goto out;
  622 
  623                 err = sk->prot->connect(sk, uaddr, addr_len);
  624                 if (err < 0)
  625                         goto out;
  626 
  627                 sock->state = SS_CONNECTING;
  628 
  629                 /* Just entered SS_CONNECTING state; the only
  630                  * difference is that return value in non-blocking
  631                  * case is EINPROGRESS, rather than EALREADY.
  632                  */
  633                 err = -EINPROGRESS;
  634                 break;
  635         }
  636 
  637         timeo = sock_sndtimeo(sk, flags&O_NONBLOCK);
  638 
  639         if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
  640                 /* Error code is set above */
  641                 if (!timeo || !inet_wait_for_connect(sk, timeo))
  642                         goto out;
  643 
  644                 err = sock_intr_errno(timeo);
  645                 if (signal_pending(current))
  646                         goto out;
  647         }
  648 
  649         /* Connection was closed by RST, timeout, ICMP error
  650          * or another process disconnected us.
  651          */
  652         if (sk->state == TCP_CLOSE)
  653                 goto sock_error;
  654 
  655         /* sk->err may be not zero now, if RECVERR was ordered by user
  656          * and error was received after socket entered established state.
  657          * Hence, it is handled normally after connect() return successfully.
  658          */
  659 
  660         sock->state = SS_CONNECTED;
  661         err = 0;
  662 out:
  663         release_sock(sk);
  664         return err;
  665 
  666 sock_error:
  667         err = sock_error(sk) ? : -ECONNABORTED;
  668         sock->state = SS_UNCONNECTED;
  669         if (sk->prot->disconnect(sk, flags))
  670                 sock->state = SS_DISCONNECTING;
  671         goto out;
  672 }
  673 
  674 /*
  675  *      Accept a pending connection. The TCP layer now gives BSD semantics.
  676  */
  677 
  678 int inet_accept(struct socket *sock, struct socket *newsock, int flags)
  679 {
  680         struct sock *sk1 = sock->sk;
  681         struct sock *sk2;
  682         int err = -EINVAL;
  683 
  684         if((sk2 = sk1->prot->accept(sk1,flags,&err)) == NULL)
  685                 goto do_err;
  686 
  687         lock_sock(sk2);
  688 
  689         BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
  690 
  691         sock_graft(sk2, newsock);
  692 
  693         newsock->state = SS_CONNECTED;
  694         release_sock(sk2);
  695         return 0;
  696 
  697 do_err:
  698         return err;
  699 }
  700 
  701 
  702 /*
  703  *      This does both peername and sockname.
  704  */
  705  
  706 static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
  707                  int *uaddr_len, int peer)
  708 {
  709         struct sock *sk         = sock->sk;
  710         struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
  711   
  712         sin->sin_family = AF_INET;
  713         if (peer) {
  714                 if (!sk->dport)
  715                         return -ENOTCONN;
  716                 if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1)
  717                         return -ENOTCONN;
  718                 sin->sin_port = sk->dport;
  719                 sin->sin_addr.s_addr = sk->daddr;
  720         } else {
  721                 __u32 addr = sk->rcv_saddr;
  722                 if (!addr)
  723                         addr = sk->saddr;
  724                 sin->sin_port = sk->sport;
  725                 sin->sin_addr.s_addr = addr;
  726         }
  727         *uaddr_len = sizeof(*sin);
  728         return(0);
  729 }
  730 
  731 
  732 
  733 int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
  734                  int flags, struct scm_cookie *scm)
  735 {
  736         struct sock *sk = sock->sk;
  737         int addr_len = 0;
  738         int err;
  739 
  740         err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
  741                                 flags&~MSG_DONTWAIT, &addr_len);
  742         if (err >= 0)
  743                 msg->msg_namelen = addr_len;
  744         return err;
  745 }
  746 
  747 
  748 int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
  749                  struct scm_cookie *scm)
  750 {
  751         struct sock *sk = sock->sk;
  752 
  753         /* We may need to bind the socket. */
  754         if (sk->num==0 && inet_autobind(sk) != 0)
  755                 return -EAGAIN;
  756 
  757         return sk->prot->sendmsg(sk, msg, size);
  758 }
  759 
  760 int inet_shutdown(struct socket *sock, int how)
  761 {
  762         struct sock *sk = sock->sk;
  763         int err = 0;
  764 
  765         /* This should really check to make sure
  766          * the socket is a TCP socket. (WHY AC...)
  767          */
  768         how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
  769                        1->2 bit 2 snds.
  770                        2->3 */
  771         if ((how & ~SHUTDOWN_MASK) || how==0)   /* MAXINT->0 */
  772                 return -EINVAL;
  773 
  774         lock_sock(sk);
  775         if (sock->state == SS_CONNECTING) {
  776                 if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE))
  777                         sock->state = SS_DISCONNECTING;
  778                 else
  779                         sock->state = SS_CONNECTED;
  780         }
  781 
  782         switch (sk->state) {
  783         case TCP_CLOSE:
  784                 err = -ENOTCONN;
  785                 /* Hack to wake up other listeners, who can poll for
  786                    POLLHUP, even on eg. unconnected UDP sockets -- RR */
  787         default:
  788                 sk->shutdown |= how;
  789                 if (sk->prot->shutdown)
  790                         sk->prot->shutdown(sk, how);
  791                 break;
  792 
  793         /* Remaining two branches are temporary solution for missing
  794          * close() in multithreaded environment. It is _not_ a good idea,
  795          * but we have no choice until close() is repaired at VFS level.
  796          */
  797         case TCP_LISTEN:
  798                 if (!(how & RCV_SHUTDOWN))
  799                         break;
  800                 /* Fall through */
  801         case TCP_SYN_SENT:
  802                 err = sk->prot->disconnect(sk, O_NONBLOCK);
  803                 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
  804                 break;
  805         }
  806 
  807         /* Wake up anyone sleeping in poll. */
  808         sk->state_change(sk);
  809         release_sock(sk);
  810         return err;
  811 }
  812 
  813 /*
  814  *      ioctl() calls you can issue on an INET socket. Most of these are
  815  *      device configuration and stuff and very rarely used. Some ioctls
  816  *      pass on to the socket itself.
  817  *
  818  *      NOTE: I like the idea of a module for the config stuff. ie ifconfig
  819  *      loads the devconfigure module does its configuring and unloads it.
  820  *      There's a good 20K of config code hanging around the kernel.
  821  */
  822 
  823 static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  824 {
  825         struct sock *sk = sock->sk;
  826         int err;
  827         int pid;
  828 
  829         switch(cmd) {
  830                 case FIOSETOWN:
  831                 case SIOCSPGRP:
  832                         err = get_user(pid, (int *) arg);
  833                         if (err)
  834                                 return err; 
  835                         if (current->pid != pid && current->pgrp != -pid && 
  836                             !capable(CAP_NET_ADMIN))
  837                                 return -EPERM;
  838                         sk->proc = pid;
  839                         return(0);
  840                 case FIOGETOWN:
  841                 case SIOCGPGRP:
  842                         return put_user(sk->proc, (int *)arg);
  843                 case SIOCGSTAMP:
  844                         if(sk->stamp.tv_sec==0)
  845                                 return -ENOENT;
  846                         err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
  847                         if (err)
  848                                 err = -EFAULT;
  849                         return err;
  850                 case SIOCADDRT:
  851                 case SIOCDELRT:
  852                 case SIOCRTMSG:
  853                         return(ip_rt_ioctl(cmd,(void *) arg));
  854                 case SIOCDARP:
  855                 case SIOCGARP:
  856                 case SIOCSARP:
  857                         return(arp_ioctl(cmd,(void *) arg));
  858                 case SIOCGIFADDR:
  859                 case SIOCSIFADDR:
  860                 case SIOCGIFBRDADDR:
  861                 case SIOCSIFBRDADDR:
  862                 case SIOCGIFNETMASK:
  863                 case SIOCSIFNETMASK:
  864                 case SIOCGIFDSTADDR:
  865                 case SIOCSIFDSTADDR:
  866                 case SIOCSIFPFLAGS:     
  867                 case SIOCGIFPFLAGS:     
  868                 case SIOCSIFFLAGS:
  869                         return(devinet_ioctl(cmd,(void *) arg));
  870                 case SIOCGIFBR:
  871                 case SIOCSIFBR:
  872 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
  873 #ifdef CONFIG_KMOD
  874                         if (br_ioctl_hook == NULL)
  875                                 request_module("bridge");
  876 #endif
  877                         if (br_ioctl_hook != NULL)
  878                                 return br_ioctl_hook(arg);
  879 #endif
  880                         return -ENOPKG;
  881 
  882                 case SIOCGIFVLAN:
  883                 case SIOCSIFVLAN:
  884 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
  885 #ifdef CONFIG_KMOD
  886                         if (vlan_ioctl_hook == NULL)
  887                                 request_module("8021q");
  888 #endif
  889                         if (vlan_ioctl_hook != NULL)
  890                                 return vlan_ioctl_hook(arg);
  891 #endif
  892                         return -ENOPKG;
  893 
  894                 case SIOCGIFDIVERT:
  895                 case SIOCSIFDIVERT:
  896 #ifdef CONFIG_NET_DIVERT
  897                         return divert_ioctl(cmd, (struct divert_cf *) arg);
  898 #else
  899                         return -ENOPKG;
  900 #endif  /* CONFIG_NET_DIVERT */
  901                         
  902                 case SIOCADDDLCI:
  903                 case SIOCDELDLCI:
  904 #ifdef CONFIG_DLCI
  905                         lock_kernel();
  906                         err = dlci_ioctl(cmd, (void *) arg);
  907                         unlock_kernel();
  908                         return err;
  909 #endif
  910 
  911 #ifdef CONFIG_DLCI_MODULE
  912 
  913 #ifdef CONFIG_KMOD
  914                         if (dlci_ioctl_hook == NULL)
  915                                 request_module("dlci");
  916 #endif
  917 
  918                         if (dlci_ioctl_hook) {
  919                                 lock_kernel();
  920                                 err = (*dlci_ioctl_hook)(cmd, (void *) arg);
  921                                 unlock_kernel();
  922                                 return err;
  923                         }
  924 #endif
  925                         return -ENOPKG;
  926 
  927                 default:
  928                         if ((cmd >= SIOCDEVPRIVATE) &&
  929                             (cmd <= (SIOCDEVPRIVATE + 15)))
  930                                 return(dev_ioctl(cmd,(void *) arg));
  931 
  932 #ifdef WIRELESS_EXT
  933                         if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
  934                                 return(dev_ioctl(cmd,(void *) arg));
  935 #endif  /* WIRELESS_EXT */
  936 
  937                         if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
  938                                 return(dev_ioctl(cmd,(void *) arg));            
  939                         return err;
  940         }
  941         /*NOTREACHED*/
  942         return(0);
  943 }
  944 
  945 struct proto_ops inet_stream_ops = {
  946         family:         PF_INET,
  947 
  948         release:        inet_release,
  949         bind:           inet_bind,
  950         connect:        inet_stream_connect,
  951         socketpair:     sock_no_socketpair,
  952         accept:         inet_accept,
  953         getname:        inet_getname, 
  954         poll:           tcp_poll,
  955         ioctl:          inet_ioctl,
  956         listen:         inet_listen,
  957         shutdown:       inet_shutdown,
  958         setsockopt:     inet_setsockopt,
  959         getsockopt:     inet_getsockopt,
  960         sendmsg:        inet_sendmsg,
  961         recvmsg:        inet_recvmsg,
  962         mmap:           sock_no_mmap,
  963         sendpage:       tcp_sendpage
  964 };
  965 
  966 struct proto_ops inet_dgram_ops = {
  967         family:         PF_INET,
  968 
  969         release:        inet_release,
  970         bind:           inet_bind,
  971         connect:        inet_dgram_connect,
  972         socketpair:     sock_no_socketpair,
  973         accept:         sock_no_accept,
  974         getname:        inet_getname, 
  975         poll:           datagram_poll,
  976         ioctl:          inet_ioctl,
  977         listen:         sock_no_listen,
  978         shutdown:       inet_shutdown,
  979         setsockopt:     inet_setsockopt,
  980         getsockopt:     inet_getsockopt,
  981         sendmsg:        inet_sendmsg,
  982         recvmsg:        inet_recvmsg,
  983         mmap:           sock_no_mmap,
  984         sendpage:       sock_no_sendpage,
  985 };
  986 
  987 struct net_proto_family inet_family_ops = {
  988         family: PF_INET,
  989         create: inet_create
  990 };
  991 
  992 
  993 extern void tcp_init(void);
  994 extern void tcp_v4_init(struct net_proto_family *);
  995 
  996 /* Upon startup we insert all the elements in inetsw_array[] into
  997  * the linked list inetsw.
  998  */
  999 static struct inet_protosw inetsw_array[] =
 1000 {
 1001         {
 1002                 type:        SOCK_STREAM,
 1003                 protocol:    IPPROTO_TCP,
 1004                 prot:        &tcp_prot,
 1005                 ops:         &inet_stream_ops,
 1006                 capability:  -1,
 1007                 no_check:    0,
 1008                 flags:       INET_PROTOSW_PERMANENT,
 1009         },
 1010 
 1011         {
 1012                 type:        SOCK_DGRAM,
 1013                 protocol:    IPPROTO_UDP,
 1014                 prot:        &udp_prot,
 1015                 ops:         &inet_dgram_ops,
 1016                 capability:  -1,
 1017                 no_check:    UDP_CSUM_DEFAULT,
 1018                 flags:       INET_PROTOSW_PERMANENT,
 1019        },
 1020         
 1021 
 1022        {
 1023                type:        SOCK_RAW,
 1024                protocol:    IPPROTO_IP, /* wild card */
 1025                prot:        &raw_prot,
 1026                ops:         &inet_dgram_ops,
 1027                capability:  CAP_NET_RAW,
 1028                no_check:    UDP_CSUM_DEFAULT,
 1029                flags:       INET_PROTOSW_REUSE,
 1030        }
 1031 };
 1032 
 1033 #define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw))
 1034 
 1035 void
 1036 inet_register_protosw(struct inet_protosw *p)
 1037 {
 1038         struct list_head *lh;
 1039         struct inet_protosw *answer;
 1040         int protocol = p->protocol;
 1041         struct list_head *last_perm;
 1042 
 1043         br_write_lock_bh(BR_NETPROTO_LOCK);
 1044 
 1045         if (p->type > SOCK_MAX)
 1046                 goto out_illegal;
 1047 
 1048         /* If we are trying to override a permanent protocol, bail. */
 1049         answer = NULL;
 1050         last_perm = &inetsw[p->type];
 1051         list_for_each(lh, &inetsw[p->type]) {
 1052                 answer = list_entry(lh, struct inet_protosw, list);
 1053 
 1054                 /* Check only the non-wild match. */
 1055                 if (INET_PROTOSW_PERMANENT & answer->flags) {
 1056                         if (protocol == answer->protocol)
 1057                                 break;
 1058                         last_perm = lh;
 1059                 }
 1060 
 1061                 answer = NULL;
 1062         }
 1063         if (answer)
 1064                 goto out_permanent;
 1065 
 1066         /* Add the new entry after the last permanent entry if any, so that
 1067          * the new entry does not override a permanent entry when matched with
 1068          * a wild-card protocol. But it is allowed to override any existing
 1069          * non-permanent entry.  This means that when we remove this entry, the 
 1070          * system automatically returns to the old behavior.
 1071          */
 1072         list_add(&p->list, last_perm);
 1073 out:
 1074         br_write_unlock_bh(BR_NETPROTO_LOCK);
 1075         return;
 1076 
 1077 out_permanent:
 1078         printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
 1079                protocol);
 1080         goto out;
 1081 
 1082 out_illegal:
 1083         printk(KERN_ERR
 1084                "Ignoring attempt to register illegal socket type %d.\n",
 1085                p->type);
 1086         goto out;
 1087 }
 1088 
 1089 void
 1090 inet_unregister_protosw(struct inet_protosw *p)
 1091 {
 1092         if (INET_PROTOSW_PERMANENT & p->flags) {
 1093                 printk(KERN_ERR
 1094                        "Attempt to unregister permanent protocol %d.\n",
 1095                        p->protocol);
 1096         } else {
 1097                 br_write_lock_bh(BR_NETPROTO_LOCK);
 1098                 list_del(&p->list);
 1099                 br_write_unlock_bh(BR_NETPROTO_LOCK);
 1100         }
 1101 }
 1102 
 1103 extern void ipfrag_init(void);
 1104 
 1105 /*
 1106  *      Called by socket.c on kernel startup.  
 1107  */
 1108  
 1109 static int __init inet_init(void)
 1110 {
 1111         struct sk_buff *dummy_skb;
 1112         struct inet_protocol *p;
 1113         struct inet_protosw *q;
 1114         struct list_head *r;
 1115 
 1116         printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
 1117 
 1118         if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
 1119                 printk(KERN_CRIT "inet_proto_init: panic\n");
 1120                 return -EINVAL;
 1121         }
 1122 
 1123         /*
 1124          *      Tell SOCKET that we are alive... 
 1125          */
 1126    
 1127         (void) sock_register(&inet_family_ops);
 1128 
 1129         /*
 1130          *      Add all the protocols. 
 1131          */
 1132 
 1133         printk(KERN_INFO "IP Protocols: ");
 1134         for (p = inet_protocol_base; p != NULL;) {
 1135                 struct inet_protocol *tmp = (struct inet_protocol *) p->next;
 1136                 inet_add_protocol(p);
 1137                 printk("%s%s",p->name,tmp?", ":"\n");
 1138                 p = tmp;
 1139         }
 1140 
 1141         /* Register the socket-side information for inet_create. */
 1142         for(r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
 1143                 INIT_LIST_HEAD(r);
 1144 
 1145         for(q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
 1146                 inet_register_protosw(q);
 1147 
 1148         /*
 1149          *      Set the ARP module up
 1150          */
 1151 
 1152         arp_init();
 1153 
 1154         /*
 1155          *      Set the IP module up
 1156          */
 1157 
 1158         ip_init();
 1159 
 1160         tcp_v4_init(&inet_family_ops);
 1161 
 1162         /* Setup TCP slab cache for open requests. */
 1163         tcp_init();
 1164 
 1165 
 1166         /*
 1167          *      Set the ICMP layer up
 1168          */
 1169 
 1170         icmp_init(&inet_family_ops);
 1171 
 1172         /* I wish inet_add_protocol had no constructor hook...
 1173            I had to move IPIP from net/ipv4/protocol.c :-( --ANK
 1174          */
 1175 #ifdef CONFIG_NET_IPIP
 1176         ipip_init();
 1177 #endif
 1178 #ifdef CONFIG_NET_IPGRE
 1179         ipgre_init();
 1180 #endif
 1181 
 1182         /*
 1183          *      Initialise the multicast router
 1184          */
 1185 #if defined(CONFIG_IP_MROUTE)
 1186         ip_mr_init();
 1187 #endif
 1188 
 1189         /*
 1190          *      Create all the /proc entries.
 1191          */
 1192 #ifdef CONFIG_PROC_FS
 1193         proc_net_create ("raw", 0, raw_get_info);
 1194         proc_net_create ("netstat", 0, netstat_get_info);
 1195         proc_net_create ("snmp", 0, snmp_get_info);
 1196         proc_net_create ("sockstat", 0, afinet_get_info);
 1197         proc_net_create ("tcp", 0, tcp_get_info);
 1198         proc_net_create ("udp", 0, udp_get_info);
 1199 #endif          /* CONFIG_PROC_FS */
 1200 
 1201         ipfrag_init();
 1202 
 1203         return 0;
 1204 }
 1205 module_init(inet_init);

Cache object: 94dfa07861597a8e6f1f74a20124ab06


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.