The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/raw_ip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 4. Neither the name of the University nor the names of its contributors
   15  *    may be used to endorse or promote products derived from this software
   16  *    without specific prior written permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  *      @(#)raw_ip.c    8.7 (Berkeley) 5/15/95
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD: releng/11.2/sys/netinet/raw_ip.c 331722 2018-03-29 02:50:57Z eadler $");
   35 
   36 #include "opt_inet.h"
   37 #include "opt_inet6.h"
   38 #include "opt_ipsec.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/jail.h>
   42 #include <sys/kernel.h>
   43 #include <sys/eventhandler.h>
   44 #include <sys/lock.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/priv.h>
   48 #include <sys/proc.h>
   49 #include <sys/protosw.h>
   50 #include <sys/rmlock.h>
   51 #include <sys/rwlock.h>
   52 #include <sys/signalvar.h>
   53 #include <sys/socket.h>
   54 #include <sys/socketvar.h>
   55 #include <sys/sx.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/systm.h>
   58 
   59 #include <vm/uma.h>
   60 
   61 #include <net/if.h>
   62 #include <net/if_var.h>
   63 #include <net/route.h>
   64 #include <net/vnet.h>
   65 
   66 #include <netinet/in.h>
   67 #include <netinet/in_systm.h>
   68 #include <netinet/in_pcb.h>
   69 #include <netinet/in_var.h>
   70 #include <netinet/if_ether.h>
   71 #include <netinet/ip.h>
   72 #include <netinet/ip_var.h>
   73 #include <netinet/ip_mroute.h>
   74 #include <netinet/ip_icmp.h>
   75 
   76 #include <netipsec/ipsec_support.h>
   77 
   78 #include <machine/stdarg.h>
   79 #include <security/mac/mac_framework.h>
   80 
   81 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
   82 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
   83     &VNET_NAME(ip_defttl), 0,
   84     "Maximum TTL on IP packets");
   85 
   86 VNET_DEFINE(struct inpcbhead, ripcb);
   87 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
   88 
   89 #define V_ripcb                 VNET(ripcb)
   90 #define V_ripcbinfo             VNET(ripcbinfo)
   91 
   92 /*
   93  * Control and data hooks for ipfw, dummynet, divert and so on.
   94  * The data hooks are not used here but it is convenient
   95  * to keep them all in one place.
   96  */
   97 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
   98 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
   99 
  100 int     (*ip_dn_ctl_ptr)(struct sockopt *);
  101 int     (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
  102 void    (*ip_divert_ptr)(struct mbuf *, int);
  103 int     (*ng_ipfw_input_p)(struct mbuf **, int,
  104                         struct ip_fw_args *, int);
  105 
  106 #ifdef INET
  107 /*
  108  * Hooks for multicast routing. They all default to NULL, so leave them not
  109  * initialized and rely on BSS being set to 0.
  110  */
  111 
  112 /*
  113  * The socket used to communicate with the multicast routing daemon.
  114  */
  115 VNET_DEFINE(struct socket *, ip_mrouter);
  116 
  117 /*
  118  * The various mrouter and rsvp functions.
  119  */
  120 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
  121 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
  122 int (*ip_mrouter_done)(void);
  123 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
  124                    struct ip_moptions *);
  125 int (*mrt_ioctl)(u_long, caddr_t, int);
  126 int (*legal_vif_num)(int);
  127 u_long (*ip_mcast_src)(int);
  128 
  129 int (*rsvp_input_p)(struct mbuf **, int *, int);
  130 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
  131 void (*ip_rsvp_force_done)(struct socket *);
  132 #endif /* INET */
  133 
  134 extern  struct protosw inetsw[];
  135 
  136 u_long  rip_sendspace = 9216;
  137 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
  138     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
  139 
  140 u_long  rip_recvspace = 9216;
  141 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
  142     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
  143 
  144 /*
  145  * Hash functions
  146  */
  147 
  148 #define INP_PCBHASH_RAW_SIZE    256
  149 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
  150         (((proto) + (laddr) + (faddr)) % (mask) + 1)
  151 
  152 #ifdef INET
  153 static void
  154 rip_inshash(struct inpcb *inp)
  155 {
  156         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
  157         struct inpcbhead *pcbhash;
  158         int hash;
  159 
  160         INP_INFO_WLOCK_ASSERT(pcbinfo);
  161         INP_WLOCK_ASSERT(inp);
  162         
  163         if (inp->inp_ip_p != 0 &&
  164             inp->inp_laddr.s_addr != INADDR_ANY &&
  165             inp->inp_faddr.s_addr != INADDR_ANY) {
  166                 hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
  167                     inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
  168         } else
  169                 hash = 0;
  170         pcbhash = &pcbinfo->ipi_hashbase[hash];
  171         LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
  172 }
  173 
  174 static void
  175 rip_delhash(struct inpcb *inp)
  176 {
  177 
  178         INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
  179         INP_WLOCK_ASSERT(inp);
  180 
  181         LIST_REMOVE(inp, inp_hash);
  182 }
  183 #endif /* INET */
  184 
  185 /*
  186  * Raw interface to IP protocol.
  187  */
  188 
  189 /*
  190  * Initialize raw connection block q.
  191  */
  192 static void
  193 rip_zone_change(void *tag)
  194 {
  195 
  196         uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
  197 }
  198 
  199 static int
  200 rip_inpcb_init(void *mem, int size, int flags)
  201 {
  202         struct inpcb *inp = mem;
  203 
  204         INP_LOCK_INIT(inp, "inp", "rawinp");
  205         return (0);
  206 }
  207 
  208 void
  209 rip_init(void)
  210 {
  211 
  212         in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
  213             1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE);
  214         EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
  215             EVENTHANDLER_PRI_ANY);
  216 }
  217 
  218 #ifdef VIMAGE
  219 static void
  220 rip_destroy(void *unused __unused)
  221 {
  222 
  223         in_pcbinfo_destroy(&V_ripcbinfo);
  224 }
  225 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
  226 #endif
  227 
  228 #ifdef INET
  229 static int
  230 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
  231     struct sockaddr_in *ripsrc)
  232 {
  233         int policyfail = 0;
  234 
  235         INP_LOCK_ASSERT(last);
  236 
  237 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  238         /* check AH/ESP integrity. */
  239         if (IPSEC_ENABLED(ipv4)) {
  240                 if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0)
  241                         policyfail = 1;
  242         }
  243 #endif /* IPSEC */
  244 #ifdef MAC
  245         if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
  246                 policyfail = 1;
  247 #endif
  248         /* Check the minimum TTL for socket. */
  249         if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
  250                 policyfail = 1;
  251         if (!policyfail) {
  252                 struct mbuf *opts = NULL;
  253                 struct socket *so;
  254 
  255                 so = last->inp_socket;
  256                 if ((last->inp_flags & INP_CONTROLOPTS) ||
  257                     (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
  258                         ip_savecontrol(last, &opts, ip, n);
  259                 SOCKBUF_LOCK(&so->so_rcv);
  260                 if (sbappendaddr_locked(&so->so_rcv,
  261                     (struct sockaddr *)ripsrc, n, opts) == 0) {
  262                         /* should notify about lost packet */
  263                         m_freem(n);
  264                         if (opts)
  265                                 m_freem(opts);
  266                         SOCKBUF_UNLOCK(&so->so_rcv);
  267                 } else
  268                         sorwakeup_locked(so);
  269         } else
  270                 m_freem(n);
  271         return (policyfail);
  272 }
  273 
  274 /*
  275  * Setup generic address and protocol structures for raw_input routine, then
  276  * pass them along with mbuf chain.
  277  */
  278 int
  279 rip_input(struct mbuf **mp, int *offp, int proto)
  280 {
  281         struct ifnet *ifp;
  282         struct mbuf *m = *mp;
  283         struct ip *ip = mtod(m, struct ip *);
  284         struct inpcb *inp, *last;
  285         struct sockaddr_in ripsrc;
  286         int hash;
  287 
  288         *mp = NULL;
  289 
  290         bzero(&ripsrc, sizeof(ripsrc));
  291         ripsrc.sin_len = sizeof(ripsrc);
  292         ripsrc.sin_family = AF_INET;
  293         ripsrc.sin_addr = ip->ip_src;
  294         last = NULL;
  295 
  296         ifp = m->m_pkthdr.rcvif;
  297 
  298         hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
  299             ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
  300         INP_INFO_RLOCK(&V_ripcbinfo);
  301         LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
  302                 if (inp->inp_ip_p != proto)
  303                         continue;
  304 #ifdef INET6
  305                 /* XXX inp locking */
  306                 if ((inp->inp_vflag & INP_IPV4) == 0)
  307                         continue;
  308 #endif
  309                 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
  310                         continue;
  311                 if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
  312                         continue;
  313                 if (jailed_without_vnet(inp->inp_cred)) {
  314                         /*
  315                          * XXX: If faddr was bound to multicast group,
  316                          * jailed raw socket will drop datagram.
  317                          */
  318                         if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
  319                                 continue;
  320                 }
  321                 if (last != NULL) {
  322                         struct mbuf *n;
  323 
  324                         n = m_copy(m, 0, (int)M_COPYALL);
  325                         if (n != NULL)
  326                             (void) rip_append(last, ip, n, &ripsrc);
  327                         /* XXX count dropped packet */
  328                         INP_RUNLOCK(last);
  329                 }
  330                 INP_RLOCK(inp);
  331                 last = inp;
  332         }
  333         LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
  334                 if (inp->inp_ip_p && inp->inp_ip_p != proto)
  335                         continue;
  336 #ifdef INET6
  337                 /* XXX inp locking */
  338                 if ((inp->inp_vflag & INP_IPV4) == 0)
  339                         continue;
  340 #endif
  341                 if (!in_nullhost(inp->inp_laddr) &&
  342                     !in_hosteq(inp->inp_laddr, ip->ip_dst))
  343                         continue;
  344                 if (!in_nullhost(inp->inp_faddr) &&
  345                     !in_hosteq(inp->inp_faddr, ip->ip_src))
  346                         continue;
  347                 if (jailed_without_vnet(inp->inp_cred)) {
  348                         /*
  349                          * Allow raw socket in jail to receive multicast;
  350                          * assume process had PRIV_NETINET_RAW at attach,
  351                          * and fall through into normal filter path if so.
  352                          */
  353                         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
  354                             prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
  355                                 continue;
  356                 }
  357                 /*
  358                  * If this raw socket has multicast state, and we
  359                  * have received a multicast, check if this socket
  360                  * should receive it, as multicast filtering is now
  361                  * the responsibility of the transport layer.
  362                  */
  363                 if (inp->inp_moptions != NULL &&
  364                     IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  365                         /*
  366                          * If the incoming datagram is for IGMP, allow it
  367                          * through unconditionally to the raw socket.
  368                          *
  369                          * In the case of IGMPv2, we may not have explicitly
  370                          * joined the group, and may have set IFF_ALLMULTI
  371                          * on the interface. imo_multi_filter() may discard
  372                          * control traffic we actually need to see.
  373                          *
  374                          * Userland multicast routing daemons should continue
  375                          * filter the control traffic appropriately.
  376                          */
  377                         int blocked;
  378 
  379                         blocked = MCAST_PASS;
  380                         if (proto != IPPROTO_IGMP) {
  381                                 struct sockaddr_in group;
  382 
  383                                 bzero(&group, sizeof(struct sockaddr_in));
  384                                 group.sin_len = sizeof(struct sockaddr_in);
  385                                 group.sin_family = AF_INET;
  386                                 group.sin_addr = ip->ip_dst;
  387 
  388                                 blocked = imo_multi_filter(inp->inp_moptions,
  389                                     ifp,
  390                                     (struct sockaddr *)&group,
  391                                     (struct sockaddr *)&ripsrc);
  392                         }
  393 
  394                         if (blocked != MCAST_PASS) {
  395                                 IPSTAT_INC(ips_notmember);
  396                                 continue;
  397                         }
  398                 }
  399                 if (last != NULL) {
  400                         struct mbuf *n;
  401 
  402                         n = m_copy(m, 0, (int)M_COPYALL);
  403                         if (n != NULL)
  404                                 (void) rip_append(last, ip, n, &ripsrc);
  405                         /* XXX count dropped packet */
  406                         INP_RUNLOCK(last);
  407                 }
  408                 INP_RLOCK(inp);
  409                 last = inp;
  410         }
  411         INP_INFO_RUNLOCK(&V_ripcbinfo);
  412         if (last != NULL) {
  413                 if (rip_append(last, ip, m, &ripsrc) != 0)
  414                         IPSTAT_INC(ips_delivered);
  415                 INP_RUNLOCK(last);
  416         } else {
  417                 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
  418                         IPSTAT_INC(ips_noproto);
  419                         IPSTAT_DEC(ips_delivered);
  420                         icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
  421                 } else {
  422                         m_freem(m);
  423                 }
  424         }
  425         return (IPPROTO_DONE);
  426 }
  427 
  428 /*
  429  * Generate IP header and pass packet to ip_output.  Tack on options user may
  430  * have setup with control call.
  431  */
  432 int
  433 rip_output(struct mbuf *m, struct socket *so, ...)
  434 {
  435         struct ip *ip;
  436         int error;
  437         struct inpcb *inp = sotoinpcb(so);
  438         va_list ap;
  439         u_long dst;
  440         int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
  441             IP_ALLOWBROADCAST;
  442 
  443         va_start(ap, so);
  444         dst = va_arg(ap, u_long);
  445         va_end(ap);
  446 
  447         /*
  448          * If the user handed us a complete IP packet, use it.  Otherwise,
  449          * allocate an mbuf for a header and fill it in.
  450          */
  451         if ((inp->inp_flags & INP_HDRINCL) == 0) {
  452                 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
  453                         m_freem(m);
  454                         return(EMSGSIZE);
  455                 }
  456                 M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
  457                 if (m == NULL)
  458                         return(ENOBUFS);
  459 
  460                 INP_RLOCK(inp);
  461                 ip = mtod(m, struct ip *);
  462                 ip->ip_tos = inp->inp_ip_tos;
  463                 if (inp->inp_flags & INP_DONTFRAG)
  464                         ip->ip_off = htons(IP_DF);
  465                 else
  466                         ip->ip_off = htons(0);
  467                 ip->ip_p = inp->inp_ip_p;
  468                 ip->ip_len = htons(m->m_pkthdr.len);
  469                 ip->ip_src = inp->inp_laddr;
  470                 ip->ip_dst.s_addr = dst;
  471                 if (jailed(inp->inp_cred)) {
  472                         /*
  473                          * prison_local_ip4() would be good enough but would
  474                          * let a source of INADDR_ANY pass, which we do not
  475                          * want to see from jails.
  476                          */
  477                         if (ip->ip_src.s_addr == INADDR_ANY) {
  478                                 error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
  479                                     inp->inp_cred);
  480                         } else {
  481                                 error = prison_local_ip4(inp->inp_cred,
  482                                     &ip->ip_src);
  483                         }
  484                         if (error != 0) {
  485                                 INP_RUNLOCK(inp);
  486                                 m_freem(m);
  487                                 return (error);
  488                         }
  489                 }
  490                 ip->ip_ttl = inp->inp_ip_ttl;
  491         } else {
  492                 if (m->m_pkthdr.len > IP_MAXPACKET) {
  493                         m_freem(m);
  494                         return(EMSGSIZE);
  495                 }
  496                 INP_RLOCK(inp);
  497                 ip = mtod(m, struct ip *);
  498                 error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
  499                 if (error != 0) {
  500                         INP_RUNLOCK(inp);
  501                         m_freem(m);
  502                         return (error);
  503                 }
  504 
  505                 /*
  506                  * Don't allow both user specified and setsockopt options,
  507                  * and don't allow packet length sizes that will crash.
  508                  */
  509                 if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
  510                     || (ntohs(ip->ip_len) != m->m_pkthdr.len)
  511                     || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
  512                         INP_RUNLOCK(inp);
  513                         m_freem(m);
  514                         return (EINVAL);
  515                 }
  516                 /*
  517                  * This doesn't allow application to specify ID of zero,
  518                  * but we got this limitation from the beginning of history.
  519                  */
  520                 if (ip->ip_id == 0)
  521                         ip_fillid(ip);
  522 
  523                 /*
  524                  * XXX prevent ip_output from overwriting header fields.
  525                  */
  526                 flags |= IP_RAWOUTPUT;
  527                 IPSTAT_INC(ips_rawout);
  528         }
  529 
  530         if (inp->inp_flags & INP_ONESBCAST)
  531                 flags |= IP_SENDONES;
  532 
  533 #ifdef MAC
  534         mac_inpcb_create_mbuf(inp, m);
  535 #endif
  536 
  537         error = ip_output(m, inp->inp_options, NULL, flags,
  538             inp->inp_moptions, inp);
  539         INP_RUNLOCK(inp);
  540         return (error);
  541 }
  542 
  543 /*
  544  * Raw IP socket option processing.
  545  *
  546  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  547  * only be created by a privileged process, and as such, socket option
  548  * operations to manage system properties on any raw socket were allowed to
  549  * take place without explicit additional access control checks.  However,
  550  * raw sockets can now also be created in jail(), and therefore explicit
  551  * checks are now required.  Likewise, raw sockets can be used by a process
  552  * after it gives up privilege, so some caution is required.  For options
  553  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  554  * performed in ip_ctloutput() and therefore no check occurs here.
  555  * Unilaterally checking priv_check() here breaks normal IP socket option
  556  * operations on raw sockets.
  557  *
  558  * When adding new socket options here, make sure to add access control
  559  * checks here as necessary.
  560  *
  561  * XXX-BZ inp locking?
  562  */
  563 int
  564 rip_ctloutput(struct socket *so, struct sockopt *sopt)
  565 {
  566         struct  inpcb *inp = sotoinpcb(so);
  567         int     error, optval;
  568 
  569         if (sopt->sopt_level != IPPROTO_IP) {
  570                 if ((sopt->sopt_level == SOL_SOCKET) &&
  571                     (sopt->sopt_name == SO_SETFIB)) {
  572                         inp->inp_inc.inc_fibnum = so->so_fibnum;
  573                         return (0);
  574                 }
  575                 return (EINVAL);
  576         }
  577 
  578         error = 0;
  579         switch (sopt->sopt_dir) {
  580         case SOPT_GET:
  581                 switch (sopt->sopt_name) {
  582                 case IP_HDRINCL:
  583                         optval = inp->inp_flags & INP_HDRINCL;
  584                         error = sooptcopyout(sopt, &optval, sizeof optval);
  585                         break;
  586 
  587                 case IP_FW3:    /* generic ipfw v.3 functions */
  588                 case IP_FW_ADD: /* ADD actually returns the body... */
  589                 case IP_FW_GET:
  590                 case IP_FW_TABLE_GETSIZE:
  591                 case IP_FW_TABLE_LIST:
  592                 case IP_FW_NAT_GET_CONFIG:
  593                 case IP_FW_NAT_GET_LOG:
  594                         if (V_ip_fw_ctl_ptr != NULL)
  595                                 error = V_ip_fw_ctl_ptr(sopt);
  596                         else
  597                                 error = ENOPROTOOPT;
  598                         break;
  599 
  600                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  601                 case IP_DUMMYNET_GET:
  602                         if (ip_dn_ctl_ptr != NULL)
  603                                 error = ip_dn_ctl_ptr(sopt);
  604                         else
  605                                 error = ENOPROTOOPT;
  606                         break ;
  607 
  608                 case MRT_INIT:
  609                 case MRT_DONE:
  610                 case MRT_ADD_VIF:
  611                 case MRT_DEL_VIF:
  612                 case MRT_ADD_MFC:
  613                 case MRT_DEL_MFC:
  614                 case MRT_VERSION:
  615                 case MRT_ASSERT:
  616                 case MRT_API_SUPPORT:
  617                 case MRT_API_CONFIG:
  618                 case MRT_ADD_BW_UPCALL:
  619                 case MRT_DEL_BW_UPCALL:
  620                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  621                         if (error != 0)
  622                                 return (error);
  623                         error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
  624                                 EOPNOTSUPP;
  625                         break;
  626 
  627                 default:
  628                         error = ip_ctloutput(so, sopt);
  629                         break;
  630                 }
  631                 break;
  632 
  633         case SOPT_SET:
  634                 switch (sopt->sopt_name) {
  635                 case IP_HDRINCL:
  636                         error = sooptcopyin(sopt, &optval, sizeof optval,
  637                                             sizeof optval);
  638                         if (error)
  639                                 break;
  640                         if (optval)
  641                                 inp->inp_flags |= INP_HDRINCL;
  642                         else
  643                                 inp->inp_flags &= ~INP_HDRINCL;
  644                         break;
  645 
  646                 case IP_FW3:    /* generic ipfw v.3 functions */
  647                 case IP_FW_ADD:
  648                 case IP_FW_DEL:
  649                 case IP_FW_FLUSH:
  650                 case IP_FW_ZERO:
  651                 case IP_FW_RESETLOG:
  652                 case IP_FW_TABLE_ADD:
  653                 case IP_FW_TABLE_DEL:
  654                 case IP_FW_TABLE_FLUSH:
  655                 case IP_FW_NAT_CFG:
  656                 case IP_FW_NAT_DEL:
  657                         if (V_ip_fw_ctl_ptr != NULL)
  658                                 error = V_ip_fw_ctl_ptr(sopt);
  659                         else
  660                                 error = ENOPROTOOPT;
  661                         break;
  662 
  663                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  664                 case IP_DUMMYNET_CONFIGURE:
  665                 case IP_DUMMYNET_DEL:
  666                 case IP_DUMMYNET_FLUSH:
  667                         if (ip_dn_ctl_ptr != NULL)
  668                                 error = ip_dn_ctl_ptr(sopt);
  669                         else
  670                                 error = ENOPROTOOPT ;
  671                         break ;
  672 
  673                 case IP_RSVP_ON:
  674                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  675                         if (error != 0)
  676                                 return (error);
  677                         error = ip_rsvp_init(so);
  678                         break;
  679 
  680                 case IP_RSVP_OFF:
  681                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  682                         if (error != 0)
  683                                 return (error);
  684                         error = ip_rsvp_done();
  685                         break;
  686 
  687                 case IP_RSVP_VIF_ON:
  688                 case IP_RSVP_VIF_OFF:
  689                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  690                         if (error != 0)
  691                                 return (error);
  692                         error = ip_rsvp_vif ?
  693                                 ip_rsvp_vif(so, sopt) : EINVAL;
  694                         break;
  695 
  696                 case MRT_INIT:
  697                 case MRT_DONE:
  698                 case MRT_ADD_VIF:
  699                 case MRT_DEL_VIF:
  700                 case MRT_ADD_MFC:
  701                 case MRT_DEL_MFC:
  702                 case MRT_VERSION:
  703                 case MRT_ASSERT:
  704                 case MRT_API_SUPPORT:
  705                 case MRT_API_CONFIG:
  706                 case MRT_ADD_BW_UPCALL:
  707                 case MRT_DEL_BW_UPCALL:
  708                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  709                         if (error != 0)
  710                                 return (error);
  711                         error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
  712                                         EOPNOTSUPP;
  713                         break;
  714 
  715                 default:
  716                         error = ip_ctloutput(so, sopt);
  717                         break;
  718                 }
  719                 break;
  720         }
  721 
  722         return (error);
  723 }
  724 
  725 /*
  726  * This function exists solely to receive the PRC_IFDOWN messages which are
  727  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  728  * in_ifadown() to remove all routes corresponding to that address.  It also
  729  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  730  * routes.
  731  */
  732 void
  733 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
  734 {
  735         struct rm_priotracker in_ifa_tracker;
  736         struct in_ifaddr *ia;
  737         struct ifnet *ifp;
  738         int err;
  739         int flags;
  740 
  741         switch (cmd) {
  742         case PRC_IFDOWN:
  743                 IN_IFADDR_RLOCK(&in_ifa_tracker);
  744                 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
  745                         if (ia->ia_ifa.ifa_addr == sa
  746                             && (ia->ia_flags & IFA_ROUTE)) {
  747                                 ifa_ref(&ia->ia_ifa);
  748                                 IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  749                                 /*
  750                                  * in_scrubprefix() kills the interface route.
  751                                  */
  752                                 in_scrubprefix(ia, 0);
  753                                 /*
  754                                  * in_ifadown gets rid of all the rest of the
  755                                  * routes.  This is not quite the right thing
  756                                  * to do, but at least if we are running a
  757                                  * routing process they will come back.
  758                                  */
  759                                 in_ifadown(&ia->ia_ifa, 0);
  760                                 ifa_free(&ia->ia_ifa);
  761                                 break;
  762                         }
  763                 }
  764                 if (ia == NULL)         /* If ia matched, already unlocked. */
  765                         IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  766                 break;
  767 
  768         case PRC_IFUP:
  769                 IN_IFADDR_RLOCK(&in_ifa_tracker);
  770                 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
  771                         if (ia->ia_ifa.ifa_addr == sa)
  772                                 break;
  773                 }
  774                 if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
  775                         IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  776                         return;
  777                 }
  778                 ifa_ref(&ia->ia_ifa);
  779                 IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  780                 flags = RTF_UP;
  781                 ifp = ia->ia_ifa.ifa_ifp;
  782 
  783                 if ((ifp->if_flags & IFF_LOOPBACK)
  784                     || (ifp->if_flags & IFF_POINTOPOINT))
  785                         flags |= RTF_HOST;
  786 
  787                 err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
  788 
  789                 err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
  790                 if (err == 0)
  791                         ia->ia_flags |= IFA_ROUTE;
  792 
  793                 err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
  794 
  795                 ifa_free(&ia->ia_ifa);
  796                 break;
  797         }
  798 }
  799 
  800 static int
  801 rip_attach(struct socket *so, int proto, struct thread *td)
  802 {
  803         struct inpcb *inp;
  804         int error;
  805 
  806         inp = sotoinpcb(so);
  807         KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
  808 
  809         error = priv_check(td, PRIV_NETINET_RAW);
  810         if (error)
  811                 return (error);
  812         if (proto >= IPPROTO_MAX || proto < 0)
  813                 return EPROTONOSUPPORT;
  814         error = soreserve(so, rip_sendspace, rip_recvspace);
  815         if (error)
  816                 return (error);
  817         INP_INFO_WLOCK(&V_ripcbinfo);
  818         error = in_pcballoc(so, &V_ripcbinfo);
  819         if (error) {
  820                 INP_INFO_WUNLOCK(&V_ripcbinfo);
  821                 return (error);
  822         }
  823         inp = (struct inpcb *)so->so_pcb;
  824         inp->inp_vflag |= INP_IPV4;
  825         inp->inp_ip_p = proto;
  826         inp->inp_ip_ttl = V_ip_defttl;
  827         rip_inshash(inp);
  828         INP_INFO_WUNLOCK(&V_ripcbinfo);
  829         INP_WUNLOCK(inp);
  830         return (0);
  831 }
  832 
  833 static void
  834 rip_detach(struct socket *so)
  835 {
  836         struct inpcb *inp;
  837 
  838         inp = sotoinpcb(so);
  839         KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
  840         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
  841             ("rip_detach: not closed"));
  842 
  843         INP_INFO_WLOCK(&V_ripcbinfo);
  844         INP_WLOCK(inp);
  845         rip_delhash(inp);
  846         if (so == V_ip_mrouter && ip_mrouter_done)
  847                 ip_mrouter_done();
  848         if (ip_rsvp_force_done)
  849                 ip_rsvp_force_done(so);
  850         if (so == V_ip_rsvpd)
  851                 ip_rsvp_done();
  852         in_pcbdetach(inp);
  853         in_pcbfree(inp);
  854         INP_INFO_WUNLOCK(&V_ripcbinfo);
  855 }
  856 
  857 static void
  858 rip_dodisconnect(struct socket *so, struct inpcb *inp)
  859 {
  860         struct inpcbinfo *pcbinfo;
  861 
  862         pcbinfo = inp->inp_pcbinfo;
  863         INP_INFO_WLOCK(pcbinfo);
  864         INP_WLOCK(inp);
  865         rip_delhash(inp);
  866         inp->inp_faddr.s_addr = INADDR_ANY;
  867         rip_inshash(inp);
  868         SOCK_LOCK(so);
  869         so->so_state &= ~SS_ISCONNECTED;
  870         SOCK_UNLOCK(so);
  871         INP_WUNLOCK(inp);
  872         INP_INFO_WUNLOCK(pcbinfo);
  873 }
  874 
  875 static void
  876 rip_abort(struct socket *so)
  877 {
  878         struct inpcb *inp;
  879 
  880         inp = sotoinpcb(so);
  881         KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
  882 
  883         rip_dodisconnect(so, inp);
  884 }
  885 
  886 static void
  887 rip_close(struct socket *so)
  888 {
  889         struct inpcb *inp;
  890 
  891         inp = sotoinpcb(so);
  892         KASSERT(inp != NULL, ("rip_close: inp == NULL"));
  893 
  894         rip_dodisconnect(so, inp);
  895 }
  896 
  897 static int
  898 rip_disconnect(struct socket *so)
  899 {
  900         struct inpcb *inp;
  901 
  902         if ((so->so_state & SS_ISCONNECTED) == 0)
  903                 return (ENOTCONN);
  904 
  905         inp = sotoinpcb(so);
  906         KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
  907 
  908         rip_dodisconnect(so, inp);
  909         return (0);
  910 }
  911 
  912 static int
  913 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  914 {
  915         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  916         struct inpcb *inp;
  917         int error;
  918 
  919         if (nam->sa_len != sizeof(*addr))
  920                 return (EINVAL);
  921 
  922         error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
  923         if (error != 0)
  924                 return (error);
  925 
  926         inp = sotoinpcb(so);
  927         KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
  928 
  929         if (TAILQ_EMPTY(&V_ifnet) ||
  930             (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
  931             (addr->sin_addr.s_addr &&
  932              (inp->inp_flags & INP_BINDANY) == 0 &&
  933              ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
  934                 return (EADDRNOTAVAIL);
  935 
  936         INP_INFO_WLOCK(&V_ripcbinfo);
  937         INP_WLOCK(inp);
  938         rip_delhash(inp);
  939         inp->inp_laddr = addr->sin_addr;
  940         rip_inshash(inp);
  941         INP_WUNLOCK(inp);
  942         INP_INFO_WUNLOCK(&V_ripcbinfo);
  943         return (0);
  944 }
  945 
  946 static int
  947 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  948 {
  949         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  950         struct inpcb *inp;
  951 
  952         if (nam->sa_len != sizeof(*addr))
  953                 return (EINVAL);
  954         if (TAILQ_EMPTY(&V_ifnet))
  955                 return (EADDRNOTAVAIL);
  956         if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
  957                 return (EAFNOSUPPORT);
  958 
  959         inp = sotoinpcb(so);
  960         KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
  961 
  962         INP_INFO_WLOCK(&V_ripcbinfo);
  963         INP_WLOCK(inp);
  964         rip_delhash(inp);
  965         inp->inp_faddr = addr->sin_addr;
  966         rip_inshash(inp);
  967         soisconnected(so);
  968         INP_WUNLOCK(inp);
  969         INP_INFO_WUNLOCK(&V_ripcbinfo);
  970         return (0);
  971 }
  972 
  973 static int
  974 rip_shutdown(struct socket *so)
  975 {
  976         struct inpcb *inp;
  977 
  978         inp = sotoinpcb(so);
  979         KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
  980 
  981         INP_WLOCK(inp);
  982         socantsendmore(so);
  983         INP_WUNLOCK(inp);
  984         return (0);
  985 }
  986 
  987 static int
  988 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  989     struct mbuf *control, struct thread *td)
  990 {
  991         struct inpcb *inp;
  992         u_long dst;
  993 
  994         inp = sotoinpcb(so);
  995         KASSERT(inp != NULL, ("rip_send: inp == NULL"));
  996 
  997         /*
  998          * Note: 'dst' reads below are unlocked.
  999          */
 1000         if (so->so_state & SS_ISCONNECTED) {
 1001                 if (nam) {
 1002                         m_freem(m);
 1003                         return (EISCONN);
 1004                 }
 1005                 dst = inp->inp_faddr.s_addr;    /* Unlocked read. */
 1006         } else {
 1007                 if (nam == NULL) {
 1008                         m_freem(m);
 1009                         return (ENOTCONN);
 1010                 }
 1011                 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 1012         }
 1013         return (rip_output(m, so, dst));
 1014 }
 1015 #endif /* INET */
 1016 
 1017 static int
 1018 rip_pcblist(SYSCTL_HANDLER_ARGS)
 1019 {
 1020         int error, i, n;
 1021         struct inpcb *inp, **inp_list;
 1022         inp_gen_t gencnt;
 1023         struct xinpgen xig;
 1024 
 1025         /*
 1026          * The process of preparing the TCB list is too time-consuming and
 1027          * resource-intensive to repeat twice on every request.
 1028          */
 1029         if (req->oldptr == 0) {
 1030                 n = V_ripcbinfo.ipi_count;
 1031                 n += imax(n / 8, 10);
 1032                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 1033                 return (0);
 1034         }
 1035 
 1036         if (req->newptr != 0)
 1037                 return (EPERM);
 1038 
 1039         /*
 1040          * OK, now we're committed to doing something.
 1041          */
 1042         INP_INFO_RLOCK(&V_ripcbinfo);
 1043         gencnt = V_ripcbinfo.ipi_gencnt;
 1044         n = V_ripcbinfo.ipi_count;
 1045         INP_INFO_RUNLOCK(&V_ripcbinfo);
 1046 
 1047         xig.xig_len = sizeof xig;
 1048         xig.xig_count = n;
 1049         xig.xig_gen = gencnt;
 1050         xig.xig_sogen = so_gencnt;
 1051         error = SYSCTL_OUT(req, &xig, sizeof xig);
 1052         if (error)
 1053                 return (error);
 1054 
 1055         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 1056         if (inp_list == NULL)
 1057                 return (ENOMEM);
 1058 
 1059         INP_INFO_RLOCK(&V_ripcbinfo);
 1060         for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 1061              inp = LIST_NEXT(inp, inp_list)) {
 1062                 INP_WLOCK(inp);
 1063                 if (inp->inp_gencnt <= gencnt &&
 1064                     cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 1065                         in_pcbref(inp);
 1066                         inp_list[i++] = inp;
 1067                 }
 1068                 INP_WUNLOCK(inp);
 1069         }
 1070         INP_INFO_RUNLOCK(&V_ripcbinfo);
 1071         n = i;
 1072 
 1073         error = 0;
 1074         for (i = 0; i < n; i++) {
 1075                 inp = inp_list[i];
 1076                 INP_RLOCK(inp);
 1077                 if (inp->inp_gencnt <= gencnt) {
 1078                         struct xinpcb xi;
 1079 
 1080                         bzero(&xi, sizeof(xi));
 1081                         xi.xi_len = sizeof xi;
 1082                         /* XXX should avoid extra copy */
 1083                         bcopy(inp, &xi.xi_inp, sizeof *inp);
 1084                         if (inp->inp_socket)
 1085                                 sotoxsocket(inp->inp_socket, &xi.xi_socket);
 1086                         INP_RUNLOCK(inp);
 1087                         error = SYSCTL_OUT(req, &xi, sizeof xi);
 1088                 } else
 1089                         INP_RUNLOCK(inp);
 1090         }
 1091         INP_INFO_WLOCK(&V_ripcbinfo);
 1092         for (i = 0; i < n; i++) {
 1093                 inp = inp_list[i];
 1094                 INP_RLOCK(inp);
 1095                 if (!in_pcbrele_rlocked(inp))
 1096                         INP_RUNLOCK(inp);
 1097         }
 1098         INP_INFO_WUNLOCK(&V_ripcbinfo);
 1099 
 1100         if (!error) {
 1101                 /*
 1102                  * Give the user an updated idea of our state.  If the
 1103                  * generation differs from what we told her before, she knows
 1104                  * that something happened while we were processing this
 1105                  * request, and it might be necessary to retry.
 1106                  */
 1107                 INP_INFO_RLOCK(&V_ripcbinfo);
 1108                 xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 1109                 xig.xig_sogen = so_gencnt;
 1110                 xig.xig_count = V_ripcbinfo.ipi_count;
 1111                 INP_INFO_RUNLOCK(&V_ripcbinfo);
 1112                 error = SYSCTL_OUT(req, &xig, sizeof xig);
 1113         }
 1114         free(inp_list, M_TEMP);
 1115         return (error);
 1116 }
 1117 
 1118 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
 1119     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
 1120     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 1121 
 1122 #ifdef INET
 1123 struct pr_usrreqs rip_usrreqs = {
 1124         .pru_abort =            rip_abort,
 1125         .pru_attach =           rip_attach,
 1126         .pru_bind =             rip_bind,
 1127         .pru_connect =          rip_connect,
 1128         .pru_control =          in_control,
 1129         .pru_detach =           rip_detach,
 1130         .pru_disconnect =       rip_disconnect,
 1131         .pru_peeraddr =         in_getpeeraddr,
 1132         .pru_send =             rip_send,
 1133         .pru_shutdown =         rip_shutdown,
 1134         .pru_sockaddr =         in_getsockaddr,
 1135         .pru_sosetlabel =       in_pcbsosetlabel,
 1136         .pru_close =            rip_close,
 1137 };
 1138 #endif /* INET */

Cache object: 4afc2351131aec1342ed7ce264f79a67


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.