The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/raw_ip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1993
    5  *      The Regents of the University of California.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)raw_ip.c    8.7 (Berkeley) 5/15/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/12.0/sys/netinet/raw_ip.c 340980 2018-11-26 16:36:38Z markj $");
   37 
   38 #include "opt_inet.h"
   39 #include "opt_inet6.h"
   40 #include "opt_ipsec.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/eventhandler.h>
   46 #include <sys/lock.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/priv.h>
   50 #include <sys/proc.h>
   51 #include <sys/protosw.h>
   52 #include <sys/rmlock.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/signalvar.h>
   55 #include <sys/socket.h>
   56 #include <sys/socketvar.h>
   57 #include <sys/sx.h>
   58 #include <sys/sysctl.h>
   59 #include <sys/systm.h>
   60 
   61 #include <vm/uma.h>
   62 
   63 #include <net/if.h>
   64 #include <net/if_var.h>
   65 #include <net/route.h>
   66 #include <net/vnet.h>
   67 
   68 #include <netinet/in.h>
   69 #include <netinet/in_systm.h>
   70 #include <netinet/in_pcb.h>
   71 #include <netinet/in_var.h>
   72 #include <netinet/if_ether.h>
   73 #include <netinet/ip.h>
   74 #include <netinet/ip_var.h>
   75 #include <netinet/ip_mroute.h>
   76 #include <netinet/ip_icmp.h>
   77 
   78 #include <netipsec/ipsec_support.h>
   79 
   80 #include <machine/stdarg.h>
   81 #include <security/mac/mac_framework.h>
   82 
   83 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
   84 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
   85     &VNET_NAME(ip_defttl), 0,
   86     "Maximum TTL on IP packets");
   87 
   88 VNET_DEFINE(struct inpcbhead, ripcb);
   89 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
   90 
   91 #define V_ripcb                 VNET(ripcb)
   92 #define V_ripcbinfo             VNET(ripcbinfo)
   93 
   94 /*
   95  * Control and data hooks for ipfw, dummynet, divert and so on.
   96  * The data hooks are not used here but it is convenient
   97  * to keep them all in one place.
   98  */
   99 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
  100 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
  101 
  102 int     (*ip_dn_ctl_ptr)(struct sockopt *);
  103 int     (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
  104 void    (*ip_divert_ptr)(struct mbuf *, int);
  105 int     (*ng_ipfw_input_p)(struct mbuf **, int,
  106                         struct ip_fw_args *, int);
  107 
  108 #ifdef INET
  109 /*
  110  * Hooks for multicast routing. They all default to NULL, so leave them not
  111  * initialized and rely on BSS being set to 0.
  112  */
  113 
  114 /*
  115  * The socket used to communicate with the multicast routing daemon.
  116  */
  117 VNET_DEFINE(struct socket *, ip_mrouter);
  118 
  119 /*
  120  * The various mrouter and rsvp functions.
  121  */
  122 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
  123 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
  124 int (*ip_mrouter_done)(void);
  125 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
  126                    struct ip_moptions *);
  127 int (*mrt_ioctl)(u_long, caddr_t, int);
  128 int (*legal_vif_num)(int);
  129 u_long (*ip_mcast_src)(int);
  130 
  131 int (*rsvp_input_p)(struct mbuf **, int *, int);
  132 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
  133 void (*ip_rsvp_force_done)(struct socket *);
  134 #endif /* INET */
  135 
  136 extern  struct protosw inetsw[];
  137 
  138 u_long  rip_sendspace = 9216;
  139 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
  140     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
  141 
  142 u_long  rip_recvspace = 9216;
  143 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
  144     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
  145 
  146 /*
  147  * Hash functions
  148  */
  149 
  150 #define INP_PCBHASH_RAW_SIZE    256
  151 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
  152         (((proto) + (laddr) + (faddr)) % (mask) + 1)
  153 
  154 #ifdef INET
  155 static void
  156 rip_inshash(struct inpcb *inp)
  157 {
  158         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
  159         struct inpcbhead *pcbhash;
  160         int hash;
  161 
  162         INP_INFO_WLOCK_ASSERT(pcbinfo);
  163         INP_WLOCK_ASSERT(inp);
  164         
  165         if (inp->inp_ip_p != 0 &&
  166             inp->inp_laddr.s_addr != INADDR_ANY &&
  167             inp->inp_faddr.s_addr != INADDR_ANY) {
  168                 hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
  169                     inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
  170         } else
  171                 hash = 0;
  172         pcbhash = &pcbinfo->ipi_hashbase[hash];
  173         CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
  174 }
  175 
  176 static void
  177 rip_delhash(struct inpcb *inp)
  178 {
  179 
  180         INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
  181         INP_WLOCK_ASSERT(inp);
  182 
  183         CK_LIST_REMOVE(inp, inp_hash);
  184 }
  185 #endif /* INET */
  186 
  187 /*
  188  * Raw interface to IP protocol.
  189  */
  190 
  191 /*
  192  * Initialize raw connection block q.
  193  */
  194 static void
  195 rip_zone_change(void *tag)
  196 {
  197 
  198         uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
  199 }
  200 
  201 static int
  202 rip_inpcb_init(void *mem, int size, int flags)
  203 {
  204         struct inpcb *inp = mem;
  205 
  206         INP_LOCK_INIT(inp, "inp", "rawinp");
  207         return (0);
  208 }
  209 
  210 void
  211 rip_init(void)
  212 {
  213 
  214         in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
  215             1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE);
  216         EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
  217             EVENTHANDLER_PRI_ANY);
  218 }
  219 
  220 #ifdef VIMAGE
  221 static void
  222 rip_destroy(void *unused __unused)
  223 {
  224 
  225         in_pcbinfo_destroy(&V_ripcbinfo);
  226 }
  227 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
  228 #endif
  229 
  230 #ifdef INET
  231 static int
  232 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
  233     struct sockaddr_in *ripsrc)
  234 {
  235         int policyfail = 0;
  236 
  237         INP_LOCK_ASSERT(last);
  238 
  239 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  240         /* check AH/ESP integrity. */
  241         if (IPSEC_ENABLED(ipv4)) {
  242                 if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0)
  243                         policyfail = 1;
  244         }
  245 #endif /* IPSEC */
  246 #ifdef MAC
  247         if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
  248                 policyfail = 1;
  249 #endif
  250         /* Check the minimum TTL for socket. */
  251         if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
  252                 policyfail = 1;
  253         if (!policyfail) {
  254                 struct mbuf *opts = NULL;
  255                 struct socket *so;
  256 
  257                 so = last->inp_socket;
  258                 if ((last->inp_flags & INP_CONTROLOPTS) ||
  259                     (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
  260                         ip_savecontrol(last, &opts, ip, n);
  261                 SOCKBUF_LOCK(&so->so_rcv);
  262                 if (sbappendaddr_locked(&so->so_rcv,
  263                     (struct sockaddr *)ripsrc, n, opts) == 0) {
  264                         /* should notify about lost packet */
  265                         m_freem(n);
  266                         if (opts)
  267                                 m_freem(opts);
  268                         SOCKBUF_UNLOCK(&so->so_rcv);
  269                 } else
  270                         sorwakeup_locked(so);
  271         } else
  272                 m_freem(n);
  273         return (policyfail);
  274 }
  275 
  276 /*
  277  * Setup generic address and protocol structures for raw_input routine, then
  278  * pass them along with mbuf chain.
  279  */
  280 int
  281 rip_input(struct mbuf **mp, int *offp, int proto)
  282 {
  283         struct ifnet *ifp;
  284         struct mbuf *m = *mp;
  285         struct ip *ip = mtod(m, struct ip *);
  286         struct inpcb *inp, *last;
  287         struct sockaddr_in ripsrc;
  288         struct epoch_tracker et;
  289         int hash;
  290 
  291         *mp = NULL;
  292 
  293         bzero(&ripsrc, sizeof(ripsrc));
  294         ripsrc.sin_len = sizeof(ripsrc);
  295         ripsrc.sin_family = AF_INET;
  296         ripsrc.sin_addr = ip->ip_src;
  297         last = NULL;
  298 
  299         ifp = m->m_pkthdr.rcvif;
  300 
  301         hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
  302             ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
  303         INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
  304         CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
  305                 if (inp->inp_ip_p != proto)
  306                         continue;
  307 #ifdef INET6
  308                 /* XXX inp locking */
  309                 if ((inp->inp_vflag & INP_IPV4) == 0)
  310                         continue;
  311 #endif
  312                 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
  313                         continue;
  314                 if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
  315                         continue;
  316                 if (last != NULL) {
  317                         struct mbuf *n;
  318 
  319                         n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
  320                         if (n != NULL)
  321                             (void) rip_append(last, ip, n, &ripsrc);
  322                         /* XXX count dropped packet */
  323                         INP_RUNLOCK(last);
  324                         last = NULL;
  325                 }
  326                 INP_RLOCK(inp);
  327                 if (__predict_false(inp->inp_flags2 & INP_FREED))
  328                         goto skip_1;
  329                 if (jailed_without_vnet(inp->inp_cred)) {
  330                         /*
  331                          * XXX: If faddr was bound to multicast group,
  332                          * jailed raw socket will drop datagram.
  333                          */
  334                         if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
  335                                 goto skip_1;
  336                 }
  337                 last = inp;
  338                 continue;
  339         skip_1:
  340                 INP_RUNLOCK(inp);
  341         }
  342         CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
  343                 if (inp->inp_ip_p && inp->inp_ip_p != proto)
  344                         continue;
  345 #ifdef INET6
  346                 /* XXX inp locking */
  347                 if ((inp->inp_vflag & INP_IPV4) == 0)
  348                         continue;
  349 #endif
  350                 if (!in_nullhost(inp->inp_laddr) &&
  351                     !in_hosteq(inp->inp_laddr, ip->ip_dst))
  352                         continue;
  353                 if (!in_nullhost(inp->inp_faddr) &&
  354                     !in_hosteq(inp->inp_faddr, ip->ip_src))
  355                         continue;
  356                 if (last != NULL) {
  357                         struct mbuf *n;
  358 
  359                         n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
  360                         if (n != NULL)
  361                                 (void) rip_append(last, ip, n, &ripsrc);
  362                         /* XXX count dropped packet */
  363                         INP_RUNLOCK(last);
  364                         last = NULL;
  365                 }
  366                 INP_RLOCK(inp);
  367                 if (__predict_false(inp->inp_flags2 & INP_FREED))
  368                         goto skip_2;
  369                 if (jailed_without_vnet(inp->inp_cred)) {
  370                         /*
  371                          * Allow raw socket in jail to receive multicast;
  372                          * assume process had PRIV_NETINET_RAW at attach,
  373                          * and fall through into normal filter path if so.
  374                          */
  375                         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
  376                             prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
  377                                 goto skip_2;
  378                 }
  379                 /*
  380                  * If this raw socket has multicast state, and we
  381                  * have received a multicast, check if this socket
  382                  * should receive it, as multicast filtering is now
  383                  * the responsibility of the transport layer.
  384                  */
  385                 if (inp->inp_moptions != NULL &&
  386                     IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  387                         /*
  388                          * If the incoming datagram is for IGMP, allow it
  389                          * through unconditionally to the raw socket.
  390                          *
  391                          * In the case of IGMPv2, we may not have explicitly
  392                          * joined the group, and may have set IFF_ALLMULTI
  393                          * on the interface. imo_multi_filter() may discard
  394                          * control traffic we actually need to see.
  395                          *
  396                          * Userland multicast routing daemons should continue
  397                          * filter the control traffic appropriately.
  398                          */
  399                         int blocked;
  400 
  401                         blocked = MCAST_PASS;
  402                         if (proto != IPPROTO_IGMP) {
  403                                 struct sockaddr_in group;
  404 
  405                                 bzero(&group, sizeof(struct sockaddr_in));
  406                                 group.sin_len = sizeof(struct sockaddr_in);
  407                                 group.sin_family = AF_INET;
  408                                 group.sin_addr = ip->ip_dst;
  409 
  410                                 blocked = imo_multi_filter(inp->inp_moptions,
  411                                     ifp,
  412                                     (struct sockaddr *)&group,
  413                                     (struct sockaddr *)&ripsrc);
  414                         }
  415 
  416                         if (blocked != MCAST_PASS) {
  417                                 IPSTAT_INC(ips_notmember);
  418                                 goto skip_2;
  419                         }
  420                 }
  421                 last = inp;
  422                 continue;
  423         skip_2:
  424                 INP_RUNLOCK(inp);
  425         }
  426         INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
  427         if (last != NULL) {
  428                 if (rip_append(last, ip, m, &ripsrc) != 0)
  429                         IPSTAT_INC(ips_delivered);
  430                 INP_RUNLOCK(last);
  431         } else {
  432                 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
  433                         IPSTAT_INC(ips_noproto);
  434                         IPSTAT_DEC(ips_delivered);
  435                         icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
  436                 } else {
  437                         m_freem(m);
  438                 }
  439         }
  440         return (IPPROTO_DONE);
  441 }
  442 
  443 /*
  444  * Generate IP header and pass packet to ip_output.  Tack on options user may
  445  * have setup with control call.
  446  */
  447 int
  448 rip_output(struct mbuf *m, struct socket *so, ...)
  449 {
  450         struct ip *ip;
  451         int error;
  452         struct inpcb *inp = sotoinpcb(so);
  453         va_list ap;
  454         u_long dst;
  455         int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
  456             IP_ALLOWBROADCAST;
  457 
  458         va_start(ap, so);
  459         dst = va_arg(ap, u_long);
  460         va_end(ap);
  461 
  462         /*
  463          * If the user handed us a complete IP packet, use it.  Otherwise,
  464          * allocate an mbuf for a header and fill it in.
  465          */
  466         if ((inp->inp_flags & INP_HDRINCL) == 0) {
  467                 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
  468                         m_freem(m);
  469                         return(EMSGSIZE);
  470                 }
  471                 M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
  472                 if (m == NULL)
  473                         return(ENOBUFS);
  474 
  475                 INP_RLOCK(inp);
  476                 ip = mtod(m, struct ip *);
  477                 ip->ip_tos = inp->inp_ip_tos;
  478                 if (inp->inp_flags & INP_DONTFRAG)
  479                         ip->ip_off = htons(IP_DF);
  480                 else
  481                         ip->ip_off = htons(0);
  482                 ip->ip_p = inp->inp_ip_p;
  483                 ip->ip_len = htons(m->m_pkthdr.len);
  484                 ip->ip_src = inp->inp_laddr;
  485                 ip->ip_dst.s_addr = dst;
  486                 if (jailed(inp->inp_cred)) {
  487                         /*
  488                          * prison_local_ip4() would be good enough but would
  489                          * let a source of INADDR_ANY pass, which we do not
  490                          * want to see from jails.
  491                          */
  492                         if (ip->ip_src.s_addr == INADDR_ANY) {
  493                                 error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
  494                                     inp->inp_cred);
  495                         } else {
  496                                 error = prison_local_ip4(inp->inp_cred,
  497                                     &ip->ip_src);
  498                         }
  499                         if (error != 0) {
  500                                 INP_RUNLOCK(inp);
  501                                 m_freem(m);
  502                                 return (error);
  503                         }
  504                 }
  505                 ip->ip_ttl = inp->inp_ip_ttl;
  506         } else {
  507                 if (m->m_pkthdr.len > IP_MAXPACKET) {
  508                         m_freem(m);
  509                         return(EMSGSIZE);
  510                 }
  511                 INP_RLOCK(inp);
  512                 ip = mtod(m, struct ip *);
  513                 error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
  514                 if (error != 0) {
  515                         INP_RUNLOCK(inp);
  516                         m_freem(m);
  517                         return (error);
  518                 }
  519 
  520                 /*
  521                  * Don't allow both user specified and setsockopt options,
  522                  * and don't allow packet length sizes that will crash.
  523                  */
  524                 if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
  525                     || (ntohs(ip->ip_len) != m->m_pkthdr.len)
  526                     || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
  527                         INP_RUNLOCK(inp);
  528                         m_freem(m);
  529                         return (EINVAL);
  530                 }
  531                 /*
  532                  * This doesn't allow application to specify ID of zero,
  533                  * but we got this limitation from the beginning of history.
  534                  */
  535                 if (ip->ip_id == 0)
  536                         ip_fillid(ip);
  537 
  538                 /*
  539                  * XXX prevent ip_output from overwriting header fields.
  540                  */
  541                 flags |= IP_RAWOUTPUT;
  542                 IPSTAT_INC(ips_rawout);
  543         }
  544 
  545         if (inp->inp_flags & INP_ONESBCAST)
  546                 flags |= IP_SENDONES;
  547 
  548 #ifdef MAC
  549         mac_inpcb_create_mbuf(inp, m);
  550 #endif
  551 
  552         error = ip_output(m, inp->inp_options, NULL, flags,
  553             inp->inp_moptions, inp);
  554         INP_RUNLOCK(inp);
  555         return (error);
  556 }
  557 
  558 /*
  559  * Raw IP socket option processing.
  560  *
  561  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  562  * only be created by a privileged process, and as such, socket option
  563  * operations to manage system properties on any raw socket were allowed to
  564  * take place without explicit additional access control checks.  However,
  565  * raw sockets can now also be created in jail(), and therefore explicit
  566  * checks are now required.  Likewise, raw sockets can be used by a process
  567  * after it gives up privilege, so some caution is required.  For options
  568  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  569  * performed in ip_ctloutput() and therefore no check occurs here.
  570  * Unilaterally checking priv_check() here breaks normal IP socket option
  571  * operations on raw sockets.
  572  *
  573  * When adding new socket options here, make sure to add access control
  574  * checks here as necessary.
  575  *
  576  * XXX-BZ inp locking?
  577  */
  578 int
  579 rip_ctloutput(struct socket *so, struct sockopt *sopt)
  580 {
  581         struct  inpcb *inp = sotoinpcb(so);
  582         int     error, optval;
  583 
  584         if (sopt->sopt_level != IPPROTO_IP) {
  585                 if ((sopt->sopt_level == SOL_SOCKET) &&
  586                     (sopt->sopt_name == SO_SETFIB)) {
  587                         inp->inp_inc.inc_fibnum = so->so_fibnum;
  588                         return (0);
  589                 }
  590                 return (EINVAL);
  591         }
  592 
  593         error = 0;
  594         switch (sopt->sopt_dir) {
  595         case SOPT_GET:
  596                 switch (sopt->sopt_name) {
  597                 case IP_HDRINCL:
  598                         optval = inp->inp_flags & INP_HDRINCL;
  599                         error = sooptcopyout(sopt, &optval, sizeof optval);
  600                         break;
  601 
  602                 case IP_FW3:    /* generic ipfw v.3 functions */
  603                 case IP_FW_ADD: /* ADD actually returns the body... */
  604                 case IP_FW_GET:
  605                 case IP_FW_TABLE_GETSIZE:
  606                 case IP_FW_TABLE_LIST:
  607                 case IP_FW_NAT_GET_CONFIG:
  608                 case IP_FW_NAT_GET_LOG:
  609                         if (V_ip_fw_ctl_ptr != NULL)
  610                                 error = V_ip_fw_ctl_ptr(sopt);
  611                         else
  612                                 error = ENOPROTOOPT;
  613                         break;
  614 
  615                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  616                 case IP_DUMMYNET_GET:
  617                         if (ip_dn_ctl_ptr != NULL)
  618                                 error = ip_dn_ctl_ptr(sopt);
  619                         else
  620                                 error = ENOPROTOOPT;
  621                         break ;
  622 
  623                 case MRT_INIT:
  624                 case MRT_DONE:
  625                 case MRT_ADD_VIF:
  626                 case MRT_DEL_VIF:
  627                 case MRT_ADD_MFC:
  628                 case MRT_DEL_MFC:
  629                 case MRT_VERSION:
  630                 case MRT_ASSERT:
  631                 case MRT_API_SUPPORT:
  632                 case MRT_API_CONFIG:
  633                 case MRT_ADD_BW_UPCALL:
  634                 case MRT_DEL_BW_UPCALL:
  635                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  636                         if (error != 0)
  637                                 return (error);
  638                         error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
  639                                 EOPNOTSUPP;
  640                         break;
  641 
  642                 default:
  643                         error = ip_ctloutput(so, sopt);
  644                         break;
  645                 }
  646                 break;
  647 
  648         case SOPT_SET:
  649                 switch (sopt->sopt_name) {
  650                 case IP_HDRINCL:
  651                         error = sooptcopyin(sopt, &optval, sizeof optval,
  652                                             sizeof optval);
  653                         if (error)
  654                                 break;
  655                         if (optval)
  656                                 inp->inp_flags |= INP_HDRINCL;
  657                         else
  658                                 inp->inp_flags &= ~INP_HDRINCL;
  659                         break;
  660 
  661                 case IP_FW3:    /* generic ipfw v.3 functions */
  662                 case IP_FW_ADD:
  663                 case IP_FW_DEL:
  664                 case IP_FW_FLUSH:
  665                 case IP_FW_ZERO:
  666                 case IP_FW_RESETLOG:
  667                 case IP_FW_TABLE_ADD:
  668                 case IP_FW_TABLE_DEL:
  669                 case IP_FW_TABLE_FLUSH:
  670                 case IP_FW_NAT_CFG:
  671                 case IP_FW_NAT_DEL:
  672                         if (V_ip_fw_ctl_ptr != NULL)
  673                                 error = V_ip_fw_ctl_ptr(sopt);
  674                         else
  675                                 error = ENOPROTOOPT;
  676                         break;
  677 
  678                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  679                 case IP_DUMMYNET_CONFIGURE:
  680                 case IP_DUMMYNET_DEL:
  681                 case IP_DUMMYNET_FLUSH:
  682                         if (ip_dn_ctl_ptr != NULL)
  683                                 error = ip_dn_ctl_ptr(sopt);
  684                         else
  685                                 error = ENOPROTOOPT ;
  686                         break ;
  687 
  688                 case IP_RSVP_ON:
  689                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  690                         if (error != 0)
  691                                 return (error);
  692                         error = ip_rsvp_init(so);
  693                         break;
  694 
  695                 case IP_RSVP_OFF:
  696                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  697                         if (error != 0)
  698                                 return (error);
  699                         error = ip_rsvp_done();
  700                         break;
  701 
  702                 case IP_RSVP_VIF_ON:
  703                 case IP_RSVP_VIF_OFF:
  704                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  705                         if (error != 0)
  706                                 return (error);
  707                         error = ip_rsvp_vif ?
  708                                 ip_rsvp_vif(so, sopt) : EINVAL;
  709                         break;
  710 
  711                 case MRT_INIT:
  712                 case MRT_DONE:
  713                 case MRT_ADD_VIF:
  714                 case MRT_DEL_VIF:
  715                 case MRT_ADD_MFC:
  716                 case MRT_DEL_MFC:
  717                 case MRT_VERSION:
  718                 case MRT_ASSERT:
  719                 case MRT_API_SUPPORT:
  720                 case MRT_API_CONFIG:
  721                 case MRT_ADD_BW_UPCALL:
  722                 case MRT_DEL_BW_UPCALL:
  723                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  724                         if (error != 0)
  725                                 return (error);
  726                         error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
  727                                         EOPNOTSUPP;
  728                         break;
  729 
  730                 default:
  731                         error = ip_ctloutput(so, sopt);
  732                         break;
  733                 }
  734                 break;
  735         }
  736 
  737         return (error);
  738 }
  739 
  740 /*
  741  * This function exists solely to receive the PRC_IFDOWN messages which are
  742  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  743  * in_ifadown() to remove all routes corresponding to that address.  It also
  744  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  745  * routes.
  746  */
  747 void
  748 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
  749 {
  750         struct rm_priotracker in_ifa_tracker;
  751         struct in_ifaddr *ia;
  752         struct ifnet *ifp;
  753         int err;
  754         int flags;
  755 
  756         switch (cmd) {
  757         case PRC_IFDOWN:
  758                 IN_IFADDR_RLOCK(&in_ifa_tracker);
  759                 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
  760                         if (ia->ia_ifa.ifa_addr == sa
  761                             && (ia->ia_flags & IFA_ROUTE)) {
  762                                 ifa_ref(&ia->ia_ifa);
  763                                 IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  764                                 /*
  765                                  * in_scrubprefix() kills the interface route.
  766                                  */
  767                                 in_scrubprefix(ia, 0);
  768                                 /*
  769                                  * in_ifadown gets rid of all the rest of the
  770                                  * routes.  This is not quite the right thing
  771                                  * to do, but at least if we are running a
  772                                  * routing process they will come back.
  773                                  */
  774                                 in_ifadown(&ia->ia_ifa, 0);
  775                                 ifa_free(&ia->ia_ifa);
  776                                 break;
  777                         }
  778                 }
  779                 if (ia == NULL)         /* If ia matched, already unlocked. */
  780                         IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  781                 break;
  782 
  783         case PRC_IFUP:
  784                 IN_IFADDR_RLOCK(&in_ifa_tracker);
  785                 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
  786                         if (ia->ia_ifa.ifa_addr == sa)
  787                                 break;
  788                 }
  789                 if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
  790                         IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  791                         return;
  792                 }
  793                 ifa_ref(&ia->ia_ifa);
  794                 IN_IFADDR_RUNLOCK(&in_ifa_tracker);
  795                 flags = RTF_UP;
  796                 ifp = ia->ia_ifa.ifa_ifp;
  797 
  798                 if ((ifp->if_flags & IFF_LOOPBACK)
  799                     || (ifp->if_flags & IFF_POINTOPOINT))
  800                         flags |= RTF_HOST;
  801 
  802                 err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
  803 
  804                 err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
  805                 if (err == 0)
  806                         ia->ia_flags |= IFA_ROUTE;
  807 
  808                 err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
  809 
  810                 ifa_free(&ia->ia_ifa);
  811                 break;
  812         }
  813 }
  814 
  815 static int
  816 rip_attach(struct socket *so, int proto, struct thread *td)
  817 {
  818         struct inpcb *inp;
  819         int error;
  820 
  821         inp = sotoinpcb(so);
  822         KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
  823 
  824         error = priv_check(td, PRIV_NETINET_RAW);
  825         if (error)
  826                 return (error);
  827         if (proto >= IPPROTO_MAX || proto < 0)
  828                 return EPROTONOSUPPORT;
  829         error = soreserve(so, rip_sendspace, rip_recvspace);
  830         if (error)
  831                 return (error);
  832         INP_INFO_WLOCK(&V_ripcbinfo);
  833         error = in_pcballoc(so, &V_ripcbinfo);
  834         if (error) {
  835                 INP_INFO_WUNLOCK(&V_ripcbinfo);
  836                 return (error);
  837         }
  838         inp = (struct inpcb *)so->so_pcb;
  839         inp->inp_vflag |= INP_IPV4;
  840         inp->inp_ip_p = proto;
  841         inp->inp_ip_ttl = V_ip_defttl;
  842         rip_inshash(inp);
  843         INP_INFO_WUNLOCK(&V_ripcbinfo);
  844         INP_WUNLOCK(inp);
  845         return (0);
  846 }
  847 
  848 static void
  849 rip_detach(struct socket *so)
  850 {
  851         struct inpcb *inp;
  852 
  853         inp = sotoinpcb(so);
  854         KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
  855         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
  856             ("rip_detach: not closed"));
  857 
  858         INP_INFO_WLOCK(&V_ripcbinfo);
  859         INP_WLOCK(inp);
  860         rip_delhash(inp);
  861         if (so == V_ip_mrouter && ip_mrouter_done)
  862                 ip_mrouter_done();
  863         if (ip_rsvp_force_done)
  864                 ip_rsvp_force_done(so);
  865         if (so == V_ip_rsvpd)
  866                 ip_rsvp_done();
  867         in_pcbdetach(inp);
  868         in_pcbfree(inp);
  869         INP_INFO_WUNLOCK(&V_ripcbinfo);
  870 }
  871 
  872 static void
  873 rip_dodisconnect(struct socket *so, struct inpcb *inp)
  874 {
  875         struct inpcbinfo *pcbinfo;
  876 
  877         pcbinfo = inp->inp_pcbinfo;
  878         INP_INFO_WLOCK(pcbinfo);
  879         INP_WLOCK(inp);
  880         rip_delhash(inp);
  881         inp->inp_faddr.s_addr = INADDR_ANY;
  882         rip_inshash(inp);
  883         SOCK_LOCK(so);
  884         so->so_state &= ~SS_ISCONNECTED;
  885         SOCK_UNLOCK(so);
  886         INP_WUNLOCK(inp);
  887         INP_INFO_WUNLOCK(pcbinfo);
  888 }
  889 
  890 static void
  891 rip_abort(struct socket *so)
  892 {
  893         struct inpcb *inp;
  894 
  895         inp = sotoinpcb(so);
  896         KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
  897 
  898         rip_dodisconnect(so, inp);
  899 }
  900 
  901 static void
  902 rip_close(struct socket *so)
  903 {
  904         struct inpcb *inp;
  905 
  906         inp = sotoinpcb(so);
  907         KASSERT(inp != NULL, ("rip_close: inp == NULL"));
  908 
  909         rip_dodisconnect(so, inp);
  910 }
  911 
  912 static int
  913 rip_disconnect(struct socket *so)
  914 {
  915         struct inpcb *inp;
  916 
  917         if ((so->so_state & SS_ISCONNECTED) == 0)
  918                 return (ENOTCONN);
  919 
  920         inp = sotoinpcb(so);
  921         KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
  922 
  923         rip_dodisconnect(so, inp);
  924         return (0);
  925 }
  926 
  927 static int
  928 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  929 {
  930         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  931         struct inpcb *inp;
  932         int error;
  933 
  934         if (nam->sa_len != sizeof(*addr))
  935                 return (EINVAL);
  936 
  937         error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
  938         if (error != 0)
  939                 return (error);
  940 
  941         inp = sotoinpcb(so);
  942         KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
  943 
  944         if (CK_STAILQ_EMPTY(&V_ifnet) ||
  945             (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
  946             (addr->sin_addr.s_addr &&
  947              (inp->inp_flags & INP_BINDANY) == 0 &&
  948              ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
  949                 return (EADDRNOTAVAIL);
  950 
  951         INP_INFO_WLOCK(&V_ripcbinfo);
  952         INP_WLOCK(inp);
  953         rip_delhash(inp);
  954         inp->inp_laddr = addr->sin_addr;
  955         rip_inshash(inp);
  956         INP_WUNLOCK(inp);
  957         INP_INFO_WUNLOCK(&V_ripcbinfo);
  958         return (0);
  959 }
  960 
  961 static int
  962 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  963 {
  964         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  965         struct inpcb *inp;
  966 
  967         if (nam->sa_len != sizeof(*addr))
  968                 return (EINVAL);
  969         if (CK_STAILQ_EMPTY(&V_ifnet))
  970                 return (EADDRNOTAVAIL);
  971         if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
  972                 return (EAFNOSUPPORT);
  973 
  974         inp = sotoinpcb(so);
  975         KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
  976 
  977         INP_INFO_WLOCK(&V_ripcbinfo);
  978         INP_WLOCK(inp);
  979         rip_delhash(inp);
  980         inp->inp_faddr = addr->sin_addr;
  981         rip_inshash(inp);
  982         soisconnected(so);
  983         INP_WUNLOCK(inp);
  984         INP_INFO_WUNLOCK(&V_ripcbinfo);
  985         return (0);
  986 }
  987 
  988 static int
  989 rip_shutdown(struct socket *so)
  990 {
  991         struct inpcb *inp;
  992 
  993         inp = sotoinpcb(so);
  994         KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
  995 
  996         INP_WLOCK(inp);
  997         socantsendmore(so);
  998         INP_WUNLOCK(inp);
  999         return (0);
 1000 }
 1001 
 1002 static int
 1003 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 1004     struct mbuf *control, struct thread *td)
 1005 {
 1006         struct inpcb *inp;
 1007         u_long dst;
 1008 
 1009         inp = sotoinpcb(so);
 1010         KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 1011 
 1012         /*
 1013          * Note: 'dst' reads below are unlocked.
 1014          */
 1015         if (so->so_state & SS_ISCONNECTED) {
 1016                 if (nam) {
 1017                         m_freem(m);
 1018                         return (EISCONN);
 1019                 }
 1020                 dst = inp->inp_faddr.s_addr;    /* Unlocked read. */
 1021         } else {
 1022                 if (nam == NULL) {
 1023                         m_freem(m);
 1024                         return (ENOTCONN);
 1025                 }
 1026                 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 1027         }
 1028         return (rip_output(m, so, dst));
 1029 }
 1030 #endif /* INET */
 1031 
 1032 static int
 1033 rip_pcblist(SYSCTL_HANDLER_ARGS)
 1034 {
 1035         int error, i, n;
 1036         struct inpcb *inp, **inp_list;
 1037         inp_gen_t gencnt;
 1038         struct xinpgen xig;
 1039         struct epoch_tracker et;
 1040 
 1041         /*
 1042          * The process of preparing the TCB list is too time-consuming and
 1043          * resource-intensive to repeat twice on every request.
 1044          */
 1045         if (req->oldptr == 0) {
 1046                 n = V_ripcbinfo.ipi_count;
 1047                 n += imax(n / 8, 10);
 1048                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 1049                 return (0);
 1050         }
 1051 
 1052         if (req->newptr != 0)
 1053                 return (EPERM);
 1054 
 1055         /*
 1056          * OK, now we're committed to doing something.
 1057          */
 1058         INP_INFO_WLOCK(&V_ripcbinfo);
 1059         gencnt = V_ripcbinfo.ipi_gencnt;
 1060         n = V_ripcbinfo.ipi_count;
 1061         INP_INFO_WUNLOCK(&V_ripcbinfo);
 1062 
 1063         bzero(&xig, sizeof(xig));
 1064         xig.xig_len = sizeof xig;
 1065         xig.xig_count = n;
 1066         xig.xig_gen = gencnt;
 1067         xig.xig_sogen = so_gencnt;
 1068         error = SYSCTL_OUT(req, &xig, sizeof xig);
 1069         if (error)
 1070                 return (error);
 1071 
 1072         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 1073 
 1074         INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
 1075         for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 1076              inp = CK_LIST_NEXT(inp, inp_list)) {
 1077                 INP_WLOCK(inp);
 1078                 if (inp->inp_gencnt <= gencnt &&
 1079                     cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 1080                         in_pcbref(inp);
 1081                         inp_list[i++] = inp;
 1082                 }
 1083                 INP_WUNLOCK(inp);
 1084         }
 1085         INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
 1086         n = i;
 1087 
 1088         error = 0;
 1089         for (i = 0; i < n; i++) {
 1090                 inp = inp_list[i];
 1091                 INP_RLOCK(inp);
 1092                 if (inp->inp_gencnt <= gencnt) {
 1093                         struct xinpcb xi;
 1094 
 1095                         in_pcbtoxinpcb(inp, &xi);
 1096                         INP_RUNLOCK(inp);
 1097                         error = SYSCTL_OUT(req, &xi, sizeof xi);
 1098                 } else
 1099                         INP_RUNLOCK(inp);
 1100         }
 1101         INP_INFO_WLOCK(&V_ripcbinfo);
 1102         for (i = 0; i < n; i++) {
 1103                 inp = inp_list[i];
 1104                 INP_RLOCK(inp);
 1105                 if (!in_pcbrele_rlocked(inp))
 1106                         INP_RUNLOCK(inp);
 1107         }
 1108         INP_INFO_WUNLOCK(&V_ripcbinfo);
 1109 
 1110         if (!error) {
 1111                 struct epoch_tracker et;
 1112                 /*
 1113                  * Give the user an updated idea of our state.  If the
 1114                  * generation differs from what we told her before, she knows
 1115                  * that something happened while we were processing this
 1116                  * request, and it might be necessary to retry.
 1117                  */
 1118                 INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
 1119                 xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 1120                 xig.xig_sogen = so_gencnt;
 1121                 xig.xig_count = V_ripcbinfo.ipi_count;
 1122                 INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
 1123                 error = SYSCTL_OUT(req, &xig, sizeof xig);
 1124         }
 1125         free(inp_list, M_TEMP);
 1126         return (error);
 1127 }
 1128 
 1129 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
 1130     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
 1131     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 1132 
 1133 #ifdef INET
 1134 struct pr_usrreqs rip_usrreqs = {
 1135         .pru_abort =            rip_abort,
 1136         .pru_attach =           rip_attach,
 1137         .pru_bind =             rip_bind,
 1138         .pru_connect =          rip_connect,
 1139         .pru_control =          in_control,
 1140         .pru_detach =           rip_detach,
 1141         .pru_disconnect =       rip_disconnect,
 1142         .pru_peeraddr =         in_getpeeraddr,
 1143         .pru_send =             rip_send,
 1144         .pru_shutdown =         rip_shutdown,
 1145         .pru_sockaddr =         in_getsockaddr,
 1146         .pru_sosetlabel =       in_pcbsosetlabel,
 1147         .pru_close =            rip_close,
 1148 };
 1149 #endif /* INET */

Cache object: 2442a57c428b6b9fd527ae91fd8c4d72


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.