The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/raw_ip.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1993
    5  *      The Regents of the University of California.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)raw_ip.c    8.7 (Berkeley) 5/15/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include "opt_inet.h"
   39 #include "opt_inet6.h"
   40 #include "opt_ipsec.h"
   41 #include "opt_route.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/jail.h>
   45 #include <sys/kernel.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/protosw.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/signalvar.h>
   55 #include <sys/socket.h>
   56 #include <sys/socketvar.h>
   57 #include <sys/sx.h>
   58 #include <sys/sysctl.h>
   59 #include <sys/systm.h>
   60 
   61 #include <vm/uma.h>
   62 
   63 #include <net/if.h>
   64 #include <net/if_var.h>
   65 #include <net/route.h>
   66 #include <net/route/route_ctl.h>
   67 #include <net/vnet.h>
   68 
   69 #include <netinet/in.h>
   70 #include <netinet/in_systm.h>
   71 #include <netinet/in_fib.h>
   72 #include <netinet/in_pcb.h>
   73 #include <netinet/in_var.h>
   74 #include <netinet/if_ether.h>
   75 #include <netinet/ip.h>
   76 #include <netinet/ip_var.h>
   77 #include <netinet/ip_mroute.h>
   78 #include <netinet/ip_icmp.h>
   79 
   80 #include <netipsec/ipsec_support.h>
   81 
   82 #include <machine/stdarg.h>
   83 #include <security/mac/mac_framework.h>
   84 
   85 extern ipproto_input_t *ip_protox[];
   86 
   87 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
   88 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
   89     &VNET_NAME(ip_defttl), 0,
   90     "Maximum TTL on IP packets");
   91 
   92 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
   93 #define V_ripcbinfo             VNET(ripcbinfo)
   94 
   95 /*
   96  * Control and data hooks for ipfw, dummynet, divert and so on.
   97  * The data hooks are not used here but it is convenient
   98  * to keep them all in one place.
   99  */
  100 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
  101 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
  102 
  103 int     (*ip_dn_ctl_ptr)(struct sockopt *);
  104 int     (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
  105 void    (*ip_divert_ptr)(struct mbuf *, bool);
  106 int     (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
  107 
  108 #ifdef INET
  109 /*
  110  * Hooks for multicast routing. They all default to NULL, so leave them not
  111  * initialized and rely on BSS being set to 0.
  112  */
  113 
  114 /*
  115  * The socket used to communicate with the multicast routing daemon.
  116  */
  117 VNET_DEFINE(struct socket *, ip_mrouter);
  118 
  119 /*
  120  * The various mrouter and rsvp functions.
  121  */
  122 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
  123 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
  124 int (*ip_mrouter_done)(void);
  125 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
  126                    struct ip_moptions *);
  127 int (*mrt_ioctl)(u_long, caddr_t, int);
  128 int (*legal_vif_num)(int);
  129 u_long (*ip_mcast_src)(int);
  130 
  131 int (*rsvp_input_p)(struct mbuf **, int *, int);
  132 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
  133 void (*ip_rsvp_force_done)(struct socket *);
  134 #endif /* INET */
  135 
  136 u_long  rip_sendspace = 9216;
  137 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
  138     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
  139 
  140 u_long  rip_recvspace = 9216;
  141 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
  142     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
  143 
  144 /*
  145  * Hash functions
  146  */
  147 
  148 #define INP_PCBHASH_RAW_SIZE    256
  149 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
  150         (((proto) + (laddr) + (faddr)) % (mask) + 1)
  151 
  152 #ifdef INET
  153 static void
  154 rip_inshash(struct inpcb *inp)
  155 {
  156         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
  157         struct inpcbhead *pcbhash;
  158         int hash;
  159 
  160         INP_HASH_WLOCK_ASSERT(pcbinfo);
  161         INP_WLOCK_ASSERT(inp);
  162 
  163         if (inp->inp_ip_p != 0 &&
  164             inp->inp_laddr.s_addr != INADDR_ANY &&
  165             inp->inp_faddr.s_addr != INADDR_ANY) {
  166                 hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
  167                     inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
  168         } else
  169                 hash = 0;
  170         pcbhash = &pcbinfo->ipi_hashbase[hash];
  171         CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
  172 }
  173 
  174 static void
  175 rip_delhash(struct inpcb *inp)
  176 {
  177 
  178         INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
  179         INP_WLOCK_ASSERT(inp);
  180 
  181         CK_LIST_REMOVE(inp, inp_hash);
  182 }
  183 #endif /* INET */
  184 
  185 INPCBSTORAGE_DEFINE(ripcbstor, inpcb, "rawinp", "ripcb", "rip", "riphash");
  186 
  187 static void
  188 rip_init(void *arg __unused)
  189 {
  190 
  191         in_pcbinfo_init(&V_ripcbinfo, &ripcbstor, INP_PCBHASH_RAW_SIZE, 1);
  192 }
  193 VNET_SYSINIT(rip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rip_init, NULL);
  194 
  195 #ifdef VIMAGE
  196 static void
  197 rip_destroy(void *unused __unused)
  198 {
  199 
  200         in_pcbinfo_destroy(&V_ripcbinfo);
  201 }
  202 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
  203 #endif
  204 
  205 #ifdef INET
  206 static int
  207 rip_append(struct inpcb *inp, struct ip *ip, struct mbuf *m,
  208     struct sockaddr_in *ripsrc)
  209 {
  210         struct socket *so = inp->inp_socket;
  211         struct mbuf *n, *opts = NULL;
  212 
  213         INP_LOCK_ASSERT(inp);
  214 
  215 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  216         /* check AH/ESP integrity. */
  217         if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, m, inp) != 0)
  218                 return (0);
  219 #endif /* IPSEC */
  220 #ifdef MAC
  221         if (mac_inpcb_check_deliver(inp, m) != 0)
  222                 return (0);
  223 #endif
  224         /* Check the minimum TTL for socket. */
  225         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
  226                 return (0);
  227 
  228         if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
  229                 return (0);
  230 
  231         if ((inp->inp_flags & INP_CONTROLOPTS) ||
  232             (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
  233                 ip_savecontrol(inp, &opts, ip, n);
  234         SOCKBUF_LOCK(&so->so_rcv);
  235         if (sbappendaddr_locked(&so->so_rcv,
  236             (struct sockaddr *)ripsrc, n, opts) == 0) {
  237                 soroverflow_locked(so);
  238                 m_freem(n);
  239                 if (opts)
  240                         m_freem(opts);
  241                 return (0);
  242         }
  243         sorwakeup_locked(so);
  244 
  245         return (1);
  246 }
  247 
  248 struct rip_inp_match_ctx {
  249         struct ip *ip;
  250         int proto;
  251 };
  252 
  253 static bool
  254 rip_inp_match1(const struct inpcb *inp, void *v)
  255 {
  256         struct rip_inp_match_ctx *ctx = v;
  257 
  258         if (inp->inp_ip_p != ctx->proto)
  259                 return (false);
  260 #ifdef INET6
  261         /* XXX inp locking */
  262         if ((inp->inp_vflag & INP_IPV4) == 0)
  263                 return (false);
  264 #endif
  265         if (inp->inp_laddr.s_addr != ctx->ip->ip_dst.s_addr)
  266                 return (false);
  267         if (inp->inp_faddr.s_addr != ctx->ip->ip_src.s_addr)
  268                 return (false);
  269         return (true);
  270 }
  271 
  272 static bool
  273 rip_inp_match2(const struct inpcb *inp, void *v)
  274 {
  275         struct rip_inp_match_ctx *ctx = v;
  276 
  277         if (inp->inp_ip_p && inp->inp_ip_p != ctx->proto)
  278                 return (false);
  279 #ifdef INET6
  280         /* XXX inp locking */
  281         if ((inp->inp_vflag & INP_IPV4) == 0)
  282                 return (false);
  283 #endif
  284         if (!in_nullhost(inp->inp_laddr) &&
  285             !in_hosteq(inp->inp_laddr, ctx->ip->ip_dst))
  286                 return (false);
  287         if (!in_nullhost(inp->inp_faddr) &&
  288             !in_hosteq(inp->inp_faddr, ctx->ip->ip_src))
  289                 return (false);
  290         return (true);
  291 }
  292 
  293 /*
  294  * Setup generic address and protocol structures for raw_input routine, then
  295  * pass them along with mbuf chain.
  296  */
  297 int
  298 rip_input(struct mbuf **mp, int *offp, int proto)
  299 {
  300         struct rip_inp_match_ctx ctx = {
  301                 .ip = mtod(*mp, struct ip *),
  302                 .proto = proto,
  303         };
  304         struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
  305             INPLOOKUP_RLOCKPCB, rip_inp_match1, &ctx);
  306         struct ifnet *ifp;
  307         struct mbuf *m = *mp;
  308         struct inpcb *inp;
  309         struct sockaddr_in ripsrc;
  310         int appended;
  311 
  312         *mp = NULL;
  313         appended = 0;
  314 
  315         bzero(&ripsrc, sizeof(ripsrc));
  316         ripsrc.sin_len = sizeof(ripsrc);
  317         ripsrc.sin_family = AF_INET;
  318         ripsrc.sin_addr = ctx.ip->ip_src;
  319 
  320         ifp = m->m_pkthdr.rcvif;
  321 
  322         inpi.hash = INP_PCBHASH_RAW(proto, ctx.ip->ip_src.s_addr,
  323             ctx.ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
  324         while ((inp = inp_next(&inpi)) != NULL) {
  325                 INP_RLOCK_ASSERT(inp);
  326                 if (jailed_without_vnet(inp->inp_cred) &&
  327                     prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0) {
  328                         /*
  329                          * XXX: If faddr was bound to multicast group,
  330                          * jailed raw socket will drop datagram.
  331                          */
  332                         continue;
  333                 }
  334                 appended += rip_append(inp, ctx.ip, m, &ripsrc);
  335         }
  336 
  337         inpi.hash = 0;
  338         inpi.match = rip_inp_match2;
  339         MPASS(inpi.inp == NULL);
  340         while ((inp = inp_next(&inpi)) != NULL) {
  341                 INP_RLOCK_ASSERT(inp);
  342                 if (jailed_without_vnet(inp->inp_cred) &&
  343                     !IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr)) &&
  344                     prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0)
  345                         /*
  346                          * Allow raw socket in jail to receive multicast;
  347                          * assume process had PRIV_NETINET_RAW at attach,
  348                          * and fall through into normal filter path if so.
  349                          */
  350                         continue;
  351                 /*
  352                  * If this raw socket has multicast state, and we
  353                  * have received a multicast, check if this socket
  354                  * should receive it, as multicast filtering is now
  355                  * the responsibility of the transport layer.
  356                  */
  357                 if (inp->inp_moptions != NULL &&
  358                     IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr))) {
  359                         /*
  360                          * If the incoming datagram is for IGMP, allow it
  361                          * through unconditionally to the raw socket.
  362                          *
  363                          * In the case of IGMPv2, we may not have explicitly
  364                          * joined the group, and may have set IFF_ALLMULTI
  365                          * on the interface. imo_multi_filter() may discard
  366                          * control traffic we actually need to see.
  367                          *
  368                          * Userland multicast routing daemons should continue
  369                          * filter the control traffic appropriately.
  370                          */
  371                         int blocked;
  372 
  373                         blocked = MCAST_PASS;
  374                         if (proto != IPPROTO_IGMP) {
  375                                 struct sockaddr_in group;
  376 
  377                                 bzero(&group, sizeof(struct sockaddr_in));
  378                                 group.sin_len = sizeof(struct sockaddr_in);
  379                                 group.sin_family = AF_INET;
  380                                 group.sin_addr = ctx.ip->ip_dst;
  381 
  382                                 blocked = imo_multi_filter(inp->inp_moptions,
  383                                     ifp,
  384                                     (struct sockaddr *)&group,
  385                                     (struct sockaddr *)&ripsrc);
  386                         }
  387 
  388                         if (blocked != MCAST_PASS) {
  389                                 IPSTAT_INC(ips_notmember);
  390                                 continue;
  391                         }
  392                 }
  393                 appended += rip_append(inp, ctx.ip, m, &ripsrc);
  394         }
  395         if (appended == 0 && ip_protox[ctx.ip->ip_p] == rip_input) {
  396                 IPSTAT_INC(ips_noproto);
  397                 IPSTAT_DEC(ips_delivered);
  398                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
  399         } else
  400                 m_freem(m);
  401         return (IPPROTO_DONE);
  402 }
  403 
  404 /*
  405  * Generate IP header and pass packet to ip_output.  Tack on options user may
  406  * have setup with control call.
  407  */
  408 static int
  409 rip_send(struct socket *so, int pruflags, struct mbuf *m, struct sockaddr *nam,
  410     struct mbuf *control, struct thread *td)
  411 {
  412         struct epoch_tracker et;
  413         struct ip *ip;
  414         struct inpcb *inp;
  415         in_addr_t *dst;
  416         int error, flags, cnt, hlen;
  417         u_char opttype, optlen, *cp;
  418 
  419         inp = sotoinpcb(so);
  420         KASSERT(inp != NULL, ("rip_send: inp == NULL"));
  421 
  422         if (control != NULL) {
  423                 m_freem(control);
  424                 control = NULL;
  425         }
  426 
  427         if (so->so_state & SS_ISCONNECTED) {
  428                 if (nam) {
  429                         error = EISCONN;
  430                         m_freem(m);
  431                         return (error);
  432                 }
  433                 dst = &inp->inp_faddr.s_addr;
  434         } else {
  435                 if (nam == NULL)
  436                         error = ENOTCONN;
  437                 else if (nam->sa_family != AF_INET)
  438                         error = EAFNOSUPPORT;
  439                 else if (nam->sa_len != sizeof(struct sockaddr_in))
  440                         error = EINVAL;
  441                 else
  442                         error = 0;
  443                 if (error != 0) {
  444                         m_freem(m);
  445                         return (error);
  446                 }
  447                 dst = &((struct sockaddr_in *)nam)->sin_addr.s_addr;
  448         }
  449 
  450         flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
  451             IP_ALLOWBROADCAST;
  452 
  453         /*
  454          * If the user handed us a complete IP packet, use it.  Otherwise,
  455          * allocate an mbuf for a header and fill it in.
  456          */
  457         if ((inp->inp_flags & INP_HDRINCL) == 0) {
  458                 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
  459                         m_freem(m);
  460                         return(EMSGSIZE);
  461                 }
  462                 M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
  463                 if (m == NULL)
  464                         return(ENOBUFS);
  465 
  466                 INP_RLOCK(inp);
  467                 ip = mtod(m, struct ip *);
  468                 ip->ip_tos = inp->inp_ip_tos;
  469                 if (inp->inp_flags & INP_DONTFRAG)
  470                         ip->ip_off = htons(IP_DF);
  471                 else
  472                         ip->ip_off = htons(0);
  473                 ip->ip_p = inp->inp_ip_p;
  474                 ip->ip_len = htons(m->m_pkthdr.len);
  475                 ip->ip_src = inp->inp_laddr;
  476                 ip->ip_dst.s_addr = *dst;
  477 #ifdef ROUTE_MPATH
  478                 if (CALC_FLOWID_OUTBOUND) {
  479                         uint32_t hash_type, hash_val;
  480 
  481                         hash_val = fib4_calc_software_hash(ip->ip_src,
  482                             ip->ip_dst, 0, 0, ip->ip_p, &hash_type);
  483                         m->m_pkthdr.flowid = hash_val;
  484                         M_HASHTYPE_SET(m, hash_type);
  485                         flags |= IP_NODEFAULTFLOWID;
  486                 }
  487 #endif
  488                 if (jailed(inp->inp_cred)) {
  489                         /*
  490                          * prison_local_ip4() would be good enough but would
  491                          * let a source of INADDR_ANY pass, which we do not
  492                          * want to see from jails.
  493                          */
  494                         if (ip->ip_src.s_addr == INADDR_ANY) {
  495                                 NET_EPOCH_ENTER(et);
  496                                 error = in_pcbladdr(inp, &ip->ip_dst,
  497                                     &ip->ip_src, inp->inp_cred);
  498                                 NET_EPOCH_EXIT(et);
  499                         } else {
  500                                 error = prison_local_ip4(inp->inp_cred,
  501                                     &ip->ip_src);
  502                         }
  503                         if (error != 0) {
  504                                 INP_RUNLOCK(inp);
  505                                 m_freem(m);
  506                                 return (error);
  507                         }
  508                 }
  509                 ip->ip_ttl = inp->inp_ip_ttl;
  510         } else {
  511                 if (m->m_pkthdr.len > IP_MAXPACKET) {
  512                         m_freem(m);
  513                         return (EMSGSIZE);
  514                 }
  515                 if (m->m_pkthdr.len < sizeof(*ip)) {
  516                         m_freem(m);
  517                         return (EINVAL);
  518                 }
  519                 m = m_pullup(m, sizeof(*ip));
  520                 if (m == NULL)
  521                         return (ENOMEM);
  522                 ip = mtod(m, struct ip *);
  523                 hlen = ip->ip_hl << 2;
  524                 if (m->m_len < hlen) {
  525                         m = m_pullup(m, hlen);
  526                         if (m == NULL)
  527                                 return (EINVAL);
  528                         ip = mtod(m, struct ip *);
  529                 }
  530 #ifdef ROUTE_MPATH
  531                 if (CALC_FLOWID_OUTBOUND) {
  532                         uint32_t hash_type, hash_val;
  533 
  534                         hash_val = fib4_calc_software_hash(ip->ip_dst,
  535                             ip->ip_src, 0, 0, ip->ip_p, &hash_type);
  536                         m->m_pkthdr.flowid = hash_val;
  537                         M_HASHTYPE_SET(m, hash_type);
  538                         flags |= IP_NODEFAULTFLOWID;
  539                 }
  540 #endif
  541                 INP_RLOCK(inp);
  542                 /*
  543                  * Don't allow both user specified and setsockopt options,
  544                  * and don't allow packet length sizes that will crash.
  545                  */
  546                 if ((hlen < sizeof (*ip))
  547                     || ((hlen > sizeof (*ip)) && inp->inp_options)
  548                     || (ntohs(ip->ip_len) != m->m_pkthdr.len)) {
  549                         INP_RUNLOCK(inp);
  550                         m_freem(m);
  551                         return (EINVAL);
  552                 }
  553                 error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
  554                 if (error != 0) {
  555                         INP_RUNLOCK(inp);
  556                         m_freem(m);
  557                         return (error);
  558                 }
  559                 /*
  560                  * Don't allow IP options which do not have the required
  561                  * structure as specified in section 3.1 of RFC 791 on
  562                  * pages 15-23.
  563                  */
  564                 cp = (u_char *)(ip + 1);
  565                 cnt = hlen - sizeof (struct ip);
  566                 for (; cnt > 0; cnt -= optlen, cp += optlen) {
  567                         opttype = cp[IPOPT_OPTVAL];
  568                         if (opttype == IPOPT_EOL)
  569                                 break;
  570                         if (opttype == IPOPT_NOP) {
  571                                 optlen = 1;
  572                                 continue;
  573                         }
  574                         if (cnt < IPOPT_OLEN + sizeof(u_char)) {
  575                                 INP_RUNLOCK(inp);
  576                                 m_freem(m);
  577                                 return (EINVAL);
  578                         }
  579                         optlen = cp[IPOPT_OLEN];
  580                         if (optlen < IPOPT_OLEN + sizeof(u_char) ||
  581                             optlen > cnt) {
  582                                 INP_RUNLOCK(inp);
  583                                 m_freem(m);
  584                                 return (EINVAL);
  585                         }
  586                 }
  587                 /*
  588                  * This doesn't allow application to specify ID of zero,
  589                  * but we got this limitation from the beginning of history.
  590                  */
  591                 if (ip->ip_id == 0)
  592                         ip_fillid(ip);
  593 
  594                 /*
  595                  * XXX prevent ip_output from overwriting header fields.
  596                  */
  597                 flags |= IP_RAWOUTPUT;
  598                 IPSTAT_INC(ips_rawout);
  599         }
  600 
  601         if (inp->inp_flags & INP_ONESBCAST)
  602                 flags |= IP_SENDONES;
  603 
  604 #ifdef MAC
  605         mac_inpcb_create_mbuf(inp, m);
  606 #endif
  607 
  608         NET_EPOCH_ENTER(et);
  609         error = ip_output(m, inp->inp_options, NULL, flags,
  610             inp->inp_moptions, inp);
  611         NET_EPOCH_EXIT(et);
  612         INP_RUNLOCK(inp);
  613         return (error);
  614 }
  615 
  616 /*
  617  * Raw IP socket option processing.
  618  *
  619  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  620  * only be created by a privileged process, and as such, socket option
  621  * operations to manage system properties on any raw socket were allowed to
  622  * take place without explicit additional access control checks.  However,
  623  * raw sockets can now also be created in jail(), and therefore explicit
  624  * checks are now required.  Likewise, raw sockets can be used by a process
  625  * after it gives up privilege, so some caution is required.  For options
  626  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  627  * performed in ip_ctloutput() and therefore no check occurs here.
  628  * Unilaterally checking priv_check() here breaks normal IP socket option
  629  * operations on raw sockets.
  630  *
  631  * When adding new socket options here, make sure to add access control
  632  * checks here as necessary.
  633  *
  634  * XXX-BZ inp locking?
  635  */
  636 int
  637 rip_ctloutput(struct socket *so, struct sockopt *sopt)
  638 {
  639         struct  inpcb *inp = sotoinpcb(so);
  640         int     error, optval;
  641 
  642         if (sopt->sopt_level != IPPROTO_IP) {
  643                 if ((sopt->sopt_level == SOL_SOCKET) &&
  644                     (sopt->sopt_name == SO_SETFIB)) {
  645                         inp->inp_inc.inc_fibnum = so->so_fibnum;
  646                         return (0);
  647                 }
  648                 return (EINVAL);
  649         }
  650 
  651         error = 0;
  652         switch (sopt->sopt_dir) {
  653         case SOPT_GET:
  654                 switch (sopt->sopt_name) {
  655                 case IP_HDRINCL:
  656                         optval = inp->inp_flags & INP_HDRINCL;
  657                         error = sooptcopyout(sopt, &optval, sizeof optval);
  658                         break;
  659 
  660                 case IP_FW3:    /* generic ipfw v.3 functions */
  661                 case IP_FW_ADD: /* ADD actually returns the body... */
  662                 case IP_FW_GET:
  663                 case IP_FW_TABLE_GETSIZE:
  664                 case IP_FW_TABLE_LIST:
  665                 case IP_FW_NAT_GET_CONFIG:
  666                 case IP_FW_NAT_GET_LOG:
  667                         if (V_ip_fw_ctl_ptr != NULL)
  668                                 error = V_ip_fw_ctl_ptr(sopt);
  669                         else
  670                                 error = ENOPROTOOPT;
  671                         break;
  672 
  673                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  674                 case IP_DUMMYNET_GET:
  675                         if (ip_dn_ctl_ptr != NULL)
  676                                 error = ip_dn_ctl_ptr(sopt);
  677                         else
  678                                 error = ENOPROTOOPT;
  679                         break ;
  680 
  681                 case MRT_INIT:
  682                 case MRT_DONE:
  683                 case MRT_ADD_VIF:
  684                 case MRT_DEL_VIF:
  685                 case MRT_ADD_MFC:
  686                 case MRT_DEL_MFC:
  687                 case MRT_VERSION:
  688                 case MRT_ASSERT:
  689                 case MRT_API_SUPPORT:
  690                 case MRT_API_CONFIG:
  691                 case MRT_ADD_BW_UPCALL:
  692                 case MRT_DEL_BW_UPCALL:
  693                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  694                         if (error != 0)
  695                                 return (error);
  696                         if (inp->inp_ip_p != IPPROTO_IGMP)
  697                                 return (EOPNOTSUPP);
  698                         error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
  699                                 EOPNOTSUPP;
  700                         break;
  701 
  702                 default:
  703                         error = ip_ctloutput(so, sopt);
  704                         break;
  705                 }
  706                 break;
  707 
  708         case SOPT_SET:
  709                 switch (sopt->sopt_name) {
  710                 case IP_HDRINCL:
  711                         error = sooptcopyin(sopt, &optval, sizeof optval,
  712                                             sizeof optval);
  713                         if (error)
  714                                 break;
  715                         if (optval)
  716                                 inp->inp_flags |= INP_HDRINCL;
  717                         else
  718                                 inp->inp_flags &= ~INP_HDRINCL;
  719                         break;
  720 
  721                 case IP_FW3:    /* generic ipfw v.3 functions */
  722                 case IP_FW_ADD:
  723                 case IP_FW_DEL:
  724                 case IP_FW_FLUSH:
  725                 case IP_FW_ZERO:
  726                 case IP_FW_RESETLOG:
  727                 case IP_FW_TABLE_ADD:
  728                 case IP_FW_TABLE_DEL:
  729                 case IP_FW_TABLE_FLUSH:
  730                 case IP_FW_NAT_CFG:
  731                 case IP_FW_NAT_DEL:
  732                         if (V_ip_fw_ctl_ptr != NULL)
  733                                 error = V_ip_fw_ctl_ptr(sopt);
  734                         else
  735                                 error = ENOPROTOOPT;
  736                         break;
  737 
  738                 case IP_DUMMYNET3:      /* generic dummynet v.3 functions */
  739                 case IP_DUMMYNET_CONFIGURE:
  740                 case IP_DUMMYNET_DEL:
  741                 case IP_DUMMYNET_FLUSH:
  742                         if (ip_dn_ctl_ptr != NULL)
  743                                 error = ip_dn_ctl_ptr(sopt);
  744                         else
  745                                 error = ENOPROTOOPT ;
  746                         break ;
  747 
  748                 case IP_RSVP_ON:
  749                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  750                         if (error != 0)
  751                                 return (error);
  752                         if (inp->inp_ip_p != IPPROTO_RSVP)
  753                                 return (EOPNOTSUPP);
  754                         error = ip_rsvp_init(so);
  755                         break;
  756 
  757                 case IP_RSVP_OFF:
  758                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  759                         if (error != 0)
  760                                 return (error);
  761                         error = ip_rsvp_done();
  762                         break;
  763 
  764                 case IP_RSVP_VIF_ON:
  765                 case IP_RSVP_VIF_OFF:
  766                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  767                         if (error != 0)
  768                                 return (error);
  769                         if (inp->inp_ip_p != IPPROTO_RSVP)
  770                                 return (EOPNOTSUPP);
  771                         error = ip_rsvp_vif ?
  772                                 ip_rsvp_vif(so, sopt) : EINVAL;
  773                         break;
  774 
  775                 case MRT_INIT:
  776                 case MRT_DONE:
  777                 case MRT_ADD_VIF:
  778                 case MRT_DEL_VIF:
  779                 case MRT_ADD_MFC:
  780                 case MRT_DEL_MFC:
  781                 case MRT_VERSION:
  782                 case MRT_ASSERT:
  783                 case MRT_API_SUPPORT:
  784                 case MRT_API_CONFIG:
  785                 case MRT_ADD_BW_UPCALL:
  786                 case MRT_DEL_BW_UPCALL:
  787                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
  788                         if (error != 0)
  789                                 return (error);
  790                         if (inp->inp_ip_p != IPPROTO_IGMP)
  791                                 return (EOPNOTSUPP);
  792                         error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
  793                                         EOPNOTSUPP;
  794                         break;
  795 
  796                 default:
  797                         error = ip_ctloutput(so, sopt);
  798                         break;
  799                 }
  800                 break;
  801         }
  802 
  803         return (error);
  804 }
  805 
  806 void
  807 rip_ctlinput(struct icmp *icmp)
  808 {
  809 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  810         if (IPSEC_ENABLED(ipv4))
  811                 IPSEC_CTLINPUT(ipv4, icmp);
  812 #endif
  813 }
  814 
  815 static int
  816 rip_attach(struct socket *so, int proto, struct thread *td)
  817 {
  818         struct inpcb *inp;
  819         int error;
  820 
  821         inp = sotoinpcb(so);
  822         KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
  823 
  824         error = priv_check(td, PRIV_NETINET_RAW);
  825         if (error)
  826                 return (error);
  827         if (proto >= IPPROTO_MAX || proto < 0)
  828                 return EPROTONOSUPPORT;
  829         error = soreserve(so, rip_sendspace, rip_recvspace);
  830         if (error)
  831                 return (error);
  832         error = in_pcballoc(so, &V_ripcbinfo);
  833         if (error)
  834                 return (error);
  835         inp = (struct inpcb *)so->so_pcb;
  836         inp->inp_ip_p = proto;
  837         inp->inp_ip_ttl = V_ip_defttl;
  838         INP_HASH_WLOCK(&V_ripcbinfo);
  839         rip_inshash(inp);
  840         INP_HASH_WUNLOCK(&V_ripcbinfo);
  841         INP_WUNLOCK(inp);
  842         return (0);
  843 }
  844 
  845 static void
  846 rip_detach(struct socket *so)
  847 {
  848         struct inpcb *inp;
  849 
  850         inp = sotoinpcb(so);
  851         KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
  852         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
  853             ("rip_detach: not closed"));
  854 
  855         /* Disable mrouter first */
  856         if (so == V_ip_mrouter && ip_mrouter_done)
  857                 ip_mrouter_done();
  858 
  859         INP_WLOCK(inp);
  860         INP_HASH_WLOCK(&V_ripcbinfo);
  861         rip_delhash(inp);
  862         INP_HASH_WUNLOCK(&V_ripcbinfo);
  863 
  864         if (ip_rsvp_force_done)
  865                 ip_rsvp_force_done(so);
  866         if (so == V_ip_rsvpd)
  867                 ip_rsvp_done();
  868         in_pcbdetach(inp);
  869         in_pcbfree(inp);
  870 }
  871 
  872 static void
  873 rip_dodisconnect(struct socket *so, struct inpcb *inp)
  874 {
  875         struct inpcbinfo *pcbinfo;
  876 
  877         pcbinfo = inp->inp_pcbinfo;
  878         INP_WLOCK(inp);
  879         INP_HASH_WLOCK(pcbinfo);
  880         rip_delhash(inp);
  881         inp->inp_faddr.s_addr = INADDR_ANY;
  882         rip_inshash(inp);
  883         INP_HASH_WUNLOCK(pcbinfo);
  884         SOCK_LOCK(so);
  885         so->so_state &= ~SS_ISCONNECTED;
  886         SOCK_UNLOCK(so);
  887         INP_WUNLOCK(inp);
  888 }
  889 
  890 static void
  891 rip_abort(struct socket *so)
  892 {
  893         struct inpcb *inp;
  894 
  895         inp = sotoinpcb(so);
  896         KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
  897 
  898         rip_dodisconnect(so, inp);
  899 }
  900 
  901 static void
  902 rip_close(struct socket *so)
  903 {
  904         struct inpcb *inp;
  905 
  906         inp = sotoinpcb(so);
  907         KASSERT(inp != NULL, ("rip_close: inp == NULL"));
  908 
  909         rip_dodisconnect(so, inp);
  910 }
  911 
  912 static int
  913 rip_disconnect(struct socket *so)
  914 {
  915         struct inpcb *inp;
  916 
  917         if ((so->so_state & SS_ISCONNECTED) == 0)
  918                 return (ENOTCONN);
  919 
  920         inp = sotoinpcb(so);
  921         KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
  922 
  923         rip_dodisconnect(so, inp);
  924         return (0);
  925 }
  926 
  927 static int
  928 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  929 {
  930         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  931         struct inpcb *inp;
  932         int error;
  933 
  934         if (nam->sa_family != AF_INET)
  935                 return (EAFNOSUPPORT);
  936         if (nam->sa_len != sizeof(*addr))
  937                 return (EINVAL);
  938 
  939         error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
  940         if (error != 0)
  941                 return (error);
  942 
  943         inp = sotoinpcb(so);
  944         KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
  945 
  946         if (CK_STAILQ_EMPTY(&V_ifnet) ||
  947             (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
  948             (addr->sin_addr.s_addr &&
  949              (inp->inp_flags & INP_BINDANY) == 0 &&
  950              ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
  951                 return (EADDRNOTAVAIL);
  952 
  953         INP_WLOCK(inp);
  954         INP_HASH_WLOCK(&V_ripcbinfo);
  955         rip_delhash(inp);
  956         inp->inp_laddr = addr->sin_addr;
  957         rip_inshash(inp);
  958         INP_HASH_WUNLOCK(&V_ripcbinfo);
  959         INP_WUNLOCK(inp);
  960         return (0);
  961 }
  962 
  963 static int
  964 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  965 {
  966         struct sockaddr_in *addr = (struct sockaddr_in *)nam;
  967         struct inpcb *inp;
  968 
  969         if (nam->sa_len != sizeof(*addr))
  970                 return (EINVAL);
  971         if (CK_STAILQ_EMPTY(&V_ifnet))
  972                 return (EADDRNOTAVAIL);
  973         if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
  974                 return (EAFNOSUPPORT);
  975 
  976         inp = sotoinpcb(so);
  977         KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
  978 
  979         INP_WLOCK(inp);
  980         INP_HASH_WLOCK(&V_ripcbinfo);
  981         rip_delhash(inp);
  982         inp->inp_faddr = addr->sin_addr;
  983         rip_inshash(inp);
  984         INP_HASH_WUNLOCK(&V_ripcbinfo);
  985         soisconnected(so);
  986         INP_WUNLOCK(inp);
  987         return (0);
  988 }
  989 
  990 static int
  991 rip_shutdown(struct socket *so)
  992 {
  993         struct inpcb *inp;
  994 
  995         inp = sotoinpcb(so);
  996         KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
  997 
  998         INP_WLOCK(inp);
  999         socantsendmore(so);
 1000         INP_WUNLOCK(inp);
 1001         return (0);
 1002 }
 1003 #endif /* INET */
 1004 
 1005 static int
 1006 rip_pcblist(SYSCTL_HANDLER_ARGS)
 1007 {
 1008         struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_ripcbinfo,
 1009             INPLOOKUP_RLOCKPCB);
 1010         struct xinpgen xig;
 1011         struct inpcb *inp;
 1012         int error;
 1013 
 1014         if (req->newptr != 0)
 1015                 return (EPERM);
 1016 
 1017         if (req->oldptr == 0) {
 1018                 int n;
 1019 
 1020                 n = V_ripcbinfo.ipi_count;
 1021                 n += imax(n / 8, 10);
 1022                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 1023                 return (0);
 1024         }
 1025 
 1026         if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 1027                 return (error);
 1028 
 1029         bzero(&xig, sizeof(xig));
 1030         xig.xig_len = sizeof xig;
 1031         xig.xig_count = V_ripcbinfo.ipi_count;
 1032         xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 1033         xig.xig_sogen = so_gencnt;
 1034         error = SYSCTL_OUT(req, &xig, sizeof xig);
 1035         if (error)
 1036                 return (error);
 1037 
 1038         while ((inp = inp_next(&inpi)) != NULL) {
 1039                 if (inp->inp_gencnt <= xig.xig_gen &&
 1040                     cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 1041                         struct xinpcb xi;
 1042 
 1043                         in_pcbtoxinpcb(inp, &xi);
 1044                         error = SYSCTL_OUT(req, &xi, sizeof xi);
 1045                         if (error) {
 1046                                 INP_RUNLOCK(inp);
 1047                                 break;
 1048                         }
 1049                 }
 1050         }
 1051 
 1052         if (!error) {
 1053                 /*
 1054                  * Give the user an updated idea of our state.  If the
 1055                  * generation differs from what we told her before, she knows
 1056                  * that something happened while we were processing this
 1057                  * request, and it might be necessary to retry.
 1058                  */
 1059                 xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 1060                 xig.xig_sogen = so_gencnt;
 1061                 xig.xig_count = V_ripcbinfo.ipi_count;
 1062                 error = SYSCTL_OUT(req, &xig, sizeof xig);
 1063         }
 1064 
 1065         return (error);
 1066 }
 1067 
 1068 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
 1069     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 1070     rip_pcblist, "S,xinpcb",
 1071     "List of active raw IP sockets");
 1072 
 1073 #ifdef INET
 1074 struct protosw rip_protosw = {
 1075         .pr_type =              SOCK_RAW,
 1076         .pr_flags =             PR_ATOMIC|PR_ADDR,
 1077         .pr_ctloutput =         rip_ctloutput,
 1078         .pr_abort =             rip_abort,
 1079         .pr_attach =            rip_attach,
 1080         .pr_bind =              rip_bind,
 1081         .pr_connect =           rip_connect,
 1082         .pr_control =           in_control,
 1083         .pr_detach =            rip_detach,
 1084         .pr_disconnect =        rip_disconnect,
 1085         .pr_peeraddr =          in_getpeeraddr,
 1086         .pr_send =              rip_send,
 1087         .pr_shutdown =          rip_shutdown,
 1088         .pr_sockaddr =          in_getsockaddr,
 1089         .pr_sosetlabel =        in_pcbsosetlabel,
 1090         .pr_close =             rip_close
 1091 };
 1092 #endif /* INET */

Cache object: 6397a39222bfc361e2bc1cfb5165366e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.