FreeBSD/Linux Kernel Cross Reference
sys/netinet/raw_ip.c
1 /* $OpenBSD: raw_ip.c,v 1.151 2023/01/22 12:05:44 mvs Exp $ */
2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 *
34 * NRL grants permission for redistribution and use in source and binary
35 * forms, with or without modification, of the software and documentation
36 * created at NRL provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgements:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * This product includes software developed at the Information
48 * Technology Division, US Naval Research Laboratory.
49 * 4. Neither the name of the NRL nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 * The views and conclusions contained in the software and documentation
66 * are those of the authors and should not be interpreted as representing
67 * official policies, either expressed or implied, of the US Naval
68 * Research Laboratory (NRL).
69 */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89
90 #include <net/pfvar.h>
91
92 #include "pf.h"
93
94 struct inpcbtable rawcbtable;
95
96 /*
97 * Nominal space allocated to a raw ip socket.
98 */
99 #define RIPSNDQ 8192
100 #define RIPRCVQ 8192
101
102 /*
103 * Raw interface to IP protocol.
104 */
105
106 const struct pr_usrreqs rip_usrreqs = {
107 .pru_attach = rip_attach,
108 .pru_detach = rip_detach,
109 .pru_lock = rip_lock,
110 .pru_unlock = rip_unlock,
111 .pru_bind = rip_bind,
112 .pru_connect = rip_connect,
113 .pru_disconnect = rip_disconnect,
114 .pru_shutdown = rip_shutdown,
115 .pru_send = rip_send,
116 .pru_control = in_control,
117 .pru_sockaddr = in_sockaddr,
118 .pru_peeraddr = in_peeraddr,
119 };
120
121 /*
122 * Initialize raw connection block q.
123 */
124 void
125 rip_init(void)
126 {
127 in_pcbinit(&rawcbtable, 1);
128 }
129
130 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *);
131
132 int
133 rip_input(struct mbuf **mp, int *offp, int proto, int af)
134 {
135 struct mbuf *m = *mp;
136 struct ip *ip = mtod(m, struct ip *);
137 struct inpcb *inp;
138 SIMPLEQ_HEAD(, inpcb) inpcblist;
139 struct in_addr *key;
140 struct counters_ref ref;
141 uint64_t *counters;
142 struct sockaddr_in ripsrc;
143
144 KASSERT(af == AF_INET);
145
146 memset(&ripsrc, 0, sizeof(ripsrc));
147 ripsrc.sin_family = AF_INET;
148 ripsrc.sin_len = sizeof(ripsrc);
149 ripsrc.sin_addr = ip->ip_src;
150
151 key = &ip->ip_dst;
152 #if NPF > 0
153 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
154 struct pf_divert *divert;
155
156 divert = pf_find_divert(m);
157 KASSERT(divert != NULL);
158 switch (divert->type) {
159 case PF_DIVERT_TO:
160 key = &divert->addr.v4;
161 break;
162 case PF_DIVERT_REPLY:
163 break;
164 default:
165 panic("%s: unknown divert type %d, mbuf %p, divert %p",
166 __func__, divert->type, m, divert);
167 }
168 }
169 #endif
170 SIMPLEQ_INIT(&inpcblist);
171 rw_enter_write(&rawcbtable.inpt_notify);
172 mtx_enter(&rawcbtable.inpt_mtx);
173 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
174 if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
175 continue;
176 #ifdef INET6
177 if (inp->inp_flags & INP_IPV6)
178 continue;
179 #endif
180 if (rtable_l2(inp->inp_rtableid) !=
181 rtable_l2(m->m_pkthdr.ph_rtableid))
182 continue;
183
184 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
185 continue;
186 if (inp->inp_laddr.s_addr &&
187 inp->inp_laddr.s_addr != key->s_addr)
188 continue;
189 if (inp->inp_faddr.s_addr &&
190 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
191 continue;
192
193 in_pcbref(inp);
194 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
195 }
196 mtx_leave(&rawcbtable.inpt_mtx);
197
198 if (SIMPLEQ_EMPTY(&inpcblist)) {
199 rw_exit_write(&rawcbtable.inpt_notify);
200
201 if (ip->ip_p != IPPROTO_ICMP)
202 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
203 0, 0);
204 else
205 m_freem(m);
206
207 counters = counters_enter(&ref, ipcounters);
208 counters[ips_noproto]++;
209 counters[ips_delivered]--;
210 counters_leave(&ref, ipcounters);
211
212 return IPPROTO_DONE;
213 }
214
215 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
216 struct mbuf *n, *opts = NULL;
217
218 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
219 if (SIMPLEQ_EMPTY(&inpcblist))
220 n = m;
221 else
222 n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
223 if (n != NULL) {
224 int ret;
225
226 if (inp->inp_flags & INP_CONTROLOPTS ||
227 inp->inp_socket->so_options & SO_TIMESTAMP)
228 ip_savecontrol(inp, &opts, ip, n);
229
230 mtx_enter(&inp->inp_mtx);
231 ret = sbappendaddr(inp->inp_socket,
232 &inp->inp_socket->so_rcv,
233 sintosa(&ripsrc), n, opts);
234 mtx_leave(&inp->inp_mtx);
235
236 if (ret == 0) {
237 /* should notify about lost packet */
238 m_freem(n);
239 m_freem(opts);
240 } else
241 sorwakeup(inp->inp_socket);
242 }
243 in_pcbunref(inp);
244 }
245 rw_exit_write(&rawcbtable.inpt_notify);
246
247 return IPPROTO_DONE;
248 }
249
250 /*
251 * Generate IP header and pass packet to ip_output.
252 * Tack on options user may have setup with control call.
253 */
254 int
255 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
256 struct mbuf *control)
257 {
258 struct sockaddr_in *dst = satosin(dstaddr);
259 struct ip *ip;
260 struct inpcb *inp;
261 int flags, error;
262
263 inp = sotoinpcb(so);
264 flags = IP_ALLOWBROADCAST;
265
266 /*
267 * If the user handed us a complete IP packet, use it.
268 * Otherwise, allocate an mbuf for a header and fill it in.
269 */
270 if ((inp->inp_flags & INP_HDRINCL) == 0) {
271 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
272 m_freem(m);
273 return (EMSGSIZE);
274 }
275 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
276 if (!m)
277 return (ENOBUFS);
278 ip = mtod(m, struct ip *);
279 ip->ip_tos = inp->inp_ip.ip_tos;
280 ip->ip_off = htons(0);
281 ip->ip_p = inp->inp_ip.ip_p;
282 ip->ip_len = htons(m->m_pkthdr.len);
283 ip->ip_src.s_addr = INADDR_ANY;
284 ip->ip_dst = dst->sin_addr;
285 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
286 } else {
287 if (m->m_pkthdr.len > IP_MAXPACKET) {
288 m_freem(m);
289 return (EMSGSIZE);
290 }
291
292 m = rip_chkhdr(m, inp->inp_options);
293 if (m == NULL)
294 return (EINVAL);
295
296 ip = mtod(m, struct ip *);
297 if (ip->ip_id == 0)
298 ip->ip_id = htons(ip_randomid());
299 dst->sin_addr = ip->ip_dst;
300
301 /* XXX prevent ip_output from overwriting header fields */
302 flags |= IP_RAWOUTPUT;
303 ipstat_inc(ips_rawout);
304 }
305
306 if (ip->ip_src.s_addr == INADDR_ANY) {
307 error = in_pcbselsrc(&ip->ip_src, dst, inp);
308 if (error != 0)
309 return (error);
310 }
311
312 #ifdef INET6
313 /*
314 * A thought: Even though raw IP shouldn't be able to set IPv6
315 * multicast options, if it does, the last parameter to
316 * ip_output should be guarded against v6/v4 problems.
317 */
318 #endif
319 /* force routing table */
320 m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
321
322 #if NPF > 0
323 if (inp->inp_socket->so_state & SS_ISCONNECTED &&
324 ip->ip_p != IPPROTO_ICMP)
325 pf_mbuf_link_inpcb(m, inp);
326 #endif
327
328 error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
329 inp->inp_moptions, inp, 0);
330 return (error);
331 }
332
333 struct mbuf *
334 rip_chkhdr(struct mbuf *m, struct mbuf *options)
335 {
336 struct ip *ip;
337 int hlen, opt, optlen, cnt;
338 u_char *cp;
339
340 if (m->m_pkthdr.len < sizeof(struct ip)) {
341 m_freem(m);
342 return NULL;
343 }
344
345 m = m_pullup(m, sizeof (struct ip));
346 if (m == NULL)
347 return NULL;
348
349 ip = mtod(m, struct ip *);
350 hlen = ip->ip_hl << 2;
351
352 /* Don't allow packet length sizes that will crash. */
353 if (hlen < sizeof (struct ip) ||
354 ntohs(ip->ip_len) < hlen ||
355 ntohs(ip->ip_len) != m->m_pkthdr.len) {
356 m_freem(m);
357 return NULL;
358 }
359 m = m_pullup(m, hlen);
360 if (m == NULL)
361 return NULL;
362
363 ip = mtod(m, struct ip *);
364
365 if (ip->ip_v != IPVERSION) {
366 m_freem(m);
367 return NULL;
368 }
369
370 /*
371 * Don't allow both user specified and setsockopt options.
372 * If options are present verify them.
373 */
374 if (hlen != sizeof(struct ip)) {
375 if (options) {
376 m_freem(m);
377 return NULL;
378 } else {
379 cp = (u_char *)(ip + 1);
380 cnt = hlen - sizeof(struct ip);
381 for (; cnt > 0; cnt -= optlen, cp += optlen) {
382 opt = cp[IPOPT_OPTVAL];
383 if (opt == IPOPT_EOL)
384 break;
385 if (opt == IPOPT_NOP)
386 optlen = 1;
387 else {
388 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
389 m_freem(m);
390 return NULL;
391 }
392 optlen = cp[IPOPT_OLEN];
393 if (optlen < IPOPT_OLEN + sizeof(*cp) ||
394 optlen > cnt) {
395 m_freem(m);
396 return NULL;
397 }
398 }
399 }
400 }
401 }
402
403 return m;
404 }
405
406 /*
407 * Raw IP socket option processing.
408 */
409 int
410 rip_ctloutput(int op, struct socket *so, int level, int optname,
411 struct mbuf *m)
412 {
413 struct inpcb *inp = sotoinpcb(so);
414 int error;
415
416 if (level != IPPROTO_IP)
417 return (EINVAL);
418
419 switch (optname) {
420
421 case IP_HDRINCL:
422 error = 0;
423 if (op == PRCO_SETOPT) {
424 if (m == NULL || m->m_len < sizeof (int))
425 error = EINVAL;
426 else if (*mtod(m, int *))
427 inp->inp_flags |= INP_HDRINCL;
428 else
429 inp->inp_flags &= ~INP_HDRINCL;
430 } else {
431 m->m_len = sizeof(int);
432 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
433 }
434 return (error);
435
436 case MRT_INIT:
437 case MRT_DONE:
438 case MRT_ADD_VIF:
439 case MRT_DEL_VIF:
440 case MRT_ADD_MFC:
441 case MRT_DEL_MFC:
442 case MRT_VERSION:
443 case MRT_ASSERT:
444 case MRT_API_SUPPORT:
445 case MRT_API_CONFIG:
446 #ifdef MROUTING
447 switch (op) {
448 case PRCO_SETOPT:
449 error = ip_mrouter_set(so, optname, m);
450 break;
451 case PRCO_GETOPT:
452 error = ip_mrouter_get(so, optname, m);
453 break;
454 default:
455 error = EINVAL;
456 break;
457 }
458 return (error);
459 #else
460 return (EOPNOTSUPP);
461 #endif
462 }
463 return (ip_ctloutput(op, so, level, optname, m));
464 }
465
466 u_long rip_sendspace = RIPSNDQ;
467 u_long rip_recvspace = RIPRCVQ;
468
469 int
470 rip_attach(struct socket *so, int proto, int wait)
471 {
472 struct inpcb *inp;
473 int error;
474
475 if (so->so_pcb)
476 panic("rip_attach");
477 if ((so->so_state & SS_PRIV) == 0)
478 return EACCES;
479 if (proto < 0 || proto >= IPPROTO_MAX)
480 return EPROTONOSUPPORT;
481
482 if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
483 return error;
484 NET_ASSERT_LOCKED();
485 if ((error = in_pcballoc(so, &rawcbtable, wait)))
486 return error;
487 inp = sotoinpcb(so);
488 inp->inp_ip.ip_p = proto;
489 return 0;
490 }
491
492 int
493 rip_detach(struct socket *so)
494 {
495 struct inpcb *inp = sotoinpcb(so);
496
497 soassertlocked(so);
498
499 if (inp == NULL)
500 return (EINVAL);
501
502 #ifdef MROUTING
503 if (so == ip_mrouter[inp->inp_rtableid])
504 ip_mrouter_done(so);
505 #endif
506 in_pcbdetach(inp);
507
508 return (0);
509 }
510
511 void
512 rip_lock(struct socket *so)
513 {
514 struct inpcb *inp = sotoinpcb(so);
515
516 NET_ASSERT_LOCKED();
517 mtx_enter(&inp->inp_mtx);
518 }
519
520 void
521 rip_unlock(struct socket *so)
522 {
523 struct inpcb *inp = sotoinpcb(so);
524
525 NET_ASSERT_LOCKED();
526 mtx_leave(&inp->inp_mtx);
527 }
528
529 int
530 rip_bind(struct socket *so, struct mbuf *nam, struct proc *p)
531 {
532 struct inpcb *inp = sotoinpcb(so);
533 struct sockaddr_in *addr;
534 int error;
535
536 soassertlocked(so);
537
538 if ((error = in_nam2sin(nam, &addr)))
539 return (error);
540
541 if (!((so->so_options & SO_BINDANY) ||
542 addr->sin_addr.s_addr == INADDR_ANY ||
543 addr->sin_addr.s_addr == INADDR_BROADCAST ||
544 in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
545 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid)))
546 return (EADDRNOTAVAIL);
547
548 inp->inp_laddr = addr->sin_addr;
549
550 return (0);
551 }
552
553 int
554 rip_connect(struct socket *so, struct mbuf *nam)
555 {
556 struct inpcb *inp = sotoinpcb(so);
557 struct sockaddr_in *addr;
558 int error;
559
560 soassertlocked(so);
561
562 if ((error = in_nam2sin(nam, &addr)))
563 return (error);
564
565 inp->inp_faddr = addr->sin_addr;
566 soisconnected(so);
567
568 return (0);
569 }
570
571 int
572 rip_disconnect(struct socket *so)
573 {
574 struct inpcb *inp = sotoinpcb(so);
575
576 soassertlocked(so);
577
578 if ((so->so_state & SS_ISCONNECTED) == 0)
579 return (ENOTCONN);
580
581 soisdisconnected(so);
582 inp->inp_faddr.s_addr = INADDR_ANY;
583
584 return (0);
585 }
586
587 int
588 rip_shutdown(struct socket *so)
589 {
590 /*
591 * Mark the connection as being incapable of further input.
592 */
593
594 soassertlocked(so);
595 socantsendmore(so);
596
597 return (0);
598 }
599
600 int
601 rip_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
602 struct mbuf *control)
603 {
604 struct inpcb *inp = sotoinpcb(so);
605 struct sockaddr_in dst;
606 int error;
607
608 soassertlocked(so);
609
610 /*
611 * Ship a packet out. The appropriate raw output
612 * routine handles any massaging necessary.
613 */
614 memset(&dst, 0, sizeof(dst));
615 dst.sin_family = AF_INET;
616 dst.sin_len = sizeof(dst);
617 if (so->so_state & SS_ISCONNECTED) {
618 if (nam) {
619 error = EISCONN;
620 goto out;
621 }
622 dst.sin_addr = inp->inp_faddr;
623 } else {
624 struct sockaddr_in *addr;
625
626 if (nam == NULL) {
627 error = ENOTCONN;
628 goto out;
629 }
630 if ((error = in_nam2sin(nam, &addr)))
631 goto out;
632 dst.sin_addr = addr->sin_addr;
633 }
634 #ifdef IPSEC
635 /* XXX Find an IPsec TDB */
636 #endif
637 error = rip_output(m, so, sintosa(&dst), NULL);
638 m = NULL;
639
640 out:
641 m_freem(control);
642 m_freem(m);
643
644 return (error);
645 }
Cache object: baa8d19535c42cb0e184137a533d32b0
|