1 /* $OpenBSD: ip_divert.c,v 1.89 2022/10/17 14:49:02 mvs Exp $ */
2
3 /*
4 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/in_pcb.h>
37 #include <netinet/ip_divert.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/ip_icmp.h>
41
42 #include <net/pfvar.h>
43
44 struct inpcbtable divbtable;
45 struct cpumem *divcounters;
46
47 #ifndef DIVERT_SENDSPACE
48 #define DIVERT_SENDSPACE (65536 + 100)
49 #endif
50 u_int divert_sendspace = DIVERT_SENDSPACE;
51 #ifndef DIVERT_RECVSPACE
52 #define DIVERT_RECVSPACE (65536 + 100)
53 #endif
54 u_int divert_recvspace = DIVERT_RECVSPACE;
55
56 #ifndef DIVERTHASHSIZE
57 #define DIVERTHASHSIZE 128
58 #endif
59
60 const struct sysctl_bounded_args divertctl_vars[] = {
61 { DIVERTCTL_RECVSPACE, &divert_recvspace, 0, INT_MAX },
62 { DIVERTCTL_SENDSPACE, &divert_sendspace, 0, INT_MAX },
63 };
64
65 const struct pr_usrreqs divert_usrreqs = {
66 .pru_attach = divert_attach,
67 .pru_detach = divert_detach,
68 .pru_lock = divert_lock,
69 .pru_unlock = divert_unlock,
70 .pru_bind = divert_bind,
71 .pru_shutdown = divert_shutdown,
72 .pru_send = divert_send,
73 .pru_control = in_control,
74 .pru_sockaddr = in_sockaddr,
75 .pru_peeraddr = in_peeraddr,
76 };
77
78 int divbhashsize = DIVERTHASHSIZE;
79
80 int divert_output(struct inpcb *, struct mbuf *, struct mbuf *,
81 struct mbuf *);
82 void
83 divert_init(void)
84 {
85 in_pcbinit(&divbtable, divbhashsize);
86 divcounters = counters_alloc(divs_ncounters);
87 }
88
89 int
90 divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
91 struct mbuf *control)
92 {
93 struct sockaddr_in *sin;
94 int error, min_hdrlen, off, dir;
95 struct ip *ip;
96
97 m_freem(control);
98
99 if ((error = in_nam2sin(nam, &sin)))
100 goto fail;
101
102 /* Do basic sanity checks. */
103 if (m->m_pkthdr.len < sizeof(struct ip))
104 goto fail;
105 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
106 /* m_pullup() has freed the mbuf, so just return. */
107 divstat_inc(divs_errors);
108 return (ENOBUFS);
109 }
110 ip = mtod(m, struct ip *);
111 if (ip->ip_v != IPVERSION)
112 goto fail;
113 off = ip->ip_hl << 2;
114 if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off ||
115 m->m_pkthdr.len < ntohs(ip->ip_len))
116 goto fail;
117
118 dir = (sin->sin_addr.s_addr == INADDR_ANY ? PF_OUT : PF_IN);
119
120 switch (ip->ip_p) {
121 case IPPROTO_TCP:
122 min_hdrlen = sizeof(struct tcphdr);
123 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
124 break;
125 case IPPROTO_UDP:
126 min_hdrlen = sizeof(struct udphdr);
127 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
128 break;
129 case IPPROTO_ICMP:
130 min_hdrlen = ICMP_MINLEN;
131 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
132 break;
133 default:
134 min_hdrlen = 0;
135 break;
136 }
137 if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
138 goto fail;
139
140 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
141
142 if (dir == PF_IN) {
143 struct rtentry *rt;
144 struct ifnet *ifp;
145
146 rt = rtalloc(sintosa(sin), 0, inp->inp_rtableid);
147 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
148 rtfree(rt);
149 error = EADDRNOTAVAIL;
150 goto fail;
151 }
152 m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
153 rtfree(rt);
154
155 /*
156 * Recalculate IP and protocol checksums for the inbound packet
157 * since the userspace application may have modified the packet
158 * prior to reinjection.
159 */
160 ip->ip_sum = 0;
161 ip->ip_sum = in_cksum(m, off);
162 in_proto_cksum_out(m, NULL);
163
164 ifp = if_get(m->m_pkthdr.ph_ifidx);
165 if (ifp == NULL) {
166 error = ENETDOWN;
167 goto fail;
168 }
169 ipv4_input(ifp, m);
170 if_put(ifp);
171 } else {
172 m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
173
174 error = ip_output(m, NULL, &inp->inp_route,
175 IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0);
176 }
177
178 divstat_inc(divs_opackets);
179 return (error);
180
181 fail:
182 m_freem(m);
183 divstat_inc(divs_errors);
184 return (error ? error : EINVAL);
185 }
186
187 void
188 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
189 {
190 struct inpcb *inp = NULL;
191 struct socket *so;
192 struct sockaddr_in sin;
193
194 divstat_inc(divs_ipackets);
195
196 if (m->m_len < sizeof(struct ip) &&
197 (m = m_pullup(m, sizeof(struct ip))) == NULL) {
198 divstat_inc(divs_errors);
199 goto bad;
200 }
201
202 mtx_enter(&divbtable.inpt_mtx);
203 TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
204 if (inp->inp_lport != divert_port)
205 continue;
206 in_pcbref(inp);
207 break;
208 }
209 mtx_leave(&divbtable.inpt_mtx);
210 if (inp == NULL) {
211 divstat_inc(divs_noport);
212 goto bad;
213 }
214
215 memset(&sin, 0, sizeof(sin));
216 sin.sin_family = AF_INET;
217 sin.sin_len = sizeof(sin);
218
219 if (dir == PF_IN) {
220 struct ifaddr *ifa;
221 struct ifnet *ifp;
222
223 ifp = if_get(m->m_pkthdr.ph_ifidx);
224 if (ifp == NULL) {
225 divstat_inc(divs_errors);
226 goto bad;
227 }
228 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
229 if (ifa->ifa_addr->sa_family != AF_INET)
230 continue;
231 sin.sin_addr = satosin(ifa->ifa_addr)->sin_addr;
232 break;
233 }
234 if_put(ifp);
235 }
236
237 mtx_enter(&inp->inp_mtx);
238 so = inp->inp_socket;
239 if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
240 mtx_leave(&inp->inp_mtx);
241 divstat_inc(divs_fullsock);
242 goto bad;
243 }
244 mtx_leave(&inp->inp_mtx);
245 sorwakeup(so);
246
247 in_pcbunref(inp);
248 return;
249
250 bad:
251 if (inp != NULL)
252 in_pcbunref(inp);
253 m_freem(m);
254 }
255
256 int
257 divert_attach(struct socket *so, int proto, int wait)
258 {
259 int error;
260
261 if (so->so_pcb != NULL)
262 return EINVAL;
263 if ((so->so_state & SS_PRIV) == 0)
264 return EACCES;
265
266 error = in_pcballoc(so, &divbtable, wait);
267 if (error)
268 return error;
269
270 error = soreserve(so, divert_sendspace, divert_recvspace);
271 if (error)
272 return error;
273
274 sotoinpcb(so)->inp_flags |= INP_HDRINCL;
275 return (0);
276 }
277
278 int
279 divert_detach(struct socket *so)
280 {
281 struct inpcb *inp = sotoinpcb(so);
282
283 soassertlocked(so);
284
285 if (inp == NULL)
286 return (EINVAL);
287
288 in_pcbdetach(inp);
289 return (0);
290 }
291
292 void
293 divert_lock(struct socket *so)
294 {
295 struct inpcb *inp = sotoinpcb(so);
296
297 NET_ASSERT_LOCKED();
298 mtx_enter(&inp->inp_mtx);
299 }
300
301 void
302 divert_unlock(struct socket *so)
303 {
304 struct inpcb *inp = sotoinpcb(so);
305
306 NET_ASSERT_LOCKED();
307 mtx_leave(&inp->inp_mtx);
308 }
309
310 int
311 divert_bind(struct socket *so, struct mbuf *addr, struct proc *p)
312 {
313 struct inpcb *inp = sotoinpcb(so);
314
315 soassertlocked(so);
316 return in_pcbbind(inp, addr, p);
317 }
318
319 int
320 divert_shutdown(struct socket *so)
321 {
322 soassertlocked(so);
323 socantsendmore(so);
324 return (0);
325 }
326
327 int
328 divert_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
329 struct mbuf *control)
330 {
331 struct inpcb *inp = sotoinpcb(so);
332
333 soassertlocked(so);
334 return (divert_output(inp, m, addr, control));
335 }
336
337 int
338 divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp)
339 {
340 uint64_t counters[divs_ncounters];
341 struct divstat divstat;
342 u_long *words = (u_long *)&divstat;
343 int i;
344
345 CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long)));
346 memset(&divstat, 0, sizeof divstat);
347 counters_read(divcounters, counters, nitems(counters));
348
349 for (i = 0; i < nitems(counters); i++)
350 words[i] = (u_long)counters[i];
351
352 return (sysctl_rdstruct(oldp, oldlenp, newp,
353 &divstat, sizeof(divstat)));
354 }
355
356 /*
357 * Sysctl for divert variables.
358 */
359 int
360 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
361 size_t newlen)
362 {
363 int error;
364
365 /* All sysctl names at this level are terminal. */
366 if (namelen != 1)
367 return (ENOTDIR);
368
369 switch (name[0]) {
370 case DIVERTCTL_STATS:
371 return (divert_sysctl_divstat(oldp, oldlenp, newp));
372 default:
373 NET_LOCK();
374 error = sysctl_bounded_arr(divertctl_vars,
375 nitems(divertctl_vars), name, namelen, oldp, oldlenp, newp,
376 newlen);
377 NET_UNLOCK();
378 return (error);
379 }
380 /* NOTREACHED */
381 }
Cache object: 8d72d682b0143bd455a281b96c20a8e1
|