1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
34 * $FreeBSD$
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/fcntl.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/poll.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/resourcevar.h>
51 #include <sys/signalvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/uio.h>
54 #include <vm/vm_zone.h>
55
56 #include <machine/limits.h>
57
58 struct vm_zone *socket_zone;
59 so_gen_t so_gencnt; /* generation count for sockets */
60
61 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
62 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
63
64 static int somaxconn = SOMAXCONN;
65 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
66 0, "");
67
68 /*
69 * Socket operation routines.
70 * These routines are called by the routines in
71 * sys_socket.c or from a system process, and
72 * implement the semantics of socket operations by
73 * switching out to the protocol specific routines.
74 */
75
76 /*
77 * Get a socket structure from our zone, and initialize it.
78 * We don't implement `waitok' yet (see comments in uipc_domain.c).
79 * Note that it would probably be better to allocate socket
80 * and PCB at the same time, but I'm not convinced that all
81 * the protocols can be easily modified to do this.
82 */
83 struct socket *
84 soalloc(waitok)
85 int waitok;
86 {
87 struct socket *so;
88
89 so = zalloci(socket_zone);
90 if (so) {
91 /* XXX race condition for reentrant kernel */
92 bzero(so, sizeof *so);
93 so->so_gencnt = ++so_gencnt;
94 so->so_zone = socket_zone;
95 }
96 return so;
97 }
98
99 int
100 socreate(dom, aso, type, proto, p)
101 int dom;
102 struct socket **aso;
103 register int type;
104 int proto;
105 struct proc *p;
106 {
107 register struct protosw *prp;
108 register struct socket *so;
109 register int error;
110
111 if (proto)
112 prp = pffindproto(dom, proto, type);
113 else
114 prp = pffindtype(dom, type);
115 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
116 return (EPROTONOSUPPORT);
117 if (prp->pr_type != type)
118 return (EPROTOTYPE);
119 so = soalloc(p != 0);
120 if (so == 0)
121 return (ENOBUFS);
122
123 TAILQ_INIT(&so->so_incomp);
124 TAILQ_INIT(&so->so_comp);
125 so->so_type = type;
126 if (p != NULL) {
127 so->so_cred = p->p_cred;
128 so->so_cred->p_refcnt++;
129 } else
130 so->so_cred = NULL;
131 so->so_proto = prp;
132 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
133 if (error) {
134 so->so_state |= SS_NOFDREF;
135 sofree(so);
136 return (error);
137 }
138 *aso = so;
139 return (0);
140 }
141
142 int
143 sobind(so, nam, p)
144 struct socket *so;
145 struct sockaddr *nam;
146 struct proc *p;
147 {
148 int s = splnet();
149 int error;
150
151 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
152 splx(s);
153 return (error);
154 }
155
156 void
157 sodealloc(so)
158 struct socket *so;
159 {
160 so->so_gencnt = ++so_gencnt;
161 if (so->so_cred && --so->so_cred->p_refcnt == 0) {
162 crfree(so->so_cred->pc_ucred);
163 FREE(so->so_cred, M_SUBPROC);
164 }
165 zfreei(so->so_zone, so);
166 }
167
168 int
169 solisten(so, backlog, p)
170 register struct socket *so;
171 int backlog;
172 struct proc *p;
173 {
174 int s, error;
175
176 s = splnet();
177 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
178 if (error) {
179 splx(s);
180 return (error);
181 }
182 if (so->so_comp.tqh_first == NULL)
183 so->so_options |= SO_ACCEPTCONN;
184 if (backlog < 0 || backlog > somaxconn)
185 backlog = somaxconn;
186 so->so_qlimit = backlog;
187 splx(s);
188 return (0);
189 }
190
191 void
192 sofree(so)
193 register struct socket *so;
194 {
195 struct socket *head = so->so_head;
196
197 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
198 return;
199 if (head != NULL) {
200 if (so->so_state & SS_INCOMP) {
201 TAILQ_REMOVE(&head->so_incomp, so, so_list);
202 head->so_incqlen--;
203 } else if (so->so_state & SS_COMP) {
204 TAILQ_REMOVE(&head->so_comp, so, so_list);
205 } else {
206 panic("sofree: not queued");
207 }
208 head->so_qlen--;
209 so->so_state &= ~(SS_INCOMP|SS_COMP);
210 so->so_head = NULL;
211 }
212 sbrelease(&so->so_snd);
213 sorflush(so);
214 sodealloc(so);
215 }
216
217 /*
218 * Close a socket on last file table reference removal.
219 * Initiate disconnect if connected.
220 * Free socket when disconnect complete.
221 */
222 int
223 soclose(so)
224 register struct socket *so;
225 {
226 int s = splnet(); /* conservative */
227 int error = 0;
228
229 funsetown(so->so_sigio);
230 if (so->so_options & SO_ACCEPTCONN) {
231 struct socket *sp, *sonext;
232
233 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
234 sonext = sp->so_list.tqe_next;
235 (void) soabort(sp);
236 }
237 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
238 sonext = sp->so_list.tqe_next;
239 (void) soabort(sp);
240 }
241 }
242 if (so->so_pcb == 0)
243 goto discard;
244 if (so->so_state & SS_ISCONNECTED) {
245 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
246 error = sodisconnect(so);
247 if (error)
248 goto drop;
249 }
250 if (so->so_options & SO_LINGER) {
251 if ((so->so_state & SS_ISDISCONNECTING) &&
252 (so->so_state & SS_NBIO))
253 goto drop;
254 while (so->so_state & SS_ISCONNECTED) {
255 error = tsleep((caddr_t)&so->so_timeo,
256 PSOCK | PCATCH, "soclos", so->so_linger * hz);
257 if (error)
258 break;
259 }
260 }
261 }
262 drop:
263 if (so->so_pcb) {
264 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
265 if (error == 0)
266 error = error2;
267 }
268 discard:
269 if (so->so_state & SS_NOFDREF)
270 panic("soclose: NOFDREF");
271 so->so_state |= SS_NOFDREF;
272 sofree(so);
273 splx(s);
274 return (error);
275 }
276
277 /*
278 * Must be called at splnet...
279 */
280 int
281 soabort(so)
282 struct socket *so;
283 {
284 int error;
285
286 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
287 if (error) {
288 sofree(so);
289 return error;
290 }
291 return (0);
292 }
293
294 int
295 soaccept(so, nam)
296 register struct socket *so;
297 struct sockaddr **nam;
298 {
299 int s = splnet();
300 int error;
301
302 if ((so->so_state & SS_NOFDREF) == 0)
303 panic("soaccept: !NOFDREF");
304 so->so_state &= ~SS_NOFDREF;
305 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
306 splx(s);
307 return (error);
308 }
309
310 int
311 soconnect(so, nam, p)
312 register struct socket *so;
313 struct sockaddr *nam;
314 struct proc *p;
315 {
316 int s;
317 int error;
318
319 if (so->so_options & SO_ACCEPTCONN)
320 return (EOPNOTSUPP);
321 s = splnet();
322 /*
323 * If protocol is connection-based, can only connect once.
324 * Otherwise, if connected, try to disconnect first.
325 * This allows user to disconnect by connecting to, e.g.,
326 * a null address.
327 */
328 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
329 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
330 (error = sodisconnect(so))))
331 error = EISCONN;
332 else
333 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
334 splx(s);
335 return (error);
336 }
337
338 int
339 soconnect2(so1, so2)
340 register struct socket *so1;
341 struct socket *so2;
342 {
343 int s = splnet();
344 int error;
345
346 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
347 splx(s);
348 return (error);
349 }
350
351 int
352 sodisconnect(so)
353 register struct socket *so;
354 {
355 int s = splnet();
356 int error;
357
358 if ((so->so_state & SS_ISCONNECTED) == 0) {
359 error = ENOTCONN;
360 goto bad;
361 }
362 if (so->so_state & SS_ISDISCONNECTING) {
363 error = EALREADY;
364 goto bad;
365 }
366 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
367 bad:
368 splx(s);
369 return (error);
370 }
371
372 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
373 /*
374 * Send on a socket.
375 * If send must go all at once and message is larger than
376 * send buffering, then hard error.
377 * Lock against other senders.
378 * If must go all at once and not enough room now, then
379 * inform user that this would block and do nothing.
380 * Otherwise, if nonblocking, send as much as possible.
381 * The data to be sent is described by "uio" if nonzero,
382 * otherwise by the mbuf chain "top" (which must be null
383 * if uio is not). Data provided in mbuf chain must be small
384 * enough to send all at once.
385 *
386 * Returns nonzero on error, timeout or signal; callers
387 * must check for short counts if EINTR/ERESTART are returned.
388 * Data and control buffers are freed on return.
389 */
390 int
391 sosend(so, addr, uio, top, control, flags, p)
392 register struct socket *so;
393 struct sockaddr *addr;
394 struct uio *uio;
395 struct mbuf *top;
396 struct mbuf *control;
397 int flags;
398 struct proc *p;
399 {
400 struct mbuf **mp;
401 register struct mbuf *m;
402 register long space, len, resid;
403 int clen = 0, error, s, dontroute, mlen;
404 int atomic = sosendallatonce(so) || top;
405
406 if (uio)
407 resid = uio->uio_resid;
408 else
409 resid = top->m_pkthdr.len;
410 /*
411 * In theory resid should be unsigned.
412 * However, space must be signed, as it might be less than 0
413 * if we over-committed, and we must use a signed comparison
414 * of space and resid. On the other hand, a negative resid
415 * causes us to loop sending 0-length segments to the protocol.
416 *
417 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
418 * type sockets since that's an error.
419 */
420 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
421 error = EINVAL;
422 goto out;
423 }
424
425 dontroute =
426 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
427 (so->so_proto->pr_flags & PR_ATOMIC);
428 if (p)
429 p->p_stats->p_ru.ru_msgsnd++;
430 if (control)
431 clen = control->m_len;
432 #define snderr(errno) { error = errno; splx(s); goto release; }
433
434 restart:
435 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
436 if (error)
437 goto out;
438 do {
439 s = splnet();
440 if (so->so_state & SS_CANTSENDMORE)
441 snderr(EPIPE);
442 if (so->so_error) {
443 error = so->so_error;
444 so->so_error = 0;
445 splx(s);
446 goto release;
447 }
448 if ((so->so_state & SS_ISCONNECTED) == 0) {
449 /*
450 * `sendto' and `sendmsg' is allowed on a connection-
451 * based socket if it supports implied connect.
452 * Return ENOTCONN if not connected and no address is
453 * supplied.
454 */
455 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
456 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
457 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
458 !(resid == 0 && clen != 0))
459 snderr(ENOTCONN);
460 } else if (addr == 0)
461 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
462 ENOTCONN : EDESTADDRREQ);
463 }
464 space = sbspace(&so->so_snd);
465 if (flags & MSG_OOB)
466 space += 1024;
467 if ((atomic && resid > so->so_snd.sb_hiwat) ||
468 clen > so->so_snd.sb_hiwat)
469 snderr(EMSGSIZE);
470 if (space < resid + clen && uio &&
471 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
472 if (so->so_state & SS_NBIO)
473 snderr(EWOULDBLOCK);
474 sbunlock(&so->so_snd);
475 error = sbwait(&so->so_snd);
476 splx(s);
477 if (error)
478 goto out;
479 goto restart;
480 }
481 splx(s);
482 mp = ⊤
483 space -= clen;
484 do {
485 if (uio == NULL) {
486 /*
487 * Data is prepackaged in "top".
488 */
489 resid = 0;
490 if (flags & MSG_EOR)
491 top->m_flags |= M_EOR;
492 } else do {
493 if (top == 0) {
494 MGETHDR(m, M_WAIT, MT_DATA);
495 if (m == NULL) {
496 error = ENOBUFS;
497 goto release;
498 }
499 mlen = MHLEN;
500 m->m_pkthdr.len = 0;
501 m->m_pkthdr.rcvif = (struct ifnet *)0;
502 } else {
503 MGET(m, M_WAIT, MT_DATA);
504 if (m == NULL) {
505 error = ENOBUFS;
506 goto release;
507 }
508 mlen = MLEN;
509 }
510 if (resid >= MINCLSIZE) {
511 MCLGET(m, M_WAIT);
512 if ((m->m_flags & M_EXT) == 0)
513 goto nopages;
514 mlen = MCLBYTES;
515 len = min(min(mlen, resid), space);
516 } else {
517 nopages:
518 len = min(min(mlen, resid), space);
519 /*
520 * For datagram protocols, leave room
521 * for protocol headers in first mbuf.
522 */
523 if (atomic && top == 0 && len < mlen)
524 MH_ALIGN(m, len);
525 }
526 space -= len;
527 error = uiomove(mtod(m, caddr_t), (int)len, uio);
528 resid = uio->uio_resid;
529 m->m_len = len;
530 *mp = m;
531 top->m_pkthdr.len += len;
532 if (error)
533 goto release;
534 mp = &m->m_next;
535 if (resid <= 0) {
536 if (flags & MSG_EOR)
537 top->m_flags |= M_EOR;
538 break;
539 }
540 } while (space > 0 && atomic);
541 if (dontroute)
542 so->so_options |= SO_DONTROUTE;
543 s = splnet(); /* XXX */
544 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
545 (flags & MSG_OOB) ? PRUS_OOB :
546 /*
547 * If the user set MSG_EOF, the protocol
548 * understands this flag and nothing left to
549 * send then use PRU_SEND_EOF instead of PRU_SEND.
550 */
551 ((flags & MSG_EOF) &&
552 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
553 (resid <= 0)) ?
554 PRUS_EOF :
555 /* If there is more to send set PRUS_MORETOCOME */
556 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
557 top, addr, control, p);
558 splx(s);
559 if (dontroute)
560 so->so_options &= ~SO_DONTROUTE;
561 clen = 0;
562 control = 0;
563 top = 0;
564 mp = ⊤
565 if (error)
566 goto release;
567 } while (resid && space > 0);
568 } while (resid);
569
570 release:
571 sbunlock(&so->so_snd);
572 out:
573 if (top)
574 m_freem(top);
575 if (control)
576 m_freem(control);
577 return (error);
578 }
579
580 /*
581 * Implement receive operations on a socket.
582 * We depend on the way that records are added to the sockbuf
583 * by sbappend*. In particular, each record (mbufs linked through m_next)
584 * must begin with an address if the protocol so specifies,
585 * followed by an optional mbuf or mbufs containing ancillary data,
586 * and then zero or more mbufs of data.
587 * In order to avoid blocking network interrupts for the entire time here,
588 * we splx() while doing the actual copy to user space.
589 * Although the sockbuf is locked, new data may still be appended,
590 * and thus we must maintain consistency of the sockbuf during that time.
591 *
592 * The caller may receive the data as a single mbuf chain by supplying
593 * an mbuf **mp0 for use in returning the chain. The uio is then used
594 * only for the count in uio_resid.
595 */
596 int
597 soreceive(so, psa, uio, mp0, controlp, flagsp)
598 register struct socket *so;
599 struct sockaddr **psa;
600 struct uio *uio;
601 struct mbuf **mp0;
602 struct mbuf **controlp;
603 int *flagsp;
604 {
605 register struct mbuf *m, **mp;
606 register int flags, len, error, s, offset;
607 struct protosw *pr = so->so_proto;
608 struct mbuf *nextrecord;
609 int moff, type = 0;
610 int orig_resid = uio->uio_resid;
611
612 mp = mp0;
613 if (psa)
614 *psa = 0;
615 if (controlp)
616 *controlp = 0;
617 if (flagsp)
618 flags = *flagsp &~ MSG_EOR;
619 else
620 flags = 0;
621 if (flags & MSG_OOB) {
622 m = m_get(M_WAIT, MT_DATA);
623 if (m == NULL)
624 return (ENOBUFS);
625 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
626 if (error)
627 goto bad;
628 do {
629 error = uiomove(mtod(m, caddr_t),
630 (int) min(uio->uio_resid, m->m_len), uio);
631 m = m_free(m);
632 } while (uio->uio_resid && error == 0 && m);
633 bad:
634 if (m)
635 m_freem(m);
636 return (error);
637 }
638 if (mp)
639 *mp = (struct mbuf *)0;
640 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
641 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
642
643 restart:
644 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
645 if (error)
646 return (error);
647 s = splnet();
648
649 m = so->so_rcv.sb_mb;
650 /*
651 * If we have less data than requested, block awaiting more
652 * (subject to any timeout) if:
653 * 1. the current count is less than the low water mark, or
654 * 2. MSG_WAITALL is set, and it is possible to do the entire
655 * receive operation at once if we block (resid <= hiwat).
656 * 3. MSG_DONTWAIT is not set
657 * If MSG_WAITALL is set but resid is larger than the receive buffer,
658 * we have to do the receive in sections, and thus risk returning
659 * a short count if a timeout or signal occurs after we start.
660 */
661 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
662 so->so_rcv.sb_cc < uio->uio_resid) &&
663 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
664 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
665 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
666 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
667 if (so->so_error) {
668 if (m)
669 goto dontblock;
670 error = so->so_error;
671 if ((flags & MSG_PEEK) == 0)
672 so->so_error = 0;
673 goto release;
674 }
675 if (so->so_state & SS_CANTRCVMORE) {
676 if (m)
677 goto dontblock;
678 else
679 goto release;
680 }
681 for (; m; m = m->m_next)
682 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
683 m = so->so_rcv.sb_mb;
684 goto dontblock;
685 }
686 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
687 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
688 error = ENOTCONN;
689 goto release;
690 }
691 if (uio->uio_resid == 0)
692 goto release;
693 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
694 error = EWOULDBLOCK;
695 goto release;
696 }
697 sbunlock(&so->so_rcv);
698 error = sbwait(&so->so_rcv);
699 splx(s);
700 if (error)
701 return (error);
702 goto restart;
703 }
704 dontblock:
705 if (uio->uio_procp)
706 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
707 nextrecord = m->m_nextpkt;
708 if (pr->pr_flags & PR_ADDR) {
709 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
710 orig_resid = 0;
711 if (psa)
712 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
713 mp0 == 0);
714 if (flags & MSG_PEEK) {
715 m = m->m_next;
716 } else {
717 sbfree(&so->so_rcv, m);
718 MFREE(m, so->so_rcv.sb_mb);
719 m = so->so_rcv.sb_mb;
720 }
721 }
722 while (m && m->m_type == MT_CONTROL && error == 0) {
723 if (flags & MSG_PEEK) {
724 if (controlp)
725 *controlp = m_copy(m, 0, m->m_len);
726 m = m->m_next;
727 } else {
728 sbfree(&so->so_rcv, m);
729 if (controlp) {
730 if (pr->pr_domain->dom_externalize &&
731 mtod(m, struct cmsghdr *)->cmsg_type ==
732 SCM_RIGHTS)
733 error = (*pr->pr_domain->dom_externalize)(m);
734 *controlp = m;
735 so->so_rcv.sb_mb = m->m_next;
736 m->m_next = 0;
737 m = so->so_rcv.sb_mb;
738 } else {
739 MFREE(m, so->so_rcv.sb_mb);
740 m = so->so_rcv.sb_mb;
741 }
742 }
743 if (controlp) {
744 orig_resid = 0;
745 controlp = &(*controlp)->m_next;
746 }
747 }
748 if (m) {
749 if ((flags & MSG_PEEK) == 0)
750 m->m_nextpkt = nextrecord;
751 type = m->m_type;
752 if (type == MT_OOBDATA)
753 flags |= MSG_OOB;
754 }
755 moff = 0;
756 offset = 0;
757 while (m && uio->uio_resid > 0 && error == 0) {
758 if (m->m_type == MT_OOBDATA) {
759 if (type != MT_OOBDATA)
760 break;
761 } else if (type == MT_OOBDATA)
762 break;
763 else
764 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
765 ("receive 3"));
766 so->so_state &= ~SS_RCVATMARK;
767 len = uio->uio_resid;
768 if (so->so_oobmark && len > so->so_oobmark - offset)
769 len = so->so_oobmark - offset;
770 if (len > m->m_len - moff)
771 len = m->m_len - moff;
772 /*
773 * If mp is set, just pass back the mbufs.
774 * Otherwise copy them out via the uio, then free.
775 * Sockbuf must be consistent here (points to current mbuf,
776 * it points to next record) when we drop priority;
777 * we must note any additions to the sockbuf when we
778 * block interrupts again.
779 */
780 if (mp == 0) {
781 splx(s);
782 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
783 s = splnet();
784 if (error)
785 goto release;
786 } else
787 uio->uio_resid -= len;
788 if (len == m->m_len - moff) {
789 if (m->m_flags & M_EOR)
790 flags |= MSG_EOR;
791 if (flags & MSG_PEEK) {
792 m = m->m_next;
793 moff = 0;
794 } else {
795 nextrecord = m->m_nextpkt;
796 sbfree(&so->so_rcv, m);
797 if (mp) {
798 *mp = m;
799 mp = &m->m_next;
800 so->so_rcv.sb_mb = m = m->m_next;
801 *mp = (struct mbuf *)0;
802 } else {
803 MFREE(m, so->so_rcv.sb_mb);
804 m = so->so_rcv.sb_mb;
805 }
806 if (m)
807 m->m_nextpkt = nextrecord;
808 }
809 } else {
810 if (flags & MSG_PEEK)
811 moff += len;
812 else {
813 if (mp)
814 *mp = m_copym(m, 0, len, M_WAIT);
815 m->m_data += len;
816 m->m_len -= len;
817 so->so_rcv.sb_cc -= len;
818 }
819 }
820 if (so->so_oobmark) {
821 if ((flags & MSG_PEEK) == 0) {
822 so->so_oobmark -= len;
823 if (so->so_oobmark == 0) {
824 so->so_state |= SS_RCVATMARK;
825 break;
826 }
827 } else {
828 offset += len;
829 if (offset == so->so_oobmark)
830 break;
831 }
832 }
833 if (flags & MSG_EOR)
834 break;
835 /*
836 * If the MSG_WAITALL flag is set (for non-atomic socket),
837 * we must not quit until "uio->uio_resid == 0" or an error
838 * termination. If a signal/timeout occurs, return
839 * with a short count but without error.
840 * Keep sockbuf locked against other readers.
841 */
842 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
843 !sosendallatonce(so) && !nextrecord) {
844 if (so->so_error || so->so_state & SS_CANTRCVMORE)
845 break;
846 error = sbwait(&so->so_rcv);
847 if (error) {
848 sbunlock(&so->so_rcv);
849 splx(s);
850 return (0);
851 }
852 m = so->so_rcv.sb_mb;
853 if (m)
854 nextrecord = m->m_nextpkt;
855 }
856 }
857
858 if (m && pr->pr_flags & PR_ATOMIC) {
859 flags |= MSG_TRUNC;
860 if ((flags & MSG_PEEK) == 0)
861 (void) sbdroprecord(&so->so_rcv);
862 }
863 if ((flags & MSG_PEEK) == 0) {
864 if (m == 0)
865 so->so_rcv.sb_mb = nextrecord;
866 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
867 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
868 }
869 if (orig_resid == uio->uio_resid && orig_resid &&
870 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
871 sbunlock(&so->so_rcv);
872 splx(s);
873 goto restart;
874 }
875
876 if (flagsp)
877 *flagsp |= flags;
878 release:
879 sbunlock(&so->so_rcv);
880 splx(s);
881 return (error);
882 }
883
884 int
885 soshutdown(so, how)
886 register struct socket *so;
887 register int how;
888 {
889 register struct protosw *pr = so->so_proto;
890
891 how++;
892 if (how & FREAD)
893 sorflush(so);
894 if (how & FWRITE)
895 return ((*pr->pr_usrreqs->pru_shutdown)(so));
896 return (0);
897 }
898
899 void
900 sorflush(so)
901 register struct socket *so;
902 {
903 register struct sockbuf *sb = &so->so_rcv;
904 register struct protosw *pr = so->so_proto;
905 register int s;
906 struct sockbuf asb;
907
908 sb->sb_flags |= SB_NOINTR;
909 (void) sblock(sb, M_WAITOK);
910 s = splimp();
911 socantrcvmore(so);
912 sbunlock(sb);
913 asb = *sb;
914 bzero((caddr_t)sb, sizeof (*sb));
915 splx(s);
916 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
917 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
918 sbrelease(&asb);
919 }
920
921 /*
922 * Perhaps this routine, and sooptcopyout(), below, ought to come in
923 * an additional variant to handle the case where the option value needs
924 * to be some kind of integer, but not a specific size.
925 * In addition to their use here, these functions are also called by the
926 * protocol-level pr_ctloutput() routines.
927 */
928 int
929 sooptcopyin(sopt, buf, len, minlen)
930 struct sockopt *sopt;
931 void *buf;
932 size_t len;
933 size_t minlen;
934 {
935 size_t valsize;
936
937 /*
938 * If the user gives us more than we wanted, we ignore it,
939 * but if we don't get the minimum length the caller
940 * wants, we return EINVAL. On success, sopt->sopt_valsize
941 * is set to however much we actually retrieved.
942 */
943 if ((valsize = sopt->sopt_valsize) < minlen)
944 return EINVAL;
945 if (valsize > len)
946 sopt->sopt_valsize = valsize = len;
947
948 if (sopt->sopt_p != 0)
949 return (copyin(sopt->sopt_val, buf, valsize));
950
951 bcopy(sopt->sopt_val, buf, valsize);
952 return 0;
953 }
954
955 int
956 sosetopt(so, sopt)
957 struct socket *so;
958 struct sockopt *sopt;
959 {
960 int error, optval;
961 struct linger l;
962 struct timeval tv;
963 short val;
964
965 error = 0;
966 if (sopt->sopt_level != SOL_SOCKET) {
967 if (so->so_proto && so->so_proto->pr_ctloutput)
968 return ((*so->so_proto->pr_ctloutput)
969 (so, sopt));
970 error = ENOPROTOOPT;
971 } else {
972 switch (sopt->sopt_name) {
973 case SO_LINGER:
974 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
975 if (error)
976 goto bad;
977
978 so->so_linger = l.l_linger;
979 if (l.l_onoff)
980 so->so_options |= SO_LINGER;
981 else
982 so->so_options &= ~SO_LINGER;
983 break;
984
985 case SO_DEBUG:
986 case SO_KEEPALIVE:
987 case SO_DONTROUTE:
988 case SO_USELOOPBACK:
989 case SO_BROADCAST:
990 case SO_REUSEADDR:
991 case SO_REUSEPORT:
992 case SO_OOBINLINE:
993 case SO_TIMESTAMP:
994 error = sooptcopyin(sopt, &optval, sizeof optval,
995 sizeof optval);
996 if (error)
997 goto bad;
998 if (optval)
999 so->so_options |= sopt->sopt_name;
1000 else
1001 so->so_options &= ~sopt->sopt_name;
1002 break;
1003
1004 case SO_SNDBUF:
1005 case SO_RCVBUF:
1006 case SO_SNDLOWAT:
1007 case SO_RCVLOWAT:
1008 error = sooptcopyin(sopt, &optval, sizeof optval,
1009 sizeof optval);
1010 if (error)
1011 goto bad;
1012
1013 /*
1014 * Values < 1 make no sense for any of these
1015 * options, so disallow them.
1016 */
1017 if (optval < 1) {
1018 error = EINVAL;
1019 goto bad;
1020 }
1021
1022 switch (sopt->sopt_name) {
1023 case SO_SNDBUF:
1024 case SO_RCVBUF:
1025 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1026 &so->so_snd : &so->so_rcv,
1027 (u_long) optval) == 0) {
1028 error = ENOBUFS;
1029 goto bad;
1030 }
1031 break;
1032
1033 /*
1034 * Make sure the low-water is never greater than
1035 * the high-water.
1036 */
1037 case SO_SNDLOWAT:
1038 so->so_snd.sb_lowat =
1039 (optval > so->so_snd.sb_hiwat) ?
1040 so->so_snd.sb_hiwat : optval;
1041 break;
1042 case SO_RCVLOWAT:
1043 so->so_rcv.sb_lowat =
1044 (optval > so->so_rcv.sb_hiwat) ?
1045 so->so_rcv.sb_hiwat : optval;
1046 break;
1047 }
1048 break;
1049
1050 case SO_SNDTIMEO:
1051 case SO_RCVTIMEO:
1052 error = sooptcopyin(sopt, &tv, sizeof tv,
1053 sizeof tv);
1054 if (error)
1055 goto bad;
1056
1057 if (tv.tv_sec > SHRT_MAX / hz - hz) {
1058 error = EDOM;
1059 goto bad;
1060 }
1061 val = tv.tv_sec * hz + tv.tv_usec / tick;
1062
1063 switch (sopt->sopt_name) {
1064 case SO_SNDTIMEO:
1065 so->so_snd.sb_timeo = val;
1066 break;
1067 case SO_RCVTIMEO:
1068 so->so_rcv.sb_timeo = val;
1069 break;
1070 }
1071 break;
1072
1073 default:
1074 error = ENOPROTOOPT;
1075 break;
1076 }
1077 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1078 (void) ((*so->so_proto->pr_ctloutput)
1079 (so, sopt));
1080 }
1081 }
1082 bad:
1083 return (error);
1084 }
1085
1086 /* Helper routine for getsockopt */
1087 int
1088 sooptcopyout(sopt, buf, len)
1089 struct sockopt *sopt;
1090 void *buf;
1091 size_t len;
1092 {
1093 int error;
1094 size_t valsize;
1095
1096 error = 0;
1097
1098 /*
1099 * Documented get behavior is that we always return a value,
1100 * possibly truncated to fit in the user's buffer.
1101 * Traditional behavior is that we always tell the user
1102 * precisely how much we copied, rather than something useful
1103 * like the total amount we had available for her.
1104 * Note that this interface is not idempotent; the entire answer must
1105 * generated ahead of time.
1106 */
1107 valsize = min(len, sopt->sopt_valsize);
1108 sopt->sopt_valsize = valsize;
1109 if (sopt->sopt_val != 0) {
1110 if (sopt->sopt_p != 0)
1111 error = copyout(buf, sopt->sopt_val, valsize);
1112 else
1113 bcopy(buf, sopt->sopt_val, valsize);
1114 }
1115 return error;
1116 }
1117
1118 int
1119 sogetopt(so, sopt)
1120 struct socket *so;
1121 struct sockopt *sopt;
1122 {
1123 int error, optval;
1124 struct linger l;
1125 struct timeval tv;
1126
1127 error = 0;
1128 if (sopt->sopt_level != SOL_SOCKET) {
1129 if (so->so_proto && so->so_proto->pr_ctloutput) {
1130 return ((*so->so_proto->pr_ctloutput)
1131 (so, sopt));
1132 } else
1133 return (ENOPROTOOPT);
1134 } else {
1135 switch (sopt->sopt_name) {
1136 case SO_LINGER:
1137 l.l_onoff = so->so_options & SO_LINGER;
1138 l.l_linger = so->so_linger;
1139 error = sooptcopyout(sopt, &l, sizeof l);
1140 break;
1141
1142 case SO_USELOOPBACK:
1143 case SO_DONTROUTE:
1144 case SO_DEBUG:
1145 case SO_KEEPALIVE:
1146 case SO_REUSEADDR:
1147 case SO_REUSEPORT:
1148 case SO_BROADCAST:
1149 case SO_OOBINLINE:
1150 case SO_TIMESTAMP:
1151 optval = so->so_options & sopt->sopt_name;
1152 integer:
1153 error = sooptcopyout(sopt, &optval, sizeof optval);
1154 break;
1155
1156 case SO_TYPE:
1157 optval = so->so_type;
1158 goto integer;
1159
1160 case SO_ERROR:
1161 optval = so->so_error;
1162 so->so_error = 0;
1163 goto integer;
1164
1165 case SO_SNDBUF:
1166 optval = so->so_snd.sb_hiwat;
1167 goto integer;
1168
1169 case SO_RCVBUF:
1170 optval = so->so_rcv.sb_hiwat;
1171 goto integer;
1172
1173 case SO_SNDLOWAT:
1174 optval = so->so_snd.sb_lowat;
1175 goto integer;
1176
1177 case SO_RCVLOWAT:
1178 optval = so->so_rcv.sb_lowat;
1179 goto integer;
1180
1181 case SO_SNDTIMEO:
1182 case SO_RCVTIMEO:
1183 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1184 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1185
1186 tv.tv_sec = optval / hz;
1187 tv.tv_usec = (optval % hz) * tick;
1188 error = sooptcopyout(sopt, &tv, sizeof tv);
1189 break;
1190
1191 default:
1192 error = ENOPROTOOPT;
1193 break;
1194 }
1195 return (error);
1196 }
1197 }
1198
1199 void
1200 sohasoutofband(so)
1201 register struct socket *so;
1202 {
1203 if (so->so_sigio != NULL)
1204 pgsigio(so->so_sigio, SIGURG, 0);
1205 selwakeup(&so->so_rcv.sb_sel);
1206 }
1207
1208 int
1209 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1210 {
1211 int revents = 0;
1212 int s = splnet();
1213
1214 if (events & (POLLIN | POLLRDNORM))
1215 if (soreadable(so))
1216 revents |= events & (POLLIN | POLLRDNORM);
1217
1218 if (events & (POLLOUT | POLLWRNORM))
1219 if (sowriteable(so))
1220 revents |= events & (POLLOUT | POLLWRNORM);
1221
1222 if (events & (POLLPRI | POLLRDBAND))
1223 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1224 revents |= events & (POLLPRI | POLLRDBAND);
1225
1226 if (revents == 0) {
1227 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1228 selrecord(p, &so->so_rcv.sb_sel);
1229 so->so_rcv.sb_flags |= SB_SEL;
1230 }
1231
1232 if (events & (POLLOUT | POLLWRNORM)) {
1233 selrecord(p, &so->so_snd.sb_sel);
1234 so->so_snd.sb_flags |= SB_SEL;
1235 }
1236 }
1237
1238 splx(s);
1239 return (revents);
1240 }
Cache object: acfcfc6094f4dd0c24cd132219a48303
|