--- //depot/vendor/freebsd/src/sys/kern/uipc_socket.c 2005/11/02 13:50:33 +++ //depot/projects/netsmp/src/sys/kern/uipc_socket.c 2005/11/24 12:26:34 @@ -575,7 +575,273 @@ return (error); } +/* + * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or + * all of the data referenced by the uio. If desired, it uses zero-copy. + * *space will be updated to reflect data copied in. + * + * NB: If atomic I/O is requested, the caller must already have checked that + * space can hold resid bytes. + * + * NB: In the event of an error, the caller may need to free the partial + * chain pointed to by *mpp. The contents of both *uio and *space may be + * modified even in the case of an error. + */ +static int +sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, + int flags) +{ + struct mbuf *m, **mp, *top; + long len, resid; + int error; +#ifdef ZERO_COPY_SOCKETS + int cow_send; +#endif + + *retmp = top = NULL; + mp = ⊤ + len = 0; + resid = uio->uio_resid; + error = 0; + do { +#ifdef ZERO_COPY_SOCKETS + cow_send = 0; +#endif /* ZERO_COPY_SOCKETS */ + if (resid >= MINCLSIZE) { +#ifdef ZERO_COPY_SOCKETS + if (top == NULL) { + MGETHDR(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + } else { + MGET(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + } + if (so_zero_copy_send && + resid>=PAGE_SIZE && + *space>=PAGE_SIZE && + uio->uio_iov->iov_len>=PAGE_SIZE) { + so_zerocp_stats.size_ok++; + so_zerocp_stats.align_ok++; + cow_send = socow_setup(m, uio); + len = cow_send; + } + if (!cow_send) { + MCLGET(m, M_TRYWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m = NULL; + } else { + len = min(min(MCLBYTES, resid), + *space); + } + } +#else /* ZERO_COPY_SOCKETS */ + if (top == NULL) { + m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + } else + m = m_getcl(M_TRYWAIT, MT_DATA, 0); + len = min(min(MCLBYTES, resid), *space); +#endif /* ZERO_COPY_SOCKETS */ + } else { + if (top == NULL) { + m = m_gethdr(M_TRYWAIT, MT_DATA); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + + len = min(min(MHLEN, resid), *space); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && m && len < MHLEN) + MH_ALIGN(m, len); + } else { + m = m_get(M_TRYWAIT, MT_DATA); + len = min(min(MLEN, resid), *space); + } + } + if (m == NULL) { + error = ENOBUFS; + goto out; + } + + *space -= len; +#ifdef ZERO_COPY_SOCKETS + if (cow_send) + error = 0; + else +#endif /* ZERO_COPY_SOCKETS */ + error = uiomove(mtod(m, void *), (int)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto out; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + } while (*space > 0 && atomic); +out: + *retmp = top; + return (error); +} + #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) +#define snderr(errno) { error = (errno); goto out; } + +int +sosend_dgram(so, addr, uio, top, control, flags, td) + struct socket *so; + struct sockaddr *addr; + struct uio *uio; + struct mbuf *top; + struct mbuf *control; + int flags; + struct thread *td; +{ + long space, resid; + int clen = 0, error, dontroute; + int atomic = sosendallatonce(so) || top; + + KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); + KASSERT(so->so_proto->pr_flags & PR_ATOMIC, + ("sodgram_send: !PR_ATOMIC")); + + if (uio != NULL) + resid = uio->uio_resid; + else + resid = top->m_pkthdr.len; + /* + * In theory resid should be unsigned. + * However, space must be signed, as it might be less than 0 + * if we over-committed, and we must use a signed comparison + * of space and resid. On the other hand, a negative resid + * causes us to loop sending 0-length segments to the protocol. + * + * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM + * type sockets since that's an error. + */ + if (resid < 0) { + error = EINVAL; + goto out; + } + + dontroute = + (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; + if (td != NULL) + td->td_proc->p_stats->p_ru.ru_msgsnd++; + if (control != NULL) + clen = control->m_len; + + SOCKBUF_LOCK(&so->so_snd); + if (so->so_snd.sb_state & SBS_CANTSENDMORE) + snderr(EPIPE); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + goto out; + } + if ((so->so_state & SS_ISCONNECTED) == 0) { + /* + * `sendto' and `sendmsg' is allowed on a connection- + * based socket if it supports implied connect. + * Return ENOTCONN if not connected and no address is + * supplied. + */ + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && + (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) + snderr(ENOTCONN); + } else if (addr == NULL) + snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? + ENOTCONN : EDESTADDRREQ); + } + + /* + * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a + * problem and need fixing. + */ + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + space -= clen; + if (resid > space) + snderr(EMSGSIZE); + SOCKBUF_UNLOCK(&so->so_snd); + if (uio == NULL) { + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + } else { + error = sosend_copyin(uio, &top, atomic, &space, flags); + if (error) + goto out; + resid = uio->uio_resid; + } + KASSERT(resid == 0, ("sosend_dgram: resid != 0")); + /* + * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock + * than with. + */ + if (dontroute) { + SOCK_LOCK(so); + so->so_options |= SO_DONTROUTE; + SOCK_UNLOCK(so); + } + /* + * XXX all the SBS_CANTSENDMORE checks previously + * done could be out of date. We could have recieved + * a reset packet in an interrupt or maybe we slept + * while doing page faults in uiomove() etc. We could + * probably recheck again inside the locking protection + * here, but there are probably other places that this + * also happens. We must rethink this. + */ + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + (flags & MSG_OOB) ? PRUS_OOB : + /* + * If the user set MSG_EOF, the protocol + * understands this flag and nothing left to + * send then use PRU_SEND_EOF instead of PRU_SEND. + */ + ((flags & MSG_EOF) && + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? + PRUS_EOF : + /* If there is more to send set PRUS_MORETOCOME */ + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, + top, addr, control, td); + if (dontroute) { + SOCK_LOCK(so); + so->so_options &= ~SO_DONTROUTE; + SOCK_UNLOCK(so); + } + clen = 0; + control = NULL; + top = NULL; +out: + if (top != NULL) + m_freem(top); + if (control != NULL) + m_freem(control); + return (error); +} + /* * Send on a socket. * If send must go all at once and message is larger than @@ -619,14 +885,9 @@ int flags; struct thread *td; { - struct mbuf **mp; - struct mbuf *m; - long space, len = 0, resid; + long space, resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; -#ifdef ZERO_COPY_SOCKETS - int cow_send; -#endif /* ZERO_COPY_SOCKETS */ if (uio != NULL) resid = uio->uio_resid; @@ -654,7 +915,6 @@ td->td_proc->p_stats->p_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; -#define snderr(errno) { error = (errno); goto release; } SOCKBUF_LOCK(&so->so_snd); restart: @@ -704,153 +964,61 @@ goto restart; } SOCKBUF_UNLOCK(&so->so_snd); - mp = ⊤ space -= clen; do { - if (uio == NULL) { - /* - * Data is prepackaged in "top". - */ - resid = 0; - if (flags & MSG_EOR) - top->m_flags |= M_EOR; - } else do { -#ifdef ZERO_COPY_SOCKETS - cow_send = 0; -#endif /* ZERO_COPY_SOCKETS */ - if (resid >= MINCLSIZE) { -#ifdef ZERO_COPY_SOCKETS - if (top == NULL) { - MGETHDR(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - SOCKBUF_LOCK(&so->so_snd); - goto release; - } - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; - } else { - MGET(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - SOCKBUF_LOCK(&so->so_snd); - goto release; - } - } - if (so_zero_copy_send && - resid>=PAGE_SIZE && - space>=PAGE_SIZE && - uio->uio_iov->iov_len>=PAGE_SIZE) { - so_zerocp_stats.size_ok++; - so_zerocp_stats.align_ok++; - cow_send = socow_setup(m, uio); - len = cow_send; - } - if (!cow_send) { - MCLGET(m, M_TRYWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - m = NULL; - } else { - len = min(min(MCLBYTES, resid), space); - } - } -#else /* ZERO_COPY_SOCKETS */ - if (top == NULL) { - m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR); - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; - } else - m = m_getcl(M_TRYWAIT, MT_DATA, 0); - len = min(min(MCLBYTES, resid), space); -#endif /* ZERO_COPY_SOCKETS */ + if (uio == NULL) { + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; } else { - if (top == NULL) { - m = m_gethdr(M_TRYWAIT, MT_DATA); - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; - - len = min(min(MHLEN, resid), space); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && m && len < MHLEN) - MH_ALIGN(m, len); - } else { - m = m_get(M_TRYWAIT, MT_DATA); - len = min(min(MLEN, resid), space); + error = sosend_copyin(uio, &top, atomic, + &space, flags); + if (error != 0) { + SOCKBUF_LOCK(&so->so_snd); + goto release; } + resid = uio->uio_resid; } - if (m == NULL) { - error = ENOBUFS; - SOCKBUF_LOCK(&so->so_snd); - goto release; + if (dontroute) { + SOCK_LOCK(so); + so->so_options |= SO_DONTROUTE; + SOCK_UNLOCK(so); } - - space -= len; -#ifdef ZERO_COPY_SOCKETS - if (cow_send) - error = 0; - else -#endif /* ZERO_COPY_SOCKETS */ - error = uiomove(mtod(m, void *), (int)len, uio); - resid = uio->uio_resid; - m->m_len = len; - *mp = m; - top->m_pkthdr.len += len; - if (error) { - SOCKBUF_LOCK(&so->so_snd); - goto release; - } - mp = &m->m_next; - if (resid <= 0) { - if (flags & MSG_EOR) - top->m_flags |= M_EOR; - break; - } - } while (space > 0 && atomic); - if (dontroute) { - SOCK_LOCK(so); - so->so_options |= SO_DONTROUTE; - SOCK_UNLOCK(so); - } - /* - * XXX all the SBS_CANTSENDMORE checks previously - * done could be out of date. We could have recieved - * a reset packet in an interrupt or maybe we slept - * while doing page faults in uiomove() etc. We could - * probably recheck again inside the locking protection - * here, but there are probably other places that this - * also happens. We must rethink this. - */ - error = (*so->so_proto->pr_usrreqs->pru_send)(so, - (flags & MSG_OOB) ? PRUS_OOB : + /* + * XXX all the SBS_CANTSENDMORE checks previously + * done could be out of date. We could have recieved + * a reset packet in an interrupt or maybe we slept + * while doing page faults in uiomove() etc. We could + * probably recheck again inside the locking protection + * here, but there are probably other places that this + * also happens. We must rethink this. + */ + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol * understands this flag and nothing left to * send then use PRU_SEND_EOF instead of PRU_SEND. */ - ((flags & MSG_EOF) && - (so->so_proto->pr_flags & PR_IMPLOPCL) && - (resid <= 0)) ? + ((flags & MSG_EOF) && + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ - (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, - top, addr, control, td); - if (dontroute) { - SOCK_LOCK(so); - so->so_options &= ~SO_DONTROUTE; - SOCK_UNLOCK(so); - } - clen = 0; - control = NULL; - top = NULL; - mp = ⊤ - if (error) { - SOCKBUF_LOCK(&so->so_snd); - goto release; - } + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, + top, addr, control, td); + if (dontroute) { + SOCK_LOCK(so); + so->so_options &= ~SO_DONTROUTE; + SOCK_UNLOCK(so); + } + clen = 0; + control = NULL; + top = NULL; + if (error) { + SOCKBUF_LOCK(&so->so_snd); + goto release; + } } while (resid && space > 0); SOCKBUF_LOCK(&so->so_snd); } while (resid); --- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c 2005/11/18 20:15:44 +++ //depot/projects/netsmp/src/sys/netinet/udp_usrreq.c 2005/11/24 12:26:34 @@ -1115,5 +1115,32 @@ .pru_send = udp_send, .pru_shutdown = udp_shutdown, .pru_sockaddr = udp_sockaddr, + .pru_sosend = sosend, .pru_sosetlabel = in_pcbsosetlabel }; + +static int +sysctl_net_inet_udp_sosend_dgram(SYSCTL_HANDLER_ARGS) +{ + int error, mode; + + if (udp_usrreqs.pru_sosend == sosend_dgram) + mode = 1; + else + mode = 0; + + error = sysctl_handle_int(oidp, &mode, 0, req); + if (error || !req->newptr) + return (error); + + if (mode) + udp_usrreqs.pru_sosend = sosend_dgram; + else + udp_usrreqs.pru_sosend = sosend; + + return (0); +} + +SYSCTL_PROC(_net_inet_udp, OID_AUTO, sosend_dgram, CTLTYPE_INT | + CTLFLAG_RW, 0, 0, sysctl_net_inet_udp_sosend_dgram, "I", + "Use optimized sosend_dgram instead of sosend for UDP"); --- //depot/vendor/freebsd/src/sys/sys/socketvar.h 2005/11/02 13:50:33 +++ //depot/projects/netsmp/src/sys/sys/socketvar.h 2005/11/24 12:26:34 @@ -533,6 +533,9 @@ int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); +int sosend_dgram(struct socket *so, struct sockaddr *addr, + struct uio *uio, struct mbuf *top, struct mbuf *control, + int flags, struct thread *td); int sosetopt(struct socket *so, struct sockopt *sopt); int soshutdown(struct socket *so, int how); void sotoxsocket(struct socket *so, struct xsocket *xso);