The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/netmap/netmap_offloadings.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (C) 2014-2015 Vincenzo Maffione
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  *   1. Redistributions of source code must retain the above copyright
   11  *      notice, this list of conditions and the following disclaimer.
   12  *   2. Redistributions in binary form must reproduce the above copyright
   13  *      notice, this list of conditions and the following disclaimer in the
   14  *      documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /* $FreeBSD$ */
   30 
   31 #if defined(__FreeBSD__)
   32 #include <sys/cdefs.h> /* prerequisite */
   33 
   34 #include <sys/types.h>
   35 #include <sys/errno.h>
   36 #include <sys/param.h>  /* defines used in kernel.h */
   37 #include <sys/kernel.h> /* types used in module initialization */
   38 #include <sys/sockio.h>
   39 #include <sys/malloc.h>
   40 #include <sys/socketvar.h>      /* struct socket */
   41 #include <sys/socket.h> /* sockaddrs */
   42 #include <net/if.h>
   43 #include <net/if_var.h>
   44 #include <machine/bus.h>        /* bus_dmamap_* */
   45 #include <sys/endian.h>
   46 
   47 #elif defined(linux)
   48 
   49 #include "bsd_glue.h"
   50 
   51 #elif defined(__APPLE__)
   52 
   53 #warning OSX support is only partial
   54 #include "osx_glue.h"
   55 
   56 #else
   57 
   58 #error  Unsupported platform
   59 
   60 #endif /* unsupported */
   61 
   62 #include <net/netmap.h>
   63 #include <dev/netmap/netmap_kern.h>
   64 
   65 
   66 
   67 /* This routine is called by bdg_mismatch_datapath() when it finishes
   68  * accumulating bytes for a segment, in order to fix some fields in the
   69  * segment headers (which still contain the same content as the header
   70  * of the original GSO packet). 'pkt' points to the beginning of the IP
   71  * header of the segment, while 'len' is the length of the IP packet.
   72  */
   73 static void
   74 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
   75                 u_int idx, u_int segmented_bytes, u_int last_segment)
   76 {
   77         struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
   78         struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
   79         uint16_t *check = NULL;
   80         uint8_t *check_data = NULL;
   81 
   82         if (ipv4) {
   83                 /* Set the IPv4 "Total Length" field. */
   84                 iph->tot_len = htobe16(len);
   85                 nm_prdis("ip total length %u", be16toh(ip->tot_len));
   86 
   87                 /* Set the IPv4 "Identification" field. */
   88                 iph->id = htobe16(be16toh(iph->id) + idx);
   89                 nm_prdis("ip identification %u", be16toh(iph->id));
   90 
   91                 /* Compute and insert the IPv4 header checksum. */
   92                 iph->check = 0;
   93                 iph->check = nm_os_csum_ipv4(iph);
   94                 nm_prdis("IP csum %x", be16toh(iph->check));
   95         } else {
   96                 /* Set the IPv6 "Payload Len" field. */
   97                 ip6h->payload_len = htobe16(len-iphlen);
   98         }
   99 
  100         if (tcp) {
  101                 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
  102 
  103                 /* Set the TCP sequence number. */
  104                 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
  105                 nm_prdis("tcp seq %u", be32toh(tcph->seq));
  106 
  107                 /* Zero the PSH and FIN TCP flags if this is not the last
  108                    segment. */
  109                 if (!last_segment)
  110                         tcph->flags &= ~(0x8 | 0x1);
  111                 nm_prdis("last_segment %u", last_segment);
  112 
  113                 check = &tcph->check;
  114                 check_data = (uint8_t *)tcph;
  115         } else { /* UDP */
  116                 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
  117 
  118                 /* Set the UDP 'Length' field. */
  119                 udph->len = htobe16(len-iphlen);
  120 
  121                 check = &udph->check;
  122                 check_data = (uint8_t *)udph;
  123         }
  124 
  125         /* Compute and insert TCP/UDP checksum. */
  126         *check = 0;
  127         if (ipv4)
  128                 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
  129         else
  130                 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
  131 
  132         nm_prdis("TCP/UDP csum %x", be16toh(*check));
  133 }
  134 
  135 static inline int
  136 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
  137 {
  138         uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
  139 
  140         return (
  141                 (gso_type != VIRTIO_NET_HDR_GSO_NONE &&
  142                  gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
  143                  gso_type != VIRTIO_NET_HDR_GSO_UDP &&
  144                  gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
  145                 ||
  146                  (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
  147                                | VIRTIO_NET_HDR_F_DATA_VALID))
  148                );
  149 }
  150 
  151 /* The VALE mismatch datapath implementation. */
  152 void
  153 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
  154                       struct netmap_vp_adapter *dst_na,
  155                       const struct nm_bdg_fwd *ft_p,
  156                       struct netmap_ring *dst_ring,
  157                       u_int *j, u_int lim, u_int *howmany)
  158 {
  159         struct netmap_slot *dst_slot = NULL;
  160         struct nm_vnet_hdr *vh = NULL;
  161         const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
  162 
  163         /* Source and destination pointers. */
  164         uint8_t *dst, *src;
  165         size_t src_len, dst_len;
  166 
  167         /* Indices and counters for the destination ring. */
  168         u_int j_start = *j;
  169         u_int j_cur = j_start;
  170         u_int dst_slots = 0;
  171 
  172         if (unlikely(ft_p == ft_end)) {
  173                 nm_prlim(1, "No source slots to process");
  174                 return;
  175         }
  176 
  177         /* Init source and dest pointers. */
  178         src = ft_p->ft_buf;
  179         src_len = ft_p->ft_len;
  180         dst_slot = &dst_ring->slot[j_cur];
  181         dst = NMB(&dst_na->up, dst_slot);
  182         dst_len = src_len;
  183 
  184         /* If the source port uses the offloadings, while destination doesn't,
  185          * we grab the source virtio-net header and do the offloadings here.
  186          */
  187         if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
  188                 vh = (struct nm_vnet_hdr *)src;
  189                 /* Initial sanity check on the source virtio-net header. If
  190                  * something seems wrong, just drop the packet. */
  191                 if (src_len < na->up.virt_hdr_len) {
  192                         nm_prlim(1, "Short src vnet header, dropping");
  193                         return;
  194                 }
  195                 if (unlikely(vnet_hdr_is_bad(vh))) {
  196                         nm_prlim(1, "Bad src vnet header, dropping");
  197                         return;
  198                 }
  199         }
  200 
  201         /* We are processing the first input slot and there is a mismatch
  202          * between source and destination virt_hdr_len (SHL and DHL).
  203          * When the a client is using virtio-net headers, the header length
  204          * can be:
  205          *    - 10: the header corresponds to the struct nm_vnet_hdr
  206          *    - 12: the first 10 bytes correspond to the struct
  207          *          virtio_net_hdr, and the last 2 bytes store the
  208          *          "mergeable buffers" info, which is an optional
  209          *          hint that can be zeroed for compatibility
  210          *
  211          * The destination header is therefore built according to the
  212          * following table:
  213          *
  214          * SHL | DHL | destination header
  215          * -----------------------------
  216          *   0 |  10 | zero
  217          *   0 |  12 | zero
  218          *  10 |   0 | doesn't exist
  219          *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
  220          *  12 |   0 | doesn't exist
  221          *  12 |  10 | copied from the first 10 bytes of source header
  222          */
  223         bzero(dst, dst_na->up.virt_hdr_len);
  224         if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
  225                 memcpy(dst, src, sizeof(struct nm_vnet_hdr));
  226         /* Skip the virtio-net headers. */
  227         src += na->up.virt_hdr_len;
  228         src_len -= na->up.virt_hdr_len;
  229         dst += dst_na->up.virt_hdr_len;
  230         dst_len = dst_na->up.virt_hdr_len + src_len;
  231 
  232         /* Here it could be dst_len == 0 (which implies src_len == 0),
  233          * so we avoid passing a zero length fragment.
  234          */
  235         if (dst_len == 0) {
  236                 ft_p++;
  237                 src = ft_p->ft_buf;
  238                 src_len = ft_p->ft_len;
  239                 dst_len = src_len;
  240         }
  241 
  242         if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
  243                 u_int gso_bytes = 0;
  244                 /* Length of the GSO packet header. */
  245                 u_int gso_hdr_len = 0;
  246                 /* Pointer to the GSO packet header. Assume it is in a single fragment. */
  247                 uint8_t *gso_hdr = NULL;
  248                 /* Index of the current segment. */
  249                 u_int gso_idx = 0;
  250                 /* Payload data bytes segmented so far (e.g. TCP data bytes). */
  251                 u_int segmented_bytes = 0;
  252                 /* Is this an IPv4 or IPv6 GSO packet? */
  253                 u_int ipv4 = 0;
  254                 /* Length of the IP header (20 if IPv4, 40 if IPv6). */
  255                 u_int iphlen = 0;
  256                 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
  257                 u_int ethhlen = 14;
  258                 /* Is this a TCP or an UDP GSO packet? */
  259                 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
  260                                 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
  261 
  262                 /* Segment the GSO packet contained into the input slots (frags). */
  263                 for (;;) {
  264                         size_t copy;
  265 
  266                         if (dst_slots >= *howmany) {
  267                                 /* We still have work to do, but we've run out of
  268                                  * dst slots, so we have to drop the packet. */
  269                                 nm_prdis(1, "Not enough slots, dropping GSO packet");
  270                                 return;
  271                         }
  272 
  273                         /* Grab the GSO header if we don't have it. */
  274                         if (!gso_hdr) {
  275                                 uint16_t ethertype;
  276 
  277                                 gso_hdr = src;
  278 
  279                                 /* Look at the 'Ethertype' field to see if this packet
  280                                  * is IPv4 or IPv6, taking into account VLAN
  281                                  * encapsulation. */
  282                                 for (;;) {
  283                                         if (src_len < ethhlen) {
  284                                                 nm_prlim(1, "Short GSO fragment [eth], dropping");
  285                                                 return;
  286                                         }
  287                                         ethertype = be16toh(*((uint16_t *)
  288                                                             (gso_hdr + ethhlen - 2)));
  289                                         if (ethertype != 0x8100) /* not 802.1q */
  290                                                 break;
  291                                         ethhlen += 4;
  292                                 }
  293                                 switch (ethertype) {
  294                                         case 0x0800:  /* IPv4 */
  295                                         {
  296                                                 struct nm_iphdr *iph = (struct nm_iphdr *)
  297                                                                         (gso_hdr + ethhlen);
  298 
  299                                                 if (src_len < ethhlen + 20) {
  300                                                         nm_prlim(1, "Short GSO fragment "
  301                                                               "[IPv4], dropping");
  302                                                         return;
  303                                                 }
  304                                                 ipv4 = 1;
  305                                                 iphlen = 4 * (iph->version_ihl & 0x0F);
  306                                                 break;
  307                                         }
  308                                         case 0x86DD:  /* IPv6 */
  309                                                 ipv4 = 0;
  310                                                 iphlen = 40;
  311                                                 break;
  312                                         default:
  313                                                 nm_prlim(1, "Unsupported ethertype, "
  314                                                       "dropping GSO packet");
  315                                                 return;
  316                                 }
  317                                 nm_prdis(3, "type=%04x", ethertype);
  318 
  319                                 if (src_len < ethhlen + iphlen) {
  320                                         nm_prlim(1, "Short GSO fragment [IP], dropping");
  321                                         return;
  322                                 }
  323 
  324                                 /* Compute gso_hdr_len. For TCP we need to read the
  325                                  * content of the 'Data Offset' field.
  326                                  */
  327                                 if (tcp) {
  328                                         struct nm_tcphdr *tcph = (struct nm_tcphdr *)
  329                                                                 (gso_hdr + ethhlen + iphlen);
  330 
  331                                         if (src_len < ethhlen + iphlen + 20) {
  332                                                 nm_prlim(1, "Short GSO fragment "
  333                                                                 "[TCP], dropping");
  334                                                 return;
  335                                         }
  336                                         gso_hdr_len = ethhlen + iphlen +
  337                                                       4 * (tcph->doff >> 4);
  338                                 } else {
  339                                         gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
  340                                 }
  341 
  342                                 if (src_len < gso_hdr_len) {
  343                                         nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
  344                                         return;
  345                                 }
  346 
  347                                 nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
  348                                                                    dst_na->mfs);
  349 
  350                                 /* Advance source pointers. */
  351                                 src += gso_hdr_len;
  352                                 src_len -= gso_hdr_len;
  353                                 if (src_len == 0) {
  354                                         ft_p++;
  355                                         if (ft_p == ft_end)
  356                                                 break;
  357                                         src = ft_p->ft_buf;
  358                                         src_len = ft_p->ft_len;
  359                                 }
  360                         }
  361 
  362                         /* Fill in the header of the current segment. */
  363                         if (gso_bytes == 0) {
  364                                 memcpy(dst, gso_hdr, gso_hdr_len);
  365                                 gso_bytes = gso_hdr_len;
  366                         }
  367 
  368                         /* Fill in data and update source and dest pointers. */
  369                         copy = src_len;
  370                         if (gso_bytes + copy > dst_na->mfs)
  371                                 copy = dst_na->mfs - gso_bytes;
  372                         memcpy(dst + gso_bytes, src, copy);
  373                         gso_bytes += copy;
  374                         src += copy;
  375                         src_len -= copy;
  376 
  377                         /* A segment is complete or we have processed all the
  378                            the GSO payload bytes. */
  379                         if (gso_bytes >= dst_na->mfs ||
  380                                 (src_len == 0 && ft_p + 1 == ft_end)) {
  381                                 /* After raw segmentation, we must fix some header
  382                                  * fields and compute checksums, in a protocol dependent
  383                                  * way. */
  384                                 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
  385                                                 ipv4, iphlen, tcp,
  386                                                 gso_idx, segmented_bytes,
  387                                                 src_len == 0 && ft_p + 1 == ft_end);
  388 
  389                                 nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
  390                                 dst_slot->len = gso_bytes;
  391                                 dst_slot->flags = 0;
  392                                 dst_slots++;
  393                                 segmented_bytes += gso_bytes - gso_hdr_len;
  394 
  395                                 gso_bytes = 0;
  396                                 gso_idx++;
  397 
  398                                 /* Next destination slot. */
  399                                 j_cur = nm_next(j_cur, lim);
  400                                 dst_slot = &dst_ring->slot[j_cur];
  401                                 dst = NMB(&dst_na->up, dst_slot);
  402                         }
  403 
  404                         /* Next input slot. */
  405                         if (src_len == 0) {
  406                                 ft_p++;
  407                                 if (ft_p == ft_end)
  408                                         break;
  409                                 src = ft_p->ft_buf;
  410                                 src_len = ft_p->ft_len;
  411                         }
  412                 }
  413                 nm_prdis(3, "%d bytes segmented", segmented_bytes);
  414 
  415         } else {
  416                 /* Address of a checksum field into a destination slot. */
  417                 uint16_t *check = NULL;
  418                 /* Accumulator for an unfolded checksum. */
  419                 rawsum_t csum = 0;
  420 
  421                 /* Process a non-GSO packet. */
  422 
  423                 /* Init 'check' if necessary. */
  424                 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
  425                         if (unlikely(vh->csum_offset + vh->csum_start > src_len))
  426                                 nm_prerr("invalid checksum request");
  427                         else
  428                                 check = (uint16_t *)(dst + vh->csum_start +
  429                                                 vh->csum_offset);
  430                 }
  431 
  432                 while (ft_p != ft_end) {
  433                         /* Init/update the packet checksum if needed. */
  434                         if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
  435                                 if (!dst_slots)
  436                                         csum = nm_os_csum_raw(src + vh->csum_start,
  437                                                                 src_len - vh->csum_start, 0);
  438                                 else
  439                                         csum = nm_os_csum_raw(src, src_len, csum);
  440                         }
  441 
  442                         /* Round to a multiple of 64 */
  443                         src_len = (src_len + 63) & ~63;
  444 
  445                         if (ft_p->ft_flags & NS_INDIRECT) {
  446                                 if (copyin(src, dst, src_len)) {
  447                                         /* Invalid user pointer, pretend len is 0. */
  448                                         dst_len = 0;
  449                                 }
  450                         } else {
  451                                 memcpy(dst, src, (int)src_len);
  452                         }
  453                         dst_slot->len = dst_len;
  454                         dst_slots++;
  455 
  456                         /* Next destination slot. */
  457                         j_cur = nm_next(j_cur, lim);
  458                         dst_slot = &dst_ring->slot[j_cur];
  459                         dst = NMB(&dst_na->up, dst_slot);
  460 
  461                         /* Next source slot. */
  462                         ft_p++;
  463                         src = ft_p->ft_buf;
  464                         dst_len = src_len = ft_p->ft_len;
  465                 }
  466 
  467                 /* Finalize (fold) the checksum if needed. */
  468                 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
  469                         *check = nm_os_csum_fold(csum);
  470                 }
  471                 nm_prdis(3, "using %u dst_slots", dst_slots);
  472 
  473                 /* A second pass on the destination slots to set the slot flags,
  474                  * using the right number of destination slots.
  475                  */
  476                 while (j_start != j_cur) {
  477                         dst_slot = &dst_ring->slot[j_start];
  478                         dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
  479                         j_start = nm_next(j_start, lim);
  480                 }
  481                 /* Clear NS_MOREFRAG flag on last entry. */
  482                 dst_slot->flags = (dst_slots << 8);
  483         }
  484 
  485         /* Update howmany and j. This is to commit the use of
  486          * those slots in the destination ring. */
  487         if (unlikely(dst_slots > *howmany)) {
  488                 nm_prerr("bug: slot allocation error");
  489         }
  490         *j = j_cur;
  491         *howmany -= dst_slots;
  492 }

Cache object: efe11bd2c05a770cf6d19696bafb5c69


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.