The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/hyperv/hvsock/hv_sock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Microsoft Corp.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include <sys/param.h>
   33 #include <sys/bus.h>
   34 #include <sys/domain.h>
   35 #include <sys/lock.h>
   36 #include <sys/kernel.h>
   37 #include <sys/types.h>
   38 #include <sys/malloc.h>
   39 #include <sys/module.h>
   40 #include <sys/mutex.h>
   41 #include <sys/proc.h>
   42 #include <sys/protosw.h>
   43 #include <sys/socket.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/sysproto.h>
   46 #include <sys/systm.h>
   47 #include <sys/sockbuf.h>
   48 #include <sys/sx.h>
   49 #include <sys/uio.h>
   50 
   51 #include <net/vnet.h>
   52 
   53 #include <dev/hyperv/vmbus/vmbus_reg.h>
   54 
   55 #include "hv_sock.h"
   56 
   57 #define HVSOCK_DBG_NONE                 0x0
   58 #define HVSOCK_DBG_INFO                 0x1
   59 #define HVSOCK_DBG_ERR                  0x2
   60 #define HVSOCK_DBG_VERBOSE              0x3
   61 
   62 
   63 SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
   64 
   65 static int hvs_dbg_level;
   66 SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
   67     0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
   68 
   69 
   70 #define HVSOCK_DBG(level, ...) do {                                     \
   71         if (hvs_dbg_level >= (level))                                   \
   72                 printf(__VA_ARGS__);                                    \
   73         } while (0)
   74 
   75 MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
   76 
   77 static int hvs_dom_probe(void);
   78 
   79 /* The MTU is 16KB per host side's design */
   80 #define HVSOCK_MTU_SIZE         (1024 * 16)
   81 #define HVSOCK_SEND_BUF_SZ      (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
   82 
   83 #define HVSOCK_HEADER_LEN       (sizeof(struct hvs_pkt_header))
   84 
   85 #define HVSOCK_PKT_LEN(payload_len)     (HVSOCK_HEADER_LEN + \
   86                                          roundup2(payload_len, 8) + \
   87                                          sizeof(uint64_t))
   88 
   89 /*
   90  * HyperV Transport sockets
   91  */
   92 static struct protosw hv_socket_protosw = {
   93         .pr_type =              SOCK_STREAM,
   94         .pr_protocol =          HYPERV_SOCK_PROTO_TRANS,
   95         .pr_flags =             PR_CONNREQUIRED,
   96         .pr_attach =            hvs_trans_attach,
   97         .pr_bind =              hvs_trans_bind,
   98         .pr_listen =            hvs_trans_listen,
   99         .pr_accept =            hvs_trans_accept,
  100         .pr_connect =           hvs_trans_connect,
  101         .pr_peeraddr =          hvs_trans_peeraddr,
  102         .pr_sockaddr =          hvs_trans_sockaddr,
  103         .pr_soreceive =         hvs_trans_soreceive,
  104         .pr_sosend =            hvs_trans_sosend,
  105         .pr_disconnect =        hvs_trans_disconnect,
  106         .pr_close =             hvs_trans_close,
  107         .pr_detach =            hvs_trans_detach,
  108         .pr_shutdown =          hvs_trans_shutdown,
  109         .pr_abort =             hvs_trans_abort,
  110 };
  111 
  112 static struct domain            hv_socket_domain = {
  113         .dom_family =           AF_HYPERV,
  114         .dom_name =             "hyperv",
  115         .dom_probe =            hvs_dom_probe,
  116         .dom_nprotosw =         1,
  117         .dom_protosw =          { &hv_socket_protosw },
  118 };
  119 
  120 DOMAIN_SET(hv_socket_);
  121 
  122 #define MAX_PORT                        ((uint32_t)0xFFFFFFFF)
  123 #define MIN_PORT                        ((uint32_t)0x0)
  124 
  125 /* 00000000-facb-11e6-bd58-64006a7986d3 */
  126 static const struct hyperv_guid srv_id_template = {
  127         .hv_guid = {
  128             0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
  129             0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
  130 };
  131 
  132 static int              hvsock_br_callback(void *, int, void *);
  133 static uint32_t         hvsock_canread_check(struct hvs_pcb *);
  134 static uint32_t         hvsock_canwrite_check(struct hvs_pcb *);
  135 static int              hvsock_send_data(struct vmbus_channel *chan,
  136     struct uio *uio, uint32_t to_write, struct sockbuf *sb);
  137 
  138 
  139 
  140 /* Globals */
  141 static struct sx                hvs_trans_socks_sx;
  142 static struct mtx               hvs_trans_socks_mtx;
  143 static LIST_HEAD(, hvs_pcb)     hvs_trans_bound_socks;
  144 static LIST_HEAD(, hvs_pcb)     hvs_trans_connected_socks;
  145 static uint32_t                 previous_auto_bound_port;
  146 
  147 static void
  148 hvsock_print_guid(struct hyperv_guid *guid)
  149 {
  150         unsigned char *p = (unsigned char *)guid;
  151 
  152         HVSOCK_DBG(HVSOCK_DBG_INFO,
  153             "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
  154             *(unsigned int *)p,
  155             *((unsigned short *) &p[4]),
  156             *((unsigned short *) &p[6]),
  157             p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
  158 }
  159 
  160 static bool
  161 is_valid_srv_id(const struct hyperv_guid *id)
  162 {
  163         return !memcmp(&id->hv_guid[4],
  164             &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
  165 }
  166 
  167 static unsigned int
  168 get_port_by_srv_id(const struct hyperv_guid *srv_id)
  169 {
  170         return *((const unsigned int *)srv_id);
  171 }
  172 
  173 static void
  174 set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
  175 {
  176         *((unsigned int *)srv_id) = port;
  177 }
  178 
  179 
  180 static void
  181 __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
  182 {
  183         struct hvs_pcb *p = NULL;
  184 
  185         HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
  186 
  187         if (!pcb)
  188                 return;
  189 
  190         if (list & HVS_LIST_BOUND) {
  191                 LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
  192                         if  (p == pcb)
  193                                 LIST_REMOVE(p, bound_next);
  194         }
  195 
  196         if (list & HVS_LIST_CONNECTED) {
  197                 LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
  198                         if (p == pcb)
  199                                 LIST_REMOVE(pcb, connected_next);
  200         }
  201 }
  202 
  203 static void
  204 __hvs_remove_socket_from_list(struct socket *so, unsigned char list)
  205 {
  206         struct hvs_pcb *pcb = so2hvspcb(so);
  207 
  208         HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
  209 
  210         __hvs_remove_pcb_from_list(pcb, list);
  211 }
  212 
  213 static void
  214 __hvs_insert_socket_on_list(struct socket *so, unsigned char list)
  215 {
  216         struct hvs_pcb *pcb = so2hvspcb(so);
  217 
  218         if (list & HVS_LIST_BOUND)
  219                 LIST_INSERT_HEAD(&hvs_trans_bound_socks,
  220                    pcb, bound_next);
  221 
  222         if (list & HVS_LIST_CONNECTED)
  223                 LIST_INSERT_HEAD(&hvs_trans_connected_socks,
  224                    pcb, connected_next);
  225 }
  226 
  227 void
  228 hvs_remove_socket_from_list(struct socket *so, unsigned char list)
  229 {
  230         if (!so || !so->so_pcb) {
  231                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  232                     "%s: socket or so_pcb is null\n", __func__);
  233                 return;
  234         }
  235 
  236         mtx_lock(&hvs_trans_socks_mtx);
  237         __hvs_remove_socket_from_list(so, list);
  238         mtx_unlock(&hvs_trans_socks_mtx);
  239 }
  240 
  241 static void
  242 hvs_insert_socket_on_list(struct socket *so, unsigned char list)
  243 {
  244         if (!so || !so->so_pcb) {
  245                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  246                     "%s: socket or so_pcb is null\n", __func__);
  247                 return;
  248         }
  249 
  250         mtx_lock(&hvs_trans_socks_mtx);
  251         __hvs_insert_socket_on_list(so, list);
  252         mtx_unlock(&hvs_trans_socks_mtx);
  253 }
  254 
  255 static struct socket *
  256 __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
  257 {
  258         struct hvs_pcb *p = NULL;
  259 
  260         if (list & HVS_LIST_BOUND)
  261                 LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
  262                         if (p->so != NULL &&
  263                             addr->hvs_port == p->local_addr.hvs_port)
  264                                 return p->so;
  265 
  266         if (list & HVS_LIST_CONNECTED)
  267                 LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
  268                         if (p->so != NULL &&
  269                             addr->hvs_port == p->local_addr.hvs_port)
  270                                 return p->so;
  271 
  272         return NULL;
  273 }
  274 
  275 static struct socket *
  276 hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
  277 {
  278         struct socket *s = NULL;
  279 
  280         mtx_lock(&hvs_trans_socks_mtx);
  281         s = __hvs_find_socket_on_list(addr, list);
  282         mtx_unlock(&hvs_trans_socks_mtx);
  283 
  284         return s;
  285 }
  286 
  287 static inline void
  288 hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
  289 {
  290         memset(addr, 0, sizeof(*addr));
  291         addr->sa_family = AF_HYPERV;
  292         addr->sa_len = sizeof(*addr);
  293         addr->hvs_port = port;
  294 }
  295 
  296 void
  297 hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
  298 {
  299         hvs_addr_set(addr, get_port_by_srv_id(svr_id));
  300 }
  301 
  302 int
  303 hvs_trans_lock(void)
  304 {
  305         sx_xlock(&hvs_trans_socks_sx);
  306         return (0);
  307 }
  308 
  309 void
  310 hvs_trans_unlock(void)
  311 {
  312         sx_xunlock(&hvs_trans_socks_sx);
  313 }
  314 
  315 static int
  316 hvs_dom_probe(void)
  317 {
  318 
  319         /* Don't even give us a chance to attach on non-HyperV. */
  320         if (vm_guest != VM_GUEST_HV)
  321                 return (ENXIO);
  322         return (0);
  323 }
  324 
  325 static void
  326 hvs_trans_init(void *arg __unused)
  327 {
  328 
  329         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  330             "%s: HyperV Socket hvs_trans_init called\n", __func__);
  331 
  332         /* Initialize Globals */
  333         previous_auto_bound_port = MAX_PORT;
  334         sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
  335         mtx_init(&hvs_trans_socks_mtx,
  336             "hvs_trans_socks_mtx", NULL, MTX_DEF);
  337         LIST_INIT(&hvs_trans_bound_socks);
  338         LIST_INIT(&hvs_trans_connected_socks);
  339 }
  340 SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  341     hvs_trans_init, NULL);
  342 
  343 /*
  344  * Called in two cases:
  345  * 1) When user calls socket();
  346  * 2) When we accept new incoming conneciton and call sonewconn().
  347  */
  348 int
  349 hvs_trans_attach(struct socket *so, int proto, struct thread *td)
  350 {
  351         struct hvs_pcb *pcb = so2hvspcb(so);
  352 
  353         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  354             "%s: HyperV Socket hvs_trans_attach called\n", __func__);
  355 
  356         if (so->so_type != SOCK_STREAM)
  357                 return (ESOCKTNOSUPPORT);
  358 
  359         if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
  360                 return (EPROTONOSUPPORT);
  361 
  362         if (pcb != NULL)
  363                 return (EISCONN);
  364         pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
  365         if (pcb == NULL)
  366                 return (ENOMEM);
  367 
  368         pcb->so = so;
  369         so->so_pcb = (void *)pcb;
  370 
  371         return (0);
  372 }
  373 
  374 void
  375 hvs_trans_detach(struct socket *so)
  376 {
  377         struct hvs_pcb *pcb;
  378 
  379         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  380             "%s: HyperV Socket hvs_trans_detach called\n", __func__);
  381 
  382         (void) hvs_trans_lock();
  383         pcb = so2hvspcb(so);
  384         if (pcb == NULL) {
  385                 hvs_trans_unlock();
  386                 return;
  387         }
  388 
  389         if (SOLISTENING(so)) {
  390                 bzero(pcb, sizeof(*pcb));
  391                 free(pcb, M_HVSOCK);
  392         }
  393 
  394         so->so_pcb = NULL;
  395 
  396         hvs_trans_unlock();
  397 }
  398 
  399 int
  400 hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
  401 {
  402         struct hvs_pcb *pcb = so2hvspcb(so);
  403         struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
  404         int error = 0;
  405 
  406         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  407             "%s: HyperV Socket hvs_trans_bind called\n", __func__);
  408 
  409         if (sa == NULL) {
  410                 return (EINVAL);
  411         }
  412 
  413         if (pcb == NULL) {
  414                 return (EINVAL);
  415         }
  416 
  417         if (sa->sa_family != AF_HYPERV) {
  418                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  419                     "%s: Not supported, sa_family is %u\n",
  420                     __func__, sa->sa_family);
  421                 return (EAFNOSUPPORT);
  422         }
  423         if (sa->sa_len != sizeof(*sa)) {
  424                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  425                     "%s: Not supported, sa_len is %u\n",
  426                     __func__, sa->sa_len);
  427                 return (EINVAL);
  428         }
  429 
  430         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  431             "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
  432 
  433         mtx_lock(&hvs_trans_socks_mtx);
  434         if (__hvs_find_socket_on_list(sa,
  435             HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
  436                 error = EADDRINUSE;
  437         } else {
  438                 /*
  439                  * The address is available for us to bind.
  440                  * Add socket to the bound list.
  441                  */
  442                 hvs_addr_set(&pcb->local_addr, sa->hvs_port);
  443                 hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
  444                 __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
  445         }
  446         mtx_unlock(&hvs_trans_socks_mtx);
  447 
  448         return (error);
  449 }
  450 
  451 int
  452 hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
  453 {
  454         struct hvs_pcb *pcb = so2hvspcb(so);
  455         struct socket *bound_so;
  456         int error;
  457 
  458         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  459             "%s: HyperV Socket hvs_trans_listen called\n", __func__);
  460 
  461         if (pcb == NULL)
  462                 return (EINVAL);
  463 
  464         /* Check if the address is already bound and it was by us. */
  465         bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
  466         if (bound_so == NULL || bound_so != so) {
  467                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  468                     "%s: Address not bound or not by us.\n", __func__);
  469                 return (EADDRNOTAVAIL);
  470         }
  471 
  472         SOCK_LOCK(so);
  473         error = solisten_proto_check(so);
  474         if (error == 0)
  475                 solisten_proto(so, backlog);
  476         SOCK_UNLOCK(so);
  477 
  478         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  479             "%s: HyperV Socket listen error = %d\n", __func__, error);
  480         return (error);
  481 }
  482 
  483 int
  484 hvs_trans_accept(struct socket *so, struct sockaddr **nam)
  485 {
  486         struct hvs_pcb *pcb = so2hvspcb(so);
  487 
  488         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  489             "%s: HyperV Socket hvs_trans_accept called\n", __func__);
  490 
  491         if (pcb == NULL)
  492                 return (EINVAL);
  493 
  494         *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
  495             M_NOWAIT);
  496 
  497         return ((*nam == NULL) ? ENOMEM : 0);
  498 }
  499 
  500 int
  501 hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  502 {
  503         struct hvs_pcb *pcb = so2hvspcb(so);
  504         struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
  505         bool found_auto_bound_port = false;
  506         int i, error = 0;
  507 
  508         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  509             "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
  510             __func__, raddr->hvs_port);
  511 
  512         if (pcb == NULL)
  513                 return (EINVAL);
  514 
  515         /* Verify the remote address */
  516         if (raddr == NULL)
  517                 return (EINVAL);
  518         if (raddr->sa_family != AF_HYPERV)
  519                 return (EAFNOSUPPORT);
  520         if (raddr->sa_len != sizeof(*raddr))
  521                 return (EINVAL);
  522 
  523         mtx_lock(&hvs_trans_socks_mtx);
  524         if (so->so_state &
  525             (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
  526                         HVSOCK_DBG(HVSOCK_DBG_ERR,
  527                             "%s: socket connect in progress\n",
  528                             __func__);
  529                         error = EINPROGRESS;
  530                         goto out;
  531         }
  532 
  533         /*
  534          * Find an available port for us to auto bind the local
  535          * address.
  536          */
  537         hvs_addr_set(&pcb->local_addr, 0);
  538 
  539         for (i = previous_auto_bound_port - 1;
  540             i != previous_auto_bound_port; i --) {
  541                 if (i == MIN_PORT)
  542                         i = MAX_PORT;
  543 
  544                 pcb->local_addr.hvs_port = i;
  545 
  546                 if (__hvs_find_socket_on_list(&pcb->local_addr,
  547                     HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
  548                         found_auto_bound_port = true;
  549                         previous_auto_bound_port = i;
  550                         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  551                             "%s: found local bound port is %x\n",
  552                             __func__, pcb->local_addr.hvs_port);
  553                         break;
  554                 }
  555         }
  556 
  557         if (found_auto_bound_port == true) {
  558                 /* Found available port for auto bound, put on list */
  559                 __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
  560                 /* Set VM service ID */
  561                 pcb->vm_srv_id = srv_id_template;
  562                 set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
  563                 /* Set host service ID and remote port */
  564                 pcb->host_srv_id = srv_id_template;
  565                 set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
  566                 hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
  567 
  568                 /* Change the socket state to SS_ISCONNECTING */
  569                 soisconnecting(so);
  570         } else {
  571                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  572                     "%s: No local port available for auto bound\n",
  573                     __func__);
  574                 error = EADDRINUSE;
  575         }
  576 
  577         HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
  578         hvsock_print_guid(&pcb->vm_srv_id);
  579         HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
  580         hvsock_print_guid(&pcb->host_srv_id);
  581 
  582 out:
  583         mtx_unlock(&hvs_trans_socks_mtx);
  584 
  585         if (found_auto_bound_port == true)
  586                  vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
  587 
  588         return (error);
  589 }
  590 
  591 int
  592 hvs_trans_disconnect(struct socket *so)
  593 {
  594         struct hvs_pcb *pcb;
  595 
  596         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  597             "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
  598 
  599         (void) hvs_trans_lock();
  600         pcb = so2hvspcb(so);
  601         if (pcb == NULL) {
  602                 hvs_trans_unlock();
  603                 return (EINVAL);
  604         }
  605 
  606         /* If socket is already disconnected, skip this */
  607         if ((so->so_state & SS_ISDISCONNECTED) == 0)
  608                 soisdisconnecting(so);
  609 
  610         hvs_trans_unlock();
  611 
  612         return (0);
  613 }
  614 
  615 struct hvs_callback_arg {
  616         struct uio *uio;
  617         struct sockbuf *sb;
  618 };
  619 
  620 int
  621 hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
  622     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
  623 {
  624         struct hvs_pcb *pcb = so2hvspcb(so);
  625         struct sockbuf *sb;
  626         ssize_t orig_resid;
  627         uint32_t canread, to_read;
  628         int flags, error = 0;
  629         struct hvs_callback_arg cbarg;
  630 
  631         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  632             "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
  633 
  634         if (so->so_type != SOCK_STREAM)
  635                 return (EINVAL);
  636         if (pcb == NULL)
  637                 return (EINVAL);
  638 
  639         if (flagsp != NULL)
  640                 flags = *flagsp &~ MSG_EOR;
  641         else
  642                 flags = 0;
  643 
  644         if (flags & MSG_PEEK)
  645                 return (EOPNOTSUPP);
  646 
  647         /* If no space to copy out anything */
  648         if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
  649                 return (EINVAL);
  650 
  651         orig_resid = uio->uio_resid;
  652 
  653         /* Prevent other readers from entering the socket. */
  654         error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
  655         if (error) {
  656                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  657                     "%s: soiolock returned error = %d\n", __func__, error);
  658                 return (error);
  659         }
  660 
  661         sb = &so->so_rcv;
  662         SOCKBUF_LOCK(sb);
  663 
  664         cbarg.uio = uio;
  665         cbarg.sb = sb;
  666         /*
  667          * If the socket is closing, there might still be some data
  668          * in rx br to read. However we need to make sure
  669          * the channel is still open.
  670          */
  671         if ((sb->sb_state & SBS_CANTRCVMORE) &&
  672             (so->so_state & SS_ISDISCONNECTED)) {
  673                 /* Other thread already closed the channel */
  674                 error = EPIPE;
  675                 goto out;
  676         }
  677 
  678         while (true) {
  679                 while (uio->uio_resid > 0 &&
  680                     (canread = hvsock_canread_check(pcb)) > 0) {
  681                         to_read = MIN(canread, uio->uio_resid);
  682                         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  683                             "%s: to_read = %u, skip = %u\n", __func__, to_read,
  684                             (unsigned int)(sizeof(struct hvs_pkt_header) +
  685                             pcb->recv_data_off));
  686 
  687                         error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
  688                             sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
  689                             hvsock_br_callback, (void *)&cbarg);
  690                         /*
  691                          * It is possible socket is disconnected becasue
  692                          * we released lock in hvsock_br_callback. So we
  693                          * need to check the state to make sure it is not
  694                          * disconnected.
  695                          */
  696                         if (error || so->so_state & SS_ISDISCONNECTED) {
  697                                 break;
  698                         }
  699 
  700                         pcb->recv_data_len -= to_read;
  701                         pcb->recv_data_off += to_read;
  702                 }
  703 
  704                 if (error)
  705                         break;
  706 
  707                 /* Abort if socket has reported problems. */
  708                 if (so->so_error) {
  709                         if (so->so_error == ESHUTDOWN &&
  710                             orig_resid > uio->uio_resid) {
  711                                 /*
  712                                  * Although we got a FIN, we also received
  713                                  * some data in this round. Delivery it
  714                                  * to user.
  715                                  */
  716                                 error = 0;
  717                         } else {
  718                                 if (so->so_error != ESHUTDOWN)
  719                                         error = so->so_error;
  720                         }
  721 
  722                         break;
  723                 }
  724 
  725                 /* Cannot received more. */
  726                 if (sb->sb_state & SBS_CANTRCVMORE)
  727                         break;
  728 
  729                 /* We are done if buffer has been filled */
  730                 if (uio->uio_resid == 0)
  731                         break;
  732 
  733                 if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
  734                         break;
  735 
  736                 /* Buffer ring is empty and we shall not block */
  737                 if ((so->so_state & SS_NBIO) ||
  738                     (flags & (MSG_DONTWAIT|MSG_NBIO))) {
  739                         if (orig_resid == uio->uio_resid) {
  740                                 /* We have not read anything */
  741                                 error = EAGAIN;
  742                         }
  743                         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  744                             "%s: non blocked read return, error %d.\n",
  745                             __func__, error);
  746                         break;
  747                 }
  748 
  749                 /*
  750                  * Wait and block until (more) data comes in.
  751                  * Note: Drops the sockbuf lock during wait.
  752                  */
  753                 error = sbwait(so, SO_RCV);
  754 
  755                 if (error)
  756                         break;
  757 
  758                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  759                     "%s: wake up from sbwait, read available is %u\n",
  760                     __func__, vmbus_chan_read_available(pcb->chan));
  761         }
  762 
  763 out:
  764         SOCKBUF_UNLOCK(sb);
  765         SOCK_IO_RECV_UNLOCK(so);
  766 
  767         /* We recieved a FIN in this call */
  768         if (so->so_error == ESHUTDOWN) {
  769                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  770                         /* Send has already closed */
  771                         soisdisconnecting(so);
  772                 } else {
  773                         /* Just close the receive side */
  774                         socantrcvmore(so);
  775                 }
  776         }
  777 
  778         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  779             "%s: returning error = %d, so_error = %d\n",
  780             __func__, error, so->so_error);
  781 
  782         return (error);
  783 }
  784 
  785 int
  786 hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
  787     struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
  788 {
  789         struct hvs_pcb *pcb = so2hvspcb(so);
  790         struct sockbuf *sb;
  791         ssize_t orig_resid;
  792         uint32_t canwrite, to_write;
  793         int error = 0;
  794 
  795         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  796             "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n",
  797             __func__, uio->uio_resid);
  798 
  799         if (so->so_type != SOCK_STREAM)
  800                 return (EINVAL);
  801         if (pcb == NULL)
  802                 return (EINVAL);
  803 
  804         /* If nothing to send */
  805         if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
  806                 return (EINVAL);
  807 
  808         orig_resid = uio->uio_resid;
  809 
  810         /* Prevent other writers from entering the socket. */
  811         error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
  812         if (error) {
  813                 HVSOCK_DBG(HVSOCK_DBG_ERR,
  814                     "%s: soiolocak returned error = %d\n", __func__, error);
  815                 return (error);
  816         }
  817 
  818         sb = &so->so_snd;
  819         SOCKBUF_LOCK(sb);
  820 
  821         if ((sb->sb_state & SBS_CANTSENDMORE) ||
  822             so->so_error == ESHUTDOWN) {
  823                 error = EPIPE;
  824                 goto out;
  825         }
  826 
  827         while (uio->uio_resid > 0) {
  828                 canwrite = hvsock_canwrite_check(pcb);
  829                 if (canwrite == 0) {
  830                         /* We have sent some data */
  831                         if (orig_resid > uio->uio_resid)
  832                                 break;
  833                         /*
  834                          * We have not sent any data and it is
  835                          * non-blocked io
  836                          */
  837                         if (so->so_state & SS_NBIO ||
  838                             (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
  839                                 error = EWOULDBLOCK;
  840                                 break;
  841                         } else {
  842                                 /*
  843                                  * We are here because there is no space on
  844                                  * send buffer ring. Signal the other side
  845                                  * to read and free more space.
  846                                  * Sleep wait until space avaiable to send
  847                                  * Note: Drops the sockbuf lock during wait.
  848                                  */
  849                                 error = sbwait(so, SO_SND);
  850 
  851                                 if (error)
  852                                         break;
  853 
  854                                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  855                                     "%s: wake up from sbwait, space avail on "
  856                                     "tx ring is %u\n",
  857                                     __func__,
  858                                     vmbus_chan_write_available(pcb->chan));
  859 
  860                                 continue;
  861                         }
  862                 }
  863                 to_write = MIN(canwrite, uio->uio_resid);
  864                 to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
  865 
  866                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  867                     "%s: canwrite is %u, to_write = %u\n", __func__,
  868                     canwrite, to_write);
  869                 error = hvsock_send_data(pcb->chan, uio, to_write, sb);
  870 
  871                 if (error)
  872                         break;
  873         }
  874 
  875 out:
  876         SOCKBUF_UNLOCK(sb);
  877         SOCK_IO_SEND_UNLOCK(so);
  878 
  879         return (error);
  880 }
  881 
  882 int
  883 hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
  884 {
  885         struct hvs_pcb *pcb = so2hvspcb(so);
  886 
  887         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  888             "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
  889 
  890         if (pcb == NULL)
  891                 return (EINVAL);
  892 
  893         *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
  894 
  895         return ((*nam == NULL)? ENOMEM : 0);
  896 }
  897 
  898 int
  899 hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
  900 {
  901         struct hvs_pcb *pcb = so2hvspcb(so);
  902 
  903         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  904             "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
  905 
  906         if (pcb == NULL)
  907                 return (EINVAL);
  908 
  909         *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
  910 
  911         return ((*nam == NULL)? ENOMEM : 0);
  912 }
  913 
  914 void
  915 hvs_trans_close(struct socket *so)
  916 {
  917         struct hvs_pcb *pcb;
  918 
  919         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  920             "%s: HyperV Socket hvs_trans_close called\n", __func__);
  921 
  922         (void) hvs_trans_lock();
  923         pcb = so2hvspcb(so);
  924         if (!pcb) {
  925                 hvs_trans_unlock();
  926                 return;
  927         }
  928 
  929         if (so->so_state & SS_ISCONNECTED) {
  930                 /* Send a FIN to peer */
  931                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  932                     "%s: hvs_trans_close sending a FIN to host\n", __func__);
  933                 (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
  934         }
  935 
  936         if (so->so_state &
  937             (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
  938                 soisdisconnected(so);
  939 
  940         pcb->chan = NULL;
  941         pcb->so = NULL;
  942 
  943         if (SOLISTENING(so)) {
  944                 mtx_lock(&hvs_trans_socks_mtx);
  945                 /* Remove from bound list */
  946                 __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
  947                 mtx_unlock(&hvs_trans_socks_mtx);
  948         }
  949 
  950         hvs_trans_unlock();
  951 
  952         return;
  953 }
  954 
  955 void
  956 hvs_trans_abort(struct socket *so)
  957 {
  958         struct hvs_pcb *pcb = so2hvspcb(so);
  959 
  960         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  961             "%s: HyperV Socket hvs_trans_abort called\n", __func__);
  962 
  963         (void) hvs_trans_lock();
  964         if (pcb == NULL) {
  965                 hvs_trans_unlock();
  966                 return;
  967         }
  968 
  969         if (SOLISTENING(so)) {
  970                 mtx_lock(&hvs_trans_socks_mtx);
  971                 /* Remove from bound list */
  972                 __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
  973                 mtx_unlock(&hvs_trans_socks_mtx);
  974         }
  975 
  976         if (so->so_state & SS_ISCONNECTED) {
  977                 (void) sodisconnect(so);
  978         }
  979         hvs_trans_unlock();
  980 
  981         return;
  982 }
  983 
  984 int
  985 hvs_trans_shutdown(struct socket *so)
  986 {
  987         struct hvs_pcb *pcb = so2hvspcb(so);
  988         struct sockbuf *sb;
  989 
  990         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
  991             "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
  992 
  993         if (pcb == NULL)
  994                 return (EINVAL);
  995 
  996         /*
  997          * Only get called with the shutdown method is SHUT_WR or
  998          * SHUT_RDWR.
  999          * When the method is SHUT_RD or SHUT_RDWR, the caller
 1000          * already set the SBS_CANTRCVMORE on receive side socket
 1001          * buffer.
 1002          */
 1003         if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 1004                 /*
 1005                  * SHUT_WR only case.
 1006                  * Receive side is still open. Just close
 1007                  * the send side.
 1008                  */
 1009                 socantsendmore(so);
 1010         } else {
 1011                 /* SHUT_RDWR case */
 1012                 if (so->so_state & SS_ISCONNECTED) {
 1013                         /* Send a FIN to peer */
 1014                         sb = &so->so_snd;
 1015                         SOCKBUF_LOCK(sb);
 1016                         (void) hvsock_send_data(pcb->chan, NULL, 0, sb);
 1017                         SOCKBUF_UNLOCK(sb);
 1018 
 1019                         soisdisconnecting(so);
 1020                 }
 1021         }
 1022 
 1023         return (0);
 1024 }
 1025 
 1026 /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
 1027  * <port> (see struct sockaddr_hvs).
 1028  *
 1029  * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
 1030  * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
 1031  * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
 1032  * the below sockaddr:
 1033  *
 1034  * struct SOCKADDR_HV
 1035  * {
 1036  *    ADDRESS_FAMILY Family;
 1037  *    USHORT Reserved;
 1038  *    GUID VmId;
 1039  *    GUID ServiceId;
 1040  * };
 1041  * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
 1042  * VMBus, because here it's obvious the host and the VM can easily identify
 1043  * each other. Though the VmID is useful on the host, especially in the case
 1044  * of Windows container, FreeBSD VM doesn't need it at all.
 1045  *
 1046  * To be compatible with similar infrastructure in Linux VMs, we have
 1047  * to limit the available GUID space of SOCKADDR_HV so that we can create
 1048  * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
 1049  * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
 1050  *
 1051  ****************************************************************************
 1052  * The only valid Service GUIDs, from the perspectives of both the host and *
 1053  * FreeBSD VM, that can be connected by the other end, must conform to this *
 1054  * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
 1055  ****************************************************************************
 1056  *
 1057  * When we write apps on the host to connect(), the GUID ServiceID is used.
 1058  * When we write apps in FreeBSD VM to connect(), we only need to specify the
 1059  * port and the driver will form the GUID and use that to request the host.
 1060  *
 1061  * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
 1062  * auto-generated remote port for a connect request initiated by the host's
 1063  * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
 1064  * FreeBSD guest.
 1065  */
 1066 
 1067 /*
 1068  * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
 1069  * restricts HyperV socket ring buffer size to six 4K pages. Newer
 1070  * HyperV hosts doen't have this limit.
 1071  */
 1072 #define HVS_RINGBUF_RCV_SIZE    (PAGE_SIZE * 6)
 1073 #define HVS_RINGBUF_SND_SIZE    (PAGE_SIZE * 6)
 1074 #define HVS_RINGBUF_MAX_SIZE    (PAGE_SIZE * 64)
 1075 
 1076 struct hvsock_sc {
 1077         device_t                dev;
 1078         struct hvs_pcb          *pcb;
 1079         struct vmbus_channel    *channel;
 1080 };
 1081 
 1082 static bool
 1083 hvsock_chan_readable(struct vmbus_channel *chan)
 1084 {
 1085         uint32_t readable = vmbus_chan_read_available(chan);
 1086 
 1087         return (readable >= HVSOCK_PKT_LEN(0));
 1088 }
 1089 
 1090 static void
 1091 hvsock_chan_cb(struct vmbus_channel *chan, void *context)
 1092 {
 1093         struct hvs_pcb *pcb = (struct hvs_pcb *) context;
 1094         struct socket *so;
 1095         uint32_t canwrite;
 1096 
 1097         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1098             "%s: host send us a wakeup on rb data, pcb = %p\n",
 1099             __func__, pcb);
 1100 
 1101         /*
 1102          * Check if the socket is still attached and valid.
 1103          * Here we know channel is still open. Need to make
 1104          * sure the socket has not been closed or freed.
 1105          */
 1106         (void) hvs_trans_lock();
 1107         so = hsvpcb2so(pcb);
 1108 
 1109         if (pcb->chan != NULL && so != NULL) {
 1110                 /*
 1111                  * Wake up reader if there are data to read.
 1112                  */
 1113                 SOCKBUF_LOCK(&(so)->so_rcv);
 1114 
 1115                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1116                     "%s: read available = %u\n", __func__,
 1117                     vmbus_chan_read_available(pcb->chan));
 1118 
 1119                 if (hvsock_chan_readable(pcb->chan))
 1120                         sorwakeup_locked(so);
 1121                 else
 1122                         SOCKBUF_UNLOCK(&(so)->so_rcv);
 1123 
 1124                 /*
 1125                  * Wake up sender if space becomes available to write.
 1126                  */
 1127                 SOCKBUF_LOCK(&(so)->so_snd);
 1128                 canwrite = hvsock_canwrite_check(pcb);
 1129 
 1130                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1131                     "%s: canwrite = %u\n", __func__, canwrite);
 1132 
 1133                 if (canwrite > 0) {
 1134                         sowwakeup_locked(so);
 1135                 } else {
 1136                         SOCKBUF_UNLOCK(&(so)->so_snd);
 1137                 }
 1138         }
 1139 
 1140         hvs_trans_unlock();
 1141 
 1142         return;
 1143 }
 1144 
 1145 static int
 1146 hvsock_br_callback(void *datap, int cplen, void *cbarg)
 1147 {
 1148         struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
 1149         struct uio *uio = arg->uio;
 1150         struct sockbuf *sb = arg->sb;
 1151         int error = 0;
 1152 
 1153         if (cbarg == NULL || datap == NULL)
 1154                 return (EINVAL);
 1155 
 1156         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1157             "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, "
 1158             "datap = %p\n",
 1159             __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
 1160             uio->uio_resid, cplen, datap);
 1161 
 1162         if (sb)
 1163                 SOCKBUF_UNLOCK(sb);
 1164 
 1165         error = uiomove(datap, cplen, uio);
 1166 
 1167         if (sb)
 1168                 SOCKBUF_LOCK(sb);
 1169 
 1170         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1171             "%s: after uiomove, uio_resid = %zd, error = %d\n",
 1172             __func__, uio->uio_resid, error);
 1173 
 1174         return (error);
 1175 }
 1176 
 1177 static int
 1178 hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
 1179     uint32_t to_write, struct sockbuf *sb)
 1180 {
 1181         struct hvs_pkt_header hvs_pkt;
 1182         int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
 1183         uint64_t pad = 0;
 1184         struct iovec iov[3];
 1185         struct hvs_callback_arg cbarg;
 1186 
 1187         if (chan == NULL)
 1188                 return (ENOTCONN);
 1189 
 1190         hlen = sizeof(struct vmbus_chanpkt_hdr);
 1191         hvs_pkthlen = sizeof(struct hvs_pkt_header);
 1192         hvs_pktlen = hvs_pkthlen + to_write;
 1193         pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
 1194 
 1195         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1196             "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
 1197             "pad_pktlen = %u, data_len = %u\n",
 1198             __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
 1199 
 1200         hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
 1201         hvs_pkt.chan_pkt_hdr.cph_flags = 0;
 1202         VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
 1203         VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
 1204         hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
 1205 
 1206         hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
 1207         hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
 1208 
 1209         cbarg.uio = uio;
 1210         cbarg.sb = sb;
 1211 
 1212         if (uio && to_write > 0) {
 1213                 iov[0].iov_base = &hvs_pkt;
 1214                 iov[0].iov_len = hvs_pkthlen;
 1215                 iov[1].iov_base = NULL;
 1216                 iov[1].iov_len = to_write;
 1217                 iov[2].iov_base = &pad;
 1218                 iov[2].iov_len = pad_pktlen - hvs_pktlen;
 1219 
 1220                 error = vmbus_chan_iov_send(chan, iov, 3,
 1221                     hvsock_br_callback, &cbarg);
 1222         } else {
 1223                 if (to_write == 0) {
 1224                         iov[0].iov_base = &hvs_pkt;
 1225                         iov[0].iov_len = hvs_pkthlen;
 1226                         iov[1].iov_base = &pad;
 1227                         iov[1].iov_len = pad_pktlen - hvs_pktlen;
 1228                         error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
 1229                 }
 1230         }
 1231 
 1232         if (error) {
 1233                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1234                     "%s: error = %d\n", __func__, error);
 1235         }
 1236 
 1237         return (error);
 1238 }
 1239 
 1240 /*
 1241  * Check if we have data on current ring buffer to read
 1242  * or not. If not, advance the ring buffer read index to
 1243  * next packet. Update the recev_data_len and recev_data_off
 1244  * to new value.
 1245  * Return the number of bytes can read.
 1246  */
 1247 static uint32_t
 1248 hvsock_canread_check(struct hvs_pcb *pcb)
 1249 {
 1250         uint32_t advance;
 1251         uint32_t tlen, hlen, dlen;
 1252         uint32_t bytes_canread = 0;
 1253         int error;
 1254 
 1255         if (pcb == NULL || pcb->chan == NULL) {
 1256                 pcb->so->so_error = EIO;
 1257                 return (0);
 1258         }
 1259 
 1260         /* Still have data not read yet on current packet */
 1261         if (pcb->recv_data_len > 0)
 1262                 return (pcb->recv_data_len);
 1263 
 1264         if (pcb->rb_init)
 1265                 advance =
 1266                     VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 1267         else
 1268                 advance = 0;
 1269 
 1270         bytes_canread = vmbus_chan_read_available(pcb->chan);
 1271 
 1272         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1273             "%s: bytes_canread on br = %u, advance = %u\n",
 1274             __func__, bytes_canread, advance);
 1275 
 1276         if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
 1277                 /*
 1278                  * Nothing to read. Need to advance the rindex before
 1279                  * calling sbwait, so host knows to wake us up when data
 1280                  * is available to read on rb.
 1281                  */
 1282                 error = vmbus_chan_recv_idxadv(pcb->chan, advance);
 1283                 if (error) {
 1284                         HVSOCK_DBG(HVSOCK_DBG_ERR,
 1285                             "%s: after calling vmbus_chan_recv_idxadv, "
 1286                             "got error = %d\n",  __func__, error);
 1287                         return (0);
 1288                 } else {
 1289                         pcb->rb_init = false;
 1290                         pcb->recv_data_len = 0;
 1291                         pcb->recv_data_off = 0;
 1292                         bytes_canread = vmbus_chan_read_available(pcb->chan);
 1293 
 1294                         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1295                             "%s: advanced %u bytes, "
 1296                             " bytes_canread on br now = %u\n",
 1297                             __func__, advance, bytes_canread);
 1298 
 1299                         if (bytes_canread == 0)
 1300                                 return (0);
 1301                         else
 1302                                 advance = 0;
 1303                 }
 1304         }
 1305 
 1306         if (bytes_canread <
 1307             advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
 1308                 return (0);
 1309 
 1310         error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
 1311             sizeof(struct hvs_pkt_header), advance);
 1312 
 1313         /* Don't have anything to read */
 1314         if (error) {
 1315                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1316                     "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
 1317                     __func__, error);
 1318                 return (0);
 1319         }
 1320 
 1321         /*
 1322          * We just read in a new packet header. Do some sanity checks.
 1323          */
 1324         tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 1325         hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
 1326         dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
 1327         if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
 1328             __predict_false(hlen > tlen) ||
 1329             __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
 1330                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1331                     "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
 1332                     tlen, hlen, dlen);
 1333                 pcb->so->so_error = EIO;
 1334                 return (0);
 1335         }
 1336         if (pcb->rb_init == false)
 1337                 pcb->rb_init = true;
 1338 
 1339         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1340             "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
 1341             tlen, hlen, dlen);
 1342 
 1343         /* The other side has sent a close FIN */
 1344         if (dlen == 0) {
 1345                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1346                     "%s: Received FIN from other side\n", __func__);
 1347                 /* inform the caller by seting so_error to ESHUTDOWN */
 1348                 pcb->so->so_error = ESHUTDOWN;
 1349         }
 1350 
 1351         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1352             "%s: canread on receive ring is %u \n", __func__, dlen);
 1353 
 1354         pcb->recv_data_len = dlen;
 1355         pcb->recv_data_off = 0;
 1356 
 1357         return (pcb->recv_data_len);
 1358 }
 1359 
 1360 static uint32_t
 1361 hvsock_canwrite_check(struct hvs_pcb *pcb)
 1362 {
 1363         uint32_t writeable;
 1364         uint32_t ret;
 1365 
 1366         if (pcb == NULL || pcb->chan == NULL)
 1367                 return (0);
 1368 
 1369         writeable = vmbus_chan_write_available(pcb->chan);
 1370 
 1371         /*
 1372          * We must always reserve a 0-length-payload packet for the FIN.
 1373          */
 1374         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1375             "%s: writeable is %u, should be greater than %ju\n",
 1376             __func__, writeable,
 1377             (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)));
 1378 
 1379         if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
 1380                 /*
 1381                  * The Tx ring seems full.
 1382                  */
 1383                 return (0);
 1384         }
 1385 
 1386         ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
 1387 
 1388         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1389             "%s: available size is %u\n", __func__, rounddown2(ret, 8));
 1390 
 1391         return (rounddown2(ret, 8));
 1392 }
 1393 
 1394 static void
 1395 hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
 1396 {
 1397         vmbus_chan_set_pending_send_size(chan,
 1398             HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
 1399 }
 1400 
 1401 static int
 1402 hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
 1403 {
 1404         unsigned int rcvbuf, sndbuf;
 1405         struct hvs_pcb *pcb = so2hvspcb(so);
 1406         int ret;
 1407 
 1408         if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
 1409                 sndbuf = HVS_RINGBUF_SND_SIZE;
 1410                 rcvbuf = HVS_RINGBUF_RCV_SIZE;
 1411         } else {
 1412                 sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
 1413                 sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
 1414                 sndbuf = rounddown2(sndbuf, PAGE_SIZE);
 1415                 rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
 1416                 rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
 1417                 rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
 1418         }
 1419 
 1420         /*
 1421          * Can only read whatever user provided size of data
 1422          * from ring buffer. Turn off batched reading.
 1423          */
 1424         vmbus_chan_set_readbatch(chan, false);
 1425 
 1426         ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
 1427             hvsock_chan_cb, pcb);
 1428 
 1429         if (ret != 0) {
 1430                 HVSOCK_DBG(HVSOCK_DBG_ERR,
 1431                     "%s: failed to open hvsock channel, sndbuf = %u, "
 1432                     "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 1433         } else {
 1434                 HVSOCK_DBG(HVSOCK_DBG_INFO,
 1435                     "%s: hvsock channel opened, sndbuf = %u, i"
 1436                     "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 1437                 /*
 1438                  * Se the pending send size so to receive wakeup
 1439                  * signals from host when there is enough space on
 1440                  * rx buffer ring to write.
 1441                  */
 1442                 hvsock_set_chan_pending_send_size(chan);
 1443         }
 1444 
 1445         return ret;
 1446 }
 1447 
 1448 /*
 1449  * Guest is listening passively on the socket. Open channel and
 1450  * create a new socket for the conneciton.
 1451  */
 1452 static void
 1453 hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
 1454     struct hvsock_sc *sc)
 1455 {
 1456         struct socket *new_so;
 1457         struct hvs_pcb *new_pcb, *pcb;
 1458         int error;
 1459 
 1460         /* Do nothing if socket is not listening */
 1461         if (!SOLISTENING(so)) {
 1462                 HVSOCK_DBG(HVSOCK_DBG_ERR,
 1463                     "%s: socket is not a listening one\n", __func__);
 1464                 return;
 1465         }
 1466 
 1467         /*
 1468          * Create a new socket. This will call pru_attach to complete
 1469          * the socket initialization and put the new socket onto
 1470          * listening socket's sol_incomp list, waiting to be promoted
 1471          * to sol_comp list.
 1472          * The new socket created has ref count 0. There is no other
 1473          * thread that changes the state of this new one at the
 1474          * moment, so we don't need to hold its lock while opening
 1475          * channel and filling out its pcb information.
 1476          */
 1477         new_so = sonewconn(so, 0);
 1478         if (!new_so)
 1479                 HVSOCK_DBG(HVSOCK_DBG_ERR,
 1480                     "%s: creating new socket failed\n", __func__);
 1481 
 1482         /*
 1483          * Now open the vmbus channel. If it fails, the socket will be
 1484          * on the listening socket's sol_incomp queue until it is
 1485          * replaced and aborted.
 1486          */
 1487         error = hvsock_open_channel(chan, new_so);
 1488         if (error) {
 1489                 new_so->so_error = error;
 1490                 return;
 1491         }
 1492 
 1493         pcb = so->so_pcb;
 1494         new_pcb = new_so->so_pcb;
 1495 
 1496         hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
 1497         /* Remote port is unknown to guest in this type of conneciton */
 1498         hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
 1499         new_pcb->chan = chan;
 1500         new_pcb->recv_data_len = 0;
 1501         new_pcb->recv_data_off = 0;
 1502         new_pcb->rb_init = false;
 1503 
 1504         new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
 1505         new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
 1506 
 1507         hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
 1508 
 1509         sc->pcb = new_pcb;
 1510 
 1511         /*
 1512          * Change the socket state to SS_ISCONNECTED. This will promote
 1513          * the socket to sol_comp queue and wake up the thread which
 1514          * is accepting connection.
 1515          */
 1516         soisconnected(new_so);
 1517 }
 1518 
 1519 
 1520 /*
 1521  * Guest is actively connecting to host.
 1522  */
 1523 static void
 1524 hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
 1525 {
 1526         struct hvs_pcb *pcb;
 1527         int error;
 1528 
 1529         error = hvsock_open_channel(chan, so);
 1530         if (error) {
 1531                 so->so_error = error;
 1532                 return;
 1533         }
 1534 
 1535         pcb = so->so_pcb;
 1536         pcb->chan = chan;
 1537         pcb->recv_data_len = 0;
 1538         pcb->recv_data_off = 0;
 1539         pcb->rb_init = false;
 1540 
 1541         mtx_lock(&hvs_trans_socks_mtx);
 1542         __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 1543         __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
 1544         mtx_unlock(&hvs_trans_socks_mtx);
 1545 
 1546         /*
 1547          * Change the socket state to SS_ISCONNECTED. This will wake up
 1548          * the thread sleeping in connect call.
 1549          */
 1550         soisconnected(so);
 1551 }
 1552 
 1553 static void
 1554 hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
 1555 {
 1556         struct hyperv_guid *inst_guid, *type_guid;
 1557         bool conn_from_host;
 1558         struct sockaddr_hvs addr;
 1559         struct socket *so;
 1560         struct hvs_pcb *pcb;
 1561 
 1562         type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
 1563         inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
 1564         conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
 1565 
 1566         HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
 1567         hvsock_print_guid(type_guid);
 1568         HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
 1569         hvsock_print_guid(inst_guid);
 1570         HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
 1571             (conn_from_host == true ) ? "from" : "to");
 1572 
 1573         /*
 1574          * The listening port should be in [0, MAX_LISTEN_PORT]
 1575          */
 1576         if (!is_valid_srv_id(type_guid))
 1577                 return;
 1578 
 1579         /*
 1580          * There should be a bound socket already created no matter
 1581          * it is a passive or active connection.
 1582          * For host initiated connection (passive on guest side),
 1583          * the  type_guid contains the port which guest is bound and
 1584          * listening.
 1585          * For the guest initiated connection (active on guest side),
 1586          * the inst_guid contains the port that guest has auto bound
 1587          * to.
 1588          */
 1589         hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
 1590         so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
 1591         if (!so) {
 1592                 HVSOCK_DBG(HVSOCK_DBG_ERR,
 1593                     "%s: no bound socket found for port %u\n",
 1594                     __func__, addr.hvs_port);
 1595                 return;
 1596         }
 1597 
 1598         if (conn_from_host) {
 1599                 hvsock_open_conn_passive(chan, so, sc);
 1600         } else {
 1601                 (void) hvs_trans_lock();
 1602                 pcb = so->so_pcb;
 1603                 if (pcb && pcb->so) {
 1604                         sc->pcb = so2hvspcb(so);
 1605                         hvsock_open_conn_active(chan, so);
 1606                 } else {
 1607                         HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1608                             "%s: channel detached before open\n", __func__);
 1609                 }
 1610                 hvs_trans_unlock();
 1611         }
 1612 
 1613 }
 1614 
 1615 static int
 1616 hvsock_probe(device_t dev)
 1617 {
 1618         struct vmbus_channel *channel = vmbus_get_channel(dev);
 1619 
 1620         if (!channel || !vmbus_chan_is_hvs(channel)) {
 1621                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1622                     "hvsock_probe called but not a hvsock channel id %u\n",
 1623                     vmbus_chan_id(channel));
 1624 
 1625                 return ENXIO;
 1626         } else {
 1627                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1628                     "hvsock_probe got a hvsock channel id %u\n",
 1629                     vmbus_chan_id(channel));
 1630 
 1631                 return BUS_PROBE_DEFAULT;
 1632         }
 1633 }
 1634 
 1635 static int
 1636 hvsock_attach(device_t dev)
 1637 {
 1638         struct vmbus_channel *channel = vmbus_get_channel(dev);
 1639         struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 1640 
 1641         HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
 1642 
 1643         hvsock_open_connection(channel, sc);
 1644 
 1645         /*
 1646          * Always return success. On error the host will rescind the device
 1647          * in 30 seconds and we can do cleanup at that time in
 1648          * vmbus_chan_msgproc_chrescind().
 1649          */
 1650         return (0);
 1651 }
 1652 
 1653 static int
 1654 hvsock_detach(device_t dev)
 1655 {
 1656         struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 1657         struct socket *so;
 1658         int retry;
 1659 
 1660         if (bootverbose)
 1661                 device_printf(dev, "hvsock_detach called.\n");
 1662 
 1663         HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
 1664 
 1665         if (sc->pcb != NULL) {
 1666                 (void) hvs_trans_lock();
 1667 
 1668                 so = hsvpcb2so(sc->pcb);
 1669                 if (so) {
 1670                         /* Close the connection */
 1671                         if (so->so_state &
 1672                             (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
 1673                                 soisdisconnected(so);
 1674                 }
 1675 
 1676                 mtx_lock(&hvs_trans_socks_mtx);
 1677                 __hvs_remove_pcb_from_list(sc->pcb,
 1678                     HVS_LIST_BOUND | HVS_LIST_CONNECTED);
 1679                 mtx_unlock(&hvs_trans_socks_mtx);
 1680 
 1681                 /*
 1682                  * Close channel while no reader and sender are working
 1683                  * on the buffer rings.
 1684                  */
 1685                 if (so) {
 1686                         retry = 0;
 1687                         while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) {
 1688                                 /*
 1689                                  * Someone is reading, rx br is busy
 1690                                  */
 1691                                 soisdisconnected(so);
 1692                                 DELAY(500);
 1693                                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1694                                     "waiting for rx reader to exit, "
 1695                                     "retry = %d\n", retry++);
 1696                         }
 1697                         retry = 0;
 1698                         while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) {
 1699                                 /*
 1700                                  * Someone is sending, tx br is busy
 1701                                  */
 1702                                 soisdisconnected(so);
 1703                                 DELAY(500);
 1704                                 HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 1705                                     "waiting for tx sender to exit, "
 1706                                     "retry = %d\n", retry++);
 1707                         }
 1708                 }
 1709 
 1710 
 1711                 bzero(sc->pcb, sizeof(struct hvs_pcb));
 1712                 free(sc->pcb, M_HVSOCK);
 1713                 sc->pcb = NULL;
 1714 
 1715                 if (so) {
 1716                         SOCK_IO_RECV_UNLOCK(so);
 1717                         SOCK_IO_SEND_UNLOCK(so);
 1718                         so->so_pcb = NULL;
 1719                 }
 1720 
 1721                 hvs_trans_unlock();
 1722         }
 1723 
 1724         vmbus_chan_close(vmbus_get_channel(dev));
 1725 
 1726         return (0);
 1727 }
 1728 
 1729 static device_method_t hvsock_methods[] = {
 1730         /* Device interface */
 1731         DEVMETHOD(device_probe, hvsock_probe),
 1732         DEVMETHOD(device_attach, hvsock_attach),
 1733         DEVMETHOD(device_detach, hvsock_detach),
 1734         DEVMETHOD_END
 1735 };
 1736 
 1737 static driver_t hvsock_driver = {
 1738         "hv_sock",
 1739         hvsock_methods,
 1740         sizeof(struct hvsock_sc)
 1741 };
 1742 
 1743 DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL);
 1744 MODULE_VERSION(hvsock, 1);
 1745 MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);

Cache object: 25f04cb684f3fa5808a3eddff0258b11


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.