The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_dmsg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
    3  *
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  *
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  */
   34 /*
   35  * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate
   36  *       a streaming response.  See subr_diskiocom()'s diskiodone().
   37  */
   38 #include <sys/param.h>
   39 #include <sys/types.h>
   40 #include <sys/kernel.h>
   41 #include <sys/conf.h>
   42 #include <sys/systm.h>
   43 #include <sys/queue.h>
   44 #include <sys/tree.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mount.h>
   47 #include <sys/socket.h>
   48 #include <sys/vnode.h>
   49 #include <sys/file.h>
   50 #include <sys/proc.h>
   51 #include <sys/priv.h>
   52 #include <sys/thread.h>
   53 #include <sys/globaldata.h>
   54 #include <sys/limits.h>
   55 
   56 #include <sys/dmsg.h>
   57 
   58 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
   59 RB_GENERATE(kdmsg_circuit_tree, kdmsg_circuit, rbnode, kdmsg_circuit_cmp);
   60 
   61 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg);
   62 static int kdmsg_circ_msgrx(kdmsg_msg_t *msg);
   63 static int kdmsg_state_msgrx(kdmsg_msg_t *msg);
   64 static int kdmsg_state_msgtx(kdmsg_msg_t *msg);
   65 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg);
   66 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg);
   67 static void kdmsg_state_abort(kdmsg_state_t *state);
   68 static void kdmsg_state_free(kdmsg_state_t *state);
   69 
   70 static void kdmsg_iocom_thread_rd(void *arg);
   71 static void kdmsg_iocom_thread_wr(void *arg);
   72 static int kdmsg_autorxmsg(kdmsg_msg_t *msg);
   73 static void kdmsg_autocirc(kdmsg_msg_t *msg);
   74 static int kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
   75 
   76 static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);
   77 
   78 void
   79 kdmsg_circ_hold(kdmsg_circuit_t *circ)
   80 {
   81         atomic_add_int(&circ->refs, 1);
   82 }
   83 
   84 void
   85 kdmsg_circ_drop(kdmsg_circuit_t *circ)
   86 {
   87         kdmsg_iocom_t *iocom;
   88 
   89         if (atomic_fetchadd_int(&circ->refs, -1) == 1) {
   90                 KKASSERT(circ->span_state == NULL &&
   91                          circ->circ_state == NULL &&
   92                          circ->rcirc_state == NULL &&
   93                          circ->recorded == 0);
   94                 iocom = circ->iocom;
   95                 circ->iocom = NULL;
   96                 kfree(circ, iocom->mmsg);
   97         }
   98 }
   99 
  100 
  101 /*
  102  * Initialize the roll-up communications structure for a network
  103  * messaging session.  This function does not install the socket.
  104  */
  105 void
  106 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags,
  107                  struct malloc_type *mmsg,
  108                  int (*rcvmsg)(kdmsg_msg_t *msg))
  109 {
  110         bzero(iocom, sizeof(*iocom));
  111         iocom->handle = handle;
  112         iocom->mmsg = mmsg;
  113         iocom->rcvmsg = rcvmsg;
  114         iocom->flags = flags;
  115         lockinit(&iocom->msglk, "h2msg", 0, 0);
  116         TAILQ_INIT(&iocom->msgq);
  117         RB_INIT(&iocom->circ_tree);
  118         RB_INIT(&iocom->staterd_tree);
  119         RB_INIT(&iocom->statewr_tree);
  120 }
  121 
  122 /*
  123  * [Re]connect using the passed file pointer.  The caller must ref the
  124  * fp for us.  We own that ref now.
  125  */
  126 void
  127 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
  128                       const char *subsysname)
  129 {
  130         /*
  131          * Destroy the current connection
  132          */
  133         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  134         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
  135         while (iocom->msgrd_td || iocom->msgwr_td) {
  136                 wakeup(&iocom->msg_ctl);
  137                 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
  138         }
  139 
  140         /*
  141          * Drop communications descriptor
  142          */
  143         if (iocom->msg_fp) {
  144                 fdrop(iocom->msg_fp);
  145                 iocom->msg_fp = NULL;
  146         }
  147 
  148         /*
  149          * Setup new communications descriptor
  150          */
  151         iocom->msg_ctl = 0;
  152         iocom->msg_fp = fp;
  153         iocom->msg_seq = 0;
  154         iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC;
  155 
  156         lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
  157                     NULL, 0, -1, "%s-msgrd", subsysname);
  158         lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
  159                     NULL, 0, -1, "%s-msgwr", subsysname);
  160         lockmgr(&iocom->msglk, LK_RELEASE);
  161 }
  162 
  163 /*
  164  * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls
  165  * this function to handle the state machine for LNK_CONN and LNK_SPAN.
  166  *
  167  * NOTE: Caller typically also sets the IOCOMF_AUTOCONN, IOCOMF_AUTOSPAN,
  168  *       and IOCOMF_AUTOCIRC in the kdmsg_iocom_init() call.  Clients
  169  *       typically set IOCOMF_AUTOFORGE to automatically forged circuits
  170  *       for received SPANs.
  171  */
  172 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
  173 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
  174 
  175 void
  176 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom,
  177                          void (*auto_callback)(kdmsg_msg_t *msg))
  178 {
  179         kdmsg_msg_t *msg;
  180 
  181         iocom->auto_callback = auto_callback;
  182 
  183         msg = kdmsg_msg_alloc(iocom, NULL,
  184                               DMSG_LNK_CONN | DMSGF_CREATE,
  185                               kdmsg_lnk_conn_reply, NULL);
  186         iocom->auto_lnk_conn.head = msg->any.head;
  187         msg->any.lnk_conn = iocom->auto_lnk_conn;
  188         iocom->conn_state = msg->state;
  189         kdmsg_msg_write(msg);
  190 }
  191 
  192 static
  193 int
  194 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
  195 {
  196         kdmsg_iocom_t *iocom = state->iocom;
  197         kdmsg_msg_t *rmsg;
  198 
  199         if (msg->any.head.cmd & DMSGF_CREATE) {
  200                 rmsg = kdmsg_msg_alloc(iocom, NULL,
  201                                        DMSG_LNK_SPAN | DMSGF_CREATE,
  202                                        kdmsg_lnk_span_reply, NULL);
  203                 iocom->auto_lnk_span.head = rmsg->any.head;
  204                 rmsg->any.lnk_span = iocom->auto_lnk_span;
  205                 kdmsg_msg_write(rmsg);
  206         }
  207 
  208         /*
  209          * Process shim after the CONN is acknowledged and before the CONN
  210          * transaction is deleted.  For deletions this gives device drivers
  211          * the ability to interlock new operations on the circuit before
  212          * it becomes illegal and panics.
  213          */
  214         if (iocom->auto_callback)
  215                 iocom->auto_callback(msg);
  216 
  217         if ((state->txcmd & DMSGF_DELETE) == 0 &&
  218             (msg->any.head.cmd & DMSGF_DELETE)) {
  219                 iocom->conn_state = NULL;
  220                 kdmsg_msg_reply(msg, 0);
  221         }
  222 
  223         return (0);
  224 }
  225 
  226 static
  227 int
  228 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
  229 {
  230         /*
  231          * Be sure to process shim before terminating the SPAN
  232          * transaction.  Gives device drivers the ability to
  233          * interlock new operations on the circuit before it
  234          * becomes illegal and panics.
  235          */
  236         if (state->iocom->auto_callback)
  237                 state->iocom->auto_callback(msg);
  238 
  239         if ((state->txcmd & DMSGF_DELETE) == 0 &&
  240             (msg->any.head.cmd & DMSGF_DELETE)) {
  241                 kdmsg_msg_reply(msg, 0);
  242         }
  243         return (0);
  244 }
  245 
  246 /*
  247  * Disconnect and clean up
  248  */
  249 void
  250 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
  251 {
  252         kdmsg_state_t *state;
  253 
  254         /*
  255          * Ask the cluster controller to go away
  256          */
  257         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  258         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
  259 
  260         while (iocom->msgrd_td || iocom->msgwr_td) {
  261                 wakeup(&iocom->msg_ctl);
  262                 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
  263         }
  264 
  265         /*
  266          * Cleanup caches
  267          */
  268         if ((state = iocom->freerd_state) != NULL) {
  269                 iocom->freerd_state = NULL;
  270                 kdmsg_state_free(state);
  271         }
  272 
  273         if ((state = iocom->freewr_state) != NULL) {
  274                 iocom->freewr_state = NULL;
  275                 kdmsg_state_free(state);
  276         }
  277 
  278         /*
  279          * Drop communications descriptor
  280          */
  281         if (iocom->msg_fp) {
  282                 fdrop(iocom->msg_fp);
  283                 iocom->msg_fp = NULL;
  284         }
  285         lockmgr(&iocom->msglk, LK_RELEASE);
  286 }
  287 
  288 /*
  289  * Cluster controller thread.  Perform messaging functions.  We have one
  290  * thread for the reader and one for the writer.  The writer handles
  291  * shutdown requests (which should break the reader thread).
  292  */
  293 static
  294 void
  295 kdmsg_iocom_thread_rd(void *arg)
  296 {
  297         kdmsg_iocom_t *iocom = arg;
  298         dmsg_hdr_t hdr;
  299         kdmsg_msg_t *msg = NULL;
  300         size_t hbytes;
  301         size_t abytes;
  302         int error = 0;
  303 
  304         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
  305                 /*
  306                  * Retrieve the message from the pipe or socket.
  307                  */
  308                 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
  309                                 NULL, 1, UIO_SYSSPACE);
  310                 if (error)
  311                         break;
  312                 if (hdr.magic != DMSG_HDR_MAGIC) {
  313                         kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
  314                         error = EINVAL;
  315                         break;
  316                 }
  317                 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
  318                 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
  319                         kprintf("kdmsg: bad header size %zd\n", hbytes);
  320                         error = EINVAL;
  321                         break;
  322                 }
  323                 /* XXX messy: mask cmd to avoid allocating state */
  324                 msg = kdmsg_msg_alloc(iocom, NULL,
  325                                       hdr.cmd & DMSGF_BASECMDMASK,
  326                                       NULL, NULL);
  327                 msg->any.head = hdr;
  328                 msg->hdr_size = hbytes;
  329                 if (hbytes > sizeof(hdr)) {
  330                         error = fp_read(iocom->msg_fp, &msg->any.head + 1,
  331                                         hbytes - sizeof(hdr),
  332                                         NULL, 1, UIO_SYSSPACE);
  333                         if (error) {
  334                                 kprintf("kdmsg: short msg received\n");
  335                                 error = EINVAL;
  336                                 break;
  337                         }
  338                 }
  339                 msg->aux_size = hdr.aux_bytes;
  340                 if (msg->aux_size > DMSG_AUX_MAX) {
  341                         kprintf("kdmsg: illegal msg payload size %zd\n",
  342                                 msg->aux_size);
  343                         error = EINVAL;
  344                         break;
  345                 }
  346                 if (msg->aux_size) {
  347                         abytes = DMSG_DOALIGN(msg->aux_size);
  348                         msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK);
  349                         msg->flags |= KDMSG_FLAG_AUXALLOC;
  350                         error = fp_read(iocom->msg_fp, msg->aux_data,
  351                                         abytes, NULL, 1, UIO_SYSSPACE);
  352                         if (error) {
  353                                 kprintf("kdmsg: short msg payload received\n");
  354                                 break;
  355                         }
  356                 }
  357 
  358                 (void)kdmsg_circ_msgrx(msg);
  359                 error = kdmsg_msg_receive_handling(msg);
  360                 msg = NULL;
  361         }
  362 
  363         if (error)
  364                 kprintf("kdmsg: read failed error %d\n", error);
  365 
  366         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  367         if (msg)
  368                 kdmsg_msg_free(msg);
  369 
  370         /*
  371          * Shutdown the socket before waiting for the transmit side.
  372          *
  373          * If we are dying due to e.g. a socket disconnect verses being
  374          * killed explicity we have to set KILL in order to kick the tx
  375          * side when it might not have any other work to do.  KILL might
  376          * already be set if we are in an unmount or reconnect.
  377          */
  378         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
  379 
  380         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
  381         wakeup(&iocom->msg_ctl);
  382 
  383         /*
  384          * Wait for the transmit side to drain remaining messages
  385          * before cleaning up the rx state.  The transmit side will
  386          * set KILLTX and wait for the rx side to completely finish
  387          * (set msgrd_td to NULL) before cleaning up any remaining
  388          * tx states.
  389          */
  390         lockmgr(&iocom->msglk, LK_RELEASE);
  391         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
  392         wakeup(&iocom->msg_ctl);
  393         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
  394                 wakeup(&iocom->msg_ctl);
  395                 tsleep(iocom, 0, "clstrkw", hz);
  396         }
  397 
  398         iocom->msgrd_td = NULL;
  399 
  400         /*
  401          * iocom can be ripped out from under us at this point but
  402          * wakeup() is safe.
  403          */
  404         wakeup(iocom);
  405         lwkt_exit();
  406 }
  407 
  408 static
  409 void
  410 kdmsg_iocom_thread_wr(void *arg)
  411 {
  412         kdmsg_iocom_t *iocom = arg;
  413         kdmsg_msg_t *msg;
  414         kdmsg_state_t *state;
  415         ssize_t res;
  416         size_t abytes;
  417         int error = 0;
  418         int retries = 20;
  419 
  420         /*
  421          * Transmit loop
  422          */
  423         msg = NULL;
  424         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  425 
  426         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
  427                 /*
  428                  * Sleep if no messages pending.  Interlock with flag while
  429                  * holding msglk.
  430                  */
  431                 if (TAILQ_EMPTY(&iocom->msgq)) {
  432                         atomic_set_int(&iocom->msg_ctl,
  433                                        KDMSG_CLUSTERCTL_SLEEPING);
  434                         lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
  435                         atomic_clear_int(&iocom->msg_ctl,
  436                                          KDMSG_CLUSTERCTL_SLEEPING);
  437                 }
  438 
  439                 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
  440                         /*
  441                          * Remove msg from the transmit queue and do
  442                          * persist and half-closed state handling.
  443                          */
  444                         TAILQ_REMOVE(&iocom->msgq, msg, qentry);
  445                         lockmgr(&iocom->msglk, LK_RELEASE);
  446 
  447                         error = kdmsg_state_msgtx(msg);
  448                         if (error == EALREADY) {
  449                                 error = 0;
  450                                 kdmsg_msg_free(msg);
  451                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  452                                 continue;
  453                         }
  454                         if (error) {
  455                                 kdmsg_msg_free(msg);
  456                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  457                                 break;
  458                         }
  459 
  460                         /*
  461                          * Dump the message to the pipe or socket.
  462                          *
  463                          * We have to clean up the message as if the transmit
  464                          * succeeded even if it failed.
  465                          */
  466                         error = fp_write(iocom->msg_fp, &msg->any,
  467                                          msg->hdr_size, &res, UIO_SYSSPACE);
  468                         if (error || res != msg->hdr_size) {
  469                                 if (error == 0)
  470                                         error = EINVAL;
  471                                 kdmsg_state_cleanuptx(msg);
  472                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  473                                 break;
  474                         }
  475                         if (msg->aux_size) {
  476                                 abytes = DMSG_DOALIGN(msg->aux_size);
  477                                 error = fp_write(iocom->msg_fp,
  478                                                  msg->aux_data, abytes,
  479                                                  &res, UIO_SYSSPACE);
  480                                 if (error || res != abytes) {
  481                                         if (error == 0)
  482                                                 error = EINVAL;
  483                                         kdmsg_state_cleanuptx(msg);
  484                                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  485                                         break;
  486                                 }
  487                         }
  488                         kdmsg_state_cleanuptx(msg);
  489                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  490                 }
  491         }
  492 
  493         /*
  494          * Cleanup messages pending transmission and release msgq lock.
  495          */
  496         if (error)
  497                 kprintf("kdmsg: write failed error %d\n", error);
  498         kprintf("thread_wr: Terminating iocom\n");
  499 
  500         /*
  501          * Shutdown the socket.  This will cause the rx thread to get an
  502          * EOF and ensure that both threads get to a termination state.
  503          */
  504         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
  505 
  506         /*
  507          * Set KILLTX (which the rx side waits for), then wait for the RX
  508          * side to completely finish before we clean out any remaining
  509          * command states.
  510          */
  511         lockmgr(&iocom->msglk, LK_RELEASE);
  512         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
  513         wakeup(&iocom->msg_ctl);
  514         while (iocom->msgrd_td) {
  515                 wakeup(&iocom->msg_ctl);
  516                 tsleep(iocom, 0, "clstrkw", hz);
  517         }
  518         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  519 
  520         /*
  521          * Simulate received MSGF_DELETE's for any remaining states.
  522          * (For remote masters).
  523          *
  524          * Drain the message queue to handle any device initiated writes
  525          * due to state callbacks.
  526          */
  527 cleanuprd:
  528         kdmsg_drain_msgq(iocom);
  529         RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
  530                 if ((state->rxcmd & DMSGF_DELETE) == 0) {
  531                         lockmgr(&iocom->msglk, LK_RELEASE);
  532                         kdmsg_state_abort(state);
  533                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  534                         goto cleanuprd;
  535                 }
  536         }
  537 
  538         /*
  539          * Simulate received MSGF_DELETE's for any remaining states.
  540          * (For local masters).
  541          */
  542 cleanupwr:
  543         kdmsg_drain_msgq(iocom);
  544         RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
  545                 if ((state->rxcmd & DMSGF_DELETE) == 0) {
  546                         lockmgr(&iocom->msglk, LK_RELEASE);
  547                         kdmsg_state_abort(state);
  548                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  549                         goto cleanupwr;
  550                 }
  551         }
  552 
  553         /*
  554          * Retry until all work is done
  555          */
  556         if (--retries == 0)
  557                 panic("kdmsg: comm thread shutdown couldn't drain");
  558         if (TAILQ_FIRST(&iocom->msgq) ||
  559             RB_ROOT(&iocom->staterd_tree) ||
  560             RB_ROOT(&iocom->statewr_tree)) {
  561                 goto cleanuprd;
  562         }
  563         iocom->flags |= KDMSG_IOCOMF_EXITNOACC;
  564 
  565         lockmgr(&iocom->msglk, LK_RELEASE);
  566 
  567         /*
  568          * The state trees had better be empty now
  569          */
  570         KKASSERT(RB_EMPTY(&iocom->staterd_tree));
  571         KKASSERT(RB_EMPTY(&iocom->statewr_tree));
  572         KKASSERT(iocom->conn_state == NULL);
  573 
  574         if (iocom->exit_func) {
  575                 /*
  576                  * iocom is invalid after we call the exit function.
  577                  */
  578                 iocom->msgwr_td = NULL;
  579                 iocom->exit_func(iocom);
  580         } else {
  581                 /*
  582                  * iocom can be ripped out from under us once msgwr_td is
  583                  * set to NULL.  The wakeup is safe.
  584                  */
  585                 iocom->msgwr_td = NULL;
  586                 wakeup(iocom);
  587         }
  588         lwkt_exit();
  589 }
  590 
  591 /*
  592  * This cleans out the pending transmit message queue, adjusting any
  593  * persistent states properly in the process.
  594  *
  595  * Caller must hold pmp->iocom.msglk
  596  */
  597 void
  598 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
  599 {
  600         kdmsg_msg_t *msg;
  601 
  602         /*
  603          * Clean out our pending transmit queue, executing the
  604          * appropriate state adjustments.  If this tries to open
  605          * any new outgoing transactions we have to loop up and
  606          * clean them out.
  607          */
  608         while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
  609                 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
  610                 lockmgr(&iocom->msglk, LK_RELEASE);
  611                 if (kdmsg_state_msgtx(msg))
  612                         kdmsg_msg_free(msg);
  613                 else
  614                         kdmsg_state_cleanuptx(msg);
  615                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  616         }
  617 }
  618 
  619 /*
  620  * Do all processing required to handle a freshly received message
  621  * after its low level header has been validated.
  622  */
  623 static
  624 int
  625 kdmsg_msg_receive_handling(kdmsg_msg_t *msg)
  626 {
  627         kdmsg_iocom_t *iocom = msg->iocom;
  628         int error;
  629 
  630         /*
  631          * State machine tracking, state assignment for msg,
  632          * returns error and discard status.  Errors are fatal
  633          * to the connection except for EALREADY which forces
  634          * a discard without execution.
  635          */
  636         error = kdmsg_state_msgrx(msg);
  637         if (error) {
  638                 /*
  639                  * Raw protocol or connection error
  640                  */
  641                 kdmsg_msg_free(msg);
  642                 if (error == EALREADY)
  643                         error = 0;
  644         } else if (msg->state && msg->state->func) {
  645                 /*
  646                  * Message related to state which already has a
  647                  * handling function installed for it.
  648                  */
  649                 error = msg->state->func(msg->state, msg);
  650                 kdmsg_state_cleanuprx(msg);
  651         } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) {
  652                 error = kdmsg_autorxmsg(msg);
  653                 kdmsg_state_cleanuprx(msg);
  654         } else {
  655                 error = iocom->rcvmsg(msg);
  656                 kdmsg_state_cleanuprx(msg);
  657         }
  658         return error;
  659 }
  660 
  661 /*
  662  * Process circuit tracking (NEEDS WORK)
  663  */
  664 static
  665 int
  666 kdmsg_circ_msgrx(kdmsg_msg_t *msg)
  667 {
  668         kdmsg_circuit_t dummy;
  669         kdmsg_circuit_t *circ;
  670         int error = 0;
  671 
  672         if (msg->any.head.circuit) {
  673                 dummy.msgid = msg->any.head.circuit;
  674                 lwkt_gettoken(&kdmsg_token);
  675                 circ = RB_FIND(kdmsg_circuit_tree, &msg->iocom->circ_tree,
  676                                &dummy);
  677                 if (circ) {
  678                         msg->circ = circ;
  679                         kdmsg_circ_hold(circ);
  680                 }
  681                 if (circ == NULL) {
  682                         kprintf("KDMSG_CIRC_MSGRX CMD %08x: IOCOM %p "
  683                                 "Bad circuit %016jx\n",
  684                                 msg->any.head.cmd,
  685                                 msg->iocom,
  686                                 (intmax_t)msg->any.head.circuit);
  687                         kprintf("KDMSG_CIRC_MSGRX: Avail circuits: ");
  688                         RB_FOREACH(circ, kdmsg_circuit_tree,
  689                                    &msg->iocom->circ_tree) {
  690                                 kprintf(" %016jx", (intmax_t)circ->msgid);
  691                         }
  692                         kprintf("\n");
  693                         error = EINVAL;
  694                 }
  695                 lwkt_reltoken(&kdmsg_token);
  696         }
  697         return (error);
  698 }
  699 
  700 /*
  701  * Process state tracking for a message after reception, prior to
  702  * execution.
  703  *
  704  * Called with msglk held and the msg dequeued.
  705  *
  706  * All messages are called with dummy state and return actual state.
  707  * (One-off messages often just return the same dummy state).
  708  *
  709  * May request that caller discard the message by setting *discardp to 1.
  710  * The returned state is not used in this case and is allowed to be NULL.
  711  *
  712  * --
  713  *
  714  * These routines handle persistent and command/reply message state via the
  715  * CREATE and DELETE flags.  The first message in a command or reply sequence
  716  * sets CREATE, the last message in a command or reply sequence sets DELETE.
  717  *
  718  * There can be any number of intermediate messages belonging to the same
  719  * sequence sent inbetween the CREATE message and the DELETE message,
  720  * which set neither flag.  This represents a streaming command or reply.
  721  *
  722  * Any command message received with CREATE set expects a reply sequence to
  723  * be returned.  Reply sequences work the same as command sequences except the
  724  * REPLY bit is also sent.  Both the command side and reply side can
  725  * degenerate into a single message with both CREATE and DELETE set.  Note
  726  * that one side can be streaming and the other side not, or neither, or both.
  727  *
  728  * The msgid is unique for the initiator.  That is, two sides sending a new
  729  * message can use the same msgid without colliding.
  730  *
  731  * --
  732  *
  733  * ABORT sequences work by setting the ABORT flag along with normal message
  734  * state.  However, ABORTs can also be sent on half-closed messages, that is
  735  * even if the command or reply side has already sent a DELETE, as long as
  736  * the message has not been fully closed it can still send an ABORT+DELETE
  737  * to terminate the half-closed message state.
  738  *
  739  * Since ABORT+DELETEs can race we silently discard ABORT's for message
  740  * state which has already been fully closed.  REPLY+ABORT+DELETEs can
  741  * also race, and in this situation the other side might have already
  742  * initiated a new unrelated command with the same message id.  Since
  743  * the abort has not set the CREATE flag the situation can be detected
  744  * and the message will also be discarded.
  745  *
  746  * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
  747  * The ABORT request is essentially integrated into the command instead
  748  * of being sent later on.  In this situation the command implementation
  749  * detects that CREATE and ABORT are both set (vs ABORT alone) and can
  750  * special-case non-blocking operation for the command.
  751  *
  752  * NOTE!  Messages with ABORT set without CREATE or DELETE are considered
  753  *        to be mid-stream aborts for command/reply sequences.  ABORTs on
  754  *        one-way messages are not supported.
  755  *
  756  * NOTE!  If a command sequence does not support aborts the ABORT flag is
  757  *        simply ignored.
  758  *
  759  * --
  760  *
  761  * One-off messages (no reply expected) are sent with neither CREATE or DELETE
  762  * set.  One-off messages cannot be aborted and typically aren't processed
  763  * by these routines.  The REPLY bit can be used to distinguish whether a
  764  * one-off message is a command or reply.  For example, one-off replies
  765  * will typically just contain status updates.
  766  */
  767 static
  768 int
  769 kdmsg_state_msgrx(kdmsg_msg_t *msg)
  770 {
  771         kdmsg_iocom_t *iocom = msg->iocom;
  772         kdmsg_state_t *state;
  773         int error;
  774 
  775         /*
  776          * Make sure a state structure is ready to go in case we need a new
  777          * one.  This is the only routine which uses freerd_state so no
  778          * races are possible.
  779          */
  780         if ((state = iocom->freerd_state) == NULL) {
  781                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
  782                 state->flags = KDMSG_STATE_DYNAMIC;
  783                 iocom->freerd_state = state;
  784         }
  785 
  786         /*
  787          * Lock RB tree and locate existing persistent state, if any.
  788          *
  789          * If received msg is a command state is on staterd_tree.
  790          * If received msg is a reply state is on statewr_tree.
  791          */
  792         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
  793 
  794         state->msgid = msg->any.head.msgid;
  795         state->circ = msg->circ;
  796         state->iocom = iocom;
  797         if (msg->any.head.cmd & DMSGF_REPLY)
  798                 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
  799         else
  800                 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
  801         msg->state = state;
  802 
  803         /*
  804          * Short-cut one-off or mid-stream messages (state may be NULL).
  805          */
  806         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
  807                                   DMSGF_ABORT)) == 0) {
  808                 lockmgr(&iocom->msglk, LK_RELEASE);
  809                 return(0);
  810         }
  811 
  812         /*
  813          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
  814          * inside the case statements.
  815          */
  816         switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) {
  817         case DMSGF_CREATE:
  818         case DMSGF_CREATE | DMSGF_DELETE:
  819                 /*
  820                  * New persistant command received.
  821                  */
  822                 if (state) {
  823                         kprintf("kdmsg_state_msgrx: duplicate transaction\n");
  824                         error = EINVAL;
  825                         break;
  826                 }
  827                 state = iocom->freerd_state;
  828                 iocom->freerd_state = NULL;
  829                 msg->state = state;
  830                 state->msg = msg;
  831                 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
  832                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
  833                 state->txcmd = DMSGF_REPLY;
  834                 state->msgid = msg->any.head.msgid;
  835                 if ((state->circ = msg->circ) != NULL)
  836                         kdmsg_circ_hold(state->circ);
  837                 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
  838                 state->flags |= KDMSG_STATE_INSERTED;
  839                 error = 0;
  840                 break;
  841         case DMSGF_DELETE:
  842                 /*
  843                  * Persistent state is expected but might not exist if an
  844                  * ABORT+DELETE races the close.
  845                  */
  846                 if (state == NULL) {
  847                         if (msg->any.head.cmd & DMSGF_ABORT) {
  848                                 error = EALREADY;
  849                         } else {
  850                                 kprintf("kdmsg_state_msgrx: "
  851                                         "no state for DELETE\n");
  852                                 error = EINVAL;
  853                         }
  854                         break;
  855                 }
  856 
  857                 /*
  858                  * Handle another ABORT+DELETE case if the msgid has already
  859                  * been reused.
  860                  */
  861                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
  862                         if (msg->any.head.cmd & DMSGF_ABORT) {
  863                                 error = EALREADY;
  864                         } else {
  865                                 kprintf("kdmsg_state_msgrx: "
  866                                         "state reused for DELETE\n");
  867                                 error = EINVAL;
  868                         }
  869                         break;
  870                 }
  871                 error = 0;
  872                 break;
  873         default:
  874                 /*
  875                  * Check for mid-stream ABORT command received, otherwise
  876                  * allow.
  877                  */
  878                 if (msg->any.head.cmd & DMSGF_ABORT) {
  879                         if (state == NULL ||
  880                             (state->rxcmd & DMSGF_CREATE) == 0) {
  881                                 error = EALREADY;
  882                                 break;
  883                         }
  884                 }
  885                 error = 0;
  886                 break;
  887         case DMSGF_REPLY | DMSGF_CREATE:
  888         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
  889                 /*
  890                  * When receiving a reply with CREATE set the original
  891                  * persistent state message should already exist.
  892                  */
  893                 if (state == NULL) {
  894                         kprintf("kdmsg_state_msgrx: no state match for "
  895                                 "REPLY cmd=%08x msgid=%016jx\n",
  896                                 msg->any.head.cmd,
  897                                 (intmax_t)msg->any.head.msgid);
  898                         error = EINVAL;
  899                         break;
  900                 }
  901                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
  902                 error = 0;
  903                 break;
  904         case DMSGF_REPLY | DMSGF_DELETE:
  905                 /*
  906                  * Received REPLY+ABORT+DELETE in case where msgid has
  907                  * already been fully closed, ignore the message.
  908                  */
  909                 if (state == NULL) {
  910                         if (msg->any.head.cmd & DMSGF_ABORT) {
  911                                 error = EALREADY;
  912                         } else {
  913                                 kprintf("kdmsg_state_msgrx: no state match "
  914                                         "for REPLY|DELETE\n");
  915                                 error = EINVAL;
  916                         }
  917                         break;
  918                 }
  919 
  920                 /*
  921                  * Received REPLY+ABORT+DELETE in case where msgid has
  922                  * already been reused for an unrelated message,
  923                  * ignore the message.
  924                  */
  925                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
  926                         if (msg->any.head.cmd & DMSGF_ABORT) {
  927                                 error = EALREADY;
  928                         } else {
  929                                 kprintf("kdmsg_state_msgrx: state reused "
  930                                         "for REPLY|DELETE\n");
  931                                 error = EINVAL;
  932                         }
  933                         break;
  934                 }
  935                 error = 0;
  936                 break;
  937         case DMSGF_REPLY:
  938                 /*
  939                  * Check for mid-stream ABORT reply received to sent command.
  940                  */
  941                 if (msg->any.head.cmd & DMSGF_ABORT) {
  942                         if (state == NULL ||
  943                             (state->rxcmd & DMSGF_CREATE) == 0) {
  944                                 error = EALREADY;
  945                                 break;
  946                         }
  947                 }
  948                 error = 0;
  949                 break;
  950         }
  951         lockmgr(&iocom->msglk, LK_RELEASE);
  952         return (error);
  953 }
  954 
  955 /*
  956  * Called instead of iocom->rcvmsg() if any of the AUTO flags are set.
  957  * This routine must call iocom->rcvmsg() for anything not automatically
  958  * handled.
  959  */
  960 static int
  961 kdmsg_autorxmsg(kdmsg_msg_t *msg)
  962 {
  963         kdmsg_iocom_t *iocom = msg->iocom;
  964         kdmsg_circuit_t *circ;
  965         int error = 0;
  966         uint32_t cmd;
  967 
  968         /*
  969          * Process a combination of the transaction command and the message
  970          * flags.  For the purposes of this routine, the message command is
  971          * only relevant when it initiates a transaction (where it is
  972          * recorded in icmd).
  973          */
  974         cmd = (msg->state ? msg->state->icmd : msg->any.head.cmd) &
  975               DMSGF_BASECMDMASK;
  976         cmd |= msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY);
  977 
  978         switch(cmd) {
  979         case DMSG_LNK_CONN | DMSGF_CREATE:
  980         case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE:
  981                 /*
  982                  * Received LNK_CONN transaction.  Transmit response and
  983                  * leave transaction open, which allows the other end to
  984                  * start to the SPAN protocol.
  985                  *
  986                  * Handle shim after acknowledging the CONN.
  987                  */
  988                 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
  989                         if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
  990                                 kdmsg_msg_result(msg, 0);
  991                                 if (iocom->auto_callback)
  992                                         iocom->auto_callback(msg);
  993                         } else {
  994                                 error = iocom->rcvmsg(msg);
  995                         }
  996                         break;
  997                 }
  998                 /* fall through */
  999         case DMSG_LNK_CONN | DMSGF_DELETE:
 1000                 /*
 1001                  * This message is usually simulated after a link is lost
 1002                  * to clean up the transaction.
 1003                  */
 1004                 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
 1005                         if (iocom->auto_callback)
 1006                                 iocom->auto_callback(msg);
 1007                         kdmsg_msg_reply(msg, 0);
 1008                 } else {
 1009                         error = iocom->rcvmsg(msg);
 1010                 }
 1011                 break;
 1012         case DMSG_LNK_SPAN | DMSGF_CREATE:
 1013         case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
 1014                 /*
 1015                  * Received LNK_SPAN transaction.  We do not have to respond
 1016                  * but we must leave the transaction open.
 1017                  *
 1018                  * If AUTOCIRC is set automatically initiate a virtual circuit
 1019                  * to the received span.  This will attach a kdmsg_circuit
 1020                  * to the SPAN state.  The circuit is lost when the span is
 1021                  * lost.
 1022                  *
 1023                  * Handle shim after acknowledging the SPAN.
 1024                  */
 1025                 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
 1026                         if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
 1027                                 if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
 1028                                         kdmsg_autocirc(msg);
 1029                                 if (iocom->auto_callback)
 1030                                         iocom->auto_callback(msg);
 1031                                 break;
 1032                         }
 1033                         /* fall through */
 1034                 } else {
 1035                         error = iocom->rcvmsg(msg);
 1036                         break;
 1037                 }
 1038                 /* fall through */
 1039         case DMSG_LNK_SPAN | DMSGF_DELETE:
 1040                 /*
 1041                  * Process shims (auto_callback) before cleaning up the
 1042                  * circuit structure and closing the transactions.  Device
 1043                  * driver should ensure that the circuit is not used after
 1044                  * the auto_callback() returns.
 1045                  *
 1046                  * Handle shim before closing the SPAN transaction.
 1047                  */
 1048                 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
 1049                         if (iocom->auto_callback)
 1050                                 iocom->auto_callback(msg);
 1051                         if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
 1052                                 kdmsg_autocirc(msg);
 1053                         kdmsg_msg_reply(msg, 0);
 1054                 } else {
 1055                         error = iocom->rcvmsg(msg);
 1056                 }
 1057                 break;
 1058         case DMSG_LNK_CIRC | DMSGF_CREATE:
 1059         case DMSG_LNK_CIRC | DMSGF_CREATE | DMSGF_DELETE:
 1060                 /*
 1061                  * Received LNK_CIRC transaction.  We must respond and should
 1062                  * leave the transaction open, allowing the circuit.  The
 1063                  * remote can start issuing commands to us over the circuit
 1064                  * even before we respond.
 1065                  */
 1066                 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
 1067                         if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
 1068                                 circ = kmalloc(sizeof(*circ), iocom->mmsg,
 1069                                                M_WAITOK | M_ZERO);
 1070                                 lwkt_gettoken(&kdmsg_token);
 1071                                 msg->state->any.circ = circ;
 1072                                 circ->iocom = iocom;
 1073                                 circ->rcirc_state = msg->state;
 1074                                 kdmsg_circ_hold(circ);  /* for rcirc_state */
 1075                                 circ->weight = 0;
 1076                                 circ->msgid = circ->rcirc_state->msgid;
 1077                                 /* XXX no span link for received circuits */
 1078                                 kdmsg_circ_hold(circ);  /* for circ_state */
 1079 
 1080                                 if (RB_INSERT(kdmsg_circuit_tree,
 1081                                               &iocom->circ_tree, circ)) {
 1082                                         panic("duplicate circuitid allocated");
 1083                                 }
 1084                                 lwkt_reltoken(&kdmsg_token);
 1085                                 kdmsg_msg_result(msg, 0);
 1086 
 1087                                 /*
 1088                                  * Handle shim after adding the circuit and
 1089                                  * after acknowledging the CIRC.
 1090                                  */
 1091                                 if (iocom->auto_callback)
 1092                                         iocom->auto_callback(msg);
 1093                                 break;
 1094                         }
 1095                         /* fall through */
 1096                 } else {
 1097                         error = iocom->rcvmsg(msg);
 1098                         break;
 1099                 }
 1100                 /* fall through */
 1101         case DMSG_LNK_CIRC | DMSGF_DELETE:
 1102                 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
 1103                         circ = msg->state->any.circ;
 1104                         if (circ == NULL)
 1105                                 break;
 1106 
 1107                         /*
 1108                          * Handle shim before terminating the circuit.
 1109                          */
 1110 #if 0
 1111                         kprintf("KDMSG VC: RECEIVE CIRC DELETE "
 1112                                 "IOCOM %p MSGID %016jx\n",
 1113                                 msg->iocom, circ->msgid);
 1114 #endif
 1115                         if (iocom->auto_callback)
 1116                                 iocom->auto_callback(msg);
 1117 
 1118                         KKASSERT(circ->rcirc_state == msg->state);
 1119                         lwkt_gettoken(&kdmsg_token);
 1120                         circ->rcirc_state = NULL;
 1121                         msg->state->any.circ = NULL;
 1122                         RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
 1123                         lwkt_reltoken(&kdmsg_token);
 1124                         kdmsg_circ_drop(circ);  /* for rcirc_state */
 1125                         kdmsg_msg_reply(msg, 0);
 1126                 } else {
 1127                         error = iocom->rcvmsg(msg);
 1128                 }
 1129                 break;
 1130         default:
 1131                 /*
 1132                  * Anything unhandled goes into rcvmsg.
 1133                  *
 1134                  * NOTE: Replies to link-level messages initiated by our side
 1135                  *       are handled by the state callback, they are NOT
 1136                  *       handled here.
 1137                  */
 1138                 error = iocom->rcvmsg(msg);
 1139                 break;
 1140         }
 1141         return (error);
 1142 }
 1143 
 1144 /*
 1145  * Handle automatic forging of virtual circuits based on received SPANs.
 1146  * (AUTOFORGE).  Note that other code handles tracking received circuit
 1147  * transactions (AUTOCIRC).
 1148  *
 1149  * We can ignore non-transactions here.  Use trans->icmd to test the
 1150  * transactional command (once past the CREATE the individual message
 1151  * commands are not usually the icmd).
 1152  *
 1153  * XXX locks
 1154  */
 1155 static
 1156 void
 1157 kdmsg_autocirc(kdmsg_msg_t *msg)
 1158 {
 1159         kdmsg_iocom_t *iocom = msg->iocom;
 1160         kdmsg_circuit_t *circ;
 1161         kdmsg_msg_t *xmsg;      /* CIRC */
 1162 
 1163         if (msg->state == NULL)
 1164                 return;
 1165 
 1166         /*
 1167          * Gaining the SPAN, automatically forge a circuit to the target.
 1168          *
 1169          * NOTE!! The shim is not executed until we receive an acknowlegement
 1170          *        to our forged LNK_CIRC (see kdmsg_autocirc_reply()).
 1171          */
 1172         if (msg->state->icmd == DMSG_LNK_SPAN &&
 1173             (msg->any.head.cmd & DMSGF_CREATE)) {
 1174                 circ = kmalloc(sizeof(*circ), iocom->mmsg, M_WAITOK | M_ZERO);
 1175                 lwkt_gettoken(&kdmsg_token);
 1176                 msg->state->any.circ = circ;
 1177                 circ->iocom = iocom;
 1178                 circ->span_state = msg->state;
 1179                 kdmsg_circ_hold(circ);  /* for span_state */
 1180                 xmsg = kdmsg_msg_alloc(iocom, NULL,
 1181                                        DMSG_LNK_CIRC | DMSGF_CREATE,
 1182                                        kdmsg_autocirc_reply, circ);
 1183                 circ->circ_state = xmsg->state;
 1184                 circ->weight = msg->any.lnk_span.dist;
 1185                 circ->msgid = circ->circ_state->msgid;
 1186                 kdmsg_circ_hold(circ);  /* for circ_state */
 1187 #if 0
 1188                 kprintf("KDMSG VC: CREATE SPAN->CIRC IOCOM %p MSGID %016jx\n",
 1189                         msg->iocom, circ->msgid);
 1190 #endif
 1191 
 1192                 if (RB_INSERT(kdmsg_circuit_tree, &iocom->circ_tree, circ))
 1193                         panic("duplicate circuitid allocated");
 1194                 lwkt_reltoken(&kdmsg_token);
 1195 
 1196                 xmsg->any.lnk_circ.target = msg->any.head.msgid;
 1197                 kdmsg_msg_write(xmsg);
 1198         }
 1199 
 1200         /*
 1201          * Losing the SPAN
 1202          *
 1203          * NOTE: When losing a SPAN, any circuits using the span should be
 1204          *       deleted by the remote end first.  XXX might not be ordered
 1205          *       on actual loss of connection.
 1206          */
 1207         if (msg->state->icmd == DMSG_LNK_SPAN &&
 1208             (msg->any.head.cmd & DMSGF_DELETE) &&
 1209             msg->state->any.circ) {
 1210                 circ = msg->state->any.circ;
 1211                 lwkt_gettoken(&kdmsg_token);
 1212                 circ->span_state = NULL;
 1213                 msg->state->any.circ = NULL;
 1214                 RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
 1215 #if 0
 1216                 kprintf("KDMSG VC: DELETE SPAN->CIRC IOCOM %p MSGID %016jx\n",
 1217                         msg->iocom, (intmax_t)circ->msgid);
 1218 #endif
 1219                 kdmsg_circ_drop(circ);  /* for span_state */
 1220                 lwkt_reltoken(&kdmsg_token);
 1221         }
 1222 }
 1223 
 1224 static
 1225 int
 1226 kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
 1227 {
 1228         kdmsg_iocom_t *iocom = state->iocom;
 1229         kdmsg_circuit_t *circ = state->any.circ;
 1230 
 1231         /*
 1232          * Call shim after receiving an acknowlegement to our forged
 1233          * circuit and before processing a received termination.
 1234          */
 1235         if (iocom->auto_callback)
 1236                 iocom->auto_callback(msg);
 1237 
 1238         /*
 1239          * If the remote is terminating the VC we terminate our side
 1240          */
 1241         if ((state->txcmd & DMSGF_DELETE) == 0 &&
 1242             (msg->any.head.cmd & DMSGF_DELETE)) {
 1243 #if 0
 1244                 kprintf("KDMSG VC: DELETE CIRC FROM REMOTE\n");
 1245 #endif
 1246                 lwkt_gettoken(&kdmsg_token);
 1247                 circ->circ_state = NULL;
 1248                 state->any.circ = NULL;
 1249                 kdmsg_circ_drop(circ);          /* for circ_state */
 1250                 lwkt_reltoken(&kdmsg_token);
 1251                 kdmsg_msg_reply(msg, 0);
 1252         }
 1253         return (0);
 1254 }
 1255 
 1256 /*
 1257  * Post-receive-handling message and state cleanup.  This routine is called
 1258  * after the state function handling/callback to properly dispose of the
 1259  * message and update or dispose of the state.
 1260  */
 1261 static
 1262 void
 1263 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
 1264 {
 1265         kdmsg_iocom_t *iocom = msg->iocom;
 1266         kdmsg_state_t *state;
 1267 
 1268         if ((state = msg->state) == NULL) {
 1269                 kdmsg_msg_free(msg);
 1270         } else if (msg->any.head.cmd & DMSGF_DELETE) {
 1271                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1272                 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0);
 1273                 state->rxcmd |= DMSGF_DELETE;
 1274                 if (state->txcmd & DMSGF_DELETE) {
 1275                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
 1276                         if (state->rxcmd & DMSGF_REPLY) {
 1277                                 KKASSERT(msg->any.head.cmd &
 1278                                          DMSGF_REPLY);
 1279                                 RB_REMOVE(kdmsg_state_tree,
 1280                                           &iocom->statewr_tree, state);
 1281                         } else {
 1282                                 KKASSERT((msg->any.head.cmd &
 1283                                           DMSGF_REPLY) == 0);
 1284                                 RB_REMOVE(kdmsg_state_tree,
 1285                                           &iocom->staterd_tree, state);
 1286                         }
 1287                         state->flags &= ~KDMSG_STATE_INSERTED;
 1288                         if (msg != state->msg)
 1289                                 kdmsg_msg_free(msg);
 1290                         lockmgr(&iocom->msglk, LK_RELEASE);
 1291                         kdmsg_state_free(state);
 1292                 } else {
 1293                         if (msg != state->msg)
 1294                                 kdmsg_msg_free(msg);
 1295                         lockmgr(&iocom->msglk, LK_RELEASE);
 1296                 }
 1297         } else if (msg != state->msg) {
 1298                 kdmsg_msg_free(msg);
 1299         }
 1300 }
 1301 
 1302 /*
 1303  * Simulate receiving a message which terminates an active transaction
 1304  * state.  Our simulated received message must set DELETE and may also
 1305  * have to set CREATE.  It must also ensure that all fields are set such
 1306  * that the receive handling code can find the state (kdmsg_state_msgrx())
 1307  * or an endless loop will ensue.
 1308  *
 1309  * This is used when the other end of the link or virtual circuit is dead
 1310  * so the device driver gets a completed transaction for all pending states.
 1311  */
 1312 static
 1313 void
 1314 kdmsg_state_abort(kdmsg_state_t *state)
 1315 {
 1316         kdmsg_iocom_t *iocom = state->iocom;
 1317         kdmsg_msg_t *msg;
 1318 
 1319         /*
 1320          * Prevent recursive aborts which could otherwise occur if the
 1321          * simulated message reception runs state->func which then turns
 1322          * around and tries to reply to a broken circuit when then calls
 1323          * the state abort code again.
 1324          */
 1325         if (state->flags & KDMSG_STATE_ABORTING)
 1326                 return;
 1327         state->flags |= KDMSG_STATE_ABORTING;
 1328 
 1329         /*
 1330          * Simulatem essage reception
 1331          */
 1332         msg = kdmsg_msg_alloc(iocom, state->circ,
 1333                               DMSG_LNK_ERROR,
 1334                               NULL, NULL);
 1335         if ((state->rxcmd & DMSGF_CREATE) == 0)
 1336                 msg->any.head.cmd |= DMSGF_CREATE;
 1337         msg->any.head.cmd |= DMSGF_DELETE | (state->rxcmd & DMSGF_REPLY);
 1338         msg->any.head.error = DMSG_ERR_LOSTLINK;
 1339         msg->any.head.msgid = state->msgid;
 1340         msg->state = state;
 1341         kdmsg_msg_receive_handling(msg);
 1342 }
 1343 
 1344 /*
 1345  * Process state tracking for a message prior to transmission.
 1346  *
 1347  * Called with msglk held and the msg dequeued.  Returns non-zero if
 1348  * the message is bad and should be deleted by the caller.
 1349  *
 1350  * One-off messages are usually with dummy state and msg->state may be NULL
 1351  * in this situation.
 1352  *
 1353  * New transactions (when CREATE is set) will insert the state.
 1354  *
 1355  * May request that caller discard the message by setting *discardp to 1.
 1356  * A NULL state may be returned in this case.
 1357  */
 1358 static
 1359 int
 1360 kdmsg_state_msgtx(kdmsg_msg_t *msg)
 1361 {
 1362         kdmsg_iocom_t *iocom = msg->iocom;
 1363         kdmsg_state_t *state;
 1364         int error;
 1365 
 1366         /*
 1367          * Make sure a state structure is ready to go in case we need a new
 1368          * one.  This is the only routine which uses freewr_state so no
 1369          * races are possible.
 1370          */
 1371         if ((state = iocom->freewr_state) == NULL) {
 1372                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
 1373                 state->flags = KDMSG_STATE_DYNAMIC;
 1374                 state->iocom = iocom;
 1375                 iocom->freewr_state = state;
 1376         }
 1377 
 1378         /*
 1379          * Lock RB tree.  If persistent state is present it will have already
 1380          * been assigned to msg.
 1381          */
 1382         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1383         state = msg->state;
 1384 
 1385         /*
 1386          * Short-cut one-off or mid-stream messages (state may be NULL).
 1387          */
 1388         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
 1389                                   DMSGF_ABORT)) == 0) {
 1390                 lockmgr(&iocom->msglk, LK_RELEASE);
 1391                 return(0);
 1392         }
 1393 
 1394 
 1395         /*
 1396          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
 1397          * inside the case statements.
 1398          */
 1399         switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
 1400                                     DMSGF_REPLY)) {
 1401         case DMSGF_CREATE:
 1402         case DMSGF_CREATE | DMSGF_DELETE:
 1403                 /*
 1404                  * Insert the new persistent message state and mark
 1405                  * half-closed if DELETE is set.  Since this is a new
 1406                  * message it isn't possible to transition into the fully
 1407                  * closed state here.
 1408                  *
 1409                  * XXX state must be assigned and inserted by
 1410                  *     kdmsg_msg_write().  txcmd is assigned by us
 1411                  *     on-transmit.
 1412                  */
 1413                 KKASSERT(state != NULL);
 1414                 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
 1415                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
 1416                 state->rxcmd = DMSGF_REPLY;
 1417                 error = 0;
 1418                 break;
 1419         case DMSGF_DELETE:
 1420                 /*
 1421                  * Sent ABORT+DELETE in case where msgid has already
 1422                  * been fully closed, ignore the message.
 1423                  */
 1424                 if (state == NULL) {
 1425                         if (msg->any.head.cmd & DMSGF_ABORT) {
 1426                                 error = EALREADY;
 1427                         } else {
 1428                                 kprintf("kdmsg_state_msgtx: no state match "
 1429                                         "for DELETE cmd=%08x msgid=%016jx\n",
 1430                                         msg->any.head.cmd,
 1431                                         (intmax_t)msg->any.head.msgid);
 1432                                 error = EINVAL;
 1433                         }
 1434                         break;
 1435                 }
 1436 
 1437                 /*
 1438                  * Sent ABORT+DELETE in case where msgid has
 1439                  * already been reused for an unrelated message,
 1440                  * ignore the message.
 1441                  */
 1442                 if ((state->txcmd & DMSGF_CREATE) == 0) {
 1443                         if (msg->any.head.cmd & DMSGF_ABORT) {
 1444                                 error = EALREADY;
 1445                         } else {
 1446                                 kprintf("kdmsg_state_msgtx: state reused "
 1447                                         "for DELETE\n");
 1448                                 error = EINVAL;
 1449                         }
 1450                         break;
 1451                 }
 1452                 error = 0;
 1453                 break;
 1454         default:
 1455                 /*
 1456                  * Check for mid-stream ABORT command sent
 1457                  */
 1458                 if (msg->any.head.cmd & DMSGF_ABORT) {
 1459                         if (state == NULL ||
 1460                             (state->txcmd & DMSGF_CREATE) == 0) {
 1461                                 error = EALREADY;
 1462                                 break;
 1463                         }
 1464                 }
 1465                 error = 0;
 1466                 break;
 1467         case DMSGF_REPLY | DMSGF_CREATE:
 1468         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
 1469                 /*
 1470                  * When transmitting a reply with CREATE set the original
 1471                  * persistent state message should already exist.
 1472                  */
 1473                 if (state == NULL) {
 1474                         kprintf("kdmsg_state_msgtx: no state match "
 1475                                 "for REPLY | CREATE\n");
 1476                         error = EINVAL;
 1477                         break;
 1478                 }
 1479                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
 1480                 error = 0;
 1481                 break;
 1482         case DMSGF_REPLY | DMSGF_DELETE:
 1483                 /*
 1484                  * When transmitting a reply with DELETE set the original
 1485                  * persistent state message should already exist.
 1486                  *
 1487                  * This is very similar to the REPLY|CREATE|* case except
 1488                  * txcmd is already stored, so we just add the DELETE flag.
 1489                  *
 1490                  * Sent REPLY+ABORT+DELETE in case where msgid has
 1491                  * already been fully closed, ignore the message.
 1492                  */
 1493                 if (state == NULL) {
 1494                         if (msg->any.head.cmd & DMSGF_ABORT) {
 1495                                 error = EALREADY;
 1496                         } else {
 1497                                 kprintf("kdmsg_state_msgtx: no state match "
 1498                                         "for REPLY | DELETE\n");
 1499                                 error = EINVAL;
 1500                         }
 1501                         break;
 1502                 }
 1503 
 1504                 /*
 1505                  * Sent REPLY+ABORT+DELETE in case where msgid has already
 1506                  * been reused for an unrelated message, ignore the message.
 1507                  */
 1508                 if ((state->txcmd & DMSGF_CREATE) == 0) {
 1509                         if (msg->any.head.cmd & DMSGF_ABORT) {
 1510                                 error = EALREADY;
 1511                         } else {
 1512                                 kprintf("kdmsg_state_msgtx: state reused "
 1513                                         "for REPLY | DELETE\n");
 1514                                 error = EINVAL;
 1515                         }
 1516                         break;
 1517                 }
 1518                 error = 0;
 1519                 break;
 1520         case DMSGF_REPLY:
 1521                 /*
 1522                  * Check for mid-stream ABORT reply sent.
 1523                  *
 1524                  * One-off REPLY messages are allowed for e.g. status updates.
 1525                  */
 1526                 if (msg->any.head.cmd & DMSGF_ABORT) {
 1527                         if (state == NULL ||
 1528                             (state->txcmd & DMSGF_CREATE) == 0) {
 1529                                 error = EALREADY;
 1530                                 break;
 1531                         }
 1532                 }
 1533                 error = 0;
 1534                 break;
 1535         }
 1536         lockmgr(&iocom->msglk, LK_RELEASE);
 1537         return (error);
 1538 }
 1539 
 1540 static
 1541 void
 1542 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
 1543 {
 1544         kdmsg_iocom_t *iocom = msg->iocom;
 1545         kdmsg_state_t *state;
 1546 
 1547         if ((state = msg->state) == NULL) {
 1548                 kdmsg_msg_free(msg);
 1549         } else if (msg->any.head.cmd & DMSGF_DELETE) {
 1550                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1551                 KKASSERT((state->txcmd & DMSGF_DELETE) == 0);
 1552                 state->txcmd |= DMSGF_DELETE;
 1553                 if (state->rxcmd & DMSGF_DELETE) {
 1554                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
 1555                         if (state->txcmd & DMSGF_REPLY) {
 1556                                 KKASSERT(msg->any.head.cmd &
 1557                                          DMSGF_REPLY);
 1558                                 RB_REMOVE(kdmsg_state_tree,
 1559                                           &iocom->staterd_tree, state);
 1560                         } else {
 1561                                 KKASSERT((msg->any.head.cmd &
 1562                                           DMSGF_REPLY) == 0);
 1563                                 RB_REMOVE(kdmsg_state_tree,
 1564                                           &iocom->statewr_tree, state);
 1565                         }
 1566                         state->flags &= ~KDMSG_STATE_INSERTED;
 1567                         if (msg != state->msg)
 1568                                 kdmsg_msg_free(msg);
 1569                         lockmgr(&iocom->msglk, LK_RELEASE);
 1570                         kdmsg_state_free(state);
 1571                 } else {
 1572                         if (msg != state->msg)
 1573                                 kdmsg_msg_free(msg);
 1574                         lockmgr(&iocom->msglk, LK_RELEASE);
 1575                 }
 1576         } else if (msg != state->msg) {
 1577                 kdmsg_msg_free(msg);
 1578         }
 1579 }
 1580 
 1581 static
 1582 void
 1583 kdmsg_state_free(kdmsg_state_t *state)
 1584 {
 1585         kdmsg_iocom_t *iocom = state->iocom;
 1586         kdmsg_msg_t *msg;
 1587 
 1588         KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
 1589         msg = state->msg;
 1590         state->msg = NULL;
 1591         kfree(state, iocom->mmsg);
 1592         if (msg) {
 1593                 msg->state = NULL;
 1594                 kdmsg_msg_free(msg);
 1595         }
 1596 }
 1597 
 1598 kdmsg_msg_t *
 1599 kdmsg_msg_alloc(kdmsg_iocom_t *iocom, kdmsg_circuit_t *circ, uint32_t cmd,
 1600                 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
 1601 {
 1602         kdmsg_msg_t *msg;
 1603         kdmsg_state_t *state;
 1604         size_t hbytes;
 1605 
 1606         KKASSERT(iocom != NULL);
 1607         hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
 1608         msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
 1609                       iocom->mmsg, M_WAITOK | M_ZERO);
 1610         msg->hdr_size = hbytes;
 1611         msg->iocom = iocom;
 1612         msg->any.head.magic = DMSG_HDR_MAGIC;
 1613         msg->any.head.cmd = cmd;
 1614         if (circ) {
 1615                 kdmsg_circ_hold(circ);
 1616                 msg->circ = circ;
 1617                 msg->any.head.circuit = circ->msgid;
 1618         }
 1619 
 1620         if (cmd & DMSGF_CREATE) {
 1621                 /*
 1622                  * New transaction, requires tracking state and a unique
 1623                  * msgid to be allocated.
 1624                  */
 1625                 KKASSERT(msg->state == NULL);
 1626                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
 1627                 state->flags = KDMSG_STATE_DYNAMIC;
 1628                 state->func = func;
 1629                 state->any.any = data;
 1630                 state->msg = msg;
 1631                 state->msgid = (uint64_t)(uintptr_t)state;
 1632                 state->circ = circ;
 1633                 state->iocom = iocom;
 1634                 msg->state = state;
 1635                 if (circ)
 1636                         kdmsg_circ_hold(circ);
 1637                 /*msg->any.head.msgid = state->msgid;XXX*/
 1638 
 1639                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1640                 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
 1641                         panic("duplicate msgid allocated");
 1642                 state->flags |= KDMSG_STATE_INSERTED;
 1643                 msg->any.head.msgid = state->msgid;
 1644                 lockmgr(&iocom->msglk, LK_RELEASE);
 1645         }
 1646         return (msg);
 1647 }
 1648 
 1649 kdmsg_msg_t *
 1650 kdmsg_msg_alloc_state(kdmsg_state_t *state, uint32_t cmd,
 1651                       int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
 1652 {
 1653         kdmsg_iocom_t *iocom = state->iocom;
 1654         kdmsg_msg_t *msg;
 1655         size_t hbytes;
 1656 
 1657         KKASSERT(iocom != NULL);
 1658         hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
 1659         msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
 1660                       iocom->mmsg, M_WAITOK | M_ZERO);
 1661         msg->hdr_size = hbytes;
 1662         msg->iocom = iocom;
 1663         msg->any.head.magic = DMSG_HDR_MAGIC;
 1664         msg->any.head.cmd = cmd;
 1665         msg->state = state;
 1666         if (state->circ) {
 1667                 kdmsg_circ_hold(state->circ);
 1668                 msg->circ = state->circ;
 1669                 msg->any.head.circuit = state->circ->msgid;
 1670         }
 1671         return(msg);
 1672 }
 1673 
 1674 void
 1675 kdmsg_msg_free(kdmsg_msg_t *msg)
 1676 {
 1677         kdmsg_iocom_t *iocom = msg->iocom;
 1678 
 1679         if ((msg->flags & KDMSG_FLAG_AUXALLOC) &&
 1680             msg->aux_data && msg->aux_size) {
 1681                 kfree(msg->aux_data, iocom->mmsg);
 1682                 msg->flags &= ~KDMSG_FLAG_AUXALLOC;
 1683         }
 1684         if (msg->circ) {
 1685                 kdmsg_circ_drop(msg->circ);
 1686                 msg->circ = NULL;
 1687         }
 1688         if (msg->state) {
 1689                 if (msg->state->msg == msg)
 1690                         msg->state->msg = NULL;
 1691                 msg->state = NULL;
 1692         }
 1693         msg->aux_data = NULL;
 1694         msg->aux_size = 0;
 1695         msg->iocom = NULL;
 1696         kfree(msg, iocom->mmsg);
 1697 }
 1698 
 1699 /*
 1700  * Circuits are tracked in a red-black tree by their circuit id (msgid).
 1701  */
 1702 int
 1703 kdmsg_circuit_cmp(kdmsg_circuit_t *circ1, kdmsg_circuit_t *circ2)
 1704 {
 1705         if (circ1->msgid < circ2->msgid)
 1706                 return(-1);
 1707         if (circ1->msgid > circ2->msgid)
 1708                 return(1);
 1709         return (0);
 1710 }
 1711 
 1712 /*
 1713  * Indexed messages are stored in a red-black tree indexed by their
 1714  * msgid.  Only persistent messages are indexed.
 1715  */
 1716 int
 1717 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
 1718 {
 1719         if (state1->iocom < state2->iocom)
 1720                 return(-1);
 1721         if (state1->iocom > state2->iocom)
 1722                 return(1);
 1723         if (state1->circ < state2->circ)
 1724                 return(-1);
 1725         if (state1->circ > state2->circ)
 1726                 return(1);
 1727         if (state1->msgid < state2->msgid)
 1728                 return(-1);
 1729         if (state1->msgid > state2->msgid)
 1730                 return(1);
 1731         return(0);
 1732 }
 1733 
 1734 /*
 1735  * Write a message.  All requisit command flags have been set.
 1736  *
 1737  * If msg->state is non-NULL the message is written to the existing
 1738  * transaction.  msgid will be set accordingly.
 1739  *
 1740  * If msg->state is NULL and CREATE is set new state is allocated and
 1741  * (func, data) is installed.  A msgid is assigned.
 1742  *
 1743  * If msg->state is NULL and CREATE is not set the message is assumed
 1744  * to be a one-way message.  The originator must assign the msgid
 1745  * (or leave it 0, which is typical.
 1746  *
 1747  * This function merely queues the message to the management thread, it
 1748  * does not write to the message socket/pipe.
 1749  */
 1750 void
 1751 kdmsg_msg_write(kdmsg_msg_t *msg)
 1752 {
 1753         kdmsg_iocom_t *iocom = msg->iocom;
 1754         kdmsg_state_t *state;
 1755 
 1756         if (msg->state) {
 1757                 /*
 1758                  * Continuance or termination of existing transaction.
 1759                  * The transaction could have been initiated by either end.
 1760                  *
 1761                  * (Function callback and aux data for the receive side can
 1762                  * be replaced or left alone).
 1763                  */
 1764                 state = msg->state;
 1765                 msg->any.head.msgid = state->msgid;
 1766                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1767         } else {
 1768                 /*
 1769                  * One-off message (always uses msgid 0 to distinguish
 1770                  * between a possibly lost in-transaction message due to
 1771                  * competing aborts and a real one-off message?)
 1772                  */
 1773                 state = NULL;
 1774                 msg->any.head.msgid = 0;
 1775                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
 1776         }
 1777 
 1778         /*
 1779          * With AUTOCIRC and AUTOFORGE it is possible for the circuit to
 1780          * get ripped out in the rxthread while some other thread is
 1781          * holding a ref on it inbetween allocating and sending a dmsg.
 1782          */
 1783         if (msg->circ && msg->circ->rcirc_state == NULL &&
 1784             (msg->circ->span_state == NULL || msg->circ->circ_state == NULL)) {
 1785                 kprintf("kdmsg_msg_write: Attempt to write message to "
 1786                         "terminated circuit: msg %08x\n", msg->any.head.cmd);
 1787                 lockmgr(&iocom->msglk, LK_RELEASE);
 1788                 if (kdmsg_state_msgtx(msg)) {
 1789                         if (state == NULL || msg != state->msg)
 1790                                 kdmsg_msg_free(msg);
 1791                 } else if ((msg->state->rxcmd & DMSGF_DELETE) == 0) {
 1792                         /* XXX SMP races simulating a response here */
 1793                         kdmsg_state_t *state = msg->state;
 1794                         kdmsg_state_cleanuptx(msg);
 1795                         kdmsg_state_abort(state);
 1796                 } else {
 1797                         kdmsg_state_cleanuptx(msg);
 1798                 }
 1799                 return;
 1800         }
 1801 
 1802         /*
 1803          * This flag is not set until after the tx thread has drained
 1804          * the txmsgq and simulated responses.  After that point the
 1805          * txthread is dead and can no longer simulate responses.
 1806          *
 1807          * Device drivers should never try to send a message once this
 1808          * flag is set.  They should have detected (through the state
 1809          * closures) that the link is in trouble.
 1810          */
 1811         if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) {
 1812                 lockmgr(&iocom->msglk, LK_RELEASE);
 1813                 panic("kdmsg_msg_write: Attempt to write message to "
 1814                       "terminated iocom\n");
 1815         }
 1816 
 1817         /*
 1818          * Finish up the msg fields.  Note that msg->aux_size and the
 1819          * aux_bytes stored in the message header represent the unaligned
 1820          * (actual) bytes of data, but the buffer is sized to an aligned
 1821          * size and the CRC is generated over the aligned length.
 1822          */
 1823         msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
 1824         ++iocom->msg_seq;
 1825 
 1826         if (msg->aux_data && msg->aux_size) {
 1827                 uint32_t abytes = DMSG_DOALIGN(msg->aux_size);
 1828 
 1829                 msg->any.head.aux_bytes = msg->aux_size;
 1830                 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes);
 1831         }
 1832         msg->any.head.hdr_crc = 0;
 1833         msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
 1834 
 1835         TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
 1836 
 1837         if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
 1838                 atomic_clear_int(&iocom->msg_ctl,
 1839                                  KDMSG_CLUSTERCTL_SLEEPING);
 1840                 wakeup(&iocom->msg_ctl);
 1841         }
 1842 
 1843         lockmgr(&iocom->msglk, LK_RELEASE);
 1844 }
 1845 
 1846 /*
 1847  * Reply to a message and terminate our side of the transaction.
 1848  *
 1849  * If msg->state is non-NULL we are replying to a one-way message.
 1850  */
 1851 void
 1852 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
 1853 {
 1854         kdmsg_state_t *state = msg->state;
 1855         kdmsg_msg_t *nmsg;
 1856         uint32_t cmd;
 1857 
 1858         /*
 1859          * Reply with a simple error code and terminate the transaction.
 1860          */
 1861         cmd = DMSG_LNK_ERROR;
 1862 
 1863         /*
 1864          * Check if our direction has even been initiated yet, set CREATE.
 1865          *
 1866          * Check what direction this is (command or reply direction).  Note
 1867          * that txcmd might not have been initiated yet.
 1868          *
 1869          * If our direction has already been closed we just return without
 1870          * doing anything.
 1871          */
 1872         if (state) {
 1873                 if (state->txcmd & DMSGF_DELETE)
 1874                         return;
 1875                 if ((state->txcmd & DMSGF_CREATE) == 0)
 1876                         cmd |= DMSGF_CREATE;
 1877                 if (state->txcmd & DMSGF_REPLY)
 1878                         cmd |= DMSGF_REPLY;
 1879                 cmd |= DMSGF_DELETE;
 1880         } else {
 1881                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
 1882                         cmd |= DMSGF_REPLY;
 1883         }
 1884 
 1885         /* XXX messy mask cmd to avoid allocating state */
 1886         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
 1887         nmsg->any.head.error = error;
 1888         kdmsg_msg_write(nmsg);
 1889 }
 1890 
 1891 /*
 1892  * Reply to a message and continue our side of the transaction.
 1893  *
 1894  * If msg->state is non-NULL we are replying to a one-way message and this
 1895  * function degenerates into the same as kdmsg_msg_reply().
 1896  */
 1897 void
 1898 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
 1899 {
 1900         kdmsg_state_t *state = msg->state;
 1901         kdmsg_msg_t *nmsg;
 1902         uint32_t cmd;
 1903 
 1904         /*
 1905          * Return a simple result code, do NOT terminate the transaction.
 1906          */
 1907         cmd = DMSG_LNK_ERROR;
 1908 
 1909         /*
 1910          * Check if our direction has even been initiated yet, set CREATE.
 1911          *
 1912          * Check what direction this is (command or reply direction).  Note
 1913          * that txcmd might not have been initiated yet.
 1914          *
 1915          * If our direction has already been closed we just return without
 1916          * doing anything.
 1917          */
 1918         if (state) {
 1919                 if (state->txcmd & DMSGF_DELETE)
 1920                         return;
 1921                 if ((state->txcmd & DMSGF_CREATE) == 0)
 1922                         cmd |= DMSGF_CREATE;
 1923                 if (state->txcmd & DMSGF_REPLY)
 1924                         cmd |= DMSGF_REPLY;
 1925                 /* continuing transaction, do not set MSGF_DELETE */
 1926         } else {
 1927                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
 1928                         cmd |= DMSGF_REPLY;
 1929         }
 1930 
 1931         /* XXX messy mask cmd to avoid allocating state */
 1932         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
 1933         nmsg->any.head.error = error;
 1934         kdmsg_msg_write(nmsg);
 1935 }
 1936 
 1937 /*
 1938  * Reply to a message and terminate our side of the transaction.
 1939  *
 1940  * If msg->state is non-NULL we are replying to a one-way message.
 1941  */
 1942 void
 1943 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error)
 1944 {
 1945         kdmsg_msg_t *nmsg;
 1946         uint32_t cmd;
 1947 
 1948         /*
 1949          * Reply with a simple error code and terminate the transaction.
 1950          */
 1951         cmd = DMSG_LNK_ERROR;
 1952 
 1953         /*
 1954          * Check if our direction has even been initiated yet, set CREATE.
 1955          *
 1956          * Check what direction this is (command or reply direction).  Note
 1957          * that txcmd might not have been initiated yet.
 1958          *
 1959          * If our direction has already been closed we just return without
 1960          * doing anything.
 1961          */
 1962         KKASSERT(state);
 1963         if (state->txcmd & DMSGF_DELETE)
 1964                 return;
 1965         if ((state->txcmd & DMSGF_CREATE) == 0)
 1966                 cmd |= DMSGF_CREATE;
 1967         if (state->txcmd & DMSGF_REPLY)
 1968                 cmd |= DMSGF_REPLY;
 1969         cmd |= DMSGF_DELETE;
 1970 
 1971         /* XXX messy mask cmd to avoid allocating state */
 1972         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
 1973         nmsg->any.head.error = error;
 1974         kdmsg_msg_write(nmsg);
 1975 }
 1976 
 1977 /*
 1978  * Reply to a message and continue our side of the transaction.
 1979  *
 1980  * If msg->state is non-NULL we are replying to a one-way message and this
 1981  * function degenerates into the same as kdmsg_msg_reply().
 1982  */
 1983 void
 1984 kdmsg_state_result(kdmsg_state_t *state, uint32_t error)
 1985 {
 1986         kdmsg_msg_t *nmsg;
 1987         uint32_t cmd;
 1988 
 1989         /*
 1990          * Return a simple result code, do NOT terminate the transaction.
 1991          */
 1992         cmd = DMSG_LNK_ERROR;
 1993 
 1994         /*
 1995          * Check if our direction has even been initiated yet, set CREATE.
 1996          *
 1997          * Check what direction this is (command or reply direction).  Note
 1998          * that txcmd might not have been initiated yet.
 1999          *
 2000          * If our direction has already been closed we just return without
 2001          * doing anything.
 2002          */
 2003         KKASSERT(state);
 2004         if (state->txcmd & DMSGF_DELETE)
 2005                 return;
 2006         if ((state->txcmd & DMSGF_CREATE) == 0)
 2007                 cmd |= DMSGF_CREATE;
 2008         if (state->txcmd & DMSGF_REPLY)
 2009                 cmd |= DMSGF_REPLY;
 2010         /* continuing transaction, do not set MSGF_DELETE */
 2011 
 2012         /* XXX messy mask cmd to avoid allocating state */
 2013         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
 2014         nmsg->any.head.error = error;
 2015         kdmsg_msg_write(nmsg);
 2016 }

Cache object: 3d021383b5e41a099a422abf9f79644d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.