The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/rdma/krping/krping.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
    3  * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved.
    4  *
    5  * This software is available to you under a choice of one of two
    6  * licenses.  You may choose to be licensed under the terms of the GNU
    7  * General Public License (GPL) Version 2, available from the file
    8  * COPYING in the main directory of this source tree, or the
    9  * OpenIB.org BSD license below:
   10  *
   11  *     Redistribution and use in source and binary forms, with or
   12  *     without modification, are permitted provided that the following
   13  *     conditions are met:
   14  *
   15  *      - Redistributions of source code must retain the above
   16  *        copyright notice, this list of conditions and the following
   17  *        disclaimer.
   18  *
   19  *      - Redistributions in binary form must reproduce the above
   20  *        copyright notice, this list of conditions and the following
   21  *        disclaimer in the documentation and/or other materials
   22  *        provided with the distribution.
   23  *
   24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   31  * SOFTWARE.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <linux/module.h>
   38 #include <linux/moduleparam.h>
   39 #include <linux/slab.h>
   40 #include <linux/err.h>
   41 #include <linux/string.h>
   42 #include <linux/list.h>
   43 #include <linux/in.h>
   44 #include <linux/device.h>
   45 #include <linux/pci.h>
   46 #include <linux/sched.h>
   47 #include <linux/wait.h>
   48 
   49 #include <asm/atomic.h>
   50 
   51 #include <rdma/ib_verbs.h>
   52 #include <rdma/rdma_cm.h>
   53 
   54 #include "krping.h"
   55 #include "getopt.h"
   56 
   57 #define PFX "krping: "
   58 
   59 extern int krping_debug;
   60 #define DEBUG_LOG(...) do { if (krping_debug) log(LOG_INFO, __VA_ARGS__); } while (0)
   61 #define BIND_INFO 1
   62 
   63 MODULE_AUTHOR("Steve Wise");
   64 MODULE_DESCRIPTION("RDMA ping server");
   65 MODULE_LICENSE("Dual BSD/GPL");
   66 MODULE_VERSION(krping, 1);
   67 MODULE_DEPEND(krping, linuxkpi, 1, 1, 1);
   68 
   69 static __inline uint64_t
   70 get_cycles(void)
   71 {
   72         uint32_t low, high;
   73         __asm __volatile("rdtsc" : "=a" (low), "=d" (high));
   74         return (low | ((u_int64_t)high << 32));
   75 }
   76 
   77 typedef uint64_t cycles_t;
   78 
   79 enum mem_type {
   80         DMA = 1,
   81         REG = 2,
   82 };
   83 
   84 static const struct krping_option krping_opts[] = {
   85         {"count", OPT_INT, 'C'},
   86         {"size", OPT_INT, 'S'},
   87         {"addr", OPT_STRING, 'a'},
   88         {"addr6", OPT_STRING, 'A'},
   89         {"port", OPT_INT, 'p'},
   90         {"verbose", OPT_NOPARAM, 'v'},
   91         {"validate", OPT_NOPARAM, 'V'},
   92         {"server", OPT_NOPARAM, 's'},
   93         {"client", OPT_NOPARAM, 'c'},
   94         {"server_inv", OPT_NOPARAM, 'I'},
   95         {"wlat", OPT_NOPARAM, 'l'},
   96         {"rlat", OPT_NOPARAM, 'L'},
   97         {"bw", OPT_NOPARAM, 'B'},
   98         {"duplex", OPT_NOPARAM, 'd'},
   99         {"tos", OPT_INT, 't'},
  100         {"txdepth", OPT_INT, 'T'},
  101         {"poll", OPT_NOPARAM, 'P'},
  102         {"local_dma_lkey", OPT_NOPARAM, 'Z'},
  103         {"read_inv", OPT_NOPARAM, 'R'},
  104         {"fr", OPT_NOPARAM, 'f'},
  105         {NULL, 0, 0}
  106 };
  107 
  108 #define htonll(x) cpu_to_be64((x))
  109 #define ntohll(x) cpu_to_be64((x))
  110 
  111 static DEFINE_MUTEX(krping_mutex);
  112 
  113 /*
  114  * List of running krping threads.
  115  */
  116 static LIST_HEAD(krping_cbs);
  117 
  118 /*
  119  * Invoke like this, one on each side, using the server's address on
  120  * the RDMA device (iw%d):
  121  *
  122  * /bin/echo server,port=9999,addr=192.168.69.142,validate > /proc/krping  
  123  * /bin/echo client,port=9999,addr=192.168.69.142,validate > /proc/krping  
  124  * /bin/echo client,port=9999,addr6=2001:db8:0:f101::1,validate > /proc/krping
  125  *
  126  * krping "ping/pong" loop:
  127  *      client sends source rkey/addr/len
  128  *      server receives source rkey/add/len
  129  *      server rdma reads "ping" data from source
  130  *      server sends "go ahead" on rdma read completion
  131  *      client sends sink rkey/addr/len
  132  *      server receives sink rkey/addr/len
  133  *      server rdma writes "pong" data to sink
  134  *      server sends "go ahead" on rdma write completion
  135  *      <repeat loop>
  136  */
  137 
  138 /*
  139  * These states are used to signal events between the completion handler
  140  * and the main client or server thread.
  141  *
  142  * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV,
  143  * and RDMA_WRITE_COMPLETE for each ping.
  144  */
  145 enum test_state {
  146         IDLE = 1,
  147         CONNECT_REQUEST,
  148         ADDR_RESOLVED,
  149         ROUTE_RESOLVED,
  150         CONNECTED,
  151         RDMA_READ_ADV,
  152         RDMA_READ_COMPLETE,
  153         RDMA_WRITE_ADV,
  154         RDMA_WRITE_COMPLETE,
  155         ERROR
  156 };
  157 
  158 struct krping_rdma_info {
  159         uint64_t buf;
  160         uint32_t rkey;
  161         uint32_t size;
  162 };
  163 
  164 /*
  165  * Default max buffer size for IO...
  166  */
  167 #define RPING_BUFSIZE 128*1024
  168 #define RPING_SQ_DEPTH 64
  169 
  170 /*
  171  * Control block struct.
  172  */
  173 struct krping_cb {
  174         int server;                     /* 0 iff client */
  175         struct ib_cq *cq;
  176         struct ib_pd *pd;
  177         struct ib_qp *qp;
  178 
  179         struct ib_mr *dma_mr;
  180 
  181         struct ib_fast_reg_page_list *page_list;
  182         int page_list_len;
  183         struct ib_reg_wr reg_mr_wr;
  184         struct ib_send_wr invalidate_wr;
  185         struct ib_mr *reg_mr;
  186         int server_invalidate;
  187         int read_inv;
  188         u8 key;
  189 
  190         struct ib_recv_wr rq_wr;        /* recv work request record */
  191         struct ib_sge recv_sgl;         /* recv single SGE */
  192         struct krping_rdma_info recv_buf __aligned(16); /* malloc'd buffer */
  193         u64 recv_dma_addr;
  194         DECLARE_PCI_UNMAP_ADDR(recv_mapping)
  195 
  196         struct ib_send_wr sq_wr;        /* send work requrest record */
  197         struct ib_sge send_sgl;
  198         struct krping_rdma_info send_buf __aligned(16); /* single send buf */
  199         u64 send_dma_addr;
  200         DECLARE_PCI_UNMAP_ADDR(send_mapping)
  201 
  202         struct ib_rdma_wr rdma_sq_wr;   /* rdma work request record */
  203         struct ib_sge rdma_sgl;         /* rdma single SGE */
  204         char *rdma_buf;                 /* used as rdma sink */
  205         u64  rdma_dma_addr;
  206         DECLARE_PCI_UNMAP_ADDR(rdma_mapping)
  207         struct ib_mr *rdma_mr;
  208 
  209         uint32_t remote_rkey;           /* remote guys RKEY */
  210         uint64_t remote_addr;           /* remote guys TO */
  211         uint32_t remote_len;            /* remote guys LEN */
  212 
  213         char *start_buf;                /* rdma read src */
  214         u64  start_dma_addr;
  215         DECLARE_PCI_UNMAP_ADDR(start_mapping)
  216         struct ib_mr *start_mr;
  217 
  218         enum test_state state;          /* used for cond/signalling */
  219         wait_queue_head_t sem;
  220         struct krping_stats stats;
  221 
  222         uint16_t port;                  /* dst port in NBO */
  223         u8 addr[16] __aligned(8);       /* dst addr in NBO */
  224         char *addr_str;                 /* dst addr string */
  225         uint8_t addr_type;              /* ADDR_FAMILY - IPv4/V6 */
  226         int verbose;                    /* verbose logging */
  227         int count;                      /* ping count */
  228         int size;                       /* ping data size */
  229         int validate;                   /* validate ping data */
  230         int wlat;                       /* run wlat test */
  231         int rlat;                       /* run rlat test */
  232         int bw;                         /* run bw test */
  233         int duplex;                     /* run bw full duplex test */
  234         int poll;                       /* poll or block for rlat test */
  235         int txdepth;                    /* SQ depth */
  236         int local_dma_lkey;             /* use 0 for lkey */
  237         int frtest;                     /* reg test */
  238         int tos;                        /* type of service */
  239 
  240         /* CM stuff */
  241         struct rdma_cm_id *cm_id;       /* connection on client side,*/
  242                                         /* listener on server side. */
  243         struct rdma_cm_id *child_cm_id; /* connection on server side */
  244         struct list_head list;
  245 };
  246 
  247 static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
  248                                    struct rdma_cm_event *event)
  249 {
  250         int ret;
  251         struct krping_cb *cb = cma_id->context;
  252 
  253         DEBUG_LOG("cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
  254                   (cma_id == cb->cm_id) ? "parent" : "child");
  255 
  256         switch (event->event) {
  257         case RDMA_CM_EVENT_ADDR_RESOLVED:
  258                 cb->state = ADDR_RESOLVED;
  259                 ret = rdma_resolve_route(cma_id, 2000);
  260                 if (ret) {
  261                         printk(KERN_ERR PFX "rdma_resolve_route error %d\n", 
  262                                ret);
  263                         wake_up_interruptible(&cb->sem);
  264                 }
  265                 break;
  266 
  267         case RDMA_CM_EVENT_ROUTE_RESOLVED:
  268                 cb->state = ROUTE_RESOLVED;
  269                 wake_up_interruptible(&cb->sem);
  270                 break;
  271 
  272         case RDMA_CM_EVENT_CONNECT_REQUEST:
  273                 cb->state = CONNECT_REQUEST;
  274                 cb->child_cm_id = cma_id;
  275                 DEBUG_LOG("child cma %p\n", cb->child_cm_id);
  276                 wake_up_interruptible(&cb->sem);
  277                 break;
  278 
  279         case RDMA_CM_EVENT_ESTABLISHED:
  280                 DEBUG_LOG("ESTABLISHED\n");
  281                 if (!cb->server) {
  282                         cb->state = CONNECTED;
  283                 }
  284                 wake_up_interruptible(&cb->sem);
  285                 break;
  286 
  287         case RDMA_CM_EVENT_ADDR_ERROR:
  288         case RDMA_CM_EVENT_ROUTE_ERROR:
  289         case RDMA_CM_EVENT_CONNECT_ERROR:
  290         case RDMA_CM_EVENT_UNREACHABLE:
  291         case RDMA_CM_EVENT_REJECTED:
  292                 printk(KERN_ERR PFX "cma event %d, error %d\n", event->event,
  293                        event->status);
  294                 cb->state = ERROR;
  295                 wake_up_interruptible(&cb->sem);
  296                 break;
  297 
  298         case RDMA_CM_EVENT_DISCONNECTED:
  299                 printk(KERN_ERR PFX "DISCONNECT EVENT...\n");
  300                 cb->state = ERROR;
  301                 wake_up_interruptible(&cb->sem);
  302                 break;
  303 
  304         case RDMA_CM_EVENT_DEVICE_REMOVAL:
  305                 printk(KERN_ERR PFX "cma detected device removal!!!!\n");
  306                 cb->state = ERROR;
  307                 wake_up_interruptible(&cb->sem);
  308                 break;
  309 
  310         default:
  311                 printk(KERN_ERR PFX "oof bad type!\n");
  312                 wake_up_interruptible(&cb->sem);
  313                 break;
  314         }
  315         return 0;
  316 }
  317 
  318 static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
  319 {
  320         if (wc->byte_len != sizeof(cb->recv_buf)) {
  321                 printk(KERN_ERR PFX "Received bogus data, size %d\n", 
  322                        wc->byte_len);
  323                 return -1;
  324         }
  325 
  326         cb->remote_rkey = ntohl(cb->recv_buf.rkey);
  327         cb->remote_addr = ntohll(cb->recv_buf.buf);
  328         cb->remote_len  = ntohl(cb->recv_buf.size);
  329         DEBUG_LOG("Received rkey %x addr %llx len %d from peer\n",
  330                   cb->remote_rkey, (unsigned long long)cb->remote_addr, 
  331                   cb->remote_len);
  332 
  333         if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE)
  334                 cb->state = RDMA_READ_ADV;
  335         else
  336                 cb->state = RDMA_WRITE_ADV;
  337 
  338         return 0;
  339 }
  340 
  341 static int client_recv(struct krping_cb *cb, struct ib_wc *wc)
  342 {
  343         if (wc->byte_len != sizeof(cb->recv_buf)) {
  344                 printk(KERN_ERR PFX "Received bogus data, size %d\n", 
  345                        wc->byte_len);
  346                 return -1;
  347         }
  348 
  349         if (cb->state == RDMA_READ_ADV)
  350                 cb->state = RDMA_WRITE_ADV;
  351         else
  352                 cb->state = RDMA_WRITE_COMPLETE;
  353 
  354         return 0;
  355 }
  356 
  357 static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
  358 {
  359         struct krping_cb *cb = ctx;
  360         struct ib_wc wc;
  361         const struct ib_recv_wr *bad_wr;
  362         int ret;
  363 
  364         BUG_ON(cb->cq != cq);
  365         if (cb->frtest) {
  366                 printk(KERN_ERR PFX "cq completion event in frtest!\n");
  367                 return;
  368         }
  369         if (!cb->wlat && !cb->rlat && !cb->bw)
  370                 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
  371         while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
  372                 if (wc.status) {
  373                         if (wc.status == IB_WC_WR_FLUSH_ERR) {
  374                                 DEBUG_LOG("cq flushed\n");
  375                                 continue;
  376                         } else {
  377                                 printk(KERN_ERR PFX "cq completion failed with "
  378                                        "wr_id %jx status %d opcode %d vender_err %x\n",
  379                                         (uintmax_t)wc.wr_id, wc.status, wc.opcode, wc.vendor_err);
  380                                 goto error;
  381                         }
  382                 }
  383                 if (cb->state == ERROR) {
  384                         printk(KERN_ERR PFX "cq completion in ERROR state\n");
  385                         return;
  386                 }
  387                 switch (wc.opcode) {
  388                 case IB_WC_SEND:
  389                         DEBUG_LOG("send completion\n");
  390                         cb->stats.send_bytes += cb->send_sgl.length;
  391                         cb->stats.send_msgs++;
  392                         break;
  393 
  394                 case IB_WC_RDMA_WRITE:
  395                         DEBUG_LOG("rdma write completion\n");
  396                         cb->stats.write_bytes += cb->rdma_sq_wr.wr.sg_list->length;
  397                         cb->stats.write_msgs++;
  398                         cb->state = RDMA_WRITE_COMPLETE;
  399                         wake_up_interruptible(&cb->sem);
  400                         break;
  401 
  402                 case IB_WC_RDMA_READ:
  403                         DEBUG_LOG("rdma read completion\n");
  404                         cb->stats.read_bytes += cb->rdma_sq_wr.wr.sg_list->length;
  405                         cb->stats.read_msgs++;
  406                         cb->state = RDMA_READ_COMPLETE;
  407                         wake_up_interruptible(&cb->sem);
  408                         break;
  409 
  410                 case IB_WC_RECV:
  411                         DEBUG_LOG("recv completion\n");
  412                         cb->stats.recv_bytes += sizeof(cb->recv_buf);
  413                         cb->stats.recv_msgs++;
  414                         if (cb->wlat || cb->rlat || cb->bw)
  415                                 ret = server_recv(cb, &wc);
  416                         else
  417                                 ret = cb->server ? server_recv(cb, &wc) :
  418                                                    client_recv(cb, &wc);
  419                         if (ret) {
  420                                 printk(KERN_ERR PFX "recv wc error: %d\n", ret);
  421                                 goto error;
  422                         }
  423 
  424                         ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
  425                         if (ret) {
  426                                 printk(KERN_ERR PFX "post recv error: %d\n", 
  427                                        ret);
  428                                 goto error;
  429                         }
  430                         wake_up_interruptible(&cb->sem);
  431                         break;
  432 
  433                 default:
  434                         printk(KERN_ERR PFX
  435                                "%s:%d Unexpected opcode %d, Shutting down\n",
  436                                __func__, __LINE__, wc.opcode);
  437                         goto error;
  438                 }
  439         }
  440         if (ret) {
  441                 printk(KERN_ERR PFX "poll error %d\n", ret);
  442                 goto error;
  443         }
  444         return;
  445 error:
  446         cb->state = ERROR;
  447         wake_up_interruptible(&cb->sem);
  448 }
  449 
  450 static int krping_accept(struct krping_cb *cb)
  451 {
  452         struct rdma_conn_param conn_param;
  453         int ret;
  454 
  455         DEBUG_LOG("accepting client connection request\n");
  456 
  457         memset(&conn_param, 0, sizeof conn_param);
  458         conn_param.responder_resources = 1;
  459         conn_param.initiator_depth = 1;
  460 
  461         ret = rdma_accept(cb->child_cm_id, &conn_param);
  462         if (ret) {
  463                 printk(KERN_ERR PFX "rdma_accept error: %d\n", ret);
  464                 return ret;
  465         }
  466 
  467         if (!cb->wlat && !cb->rlat && !cb->bw) {
  468                 wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
  469                 if (cb->state == ERROR) {
  470                         printk(KERN_ERR PFX "wait for CONNECTED state %d\n", 
  471                                 cb->state);
  472                         return -1;
  473                 }
  474         }
  475         return 0;
  476 }
  477 
  478 static void krping_setup_wr(struct krping_cb *cb)
  479 {
  480         cb->recv_sgl.addr = cb->recv_dma_addr;
  481         cb->recv_sgl.length = sizeof cb->recv_buf;
  482         cb->recv_sgl.lkey = cb->pd->local_dma_lkey;
  483         cb->rq_wr.sg_list = &cb->recv_sgl;
  484         cb->rq_wr.num_sge = 1;
  485 
  486         cb->send_sgl.addr = cb->send_dma_addr;
  487         cb->send_sgl.length = sizeof cb->send_buf;
  488         cb->send_sgl.lkey = cb->pd->local_dma_lkey;
  489 
  490         cb->sq_wr.opcode = IB_WR_SEND;
  491         cb->sq_wr.send_flags = IB_SEND_SIGNALED;
  492         cb->sq_wr.sg_list = &cb->send_sgl;
  493         cb->sq_wr.num_sge = 1;
  494 
  495         if (cb->server || cb->wlat || cb->rlat || cb->bw) {
  496                 cb->rdma_sgl.addr = cb->rdma_dma_addr;
  497                 cb->rdma_sq_wr.wr.send_flags = IB_SEND_SIGNALED;
  498                 cb->rdma_sq_wr.wr.sg_list = &cb->rdma_sgl;
  499                 cb->rdma_sq_wr.wr.num_sge = 1;
  500         }
  501 
  502         /* 
  503          * A chain of 2 WRs, INVALDATE_MR + REG_MR.
  504          * both unsignaled.  The client uses them to reregister
  505          * the rdma buffers with a new key each iteration.
  506          */
  507         cb->reg_mr_wr.wr.opcode = IB_WR_REG_MR;
  508         cb->reg_mr_wr.mr = cb->reg_mr;
  509 
  510         cb->invalidate_wr.next = &cb->reg_mr_wr.wr;
  511         cb->invalidate_wr.opcode = IB_WR_LOCAL_INV;
  512 }
  513 
  514 static int krping_setup_buffers(struct krping_cb *cb)
  515 {
  516         int ret;
  517 
  518         DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);
  519 
  520         cb->recv_dma_addr = ib_dma_map_single(cb->pd->device,
  521                                    &cb->recv_buf, 
  522                                    sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
  523         pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr);
  524         cb->send_dma_addr = ib_dma_map_single(cb->pd->device,
  525                                            &cb->send_buf, sizeof(cb->send_buf),
  526                                            DMA_BIDIRECTIONAL);
  527         pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr);
  528 
  529         cb->rdma_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size,
  530                                              &cb->rdma_dma_addr,
  531                                              GFP_KERNEL);
  532         if (!cb->rdma_buf) {
  533                 DEBUG_LOG(PFX "rdma_buf allocation failed\n");
  534                 ret = -ENOMEM;
  535                 goto bail;
  536         }
  537         pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr);
  538         cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE)
  539                                 >> PAGE_SHIFT;
  540         cb->reg_mr = ib_alloc_mr(cb->pd,  IB_MR_TYPE_MEM_REG,
  541                                  cb->page_list_len);
  542         if (IS_ERR(cb->reg_mr)) {
  543                 ret = PTR_ERR(cb->reg_mr);
  544                 DEBUG_LOG(PFX "recv_buf reg_mr failed %d\n", ret);
  545                 goto bail;
  546         }
  547         DEBUG_LOG(PFX "reg rkey 0x%x page_list_len %u\n",
  548                 cb->reg_mr->rkey, cb->page_list_len);
  549 
  550         if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
  551 
  552                 cb->start_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size,
  553                                                       &cb->start_dma_addr,
  554                                                       GFP_KERNEL);
  555                 if (!cb->start_buf) {
  556                         DEBUG_LOG(PFX "start_buf malloc failed\n");
  557                         ret = -ENOMEM;
  558                         goto bail;
  559                 }
  560                 pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr);
  561         }
  562 
  563         krping_setup_wr(cb);
  564         DEBUG_LOG(PFX "allocated & registered buffers...\n");
  565         return 0;
  566 bail:
  567         if (cb->reg_mr && !IS_ERR(cb->reg_mr))
  568                 ib_dereg_mr(cb->reg_mr);
  569         if (cb->rdma_mr && !IS_ERR(cb->rdma_mr))
  570                 ib_dereg_mr(cb->rdma_mr);
  571         if (cb->dma_mr && !IS_ERR(cb->dma_mr))
  572                 ib_dereg_mr(cb->dma_mr);
  573         if (cb->rdma_buf) {
  574                 ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf,
  575                                      cb->rdma_dma_addr);
  576         }
  577         if (cb->start_buf) {
  578                 ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf,
  579                                      cb->start_dma_addr);
  580         }
  581         return ret;
  582 }
  583 
  584 static void krping_free_buffers(struct krping_cb *cb)
  585 {
  586         DEBUG_LOG("krping_free_buffers called on cb %p\n", cb);
  587         
  588         if (cb->dma_mr)
  589                 ib_dereg_mr(cb->dma_mr);
  590         if (cb->rdma_mr)
  591                 ib_dereg_mr(cb->rdma_mr);
  592         if (cb->start_mr)
  593                 ib_dereg_mr(cb->start_mr);
  594         if (cb->reg_mr)
  595                 ib_dereg_mr(cb->reg_mr);
  596 
  597         dma_unmap_single(cb->pd->device->dma_device,
  598                          pci_unmap_addr(cb, recv_mapping),
  599                          sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
  600         dma_unmap_single(cb->pd->device->dma_device,
  601                          pci_unmap_addr(cb, send_mapping),
  602                          sizeof(cb->send_buf), DMA_BIDIRECTIONAL);
  603 
  604         ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf,
  605                              cb->rdma_dma_addr);
  606 
  607         if (cb->start_buf) {
  608                 ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf,
  609                                      cb->start_dma_addr);
  610         }
  611 }
  612 
  613 static int krping_create_qp(struct krping_cb *cb)
  614 {
  615         struct ib_qp_init_attr init_attr;
  616         int ret;
  617 
  618         memset(&init_attr, 0, sizeof(init_attr));
  619         init_attr.cap.max_send_wr = cb->txdepth;
  620         init_attr.cap.max_recv_wr = 2;
  621         
  622         /* For flush_qp() */
  623         init_attr.cap.max_send_wr++;
  624         init_attr.cap.max_recv_wr++;
  625 
  626         init_attr.cap.max_recv_sge = 1;
  627         init_attr.cap.max_send_sge = 1;
  628         init_attr.qp_type = IB_QPT_RC;
  629         init_attr.send_cq = cb->cq;
  630         init_attr.recv_cq = cb->cq;
  631         init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  632 
  633         if (cb->server) {
  634                 ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);
  635                 if (!ret)
  636                         cb->qp = cb->child_cm_id->qp;
  637         } else {
  638                 ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);
  639                 if (!ret)
  640                         cb->qp = cb->cm_id->qp;
  641         }
  642 
  643         return ret;
  644 }
  645 
  646 static void krping_free_qp(struct krping_cb *cb)
  647 {
  648         ib_destroy_qp(cb->qp);
  649         ib_destroy_cq(cb->cq);
  650         ib_dealloc_pd(cb->pd);
  651 }
  652 
  653 static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id)
  654 {
  655         int ret;
  656         struct ib_cq_init_attr attr = {0};
  657 
  658         cb->pd = ib_alloc_pd(cm_id->device, 0);
  659         if (IS_ERR(cb->pd)) {
  660                 printk(KERN_ERR PFX "ib_alloc_pd failed\n");
  661                 return PTR_ERR(cb->pd);
  662         }
  663         DEBUG_LOG("created pd %p\n", cb->pd);
  664 
  665         strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name));
  666 
  667         attr.cqe = cb->txdepth * 2;
  668         attr.comp_vector = 0;
  669         cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL,
  670                               cb, &attr);
  671         if (IS_ERR(cb->cq)) {
  672                 printk(KERN_ERR PFX "ib_create_cq failed\n");
  673                 ret = PTR_ERR(cb->cq);
  674                 goto err1;
  675         }
  676         DEBUG_LOG("created cq %p\n", cb->cq);
  677 
  678         if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
  679                 ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
  680                 if (ret) {
  681                         printk(KERN_ERR PFX "ib_create_cq failed\n");
  682                         goto err2;
  683                 }
  684         }
  685 
  686         ret = krping_create_qp(cb);
  687         if (ret) {
  688                 printk(KERN_ERR PFX "krping_create_qp failed: %d\n", ret);
  689                 goto err2;
  690         }
  691         DEBUG_LOG("created qp %p\n", cb->qp);
  692         return 0;
  693 err2:
  694         ib_destroy_cq(cb->cq);
  695 err1:
  696         ib_dealloc_pd(cb->pd);
  697         return ret;
  698 }
  699 
  700 /*
  701  * return the (possibly rebound) rkey for the rdma buffer.
  702  * REG mode: invalidate and rebind via reg wr.
  703  * other modes: just return the mr rkey.
  704  */
  705 static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv)
  706 {
  707         u32 rkey;
  708         const struct ib_send_wr *bad_wr;
  709         int ret;
  710         struct scatterlist sg = {0};
  711 
  712         cb->invalidate_wr.ex.invalidate_rkey = cb->reg_mr->rkey;
  713 
  714         /*
  715          * Update the reg key.
  716          */
  717         ib_update_fast_reg_key(cb->reg_mr, ++cb->key);
  718         cb->reg_mr_wr.key = cb->reg_mr->rkey;
  719 
  720         /*
  721          * Update the reg WR with new buf info.
  722          */
  723         if (buf == (u64)cb->start_dma_addr)
  724                 cb->reg_mr_wr.access = IB_ACCESS_REMOTE_READ;
  725         else
  726                 cb->reg_mr_wr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
  727         sg_dma_address(&sg) = buf;
  728         sg_dma_len(&sg) = cb->size;
  729 
  730         ret = ib_map_mr_sg(cb->reg_mr, &sg, 1, NULL, PAGE_SIZE);
  731         BUG_ON(ret <= 0 || ret > cb->page_list_len);
  732 
  733         DEBUG_LOG(PFX "post_inv = %d, reg_mr new rkey 0x%x pgsz %u len %u"
  734                 " iova_start %llx\n",
  735                 post_inv,
  736                 cb->reg_mr_wr.key,
  737                 cb->reg_mr->page_size,
  738                 (unsigned)cb->reg_mr->length,
  739                 (unsigned long long)cb->reg_mr->iova);
  740 
  741         if (post_inv)
  742                 ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr);
  743         else
  744                 ret = ib_post_send(cb->qp, &cb->reg_mr_wr.wr, &bad_wr);
  745         if (ret) {
  746                 printk(KERN_ERR PFX "post send error %d\n", ret);
  747                 cb->state = ERROR;
  748         }
  749         rkey = cb->reg_mr->rkey;
  750         return rkey;
  751 }
  752 
  753 static void krping_format_send(struct krping_cb *cb, u64 buf)
  754 {
  755         struct krping_rdma_info *info = &cb->send_buf;
  756         u32 rkey;
  757 
  758         /*
  759          * Client side will do reg or mw bind before
  760          * advertising the rdma buffer.  Server side
  761          * sends have no data.
  762          */
  763         if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
  764                 rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
  765                 info->buf = htonll(buf);
  766                 info->rkey = htonl(rkey);
  767                 info->size = htonl(cb->size);
  768                 DEBUG_LOG("RDMA addr %llx rkey %x len %d\n",
  769                           (unsigned long long)buf, rkey, cb->size);
  770         }
  771 }
  772 
  773 static void krping_test_server(struct krping_cb *cb)
  774 {
  775         const struct ib_send_wr *bad_wr;
  776         struct ib_send_wr inv;
  777         int ret;
  778 
  779         while (1) {
  780                 /* Wait for client's Start STAG/TO/Len */
  781                 wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV);
  782                 if (cb->state != RDMA_READ_ADV) {
  783                         printk(KERN_ERR PFX "wait for RDMA_READ_ADV state %d\n",
  784                                 cb->state);
  785                         break;
  786                 }
  787 
  788                 DEBUG_LOG("server received sink adv\n");
  789 
  790                 cb->rdma_sq_wr.rkey = cb->remote_rkey;
  791                 cb->rdma_sq_wr.remote_addr = cb->remote_addr;
  792                 cb->rdma_sq_wr.wr.sg_list->length = cb->remote_len;
  793                 cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, !cb->read_inv);
  794                 cb->rdma_sq_wr.wr.next = NULL;
  795 
  796                 /* Issue RDMA Read. */
  797                 if (cb->read_inv)
  798                         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
  799                 else {
  800 
  801                         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ;
  802                         /* 
  803                          * Immediately follow the read with a 
  804                          * fenced LOCAL_INV.
  805                          */
  806                         cb->rdma_sq_wr.wr.next = &inv;
  807                         memset(&inv, 0, sizeof inv);
  808                         inv.opcode = IB_WR_LOCAL_INV;
  809                         inv.ex.invalidate_rkey = cb->reg_mr->rkey;
  810                         inv.send_flags = IB_SEND_FENCE;
  811                 }
  812 
  813                 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
  814                 if (ret) {
  815                         printk(KERN_ERR PFX "post send error %d\n", ret);
  816                         break;
  817                 }
  818                 cb->rdma_sq_wr.wr.next = NULL;
  819 
  820                 DEBUG_LOG("server posted rdma read req \n");
  821 
  822                 /* Wait for read completion */
  823                 wait_event_interruptible(cb->sem, 
  824                                          cb->state >= RDMA_READ_COMPLETE);
  825                 if (cb->state != RDMA_READ_COMPLETE) {
  826                         printk(KERN_ERR PFX 
  827                                "wait for RDMA_READ_COMPLETE state %d\n",
  828                                cb->state);
  829                         break;
  830                 }
  831                 DEBUG_LOG("server received read complete\n");
  832 
  833                 /* Display data in recv buf */
  834                 if (cb->verbose)
  835                         printk(KERN_INFO PFX "server ping data: %s\n",
  836                                 cb->rdma_buf);
  837 
  838                 /* Tell client to continue */
  839                 if (cb->server && cb->server_invalidate) {
  840                         cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
  841                         cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
  842                         DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey);
  843                 } 
  844                 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
  845                 if (ret) {
  846                         printk(KERN_ERR PFX "post send error %d\n", ret);
  847                         break;
  848                 }
  849                 DEBUG_LOG("server posted go ahead\n");
  850 
  851                 /* Wait for client's RDMA STAG/TO/Len */
  852                 wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
  853                 if (cb->state != RDMA_WRITE_ADV) {
  854                         printk(KERN_ERR PFX 
  855                                "wait for RDMA_WRITE_ADV state %d\n",
  856                                cb->state);
  857                         break;
  858                 }
  859                 DEBUG_LOG("server received sink adv\n");
  860 
  861                 /* RDMA Write echo data */
  862                 cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
  863                 cb->rdma_sq_wr.rkey = cb->remote_rkey;
  864                 cb->rdma_sq_wr.remote_addr = cb->remote_addr;
  865                 cb->rdma_sq_wr.wr.sg_list->length = strlen(cb->rdma_buf) + 1;
  866                 if (cb->local_dma_lkey)
  867                         cb->rdma_sgl.lkey = cb->pd->local_dma_lkey;
  868                 else 
  869                         cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0);
  870                         
  871                 DEBUG_LOG("rdma write from lkey %x laddr %llx len %d\n",
  872                           cb->rdma_sq_wr.wr.sg_list->lkey,
  873                           (unsigned long long)cb->rdma_sq_wr.wr.sg_list->addr,
  874                           cb->rdma_sq_wr.wr.sg_list->length);
  875 
  876                 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
  877                 if (ret) {
  878                         printk(KERN_ERR PFX "post send error %d\n", ret);
  879                         break;
  880                 }
  881 
  882                 /* Wait for completion */
  883                 ret = wait_event_interruptible(cb->sem, cb->state >= 
  884                                                          RDMA_WRITE_COMPLETE);
  885                 if (cb->state != RDMA_WRITE_COMPLETE) {
  886                         printk(KERN_ERR PFX 
  887                                "wait for RDMA_WRITE_COMPLETE state %d\n",
  888                                cb->state);
  889                         break;
  890                 }
  891                 DEBUG_LOG("server rdma write complete \n");
  892 
  893                 cb->state = CONNECTED;
  894 
  895                 /* Tell client to begin again */
  896                 if (cb->server && cb->server_invalidate) {
  897                         cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
  898                         cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
  899                         DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey);
  900                 } 
  901                 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
  902                 if (ret) {
  903                         printk(KERN_ERR PFX "post send error %d\n", ret);
  904                         break;
  905                 }
  906                 DEBUG_LOG("server posted go ahead\n");
  907         }
  908 }
  909 
  910 static void rlat_test(struct krping_cb *cb)
  911 {
  912         int scnt;
  913         int iters = cb->count;
  914         struct timeval start_tv, stop_tv;
  915         int ret;
  916         struct ib_wc wc;
  917         const struct ib_send_wr *bad_wr;
  918         int ne;
  919 
  920         scnt = 0;
  921         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ;
  922         cb->rdma_sq_wr.rkey = cb->remote_rkey;
  923         cb->rdma_sq_wr.remote_addr = cb->remote_addr;
  924         cb->rdma_sq_wr.wr.sg_list->length = cb->size;
  925 
  926         microtime(&start_tv);
  927         if (!cb->poll) {
  928                 cb->state = RDMA_READ_ADV;
  929                 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
  930         }
  931         while (scnt < iters) {
  932 
  933                 cb->state = RDMA_READ_ADV;
  934                 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
  935                 if (ret) {
  936                         printk(KERN_ERR PFX  
  937                                 "Couldn't post send: ret=%d scnt %d\n",
  938                                 ret, scnt);
  939                         return;
  940                 }
  941 
  942                 do {
  943                         if (!cb->poll) {
  944                                 wait_event_interruptible(cb->sem, 
  945                                         cb->state != RDMA_READ_ADV);
  946                                 if (cb->state == RDMA_READ_COMPLETE) {
  947                                         ne = 1;
  948                                         ib_req_notify_cq(cb->cq, 
  949                                                 IB_CQ_NEXT_COMP);
  950                                 } else {
  951                                         ne = -1;
  952                                 }
  953                         } else
  954                                 ne = ib_poll_cq(cb->cq, 1, &wc);
  955                         if (cb->state == ERROR) {
  956                                 printk(KERN_ERR PFX 
  957                                         "state == ERROR...bailing scnt %d\n", 
  958                                         scnt);
  959                                 return;
  960                         }
  961                 } while (ne == 0);
  962 
  963                 if (ne < 0) {
  964                         printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
  965                         return;
  966                 }
  967                 if (cb->poll && wc.status != IB_WC_SUCCESS) {
  968                         printk(KERN_ERR PFX "Completion wth error at %s:\n",
  969                                 cb->server ? "server" : "client");
  970                         printk(KERN_ERR PFX "Failed status %d: wr_id %d\n",
  971                                 wc.status, (int) wc.wr_id);
  972                         return;
  973                 }
  974                 ++scnt;
  975         }
  976         microtime(&stop_tv);
  977 
  978         if (stop_tv.tv_usec < start_tv.tv_usec) {
  979                 stop_tv.tv_usec += 1000000;
  980                 stop_tv.tv_sec  -= 1;
  981         }
  982 
  983         printk(KERN_ERR PFX "delta sec %lu delta usec %lu iter %d size %d\n",
  984                 (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
  985                 (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
  986                 scnt, cb->size);
  987 }
  988 
  989 static void wlat_test(struct krping_cb *cb)
  990 {
  991         int ccnt, scnt, rcnt;
  992         int iters=cb->count;
  993         volatile char *poll_buf = (char *) cb->start_buf;
  994         char *buf = (char *)cb->rdma_buf;
  995         struct timeval start_tv, stop_tv;
  996         cycles_t *post_cycles_start = NULL;
  997         cycles_t *post_cycles_stop = NULL;
  998         cycles_t *poll_cycles_start = NULL;
  999         cycles_t *poll_cycles_stop = NULL;
 1000         cycles_t *last_poll_cycles_start = NULL;
 1001         cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
 1002         int i;
 1003         int cycle_iters = 1000;
 1004 
 1005         ccnt = 0;
 1006         scnt = 0;
 1007         rcnt = 0;
 1008 
 1009         post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1010         if (!post_cycles_start) {
 1011                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1012                 goto done;
 1013         }
 1014         post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1015         if (!post_cycles_stop) {
 1016                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1017                 goto done;
 1018         }
 1019         poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1020         if (!poll_cycles_start) {
 1021                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1022                 goto done;
 1023         }
 1024         poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1025         if (!poll_cycles_stop) {
 1026                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1027                 goto done;
 1028         }
 1029         last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 
 1030                 GFP_KERNEL);
 1031         if (!last_poll_cycles_start) {
 1032                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1033                 goto done;
 1034         }
 1035         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
 1036         cb->rdma_sq_wr.rkey = cb->remote_rkey;
 1037         cb->rdma_sq_wr.remote_addr = cb->remote_addr;
 1038         cb->rdma_sq_wr.wr.sg_list->length = cb->size;
 1039 
 1040         if (cycle_iters > iters)
 1041                 cycle_iters = iters;
 1042         microtime(&start_tv);
 1043         while (scnt < iters || ccnt < iters || rcnt < iters) {
 1044 
 1045                 /* Wait till buffer changes. */
 1046                 if (rcnt < iters && !(scnt < 1 && !cb->server)) {
 1047                         ++rcnt;
 1048                         while (*poll_buf != (char)rcnt) {
 1049                                 if (cb->state == ERROR) {
 1050                                         printk(KERN_ERR PFX 
 1051                                                 "state = ERROR, bailing\n");
 1052                                         goto done;
 1053                                 }
 1054                         }
 1055                 }
 1056 
 1057                 if (scnt < iters) {
 1058                         const struct ib_send_wr *bad_wr;
 1059 
 1060                         *buf = (char)scnt+1;
 1061                         if (scnt < cycle_iters)
 1062                                 post_cycles_start[scnt] = get_cycles();
 1063                         if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
 1064                                 printk(KERN_ERR PFX  
 1065                                         "Couldn't post send: scnt=%d\n",
 1066                                         scnt);
 1067                                 goto done;
 1068                         }
 1069                         if (scnt < cycle_iters)
 1070                                 post_cycles_stop[scnt] = get_cycles();
 1071                         scnt++;
 1072                 }
 1073 
 1074                 if (ccnt < iters) {
 1075                         struct ib_wc wc;
 1076                         int ne;
 1077 
 1078                         if (ccnt < cycle_iters)
 1079                                 poll_cycles_start[ccnt] = get_cycles();
 1080                         do {
 1081                                 if (ccnt < cycle_iters)
 1082                                         last_poll_cycles_start[ccnt] = 
 1083                                                 get_cycles();
 1084                                 ne = ib_poll_cq(cb->cq, 1, &wc);
 1085                         } while (ne == 0);
 1086                         if (ccnt < cycle_iters)
 1087                                 poll_cycles_stop[ccnt] = get_cycles();
 1088                         ++ccnt;
 1089 
 1090                         if (ne < 0) {
 1091                                 printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
 1092                                 goto done;
 1093                         }
 1094                         if (wc.status != IB_WC_SUCCESS) {
 1095                                 printk(KERN_ERR PFX 
 1096                                         "Completion wth error at %s:\n",
 1097                                         cb->server ? "server" : "client");
 1098                                 printk(KERN_ERR PFX 
 1099                                         "Failed status %d: wr_id %d\n",
 1100                                         wc.status, (int) wc.wr_id);
 1101                                 printk(KERN_ERR PFX 
 1102                                         "scnt=%d, rcnt=%d, ccnt=%d\n",
 1103                                         scnt, rcnt, ccnt);
 1104                                 goto done;
 1105                         }
 1106                 }
 1107         }
 1108         microtime(&stop_tv);
 1109 
 1110         if (stop_tv.tv_usec < start_tv.tv_usec) {
 1111                 stop_tv.tv_usec += 1000000;
 1112                 stop_tv.tv_sec  -= 1;
 1113         }
 1114 
 1115         for (i=0; i < cycle_iters; i++) {
 1116                 sum_post += post_cycles_stop[i] - post_cycles_start[i];
 1117                 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
 1118                 sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
 1119         }
 1120         printk(KERN_ERR PFX 
 1121                 "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
 1122                 " sum_post %llu sum_poll %llu sum_last_poll %llu\n",
 1123                 (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
 1124                 (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
 1125                 scnt, cb->size, cycle_iters,
 1126                 (unsigned long long)sum_post, (unsigned long long)sum_poll, 
 1127                 (unsigned long long)sum_last_poll);
 1128 done:
 1129         kfree(post_cycles_start);
 1130         kfree(post_cycles_stop);
 1131         kfree(poll_cycles_start);
 1132         kfree(poll_cycles_stop);
 1133         kfree(last_poll_cycles_start);
 1134 }
 1135 
 1136 static void bw_test(struct krping_cb *cb)
 1137 {
 1138         int ccnt, scnt;
 1139         int iters=cb->count;
 1140         struct timeval start_tv, stop_tv;
 1141         cycles_t *post_cycles_start = NULL;
 1142         cycles_t *post_cycles_stop = NULL;
 1143         cycles_t *poll_cycles_start = NULL;
 1144         cycles_t *poll_cycles_stop = NULL;
 1145         cycles_t *last_poll_cycles_start = NULL;
 1146         cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
 1147         int i;
 1148         int cycle_iters = 1000;
 1149 
 1150         ccnt = 0;
 1151         scnt = 0;
 1152 
 1153         post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1154         if (!post_cycles_start) {
 1155                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1156                 goto done;
 1157         }
 1158         post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1159         if (!post_cycles_stop) {
 1160                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1161                 goto done;
 1162         }
 1163         poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1164         if (!poll_cycles_start) {
 1165                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1166                 goto done;
 1167         }
 1168         poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
 1169         if (!poll_cycles_stop) {
 1170                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1171                 goto done;
 1172         }
 1173         last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 
 1174                 GFP_KERNEL);
 1175         if (!last_poll_cycles_start) {
 1176                 printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
 1177                 goto done;
 1178         }
 1179         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
 1180         cb->rdma_sq_wr.rkey = cb->remote_rkey;
 1181         cb->rdma_sq_wr.remote_addr = cb->remote_addr;
 1182         cb->rdma_sq_wr.wr.sg_list->length = cb->size;
 1183 
 1184         if (cycle_iters > iters)
 1185                 cycle_iters = iters;
 1186         microtime(&start_tv);
 1187         while (scnt < iters || ccnt < iters) {
 1188 
 1189                 while (scnt < iters && scnt - ccnt < cb->txdepth) {
 1190                         const struct ib_send_wr *bad_wr;
 1191 
 1192                         if (scnt < cycle_iters)
 1193                                 post_cycles_start[scnt] = get_cycles();
 1194                         if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
 1195                                 printk(KERN_ERR PFX  
 1196                                         "Couldn't post send: scnt=%d\n",
 1197                                         scnt);
 1198                                 goto done;
 1199                         }
 1200                         if (scnt < cycle_iters)
 1201                                 post_cycles_stop[scnt] = get_cycles();
 1202                         ++scnt;
 1203                 }
 1204 
 1205                 if (ccnt < iters) {
 1206                         int ne;
 1207                         struct ib_wc wc;
 1208 
 1209                         if (ccnt < cycle_iters)
 1210                                 poll_cycles_start[ccnt] = get_cycles();
 1211                         do {
 1212                                 if (ccnt < cycle_iters)
 1213                                         last_poll_cycles_start[ccnt] = 
 1214                                                 get_cycles();
 1215                                 ne = ib_poll_cq(cb->cq, 1, &wc);
 1216                         } while (ne == 0);
 1217                         if (ccnt < cycle_iters)
 1218                                 poll_cycles_stop[ccnt] = get_cycles();
 1219                         ccnt += 1;
 1220 
 1221                         if (ne < 0) {
 1222                                 printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
 1223                                 goto done;
 1224                         }
 1225                         if (wc.status != IB_WC_SUCCESS) {
 1226                                 printk(KERN_ERR PFX 
 1227                                         "Completion wth error at %s:\n",
 1228                                         cb->server ? "server" : "client");
 1229                                 printk(KERN_ERR PFX 
 1230                                         "Failed status %d: wr_id %d\n",
 1231                                         wc.status, (int) wc.wr_id);
 1232                                 goto done;
 1233                         }
 1234                 }
 1235         }
 1236         microtime(&stop_tv);
 1237 
 1238         if (stop_tv.tv_usec < start_tv.tv_usec) {
 1239                 stop_tv.tv_usec += 1000000;
 1240                 stop_tv.tv_sec  -= 1;
 1241         }
 1242 
 1243         for (i=0; i < cycle_iters; i++) {
 1244                 sum_post += post_cycles_stop[i] - post_cycles_start[i];
 1245                 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
 1246                 sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
 1247         }
 1248         printk(KERN_ERR PFX 
 1249                 "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
 1250                 " sum_post %llu sum_poll %llu sum_last_poll %llu\n",
 1251                 (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
 1252                 (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
 1253                 scnt, cb->size, cycle_iters, 
 1254                 (unsigned long long)sum_post, (unsigned long long)sum_poll, 
 1255                 (unsigned long long)sum_last_poll);
 1256 done:
 1257         kfree(post_cycles_start);
 1258         kfree(post_cycles_stop);
 1259         kfree(poll_cycles_start);
 1260         kfree(poll_cycles_stop);
 1261         kfree(last_poll_cycles_start);
 1262 }
 1263 
 1264 static void krping_rlat_test_server(struct krping_cb *cb)
 1265 {
 1266         const struct ib_send_wr *bad_wr;
 1267         struct ib_wc wc;
 1268         int ret;
 1269 
 1270         /* Spin waiting for client's Start STAG/TO/Len */
 1271         while (cb->state < RDMA_READ_ADV) {
 1272                 krping_cq_event_handler(cb->cq, cb);
 1273         }
 1274 
 1275         /* Send STAG/TO/Len to client */
 1276         krping_format_send(cb, cb->start_dma_addr);
 1277         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1278         if (ret) {
 1279                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1280                 return;
 1281         }
 1282 
 1283         /* Spin waiting for send completion */
 1284         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1285         if (ret < 0) {
 1286                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1287                 return;
 1288         }
 1289         if (wc.status) {
 1290                 printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
 1291                 return;
 1292         }
 1293 
 1294         wait_event_interruptible(cb->sem, cb->state == ERROR);
 1295 }
 1296 
 1297 static void krping_wlat_test_server(struct krping_cb *cb)
 1298 {
 1299         const struct ib_send_wr *bad_wr;
 1300         struct ib_wc wc;
 1301         int ret;
 1302 
 1303         /* Spin waiting for client's Start STAG/TO/Len */
 1304         while (cb->state < RDMA_READ_ADV) {
 1305                 krping_cq_event_handler(cb->cq, cb);
 1306         }
 1307 
 1308         /* Send STAG/TO/Len to client */
 1309         krping_format_send(cb, cb->start_dma_addr);
 1310         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1311         if (ret) {
 1312                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1313                 return;
 1314         }
 1315 
 1316         /* Spin waiting for send completion */
 1317         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1318         if (ret < 0) {
 1319                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1320                 return;
 1321         }
 1322         if (wc.status) {
 1323                 printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
 1324                 return;
 1325         }
 1326 
 1327         wlat_test(cb);
 1328         wait_event_interruptible(cb->sem, cb->state == ERROR);
 1329 }
 1330 
 1331 static void krping_bw_test_server(struct krping_cb *cb)
 1332 {
 1333         const struct ib_send_wr *bad_wr;
 1334         struct ib_wc wc;
 1335         int ret;
 1336 
 1337         /* Spin waiting for client's Start STAG/TO/Len */
 1338         while (cb->state < RDMA_READ_ADV) {
 1339                 krping_cq_event_handler(cb->cq, cb);
 1340         }
 1341 
 1342         /* Send STAG/TO/Len to client */
 1343         krping_format_send(cb, cb->start_dma_addr);
 1344         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1345         if (ret) {
 1346                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1347                 return;
 1348         }
 1349 
 1350         /* Spin waiting for send completion */
 1351         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1352         if (ret < 0) {
 1353                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1354                 return;
 1355         }
 1356         if (wc.status) {
 1357                 printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
 1358                 return;
 1359         }
 1360 
 1361         if (cb->duplex)
 1362                 bw_test(cb);
 1363         wait_event_interruptible(cb->sem, cb->state == ERROR);
 1364 }
 1365 
 1366 static int reg_supported(struct ib_device *dev)
 1367 {
 1368         u64 needed_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
 1369 
 1370         if ((dev->attrs.device_cap_flags & needed_flags) != needed_flags) {
 1371                 printk(KERN_ERR PFX 
 1372                         "Fastreg not supported - device_cap_flags 0x%llx\n",
 1373                         (unsigned long long)dev->attrs.device_cap_flags);
 1374                 return 0;
 1375         }
 1376         DEBUG_LOG("Fastreg supported - device_cap_flags 0x%llx\n",
 1377                 (unsigned long long)dev->attrs.device_cap_flags);
 1378         return 1;
 1379 }
 1380 
 1381 static void fill_sockaddr(struct sockaddr_storage *sin, struct krping_cb *cb)
 1382 {
 1383         memset(sin, 0, sizeof(*sin));
 1384 
 1385         if (cb->addr_type == AF_INET) {
 1386                 struct sockaddr_in *sin4 = (struct sockaddr_in *)sin;
 1387                 sin4->sin_len = sizeof(*sin4);
 1388                 sin4->sin_family = AF_INET;
 1389                 memcpy((void *)&sin4->sin_addr.s_addr, cb->addr, 4);
 1390                 sin4->sin_port = cb->port;
 1391         } else if (cb->addr_type == AF_INET6) {
 1392                 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin;
 1393                 sin6->sin6_len = sizeof(*sin6);
 1394                 sin6->sin6_family = AF_INET6;
 1395                 memcpy((void *)&sin6->sin6_addr, cb->addr, 16);
 1396                 sin6->sin6_port = cb->port;
 1397         }
 1398 }
 1399 
 1400 static int krping_bind_server(struct krping_cb *cb)
 1401 {
 1402         struct sockaddr_storage sin;
 1403         int ret;
 1404 
 1405 
 1406         fill_sockaddr(&sin, cb);
 1407 
 1408         ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *)&sin);
 1409         if (ret) {
 1410                 printk(KERN_ERR PFX "rdma_bind_addr error %d\n", ret);
 1411                 return ret;
 1412         }
 1413         DEBUG_LOG("rdma_bind_addr successful\n");
 1414 
 1415         DEBUG_LOG("rdma_listen\n");
 1416         ret = rdma_listen(cb->cm_id, 3);
 1417         if (ret) {
 1418                 printk(KERN_ERR PFX "rdma_listen failed: %d\n", ret);
 1419                 return ret;
 1420         }
 1421 
 1422         wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST);
 1423         if (cb->state != CONNECT_REQUEST) {
 1424                 printk(KERN_ERR PFX "wait for CONNECT_REQUEST state %d\n",
 1425                         cb->state);
 1426                 return -1;
 1427         }
 1428 
 1429         if (!reg_supported(cb->child_cm_id->device))
 1430                 return -EINVAL;
 1431 
 1432         return 0;
 1433 }
 1434 
 1435 static void krping_run_server(struct krping_cb *cb)
 1436 {
 1437         const struct ib_recv_wr *bad_wr;
 1438         int ret;
 1439 
 1440         ret = krping_bind_server(cb);
 1441         if (ret)
 1442                 return;
 1443 
 1444         ret = krping_setup_qp(cb, cb->child_cm_id);
 1445         if (ret) {
 1446                 printk(KERN_ERR PFX "setup_qp failed: %d\n", ret);
 1447                 goto err0;
 1448         }
 1449 
 1450         ret = krping_setup_buffers(cb);
 1451         if (ret) {
 1452                 printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret);
 1453                 goto err1;
 1454         }
 1455 
 1456         ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
 1457         if (ret) {
 1458                 printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
 1459                 goto err2;
 1460         }
 1461 
 1462         ret = krping_accept(cb);
 1463         if (ret) {
 1464                 printk(KERN_ERR PFX "connect error %d\n", ret);
 1465                 goto err2;
 1466         }
 1467 
 1468         if (cb->wlat)
 1469                 krping_wlat_test_server(cb);
 1470         else if (cb->rlat)
 1471                 krping_rlat_test_server(cb);
 1472         else if (cb->bw)
 1473                 krping_bw_test_server(cb);
 1474         else
 1475                 krping_test_server(cb);
 1476         rdma_disconnect(cb->child_cm_id);
 1477 err2:
 1478         krping_free_buffers(cb);
 1479 err1:
 1480         krping_free_qp(cb);
 1481 err0:
 1482         rdma_destroy_id(cb->child_cm_id);
 1483 }
 1484 
 1485 static void krping_test_client(struct krping_cb *cb)
 1486 {
 1487         int ping, start, cc, i, ret;
 1488         const struct ib_send_wr *bad_wr;
 1489         unsigned char c;
 1490 
 1491         start = 65;
 1492         for (ping = 0; !cb->count || ping < cb->count; ping++) {
 1493                 cb->state = RDMA_READ_ADV;
 1494 
 1495                 /* Put some ascii text in the buffer. */
 1496                 cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
 1497                 for (i = cc, c = start; i < cb->size; i++) {
 1498                         cb->start_buf[i] = c;
 1499                         c++;
 1500                         if (c > 122)
 1501                                 c = 65;
 1502                 }
 1503                 start++;
 1504                 if (start > 122)
 1505                         start = 65;
 1506                 cb->start_buf[cb->size - 1] = 0;
 1507 
 1508                 krping_format_send(cb, cb->start_dma_addr);
 1509                 if (cb->state == ERROR) {
 1510                         printk(KERN_ERR PFX "krping_format_send failed\n");
 1511                         break;
 1512                 }
 1513                 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1514                 if (ret) {
 1515                         printk(KERN_ERR PFX "post send error %d\n", ret);
 1516                         break;
 1517                 }
 1518 
 1519                 /* Wait for server to ACK */
 1520                 wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
 1521                 if (cb->state != RDMA_WRITE_ADV) {
 1522                         printk(KERN_ERR PFX 
 1523                                "wait for RDMA_WRITE_ADV state %d\n",
 1524                                cb->state);
 1525                         break;
 1526                 }
 1527 
 1528                 krping_format_send(cb, cb->rdma_dma_addr);
 1529                 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1530                 if (ret) {
 1531                         printk(KERN_ERR PFX "post send error %d\n", ret);
 1532                         break;
 1533                 }
 1534 
 1535                 /* Wait for the server to say the RDMA Write is complete. */
 1536                 wait_event_interruptible(cb->sem, 
 1537                                          cb->state >= RDMA_WRITE_COMPLETE);
 1538                 if (cb->state != RDMA_WRITE_COMPLETE) {
 1539                         printk(KERN_ERR PFX 
 1540                                "wait for RDMA_WRITE_COMPLETE state %d\n",
 1541                                cb->state);
 1542                         break;
 1543                 }
 1544 
 1545                 if (cb->validate)
 1546                         if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
 1547                                 printk(KERN_ERR PFX "data mismatch!\n");
 1548                                 break;
 1549                         }
 1550 
 1551                 if (cb->verbose)
 1552                         printk(KERN_INFO PFX "ping data: %s\n", cb->rdma_buf);
 1553 #ifdef SLOW_KRPING
 1554                 wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
 1555 #endif
 1556         }
 1557 }
 1558 
 1559 static void krping_rlat_test_client(struct krping_cb *cb)
 1560 {
 1561         const struct ib_send_wr *bad_wr;
 1562         struct ib_wc wc;
 1563         int ret;
 1564 
 1565         cb->state = RDMA_READ_ADV;
 1566 
 1567         /* Send STAG/TO/Len to client */
 1568         krping_format_send(cb, cb->start_dma_addr);
 1569         if (cb->state == ERROR) {
 1570                 printk(KERN_ERR PFX "krping_format_send failed\n");
 1571                 return;
 1572         }
 1573         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1574         if (ret) {
 1575                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1576                 return;
 1577         }
 1578 
 1579         /* Spin waiting for send completion */
 1580         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1581         if (ret < 0) {
 1582                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1583                 return;
 1584         }
 1585         if (wc.status) {
 1586                 printk(KERN_ERR PFX "send completion error %d\n", wc.status);
 1587                 return;
 1588         }
 1589 
 1590         /* Spin waiting for server's Start STAG/TO/Len */
 1591         while (cb->state < RDMA_WRITE_ADV) {
 1592                 krping_cq_event_handler(cb->cq, cb);
 1593         }
 1594 
 1595 #if 0
 1596 {
 1597         int i;
 1598         struct timeval start, stop;
 1599         time_t sec;
 1600         suseconds_t usec;
 1601         unsigned long long elapsed;
 1602         struct ib_wc wc;
 1603         const struct ib_send_wr *bad_wr;
 1604         int ne;
 1605         
 1606         cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
 1607         cb->rdma_sq_wr.rkey = cb->remote_rkey;
 1608         cb->rdma_sq_wr.remote_addr = cb->remote_addr;
 1609         cb->rdma_sq_wr.wr.sg_list->length = 0;
 1610         cb->rdma_sq_wr.wr.num_sge = 0;
 1611 
 1612         microtime(&start);
 1613         for (i=0; i < 100000; i++) {
 1614                 if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
 1615                         printk(KERN_ERR PFX  "Couldn't post send\n");
 1616                         return;
 1617                 }
 1618                 do {
 1619                         ne = ib_poll_cq(cb->cq, 1, &wc);
 1620                 } while (ne == 0);
 1621                 if (ne < 0) {
 1622                         printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
 1623                         return;
 1624                 }
 1625                 if (wc.status != IB_WC_SUCCESS) {
 1626                         printk(KERN_ERR PFX "Completion wth error at %s:\n",
 1627                                 cb->server ? "server" : "client");
 1628                         printk(KERN_ERR PFX "Failed status %d: wr_id %d\n",
 1629                                 wc.status, (int) wc.wr_id);
 1630                         return;
 1631                 }
 1632         }
 1633         microtime(&stop);
 1634         
 1635         if (stop.tv_usec < start.tv_usec) {
 1636                 stop.tv_usec += 1000000;
 1637                 stop.tv_sec  -= 1;
 1638         }
 1639         sec     = stop.tv_sec - start.tv_sec;
 1640         usec    = stop.tv_usec - start.tv_usec;
 1641         elapsed = sec * 1000000 + usec;
 1642         printk(KERN_ERR PFX "0B-write-lat iters 100000 usec %llu\n", elapsed);
 1643 }
 1644 #endif
 1645 
 1646         rlat_test(cb);
 1647 }
 1648 
 1649 static void krping_wlat_test_client(struct krping_cb *cb)
 1650 {
 1651         const struct ib_send_wr *bad_wr;
 1652         struct ib_wc wc;
 1653         int ret;
 1654 
 1655         cb->state = RDMA_READ_ADV;
 1656 
 1657         /* Send STAG/TO/Len to client */
 1658         krping_format_send(cb, cb->start_dma_addr);
 1659         if (cb->state == ERROR) {
 1660                 printk(KERN_ERR PFX "krping_format_send failed\n");
 1661                 return;
 1662         }
 1663         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1664         if (ret) {
 1665                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1666                 return;
 1667         }
 1668 
 1669         /* Spin waiting for send completion */
 1670         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1671         if (ret < 0) {
 1672                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1673                 return;
 1674         }
 1675         if (wc.status) {
 1676                 printk(KERN_ERR PFX "send completion error %d\n", wc.status);
 1677                 return;
 1678         }
 1679 
 1680         /* Spin waiting for server's Start STAG/TO/Len */
 1681         while (cb->state < RDMA_WRITE_ADV) {
 1682                 krping_cq_event_handler(cb->cq, cb);
 1683         }
 1684 
 1685         wlat_test(cb);
 1686 }
 1687 
 1688 static void krping_bw_test_client(struct krping_cb *cb)
 1689 {
 1690         const struct ib_send_wr *bad_wr;
 1691         struct ib_wc wc;
 1692         int ret;
 1693 
 1694         cb->state = RDMA_READ_ADV;
 1695 
 1696         /* Send STAG/TO/Len to client */
 1697         krping_format_send(cb, cb->start_dma_addr);
 1698         if (cb->state == ERROR) {
 1699                 printk(KERN_ERR PFX "krping_format_send failed\n");
 1700                 return;
 1701         }
 1702         ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
 1703         if (ret) {
 1704                 printk(KERN_ERR PFX "post send error %d\n", ret);
 1705                 return;
 1706         }
 1707 
 1708         /* Spin waiting for send completion */
 1709         while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
 1710         if (ret < 0) {
 1711                 printk(KERN_ERR PFX "poll error %d\n", ret);
 1712                 return;
 1713         }
 1714         if (wc.status) {
 1715                 printk(KERN_ERR PFX "send completion error %d\n", wc.status);
 1716                 return;
 1717         }
 1718 
 1719         /* Spin waiting for server's Start STAG/TO/Len */
 1720         while (cb->state < RDMA_WRITE_ADV) {
 1721                 krping_cq_event_handler(cb->cq, cb);
 1722         }
 1723 
 1724         bw_test(cb);
 1725 }
 1726 
 1727 /*
 1728  * Manual qp flush test
 1729  */
 1730 static void flush_qp(struct krping_cb *cb)
 1731 {
 1732         struct ib_send_wr wr = { 0 };
 1733         const struct ib_send_wr *bad;
 1734         struct ib_recv_wr recv_wr = { 0 };
 1735         const struct ib_recv_wr *recv_bad;
 1736         struct ib_wc wc;
 1737         int ret;
 1738         int flushed = 0;
 1739         int ccnt = 0;
 1740 
 1741         rdma_disconnect(cb->cm_id);
 1742         DEBUG_LOG("disconnected!\n");
 1743 
 1744         wr.opcode = IB_WR_SEND;
 1745         wr.wr_id = 0xdeadbeefcafebabe;
 1746         ret = ib_post_send(cb->qp, &wr, &bad);
 1747         if (ret) {
 1748                 printk(KERN_ERR PFX "%s post_send failed ret %d\n", __func__, ret);
 1749                 return;
 1750         }
 1751 
 1752         recv_wr.wr_id = 0xcafebabedeadbeef;
 1753         ret = ib_post_recv(cb->qp, &recv_wr, &recv_bad);
 1754         if (ret) {
 1755                 printk(KERN_ERR PFX "%s post_recv failed ret %d\n", __func__, ret);
 1756                 return;
 1757         }
 1758 
 1759         /* poll until the flush WRs complete */
 1760         do {
 1761                 ret = ib_poll_cq(cb->cq, 1, &wc);
 1762                 if (ret < 0) {
 1763                         printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret);
 1764                         return;
 1765                 }
 1766                 if (ret == 0)
 1767                         continue;
 1768                 ccnt++;
 1769                 if (wc.wr_id == 0xdeadbeefcafebabe ||
 1770                     wc.wr_id == 0xcafebabedeadbeef)
 1771                         flushed++;
 1772         } while (flushed != 2);
 1773         DEBUG_LOG("qp_flushed! ccnt %u\n", ccnt);
 1774 }
 1775 
 1776 static void krping_fr_test(struct krping_cb *cb)
 1777 {
 1778         struct ib_send_wr inv;
 1779         const struct ib_send_wr *bad;
 1780         struct ib_reg_wr fr;
 1781         struct ib_wc wc;
 1782         u8 key = 0;
 1783         struct ib_mr *mr;
 1784         int ret;
 1785         int size = cb->size;
 1786         int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
 1787         unsigned long start;
 1788         int count = 0;
 1789         int scnt = 0;
 1790         struct scatterlist sg = {0};
 1791 
 1792         mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, plen);
 1793         if (IS_ERR(mr)) {
 1794                 printk(KERN_ERR PFX "ib_alloc_mr failed %ld\n", PTR_ERR(mr));
 1795                 return;
 1796         }
 1797 
 1798         sg_dma_address(&sg) = (dma_addr_t)0xcafebabe0000ULL;
 1799         sg_dma_len(&sg) = size;
 1800         ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE);
 1801         if (ret <= 0) {
 1802                 printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret);
 1803                 goto err2;
 1804         }
 1805 
 1806         memset(&fr, 0, sizeof fr);
 1807         fr.wr.opcode = IB_WR_REG_MR;
 1808         fr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
 1809         fr.mr = mr;
 1810         fr.wr.next = &inv;
 1811 
 1812         memset(&inv, 0, sizeof inv);
 1813         inv.opcode = IB_WR_LOCAL_INV;
 1814         inv.send_flags = IB_SEND_SIGNALED;
 1815         
 1816         DEBUG_LOG("fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth);
 1817         start = time_uptime;
 1818         while (!cb->count || count <= cb->count) {
 1819                 if (SIGPENDING(curthread)) {
 1820                         printk(KERN_ERR PFX "signal!\n");
 1821                         break;
 1822                 }
 1823                 if ((time_uptime - start) >= 9) {
 1824                         DEBUG_LOG("fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen);
 1825                         wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
 1826                         if (cb->state == ERROR)
 1827                                 break;
 1828                         start = time_uptime;
 1829                 }       
 1830                 while (scnt < (cb->txdepth>>1)) {
 1831                         ib_update_fast_reg_key(mr, ++key);
 1832                         fr.key = mr->rkey;
 1833                         inv.ex.invalidate_rkey = mr->rkey;
 1834 
 1835                         size = arc4random() % cb->size;
 1836                         if (size == 0)
 1837                                 size = cb->size;
 1838                         sg_dma_len(&sg) = size;
 1839                         ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE);
 1840                         if (ret <= 0) {
 1841                                 printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret);
 1842                                 goto err2;
 1843                         }
 1844                         ret = ib_post_send(cb->qp, &fr.wr, &bad);
 1845                         if (ret) {
 1846                                 printk(KERN_ERR PFX "ib_post_send failed %d\n", ret);
 1847                                 goto err2;      
 1848                         }
 1849                         scnt++;
 1850                 }
 1851 
 1852                 ret = ib_poll_cq(cb->cq, 1, &wc);
 1853                 if (ret < 0) {
 1854                         printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret);
 1855                         goto err2;      
 1856                 }
 1857                 if (ret == 1) {
 1858                         if (wc.status) {
 1859                                 printk(KERN_ERR PFX "completion error %u\n", wc.status);
 1860                                 goto err2;
 1861                         }
 1862                         count++;
 1863                         scnt--;
 1864                 }
 1865         }
 1866 err2:
 1867         flush_qp(cb);
 1868         DEBUG_LOG("fr_test: done!\n");
 1869         ib_dereg_mr(mr);
 1870 }
 1871 
 1872 static int krping_connect_client(struct krping_cb *cb)
 1873 {
 1874         struct rdma_conn_param conn_param;
 1875         int ret;
 1876 
 1877         memset(&conn_param, 0, sizeof conn_param);
 1878         conn_param.responder_resources = 1;
 1879         conn_param.initiator_depth = 1;
 1880         conn_param.retry_count = 10;
 1881 
 1882         ret = rdma_connect(cb->cm_id, &conn_param);
 1883         if (ret) {
 1884                 printk(KERN_ERR PFX "rdma_connect error %d\n", ret);
 1885                 return ret;
 1886         }
 1887 
 1888         wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
 1889         if (cb->state == ERROR) {
 1890                 printk(KERN_ERR PFX "wait for CONNECTED state %d\n", cb->state);
 1891                 return -1;
 1892         }
 1893 
 1894         DEBUG_LOG("rdma_connect successful\n");
 1895         return 0;
 1896 }
 1897 
 1898 static int krping_bind_client(struct krping_cb *cb)
 1899 {
 1900         struct sockaddr_storage sin;
 1901         int ret;
 1902 
 1903         fill_sockaddr(&sin, cb);
 1904 
 1905         ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *)&sin, 2000);
 1906         if (ret) {
 1907                 printk(KERN_ERR PFX "rdma_resolve_addr error %d\n", ret);
 1908                 return ret;
 1909         }
 1910 
 1911         wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED);
 1912         if (cb->state != ROUTE_RESOLVED) {
 1913                 printk(KERN_ERR PFX 
 1914                        "addr/route resolution did not resolve: state %d\n",
 1915                        cb->state);
 1916                 return -EINTR;
 1917         }
 1918 
 1919         if (!reg_supported(cb->cm_id->device))
 1920                 return -EINVAL;
 1921 
 1922         DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n");
 1923         return 0;
 1924 }
 1925 
 1926 static void krping_run_client(struct krping_cb *cb)
 1927 {
 1928         const struct ib_recv_wr *bad_wr;
 1929         int ret;
 1930 
 1931         /* set type of service, if any */
 1932         if (cb->tos != 0)
 1933                 rdma_set_service_type(cb->cm_id, cb->tos);
 1934 
 1935         ret = krping_bind_client(cb);
 1936         if (ret)
 1937                 return;
 1938 
 1939         ret = krping_setup_qp(cb, cb->cm_id);
 1940         if (ret) {
 1941                 printk(KERN_ERR PFX "setup_qp failed: %d\n", ret);
 1942                 return;
 1943         }
 1944 
 1945         ret = krping_setup_buffers(cb);
 1946         if (ret) {
 1947                 printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret);
 1948                 goto err1;
 1949         }
 1950 
 1951         ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
 1952         if (ret) {
 1953                 printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
 1954                 goto err2;
 1955         }
 1956 
 1957         ret = krping_connect_client(cb);
 1958         if (ret) {
 1959                 printk(KERN_ERR PFX "connect error %d\n", ret);
 1960                 goto err2;
 1961         }
 1962 
 1963         if (cb->wlat)
 1964                 krping_wlat_test_client(cb);
 1965         else if (cb->rlat)
 1966                 krping_rlat_test_client(cb);
 1967         else if (cb->bw)
 1968                 krping_bw_test_client(cb);
 1969         else if (cb->frtest)
 1970                 krping_fr_test(cb);
 1971         else
 1972                 krping_test_client(cb);
 1973         rdma_disconnect(cb->cm_id);
 1974 err2:
 1975         krping_free_buffers(cb);
 1976 err1:
 1977         krping_free_qp(cb);
 1978 }
 1979 
 1980 static uint16_t
 1981 krping_get_ipv6_scope_id(char *name)
 1982 {
 1983         struct ifnet *ifp;
 1984         uint16_t retval;
 1985 
 1986         if (name == NULL)
 1987                 return (0);
 1988         CURVNET_SET_QUIET(TD_TO_VNET(curthread));
 1989         ifp = ifunit_ref(name);
 1990         CURVNET_RESTORE();
 1991         if (ifp == NULL)
 1992                 return (0);
 1993         retval = ifp->if_index;
 1994         if_rele(ifp);
 1995         return (retval);
 1996 }
 1997 
 1998 int krping_doit(char *cmd)
 1999 {
 2000         struct krping_cb *cb;
 2001         int op;
 2002         int ret = 0;
 2003         char *optarg;
 2004         char *scope;
 2005         unsigned long optint;
 2006 
 2007         cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 2008         if (!cb)
 2009                 return -ENOMEM;
 2010 
 2011         mutex_lock(&krping_mutex);
 2012         list_add_tail(&cb->list, &krping_cbs);
 2013         mutex_unlock(&krping_mutex);
 2014 
 2015         cb->server = -1;
 2016         cb->state = IDLE;
 2017         cb->size = 64;
 2018         cb->txdepth = RPING_SQ_DEPTH;
 2019         init_waitqueue_head(&cb->sem);
 2020 
 2021         while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
 2022                               &optint)) != 0) {
 2023                 switch (op) {
 2024                 case 'a':
 2025                         cb->addr_str = optarg;
 2026                         cb->addr_type = AF_INET;
 2027                         DEBUG_LOG("ipaddr (%s)\n", optarg);
 2028                         if (inet_pton(AF_INET, optarg, cb->addr) != 1) {
 2029                                 printk(KERN_ERR PFX "bad addr string %s\n",
 2030                                     optarg);
 2031                                 ret = EINVAL;
 2032                         }
 2033                         break;
 2034                 case 'A':
 2035                         cb->addr_str = optarg;
 2036                         cb->addr_type = AF_INET6;
 2037                         DEBUG_LOG("ipv6addr (%s)\n", optarg);
 2038                         scope = strstr(optarg, "%");
 2039                         /* extract scope ID, if any */
 2040                         if (scope != NULL)
 2041                                 *scope++ = 0;
 2042                         /* extract IPv6 network address */
 2043                         if (inet_pton(AF_INET6, optarg, cb->addr) != 1) {
 2044                                 printk(KERN_ERR PFX "bad addr string %s\n",
 2045                                     optarg);
 2046                                 ret = EINVAL;
 2047                         } else if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)cb->addr) ||
 2048                             IN6_IS_ADDR_MC_INTFACELOCAL((struct in6_addr *)cb->addr)) {
 2049                                 uint16_t scope_id = krping_get_ipv6_scope_id(scope);
 2050                                 DEBUG_LOG("ipv6 scope ID = %d\n", scope_id);
 2051                                 cb->addr[2] = scope_id >> 8;
 2052                                 cb->addr[3] = scope_id & 0xFF;
 2053                         }
 2054                         break;
 2055                 case 'p':
 2056                         cb->port = htons(optint);
 2057                         DEBUG_LOG("port %d\n", (int)optint);
 2058                         break;
 2059                 case 'P':
 2060                         cb->poll = 1;
 2061                         DEBUG_LOG("server\n");
 2062                         break;
 2063                 case 's':
 2064                         cb->server = 1;
 2065                         DEBUG_LOG("server\n");
 2066                         break;
 2067                 case 'c':
 2068                         cb->server = 0;
 2069                         DEBUG_LOG("client\n");
 2070                         break;
 2071                 case 'S':
 2072                         cb->size = optint;
 2073                         if ((cb->size < 1) ||
 2074                             (cb->size > RPING_BUFSIZE)) {
 2075                                 printk(KERN_ERR PFX "Invalid size %d "
 2076                                        "(valid range is 1 to %d)\n",
 2077                                        cb->size, RPING_BUFSIZE);
 2078                                 ret = EINVAL;
 2079                         } else
 2080                                 DEBUG_LOG("size %d\n", (int)optint);
 2081                         break;
 2082                 case 'C':
 2083                         cb->count = optint;
 2084                         if (cb->count < 0) {
 2085                                 printk(KERN_ERR PFX "Invalid count %d\n",
 2086                                         cb->count);
 2087                                 ret = EINVAL;
 2088                         } else
 2089                                 DEBUG_LOG("count %d\n", (int) cb->count);
 2090                         break;
 2091                 case 'v':
 2092                         cb->verbose++;
 2093                         DEBUG_LOG("verbose\n");
 2094                         break;
 2095                 case 'V':
 2096                         cb->validate++;
 2097                         DEBUG_LOG("validate data\n");
 2098                         break;
 2099                 case 'l':
 2100                         cb->wlat++;
 2101                         break;
 2102                 case 'L':
 2103                         cb->rlat++;
 2104                         break;
 2105                 case 'B':
 2106                         cb->bw++;
 2107                         break;
 2108                 case 'd':
 2109                         cb->duplex++;
 2110                         break;
 2111                 case 'I':
 2112                         cb->server_invalidate = 1;
 2113                         break;
 2114                 case 't':
 2115                         cb->tos = optint;
 2116                         DEBUG_LOG("type of service, tos=%d\n", (int) cb->tos);
 2117                         break;
 2118                 case 'T':
 2119                         cb->txdepth = optint;
 2120                         DEBUG_LOG("txdepth %d\n", (int) cb->txdepth);
 2121                         break;
 2122                 case 'Z':
 2123                         cb->local_dma_lkey = 1;
 2124                         DEBUG_LOG("using local dma lkey\n");
 2125                         break;
 2126                 case 'R':
 2127                         cb->read_inv = 1;
 2128                         DEBUG_LOG("using read-with-inv\n");
 2129                         break;
 2130                 case 'f':
 2131                         cb->frtest = 1;
 2132                         DEBUG_LOG("fast-reg test!\n");
 2133                         break;
 2134                 default:
 2135                         printk(KERN_ERR PFX "unknown opt %s\n", optarg);
 2136                         ret = -EINVAL;
 2137                         break;
 2138                 }
 2139         }
 2140         if (ret)
 2141                 goto out;
 2142 
 2143         if (cb->server == -1) {
 2144                 printk(KERN_ERR PFX "must be either client or server\n");
 2145                 ret = -EINVAL;
 2146                 goto out;
 2147         }
 2148 
 2149         if (cb->server && cb->frtest) {
 2150                 printk(KERN_ERR PFX "must be client to run frtest\n");
 2151                 ret = -EINVAL;
 2152                 goto out;
 2153         }
 2154 
 2155         if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) {
 2156                 printk(KERN_ERR PFX "Pick only one test: fr, bw, rlat, wlat\n");
 2157                 ret = -EINVAL;
 2158                 goto out;
 2159         }
 2160 
 2161         if (cb->wlat || cb->rlat || cb->bw) {
 2162                 printk(KERN_ERR PFX "wlat, rlat, and bw tests only support mem_mode MR - which is no longer supported\n");
 2163                 ret = -EINVAL;
 2164                 goto out;
 2165         }
 2166 
 2167         cb->cm_id = rdma_create_id(TD_TO_VNET(curthread), krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC);
 2168         if (IS_ERR(cb->cm_id)) {
 2169                 ret = PTR_ERR(cb->cm_id);
 2170                 printk(KERN_ERR PFX "rdma_create_id error %d\n", ret);
 2171                 goto out;
 2172         }
 2173         DEBUG_LOG("created cm_id %p\n", cb->cm_id);
 2174 
 2175         if (cb->server)
 2176                 krping_run_server(cb);
 2177         else
 2178                 krping_run_client(cb);
 2179 
 2180         DEBUG_LOG("destroy cm_id %p\n", cb->cm_id);
 2181         rdma_destroy_id(cb->cm_id);
 2182 out:
 2183         mutex_lock(&krping_mutex);
 2184         list_del(&cb->list);
 2185         mutex_unlock(&krping_mutex);
 2186         kfree(cb);
 2187         return ret;
 2188 }
 2189 
 2190 void
 2191 krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg)
 2192 {
 2193         struct krping_cb *cb;
 2194 
 2195         mutex_lock(&krping_mutex);
 2196         list_for_each_entry(cb, &krping_cbs, list)
 2197             (*f)(cb->pd ? &cb->stats : NULL, arg);
 2198         mutex_unlock(&krping_mutex);
 2199 }
 2200 
 2201 void
 2202 krping_cancel_all(void)
 2203 {
 2204         struct krping_cb *cb;
 2205 
 2206         mutex_lock(&krping_mutex);
 2207         list_for_each_entry(cb, &krping_cbs, list) {
 2208                 cb->state = ERROR;
 2209                 wake_up_interruptible(&cb->sem);
 2210         }
 2211         mutex_unlock(&krping_mutex);
 2212 }
 2213 

Cache object: 9e5f734066d5edb31370024a9c6c7057


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.