The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/iser/icl_iser.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /*-
    3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #ifndef ICL_ISER_H
   28 #define ICL_ISER_H
   29 
   30 /*
   31  * iSCSI Common Layer for RDMA.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 #include <sys/param.h>
   36 #include <sys/capsicum.h>
   37 #include <sys/condvar.h>
   38 #include <sys/conf.h>
   39 #include <sys/file.h>
   40 #include <sys/kernel.h>
   41 #include <sys/kthread.h>
   42 #include <sys/lock.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/mutex.h>
   45 #include <sys/module.h>
   46 #include <sys/protosw.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/systm.h>
   51 #include <sys/sx.h>
   52 #include <sys/uio.h>
   53 #include <sys/taskqueue.h>
   54 #include <sys/bio.h>
   55 #include <vm/uma.h>
   56 #include <netinet/in.h>
   57 #include <netinet/tcp.h>
   58 #include <dev/iscsi/icl.h>
   59 #include <dev/iscsi/iscsi_proto.h>
   60 #include <icl_conn_if.h>
   61 #include <cam/cam.h>
   62 #include <cam/cam_ccb.h>
   63 #include <rdma/ib_verbs.h>
   64 #include <rdma/ib_fmr_pool.h>
   65 #include <rdma/rdma_cm.h>
   66 
   67 
   68 #define ISER_DBG(X, ...)                                                \
   69         do {                                                            \
   70                 if (unlikely(iser_debug > 2))                           \
   71                         printf("DEBUG: %s: " X "\n",                    \
   72                                 __func__, ## __VA_ARGS__);              \
   73         } while (0)
   74 
   75 #define ISER_INFO(X, ...)                                               \
   76         do {                                                            \
   77                 if (unlikely(iser_debug > 1))                           \
   78                         printf("INFO: %s: " X "\n",                     \
   79                                 __func__, ## __VA_ARGS__);              \
   80         } while (0)
   81 
   82 #define ISER_WARN(X, ...)                                               \
   83         do {                                                            \
   84                 if (unlikely(iser_debug > 0)) {                         \
   85                         printf("WARNING: %s: " X "\n",                  \
   86                                 __func__, ## __VA_ARGS__);              \
   87                 }                                                       \
   88         } while (0)
   89 
   90 #define ISER_ERR(X, ...)                                                \
   91         printf("ERROR: %s: " X "\n", __func__, ## __VA_ARGS__)
   92 
   93 #define ISER_VER                        0x10
   94 #define ISER_WSV                        0x08
   95 #define ISER_RSV                        0x04
   96 
   97 #define ISER_FASTREG_LI_WRID            0xffffffffffffffffULL
   98 #define ISER_BEACON_WRID                0xfffffffffffffffeULL
   99 
  100 #define SHIFT_4K        12
  101 #define SIZE_4K (1ULL << SHIFT_4K)
  102 #define MASK_4K (~(SIZE_4K-1))
  103 
  104 /* support up to 512KB in one RDMA */
  105 #define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
  106 #define ISER_DEF_XMIT_CMDS_MAX 256
  107 
  108 /* the max RX (recv) WR supported by the iSER QP is defined by                 *
  109  * max_recv_wr = commands_max + recv_beacon                                    */
  110 #define ISER_QP_MAX_RECV_DTOS  (ISER_DEF_XMIT_CMDS_MAX + 1)
  111 #define ISER_MIN_POSTED_RX              (ISER_DEF_XMIT_CMDS_MAX >> 2)
  112 
  113 /* QP settings */
  114 /* Maximal bounds on received asynchronous PDUs */
  115 #define ISER_MAX_RX_MISC_PDUS           4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
  116 #define ISER_MAX_TX_MISC_PDUS           6 /* NOOP_OUT(2), TEXT(1), SCSI_TMFUNC(2), LOGOUT(1) */
  117 
  118 /* the max TX (send) WR supported by the iSER QP is defined by                 *
  119  * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect   *
  120  * to have at max for SCSI command. The tx posting & completion handling code  *
  121  * supports -EAGAIN scheme where tx is suspended till the QP has room for more *
  122  * send WR. D=8 comes from 64K/8K                                              */
  123 
  124 #define ISER_INFLIGHT_DATAOUTS          8
  125 
  126 /* the send_beacon increase the max_send_wr by 1  */
  127 #define ISER_QP_MAX_REQ_DTOS            (ISER_DEF_XMIT_CMDS_MAX *    \
  128                                         (1 + ISER_INFLIGHT_DATAOUTS) + \
  129                                         ISER_MAX_TX_MISC_PDUS        + \
  130                                         ISER_MAX_RX_MISC_PDUS + 1)
  131 
  132 #define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr                       \
  133                                          - ISER_MAX_TX_MISC_PDUS        \
  134                                          - ISER_MAX_RX_MISC_PDUS - 1) / \
  135                                          (1 + ISER_INFLIGHT_DATAOUTS))
  136 
  137 #define ISER_WC_BATCH_COUNT   16
  138 #define ISER_SIGNAL_CMD_COUNT 32
  139 
  140 /* Maximal QP's recommended per CQ. In case we use more QP's per CQ we might   *
  141  * encounter a CQ overrun state.                                               */
  142 #define ISCSI_ISER_MAX_CONN     8
  143 #define ISER_MAX_RX_LEN         (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
  144 #define ISER_MAX_TX_LEN         (ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
  145 #define ISER_MAX_CQ_LEN         (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
  146                                  ISCSI_ISER_MAX_CONN)
  147 
  148 #define ISER_ZBVA_NOT_SUPPORTED                0x80
  149 #define ISER_SEND_W_INV_NOT_SUPPORTED   0x40
  150 
  151 #define ISCSI_DEF_MAX_RECV_SEG_LEN      8192
  152 #define ISCSI_OPCODE_MASK               0x3f
  153 
  154 #define icl_to_iser_conn(ic) \
  155         container_of(ic, struct iser_conn, icl_conn)
  156 #define icl_to_iser_pdu(ip) \
  157         container_of(ip, struct icl_iser_pdu, icl_pdu)
  158 
  159 /**
  160  * struct iser_hdr - iSER header
  161  *
  162  * @flags:        flags support (zbva, remote_inv)
  163  * @rsvd:         reserved
  164  * @write_stag:   write rkey
  165  * @write_va:     write virtual address
  166  * @reaf_stag:    read rkey
  167  * @read_va:      read virtual address
  168  */
  169 struct iser_hdr {
  170         u8      flags;
  171         u8      rsvd[3];
  172         __be32  write_stag;
  173         __be64  write_va;
  174         __be32  read_stag;
  175         __be64  read_va;
  176 } __attribute__((packed));
  177 
  178 struct iser_cm_hdr {
  179         u8      flags;
  180         u8      rsvd[3];
  181 } __packed;
  182 
  183 /* Constant PDU lengths calculations */
  184 #define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + ISCSI_BHS_SIZE)
  185 
  186 #define ISER_RECV_DATA_SEG_LEN  128
  187 #define ISER_RX_PAYLOAD_SIZE    (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
  188 
  189 #define ISER_RX_LOGIN_SIZE      (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
  190 
  191 enum iser_conn_state {
  192         ISER_CONN_INIT,            /* descriptor allocd, no conn          */
  193         ISER_CONN_PENDING,         /* in the process of being established */
  194         ISER_CONN_UP,              /* up and running                      */
  195         ISER_CONN_TERMINATING,     /* in the process of being terminated  */
  196         ISER_CONN_DOWN,            /* shut down                           */
  197         ISER_CONN_STATES_NUM
  198 };
  199 
  200 enum iser_task_status {
  201         ISER_TASK_STATUS_INIT = 0,
  202         ISER_TASK_STATUS_STARTED,
  203         ISER_TASK_STATUS_COMPLETED
  204 };
  205 
  206 enum iser_data_dir {
  207         ISER_DIR_IN = 0,           /* to initiator */
  208         ISER_DIR_OUT,              /* from initiator */
  209         ISER_DIRS_NUM
  210 };
  211 
  212 /**
  213  * struct iser_mem_reg - iSER memory registration info
  214  *
  215  * @sge:          memory region sg element
  216  * @rkey:         memory region remote key
  217  * @mem_h:        pointer to registration context (FMR/Fastreg)
  218  */
  219 struct iser_mem_reg {
  220         struct ib_sge    sge;
  221         u32              rkey;
  222         void            *mem_h;
  223 };
  224 
  225 enum iser_desc_type {
  226         ISCSI_TX_CONTROL ,
  227         ISCSI_TX_SCSI_COMMAND,
  228         ISCSI_TX_DATAOUT
  229 };
  230 
  231 /**
  232  * struct iser_data_buf - iSER data buffer
  233  *
  234  * @sg:           pointer to the sg list
  235  * @size:         num entries of this sg
  236  * @data_len:     total beffer byte len
  237  * @dma_nents:    returned by dma_map_sg
  238  * @copy_buf:     allocated copy buf for SGs unaligned
  239  *                for rdma which are copied
  240  * @orig_sg:      pointer to the original sg list (in case
  241  *                we used a copy)
  242  * @sg_single:    SG-ified clone of a non SG SC or
  243  *                unaligned SG
  244  */
  245 struct iser_data_buf {
  246         struct scatterlist sgl[ISCSI_ISER_SG_TABLESIZE];
  247         void               *sg;
  248         int                size;
  249         unsigned long      data_len;
  250         unsigned int       dma_nents;
  251         char               *copy_buf;
  252         struct scatterlist *orig_sg;
  253         struct scatterlist sg_single;
  254   };
  255 
  256 /* fwd declarations */
  257 struct iser_conn;
  258 struct ib_conn;
  259 struct iser_device;
  260 
  261 /**
  262  * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
  263  *
  264  * @iser_header:   iser header
  265  * @iscsi_header:  iscsi header (bhs)
  266  * @type:          command/control/dataout
  267  * @dma_addr:      header buffer dma_address
  268  * @tx_sg:         sg[0] points to iser/iscsi headers
  269  *                 sg[1] optionally points to either of immediate data
  270  *                 unsolicited data-out or control
  271  * @num_sge:       number sges used on this TX task
  272  * @mapped:        indicates if the descriptor is dma mapped
  273  */
  274 struct iser_tx_desc {
  275         struct iser_hdr              iser_header;
  276         struct iscsi_bhs             iscsi_header __attribute__((packed));
  277         enum   iser_desc_type        type;
  278         u64                          dma_addr;
  279         struct ib_sge                tx_sg[2];
  280         int                          num_sge;
  281         bool                         mapped;
  282 };
  283 
  284 #define ISER_RX_PAD_SIZE        (256 - (ISER_RX_PAYLOAD_SIZE + \
  285                                         sizeof(u64) + sizeof(struct ib_sge)))
  286 /**
  287  * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
  288  *
  289  * @iser_header:   iser header
  290  * @iscsi_header:  iscsi header
  291  * @data:          received data segment
  292  * @dma_addr:      receive buffer dma address
  293  * @rx_sg:         ib_sge of receive buffer
  294  * @pad:           for sense data TODO: Modify to maximum sense length supported
  295  */
  296 struct iser_rx_desc {
  297         struct iser_hdr              iser_header;
  298         struct iscsi_bhs             iscsi_header;
  299         char                         data[ISER_RECV_DATA_SEG_LEN];
  300         u64                          dma_addr;
  301         struct ib_sge                rx_sg;
  302         char                         pad[ISER_RX_PAD_SIZE];
  303 } __attribute__((packed));
  304 
  305 struct icl_iser_pdu {
  306         struct icl_pdu               icl_pdu;
  307         struct iser_tx_desc          desc;
  308         struct iser_conn             *iser_conn;
  309         enum iser_task_status        status;
  310         struct ccb_scsiio                        *csio;
  311         int                          command_sent;
  312         int                          dir[ISER_DIRS_NUM];
  313         struct iser_mem_reg          rdma_reg[ISER_DIRS_NUM];
  314         struct iser_data_buf         data[ISER_DIRS_NUM];
  315 };
  316 
  317 /**
  318  * struct iser_comp - iSER completion context
  319  *
  320  * @device:     pointer to device handle
  321  * @cq:         completion queue
  322  * @wcs:        work completion array
  323  * @tq:         taskqueue handle
  324  * @task:       task to run task_fn
  325  * @active_qps: Number of active QPs attached
  326  *              to completion context
  327  */
  328 struct iser_comp {
  329         struct iser_device      *device;
  330         struct ib_cq            *cq;
  331         struct ib_wc             wcs[ISER_WC_BATCH_COUNT];
  332         struct taskqueue        *tq;
  333         struct task             task;
  334         int                      active_qps;
  335 };
  336 
  337 /**
  338  * struct iser_device - iSER device handle
  339  *
  340  * @ib_device:     RDMA device
  341  * @pd:            Protection Domain for this device
  342  * @dev_attr:      Device attributes container
  343  * @mr:            Global DMA memory region
  344  * @event_handler: IB events handle routine
  345  * @ig_list:       entry in devices list
  346  * @refcount:      Reference counter, dominated by open iser connections
  347  * @comps_used:    Number of completion contexts used, Min between online
  348  *                 cpus and device max completion vectors
  349  * @comps:         Dinamically allocated array of completion handlers
  350  */
  351 struct iser_device {
  352         struct ib_device             *ib_device;
  353         struct ib_pd                 *pd;
  354         struct ib_device_attr        dev_attr;
  355         struct ib_mr                 *mr;
  356         struct ib_event_handler      event_handler;
  357         struct list_head             ig_list;
  358         int                          refcount;
  359         int                          comps_used;
  360         struct iser_comp             *comps;
  361 };
  362 
  363 /**
  364  * struct iser_reg_resources - Fast registration recources
  365  *
  366  * @mr:         memory region
  367  * @mr_valid:   is mr valid indicator
  368  */
  369 struct iser_reg_resources {
  370         struct ib_mr                     *mr;
  371         u8                                mr_valid:1;
  372 };
  373 
  374 /**
  375  * struct fast_reg_descriptor - Fast registration descriptor
  376  *
  377  * @list:           entry in connection fastreg pool
  378  * @rsc:            data buffer registration resources
  379  */
  380 struct fast_reg_descriptor {
  381         struct list_head                  list;
  382         struct iser_reg_resources         rsc;
  383 };
  384 
  385 
  386 /**
  387  * struct iser_beacon - beacon to signal all flush errors were drained
  388  *
  389  * @send:           send wr
  390  * @recv:           recv wr
  391  * @flush_lock:     protects flush_cv
  392  * @flush_cv:       condition variable for beacon flush
  393  */
  394 struct iser_beacon {
  395         union {
  396                 struct ib_send_wr       send;
  397                 struct ib_recv_wr       recv;
  398         };
  399         struct mtx                   flush_lock;
  400         struct cv                    flush_cv;
  401 };
  402 
  403 /**
  404  * struct ib_conn - Infiniband related objects
  405  *
  406  * @cma_id:              rdma_cm connection maneger handle
  407  * @qp:                  Connection Queue-pair
  408  * @device:              reference to iser device
  409  * @comp:                iser completion context
  410   */
  411 struct ib_conn {
  412         struct rdma_cm_id           *cma_id;
  413         struct ib_qp                *qp;
  414         int                          post_recv_buf_count;
  415         u8                           sig_count;
  416         struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
  417         struct iser_device          *device;
  418         struct iser_comp            *comp;
  419         struct iser_beacon           beacon;
  420         struct mtx               lock;
  421         union {
  422                 struct {
  423                         struct ib_fmr_pool      *pool;
  424                         struct iser_page_vec    *page_vec;
  425                 } fmr;
  426                 struct {
  427                         struct list_head         pool;
  428                         int                      pool_size;
  429                 } fastreg;
  430         };
  431 };
  432 
  433 struct iser_conn {
  434         struct icl_conn             icl_conn;
  435         struct ib_conn               ib_conn;
  436         struct cv                    up_cv;
  437         struct list_head             conn_list;
  438         struct sx                                state_mutex;
  439         enum iser_conn_state         state;
  440         int                                              qp_max_recv_dtos;
  441         int                                              min_posted_rx;
  442         u16                          max_cmds;
  443         char                         *login_buf;
  444         char                         *login_req_buf, *login_resp_buf;
  445         u64                          login_req_dma, login_resp_dma;
  446         unsigned int                 rx_desc_head;
  447         struct iser_rx_desc          *rx_descs;
  448         u32                          num_rx_descs;
  449         bool                         handoff_done;
  450 };
  451 
  452 /**
  453  * struct iser_global: iSER global context
  454  *
  455  * @device_list_mutex:    protects device_list
  456  * @device_list:          iser devices global list
  457  * @connlist_mutex:       protects connlist
  458  * @connlist:             iser connections global list
  459  * @desc_cache:           kmem cache for tx dataout
  460  * @close_conns_mutex:    serializes conns closure
  461  */
  462 struct iser_global {
  463         struct sx        device_list_mutex;
  464         struct list_head  device_list;
  465         struct mtx        connlist_mutex;
  466         struct list_head  connlist;
  467         struct sx         close_conns_mutex;
  468 };
  469 
  470 extern struct iser_global ig;
  471 extern int iser_debug;
  472 
  473 void
  474 iser_create_send_desc(struct iser_conn *, struct iser_tx_desc *);
  475 
  476 int
  477 iser_post_recvl(struct iser_conn *);
  478 
  479 int
  480 iser_post_recvm(struct iser_conn *, int);
  481 
  482 int
  483 iser_alloc_login_buf(struct iser_conn *iser_conn);
  484 
  485 void
  486 iser_free_login_buf(struct iser_conn *iser_conn);
  487 
  488 int
  489 iser_post_send(struct ib_conn *, struct iser_tx_desc *, bool);
  490 
  491 void
  492 iser_snd_completion(struct iser_tx_desc *, struct ib_conn *);
  493 
  494 void
  495 iser_rcv_completion(struct iser_rx_desc *, unsigned long,
  496                     struct ib_conn *);
  497 
  498 void
  499 iser_pdu_free(struct icl_conn *, struct icl_pdu *);
  500 
  501 struct icl_pdu *
  502 iser_new_pdu(struct icl_conn *ic, int flags);
  503 
  504 int
  505 iser_alloc_rx_descriptors(struct iser_conn *, int);
  506 
  507 void
  508 iser_free_rx_descriptors(struct iser_conn *);
  509 
  510 int
  511 iser_initialize_headers(struct icl_iser_pdu *, struct iser_conn *);
  512 
  513 int
  514 iser_send_control(struct iser_conn *, struct icl_iser_pdu *);
  515 
  516 int
  517 iser_send_command(struct iser_conn *, struct icl_iser_pdu *);
  518 
  519 int
  520 iser_reg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir);
  521 
  522 void
  523 iser_unreg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir);
  524 
  525 int
  526 iser_create_fastreg_pool(struct ib_conn *, unsigned);
  527 
  528 void
  529 iser_free_fastreg_pool(struct ib_conn *);
  530 
  531 int
  532 iser_dma_map_task_data(struct icl_iser_pdu *,
  533                        struct iser_data_buf *, enum iser_data_dir,
  534                        enum dma_data_direction);
  535 
  536 int
  537 iser_conn_terminate(struct iser_conn *);
  538 
  539 void
  540 iser_free_ib_conn_res(struct iser_conn *, bool);
  541 
  542 void
  543 iser_dma_unmap_task_data(struct icl_iser_pdu *, struct iser_data_buf *,
  544                          enum dma_data_direction);
  545 
  546 int
  547 iser_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *);
  548 
  549 #endif /* !ICL_ISER_H */

Cache object: 24dd437730e5427c87a100bd586d1fbe


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.