The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/pci/pci_iov.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2013-2015 Sandvine Inc.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/11.2/sys/dev/pci/pci_iov.c 306461 2016-09-29 22:52:24Z jhb $");
   29 
   30 #include "opt_bus.h"
   31 
   32 #include <sys/param.h>
   33 #include <sys/conf.h>
   34 #include <sys/kernel.h>
   35 #include <sys/systm.h>
   36 #include <sys/bus.h>
   37 #include <sys/fcntl.h>
   38 #include <sys/ioccom.h>
   39 #include <sys/iov.h>
   40 #include <sys/linker.h>
   41 #include <sys/malloc.h>
   42 #include <sys/module.h>
   43 #include <sys/pciio.h>
   44 #include <sys/queue.h>
   45 #include <sys/rman.h>
   46 #include <sys/sysctl.h>
   47 
   48 #include <machine/bus.h>
   49 #include <machine/stdarg.h>
   50 
   51 #include <sys/nv.h>
   52 #include <sys/iov_schema.h>
   53 
   54 #include <dev/pci/pcireg.h>
   55 #include <dev/pci/pcivar.h>
   56 #include <dev/pci/pci_iov.h>
   57 #include <dev/pci/pci_private.h>
   58 #include <dev/pci/pci_iov_private.h>
   59 #include <dev/pci/schema_private.h>
   60 
   61 #include "pcib_if.h"
   62 
   63 static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
   64 
   65 static d_ioctl_t pci_iov_ioctl;
   66 
   67 static struct cdevsw iov_cdevsw = {
   68         .d_version = D_VERSION,
   69         .d_name = "iov",
   70         .d_ioctl = pci_iov_ioctl
   71 };
   72 
   73 SYSCTL_DECL(_hw_pci);
   74 
   75 /*
   76  * The maximum amount of memory we will allocate for user configuration of an
   77  * SR-IOV device.  1MB ought to be enough for anyone, but leave this 
   78  * configurable just in case.
   79  */
   80 static u_long pci_iov_max_config = 1024 * 1024;
   81 SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
   82     &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
   83 
   84 
   85 #define IOV_READ(d, r, w) \
   86         pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
   87 
   88 #define IOV_WRITE(d, r, v, w) \
   89         pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
   90 
   91 static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema,
   92                     nvlist_t **vf_schema);
   93 static void     pci_iov_build_pf_schema(nvlist_t *schema,
   94                     nvlist_t **driver_schema);
   95 static void     pci_iov_build_vf_schema(nvlist_t *schema,
   96                     nvlist_t **driver_schema);
   97 static nvlist_t *pci_iov_get_pf_subsystem_schema(void);
   98 static nvlist_t *pci_iov_get_vf_subsystem_schema(void);
   99 
  100 int
  101 pci_iov_attach_name(device_t dev, struct nvlist *pf_schema,
  102     struct nvlist *vf_schema, const char *fmt, ...)
  103 {
  104         char buf[NAME_MAX + 1];
  105         va_list ap;
  106 
  107         va_start(ap, fmt);
  108         vsnprintf(buf, sizeof(buf), fmt, ap);
  109         va_end(ap);
  110         return (PCI_IOV_ATTACH_NAME(device_get_parent(dev), dev, pf_schema,
  111             vf_schema, buf));
  112 }
  113 
  114 int
  115 pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
  116     nvlist_t *vf_schema, const char *name)
  117 {
  118         device_t pcib;
  119         struct pci_devinfo *dinfo;
  120         struct pcicfg_iov *iov;
  121         nvlist_t *schema;
  122         uint32_t version;
  123         int error;
  124         int iov_pos;
  125 
  126         dinfo = device_get_ivars(dev);
  127         pcib = device_get_parent(bus);
  128         schema = NULL;
  129         
  130         error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
  131 
  132         if (error != 0)
  133                 return (error);
  134 
  135         version = pci_read_config(dev, iov_pos, 4); 
  136         if (PCI_EXTCAP_VER(version) != 1) {
  137                 if (bootverbose)
  138                         device_printf(dev, 
  139                             "Unsupported version of SR-IOV (%d) detected\n",
  140                             PCI_EXTCAP_VER(version));
  141 
  142                 return (ENXIO);
  143         }
  144 
  145         iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
  146 
  147         mtx_lock(&Giant);
  148         if (dinfo->cfg.iov != NULL) {
  149                 error = EBUSY;
  150                 goto cleanup;
  151         }
  152         iov->iov_pos = iov_pos;
  153 
  154         schema = pci_iov_build_schema(&pf_schema, &vf_schema);
  155         if (schema == NULL) {
  156                 error = ENOMEM;
  157                 goto cleanup;
  158         }
  159 
  160         error = pci_iov_validate_schema(schema);
  161         if (error != 0)
  162                 goto cleanup;
  163         iov->iov_schema = schema;
  164 
  165         iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
  166             UID_ROOT, GID_WHEEL, 0600, "iov/%s", name);
  167 
  168         if (iov->iov_cdev == NULL) {
  169                 error = ENOMEM;
  170                 goto cleanup;
  171         }
  172         
  173         dinfo->cfg.iov = iov;
  174         iov->iov_cdev->si_drv1 = dinfo;
  175         mtx_unlock(&Giant);
  176 
  177         return (0);
  178 
  179 cleanup:
  180         nvlist_destroy(schema);
  181         nvlist_destroy(pf_schema);
  182         nvlist_destroy(vf_schema);
  183         free(iov, M_SRIOV);
  184         mtx_unlock(&Giant);
  185         return (error);
  186 }
  187 
  188 int
  189 pci_iov_detach_method(device_t bus, device_t dev)
  190 {
  191         struct pci_devinfo *dinfo;
  192         struct pcicfg_iov *iov;
  193 
  194         mtx_lock(&Giant);
  195         dinfo = device_get_ivars(dev);
  196         iov = dinfo->cfg.iov;
  197 
  198         if (iov == NULL) {
  199                 mtx_unlock(&Giant);
  200                 return (0);
  201         }
  202 
  203         if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) {
  204                 mtx_unlock(&Giant);
  205                 return (EBUSY);
  206         }
  207 
  208         dinfo->cfg.iov = NULL;
  209 
  210         if (iov->iov_cdev) {
  211                 destroy_dev(iov->iov_cdev);
  212                 iov->iov_cdev = NULL;
  213         }
  214         nvlist_destroy(iov->iov_schema);
  215 
  216         free(iov, M_SRIOV);
  217         mtx_unlock(&Giant);
  218 
  219         return (0);
  220 }
  221 
  222 static nvlist_t *
  223 pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
  224 {
  225         nvlist_t *schema, *pf_driver, *vf_driver;
  226 
  227         /* We always take ownership of the schemas. */
  228         pf_driver = *pf;
  229         *pf = NULL;
  230         vf_driver = *vf;
  231         *vf = NULL;
  232 
  233         schema = pci_iov_schema_alloc_node();
  234         if (schema == NULL)
  235                 goto cleanup;
  236 
  237         pci_iov_build_pf_schema(schema, &pf_driver);
  238         pci_iov_build_vf_schema(schema, &vf_driver);
  239 
  240         if (nvlist_error(schema) != 0)
  241                 goto cleanup;
  242 
  243         return (schema);
  244 
  245 cleanup:
  246         nvlist_destroy(schema);
  247         nvlist_destroy(pf_driver);
  248         nvlist_destroy(vf_driver);
  249         return (NULL);
  250 }
  251 
  252 static void
  253 pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
  254 {
  255         nvlist_t *pf_schema, *iov_schema;
  256 
  257         pf_schema = pci_iov_schema_alloc_node();
  258         if (pf_schema == NULL) {
  259                 nvlist_set_error(schema, ENOMEM);
  260                 return;
  261         }
  262 
  263         iov_schema = pci_iov_get_pf_subsystem_schema();
  264 
  265         /*
  266          * Note that if either *driver_schema or iov_schema is NULL, then
  267          * nvlist_move_nvlist will put the schema in the error state and
  268          * SR-IOV will fail to initialize later, so we don't have to explicitly
  269          * handle that case.
  270          */
  271         nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
  272         nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
  273         nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
  274         *driver_schema = NULL;
  275 }
  276 
  277 static void
  278 pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
  279 {
  280         nvlist_t *vf_schema, *iov_schema;
  281 
  282         vf_schema = pci_iov_schema_alloc_node();
  283         if (vf_schema == NULL) {
  284                 nvlist_set_error(schema, ENOMEM);
  285                 return;
  286         }
  287 
  288         iov_schema = pci_iov_get_vf_subsystem_schema();
  289 
  290         /*
  291          * Note that if either *driver_schema or iov_schema is NULL, then
  292          * nvlist_move_nvlist will put the schema in the error state and
  293          * SR-IOV will fail to initialize later, so we don't have to explicitly
  294          * handle that case.
  295          */
  296         nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
  297         nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
  298         nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
  299         *driver_schema = NULL;
  300 }
  301 
  302 static nvlist_t *
  303 pci_iov_get_pf_subsystem_schema(void)
  304 {
  305         nvlist_t *pf;
  306 
  307         pf = pci_iov_schema_alloc_node();
  308         if (pf == NULL)
  309                 return (NULL);
  310 
  311         pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
  312         pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
  313 
  314         return (pf);
  315 }
  316 
  317 static nvlist_t *
  318 pci_iov_get_vf_subsystem_schema(void)
  319 {
  320         nvlist_t *vf;
  321 
  322         vf = pci_iov_schema_alloc_node();
  323         if (vf == NULL)
  324                 return (NULL);
  325 
  326         pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
  327 
  328         return (vf);
  329 }
  330 
  331 static int
  332 pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
  333 {
  334         struct resource *res;
  335         struct pcicfg_iov *iov;
  336         device_t dev, bus;
  337         rman_res_t start, end;
  338         pci_addr_t bar_size;
  339         int rid;
  340 
  341         iov = dinfo->cfg.iov;
  342         dev = dinfo->cfg.dev;
  343         bus = device_get_parent(dev);
  344         rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
  345         bar_size = 1 << bar_shift;
  346 
  347         res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0,
  348             ~0, 1, iov->iov_num_vfs, RF_ACTIVE);
  349 
  350         if (res == NULL)
  351                 return (ENXIO);
  352 
  353         iov->iov_bar[bar].res = res;
  354         iov->iov_bar[bar].bar_size = bar_size;
  355         iov->iov_bar[bar].bar_shift = bar_shift;
  356 
  357         start = rman_get_start(res);
  358         end = rman_get_end(res);
  359         return (rman_manage_region(&iov->rman, start, end));
  360 }
  361 
  362 static void
  363 pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
  364 {
  365         struct pci_iov_bar *bar;
  366         uint64_t bar_start;
  367         int i;
  368 
  369         for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
  370                 bar = &iov->iov_bar[i];
  371                 if (bar->res != NULL) {
  372                         bar_start = rman_get_start(bar->res) +
  373                             dinfo->cfg.vf.index * bar->bar_size;
  374 
  375                         pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
  376                             bar->bar_shift);
  377                 }
  378         }
  379 }
  380 
  381 static int
  382 pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
  383     nvlist_t **ret)
  384 {
  385         void *packed_config;
  386         nvlist_t *config;
  387         int error;
  388 
  389         config = NULL;
  390         packed_config = NULL;
  391 
  392         if (arg->len > pci_iov_max_config) {
  393                 error = EMSGSIZE;
  394                 goto out;
  395         }
  396 
  397         packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
  398 
  399         error = copyin(arg->config, packed_config, arg->len);
  400         if (error != 0)
  401                 goto out;
  402 
  403         config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE);
  404         if (config == NULL) {
  405                 error = EINVAL;
  406                 goto out;
  407         }
  408 
  409         error = pci_iov_schema_validate_config(iov->iov_schema, config);
  410         if (error != 0)
  411                 goto out;
  412 
  413         error = nvlist_error(config);
  414         if (error != 0)
  415                 goto out;
  416 
  417         *ret = config;
  418         config = NULL;
  419 
  420 out:
  421         nvlist_destroy(config);
  422         free(packed_config, M_SRIOV);
  423         return (error);
  424 }
  425 
  426 /*
  427  * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
  428  * capability.  This bit is only writeable on the lowest-numbered PF but
  429  * affects all PFs on the device.
  430  */
  431 static int
  432 pci_iov_set_ari(device_t bus)
  433 {
  434         device_t lowest;
  435         device_t *devlist;
  436         int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
  437         uint16_t iov_ctl;
  438 
  439         /* If ARI is disabled on the downstream port there is nothing to do. */
  440         if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
  441                 return (0);
  442 
  443         error = device_get_children(bus, &devlist, &devcount);
  444 
  445         if (error != 0)
  446                 return (error);
  447 
  448         lowest = NULL;
  449         for (i = 0; i < devcount; i++) {
  450                 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
  451                         dev_func = pci_get_function(devlist[i]);
  452                         if (lowest == NULL || dev_func < lowest_func) {
  453                                 lowest = devlist[i];
  454                                 lowest_func = dev_func;
  455                                 lowest_pos = iov_pos;
  456                         }
  457                 }
  458         }
  459         free(devlist, M_TEMP);
  460 
  461         /*
  462          * If we called this function some device must have the SR-IOV
  463          * capability.
  464          */
  465         KASSERT(lowest != NULL,
  466             ("Could not find child of %s with SR-IOV capability",
  467             device_get_nameunit(bus)));
  468 
  469         iov_ctl = pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2);
  470         iov_ctl |= PCIM_SRIOV_ARI_EN;
  471         pci_write_config(lowest, lowest_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
  472         if ((pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2) &
  473             PCIM_SRIOV_ARI_EN) == 0) {
  474                 device_printf(lowest, "failed to enable ARI\n");
  475                 return (ENXIO);
  476         }
  477         return (0);
  478 }
  479 
  480 static int
  481 pci_iov_config_page_size(struct pci_devinfo *dinfo)
  482 {
  483         uint32_t page_cap, page_size;
  484 
  485         page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
  486 
  487         /*
  488          * If the system page size is less than the smallest SR-IOV page size
  489          * then round up to the smallest SR-IOV page size.
  490          */
  491         if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
  492                 page_size = (1 << 0);
  493         else
  494                 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
  495 
  496         /* Check that the device supports the system page size. */
  497         if (!(page_size & page_cap))
  498                 return (ENXIO);
  499 
  500         IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
  501         return (0);
  502 }
  503 
  504 static int
  505 pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config)
  506 {
  507         const nvlist_t *device, *driver_config;
  508 
  509         device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
  510         driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
  511         return (PCI_IOV_INIT(dev, num_vfs, driver_config));
  512 }
  513 
  514 static int
  515 pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
  516 {
  517         int error;
  518 
  519         iov->rman.rm_start = 0;
  520         iov->rman.rm_end = ~0;
  521         iov->rman.rm_type = RMAN_ARRAY;
  522         snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
  523             device_get_nameunit(pf));
  524         iov->rman.rm_descr = iov->rman_name;
  525 
  526         error = rman_init(&iov->rman);
  527         if (error != 0)
  528                 return (error);
  529 
  530         iov->iov_flags |= IOV_RMAN_INITED;
  531         return (0);
  532 }
  533 
  534 static int
  535 pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar)
  536 {
  537         struct pcicfg_iov *iov;
  538         rman_res_t start, end;
  539         struct resource *res;
  540         struct resource_list *rl;
  541         struct resource_list_entry *rle;
  542 
  543         rl = &dinfo->resources;
  544         iov = dinfo->cfg.iov;
  545 
  546         rle = resource_list_find(rl, SYS_RES_MEMORY,
  547             iov->iov_pos + PCIR_SRIOV_BAR(bar));
  548         if (rle == NULL)
  549                 rle = resource_list_find(rl, SYS_RES_IOPORT,
  550                     iov->iov_pos + PCIR_SRIOV_BAR(bar));
  551         if (rle == NULL)
  552                 return (ENXIO);
  553         res = rle->res;
  554 
  555         iov->iov_bar[bar].res = res;
  556         iov->iov_bar[bar].bar_size = rman_get_size(res) / iov->iov_num_vfs;
  557         iov->iov_bar[bar].bar_shift = pci_mapsize(iov->iov_bar[bar].bar_size);
  558 
  559         start = rman_get_start(res);
  560         end = rman_get_end(res);
  561 
  562         return (rman_manage_region(&iov->rman, start, end));
  563 }
  564 
  565 static int
  566 pci_iov_setup_bars(struct pci_devinfo *dinfo)
  567 {
  568         device_t dev;
  569         struct pcicfg_iov *iov;
  570         pci_addr_t bar_value, testval;
  571         int i, last_64, error;
  572 
  573         iov = dinfo->cfg.iov;
  574         dev = dinfo->cfg.dev;
  575         last_64 = 0;
  576 
  577         pci_add_resources_ea(device_get_parent(dev), dev, 1);
  578 
  579         for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
  580                 /* First, try to use BARs allocated with EA */
  581                 error = pci_iov_alloc_bar_ea(dinfo, i);
  582                 if (error == 0)
  583                         continue;
  584 
  585                 /* Allocate legacy-BAR only if EA is not enabled */
  586                 if (pci_ea_is_enabled(dev, iov->iov_pos + PCIR_SRIOV_BAR(i)))
  587                         continue;
  588 
  589                 /*
  590                  * If a PCI BAR is a 64-bit wide BAR, then it spans two
  591                  * consecutive registers.  Therefore if the last BAR that
  592                  * we looked at was a 64-bit BAR, we need to skip this
  593                  * register as it's the second half of the last BAR.
  594                  */
  595                 if (!last_64) {
  596                         pci_read_bar(dev,
  597                             iov->iov_pos + PCIR_SRIOV_BAR(i),
  598                             &bar_value, &testval, &last_64);
  599 
  600                         if (testval != 0) {
  601                                 error = pci_iov_alloc_bar(dinfo, i,
  602                                    pci_mapsize(testval));
  603                                 if (error != 0)
  604                                         return (error);
  605                         }
  606                 } else
  607                         last_64 = 0;
  608         }
  609 
  610         return (0);
  611 }
  612 
  613 static void
  614 pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
  615     uint16_t first_rid, uint16_t rid_stride)
  616 {
  617         char device_name[VF_MAX_NAME];
  618         const nvlist_t *device, *driver_config, *iov_config;
  619         device_t bus, dev, vf;
  620         struct pcicfg_iov *iov;
  621         struct pci_devinfo *vfinfo;
  622         int i, error;
  623         uint16_t vid, did, next_rid;
  624 
  625         iov = dinfo->cfg.iov;
  626         dev = dinfo->cfg.dev;
  627         bus = device_get_parent(dev);
  628         next_rid = first_rid;
  629         vid = pci_get_vendor(dev);
  630         did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
  631 
  632         for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
  633                 snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
  634                 device = nvlist_get_nvlist(config, device_name);
  635                 iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
  636                 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
  637 
  638                 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
  639                 if (vf == NULL)
  640                         break;
  641 
  642                 /*
  643                  * If we are creating passthrough devices then force the ppt
  644                  * driver to attach to prevent a VF driver from claiming the
  645                  * VFs.
  646                  */
  647                 if (nvlist_get_bool(iov_config, "passthrough"))
  648                         device_set_devclass_fixed(vf, "ppt");
  649 
  650                 vfinfo = device_get_ivars(vf);
  651 
  652                 vfinfo->cfg.iov = iov;
  653                 vfinfo->cfg.vf.index = i;
  654 
  655                 pci_iov_add_bars(iov, vfinfo);
  656 
  657                 error = PCI_IOV_ADD_VF(dev, i, driver_config);
  658                 if (error != 0) {
  659                         device_printf(dev, "Failed to add VF %d\n", i);
  660                         device_delete_child(bus, vf);
  661                 }
  662         }
  663 
  664         bus_generic_attach(bus);
  665 }
  666 
  667 static int
  668 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
  669 {
  670         device_t bus, dev;
  671         struct pci_devinfo *dinfo;
  672         struct pcicfg_iov *iov;
  673         nvlist_t *config;
  674         int i, error;
  675         uint16_t rid_off, rid_stride;
  676         uint16_t first_rid, last_rid;
  677         uint16_t iov_ctl;
  678         uint16_t num_vfs, total_vfs;
  679         int iov_inited;
  680 
  681         mtx_lock(&Giant);
  682         dinfo = cdev->si_drv1;
  683         iov = dinfo->cfg.iov;
  684         dev = dinfo->cfg.dev;
  685         bus = device_get_parent(dev);
  686         iov_inited = 0;
  687         config = NULL;
  688 
  689         if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
  690                 mtx_unlock(&Giant);
  691                 return (EBUSY);
  692         }
  693         iov->iov_flags |= IOV_BUSY;
  694 
  695         error = pci_iov_parse_config(iov, arg, &config);
  696         if (error != 0)
  697                 goto out;
  698 
  699         num_vfs = pci_iov_config_get_num_vfs(config);
  700         total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
  701         if (num_vfs > total_vfs) {
  702                 error = EINVAL;
  703                 goto out;
  704         }
  705 
  706         error = pci_iov_config_page_size(dinfo);
  707         if (error != 0)
  708                 goto out;
  709 
  710         error = pci_iov_set_ari(bus);
  711         if (error != 0)
  712                 goto out;
  713 
  714         error = pci_iov_init(dev, num_vfs, config);
  715         if (error != 0)
  716                 goto out;
  717         iov_inited = 1;
  718 
  719         IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
  720 
  721         rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
  722         rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
  723 
  724         first_rid = pci_get_rid(dev) + rid_off;
  725         last_rid = first_rid + (num_vfs - 1) * rid_stride;
  726 
  727         /* We don't yet support allocating extra bus numbers for VFs. */
  728         if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
  729                 error = ENOSPC;
  730                 goto out;
  731         }
  732 
  733         iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
  734         iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
  735         IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
  736 
  737         error = pci_iov_init_rman(dev, iov);
  738         if (error != 0)
  739                 goto out;
  740 
  741         iov->iov_num_vfs = num_vfs;
  742 
  743         error = pci_iov_setup_bars(dinfo);
  744         if (error != 0)
  745                 goto out;
  746 
  747         iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
  748         iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
  749         IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
  750 
  751         /* Per specification, we must wait 100ms before accessing VFs. */
  752         pause("iov", roundup(hz, 10));
  753         pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
  754 
  755         nvlist_destroy(config);
  756         iov->iov_flags &= ~IOV_BUSY;
  757         mtx_unlock(&Giant);
  758 
  759         return (0);
  760 out:
  761         if (iov_inited)
  762                 PCI_IOV_UNINIT(dev);
  763 
  764         for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
  765                 if (iov->iov_bar[i].res != NULL) {
  766                         pci_release_resource(bus, dev, SYS_RES_MEMORY,
  767                             iov->iov_pos + PCIR_SRIOV_BAR(i),
  768                             iov->iov_bar[i].res);
  769                         pci_delete_resource(bus, dev, SYS_RES_MEMORY,
  770                             iov->iov_pos + PCIR_SRIOV_BAR(i));
  771                         iov->iov_bar[i].res = NULL;
  772                 }
  773         }
  774 
  775         if (iov->iov_flags & IOV_RMAN_INITED) {
  776                 rman_fini(&iov->rman);
  777                 iov->iov_flags &= ~IOV_RMAN_INITED;
  778         }
  779 
  780         nvlist_destroy(config);
  781         iov->iov_num_vfs = 0;
  782         iov->iov_flags &= ~IOV_BUSY;
  783         mtx_unlock(&Giant);
  784         return (error);
  785 }
  786 
  787 void
  788 pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
  789 {
  790         struct pcicfg_iov *iov;
  791 
  792         iov = dinfo->cfg.iov;
  793 
  794         IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, iov->iov_page_size, 4);
  795         IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, iov->iov_num_vfs, 2);
  796         IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov->iov_ctl, 2);
  797 }
  798 
  799 void
  800 pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo)
  801 {
  802         struct pcicfg_iov *iov;
  803 
  804         iov = dinfo->cfg.iov;
  805 
  806         iov->iov_page_size = IOV_READ(dinfo, PCIR_SRIOV_PAGE_SIZE, 4);
  807         iov->iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
  808 }
  809 
  810 /* Return true if child is a VF of the given PF. */
  811 static int
  812 pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
  813 {
  814         struct pci_devinfo *vfinfo;
  815 
  816         vfinfo = device_get_ivars(child);
  817 
  818         if (!(vfinfo->cfg.flags & PCICFG_VF))
  819                 return (0);
  820 
  821         return (pf == vfinfo->cfg.iov);
  822 }
  823 
  824 static int
  825 pci_iov_delete(struct cdev *cdev)
  826 {
  827         device_t bus, dev, vf, *devlist;
  828         struct pci_devinfo *dinfo;
  829         struct pcicfg_iov *iov;
  830         int i, error, devcount;
  831         uint32_t iov_ctl;
  832 
  833         mtx_lock(&Giant);
  834         dinfo = cdev->si_drv1;
  835         iov = dinfo->cfg.iov;
  836         dev = dinfo->cfg.dev;
  837         bus = device_get_parent(dev);
  838         devlist = NULL;
  839 
  840         if (iov->iov_flags & IOV_BUSY) {
  841                 mtx_unlock(&Giant);
  842                 return (EBUSY);
  843         }
  844 
  845         if (iov->iov_num_vfs == 0) {
  846                 mtx_unlock(&Giant);
  847                 return (ECHILD);
  848         }
  849 
  850         iov->iov_flags |= IOV_BUSY;
  851 
  852         error = device_get_children(bus, &devlist, &devcount);
  853 
  854         if (error != 0)
  855                 goto out;
  856 
  857         for (i = 0; i < devcount; i++) {
  858                 vf = devlist[i];
  859 
  860                 if (!pci_iov_is_child_vf(iov, vf))
  861                         continue;
  862 
  863                 error = device_detach(vf);
  864                 if (error != 0) {
  865                         device_printf(dev,
  866                            "Could not disable SR-IOV: failed to detach VF %s\n",
  867                             device_get_nameunit(vf));
  868                         goto out;
  869                 }
  870         }
  871 
  872         for (i = 0; i < devcount; i++) {
  873                 vf = devlist[i];
  874 
  875                 if (pci_iov_is_child_vf(iov, vf))
  876                         device_delete_child(bus, vf);
  877         }
  878         PCI_IOV_UNINIT(dev);
  879 
  880         iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
  881         iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
  882         IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
  883         IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
  884 
  885         iov->iov_num_vfs = 0;
  886 
  887         for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
  888                 if (iov->iov_bar[i].res != NULL) {
  889                         pci_release_resource(bus, dev, SYS_RES_MEMORY,
  890                             iov->iov_pos + PCIR_SRIOV_BAR(i),
  891                             iov->iov_bar[i].res);
  892                         pci_delete_resource(bus, dev, SYS_RES_MEMORY,
  893                             iov->iov_pos + PCIR_SRIOV_BAR(i));
  894                         iov->iov_bar[i].res = NULL;
  895                 }
  896         }
  897 
  898         if (iov->iov_flags & IOV_RMAN_INITED) {
  899                 rman_fini(&iov->rman);
  900                 iov->iov_flags &= ~IOV_RMAN_INITED;
  901         }
  902 
  903         error = 0;
  904 out:
  905         free(devlist, M_TEMP);
  906         iov->iov_flags &= ~IOV_BUSY;
  907         mtx_unlock(&Giant);
  908         return (error);
  909 }
  910 
  911 static int
  912 pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
  913 {
  914         struct pci_devinfo *dinfo;
  915         void *packed;
  916         size_t output_len, size;
  917         int error;
  918 
  919         packed = NULL;
  920 
  921         mtx_lock(&Giant);
  922         dinfo = cdev->si_drv1;
  923         packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
  924         mtx_unlock(&Giant);
  925 
  926         if (packed == NULL) {
  927                 error = ENOMEM;
  928                 goto fail;
  929         }
  930 
  931         output_len = output->len;
  932         output->len = size;
  933         if (size <= output_len) {
  934                 error = copyout(packed, output->schema, size);
  935 
  936                 if (error != 0)
  937                         goto fail;
  938 
  939                 output->error = 0;
  940         } else
  941                 /*
  942                  * If we return an error then the ioctl code won't copyout
  943                  * output back to userland, so we flag the error in the struct
  944                  * instead.
  945                  */
  946                 output->error = EMSGSIZE;
  947 
  948         error = 0;
  949 
  950 fail:
  951         free(packed, M_NVLIST);
  952 
  953         return (error);
  954 }
  955 
  956 static int
  957 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
  958     struct thread *td)
  959 {
  960 
  961         switch (cmd) {
  962         case IOV_CONFIG:
  963                 return (pci_iov_config(dev, (struct pci_iov_arg *)data));
  964         case IOV_DELETE:
  965                 return (pci_iov_delete(dev));
  966         case IOV_GET_SCHEMA:
  967                 return (pci_iov_get_schema_ioctl(dev,
  968                     (struct pci_iov_schema *)data));
  969         default:
  970                 return (EINVAL);
  971         }
  972 }
  973 
  974 struct resource *
  975 pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid,
  976     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
  977 {
  978         struct pci_devinfo *dinfo;
  979         struct pcicfg_iov *iov;
  980         struct pci_map *map;
  981         struct resource *res;
  982         struct resource_list_entry *rle;
  983         rman_res_t bar_start, bar_end;
  984         pci_addr_t bar_length;
  985         int error;
  986 
  987         dinfo = device_get_ivars(child);
  988         iov = dinfo->cfg.iov;
  989 
  990         map = pci_find_bar(child, *rid);
  991         if (map == NULL)
  992                 return (NULL);
  993 
  994         bar_length = 1 << map->pm_size;
  995         bar_start = map->pm_value;
  996         bar_end = bar_start + bar_length - 1;
  997 
  998         /* Make sure that the resource fits the constraints. */
  999         if (bar_start >= end || bar_end <= bar_start || count != 1)
 1000                 return (NULL);
 1001 
 1002         /* Clamp the resource to the constraints if necessary. */
 1003         if (bar_start < start)
 1004                 bar_start = start;
 1005         if (bar_end > end)
 1006                 bar_end = end;
 1007         bar_length = bar_end - bar_start + 1;
 1008 
 1009         res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
 1010             bar_length, flags, child);
 1011         if (res == NULL)
 1012                 return (NULL);
 1013 
 1014         rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
 1015             bar_start, bar_end, 1);
 1016         if (rle == NULL) {
 1017                 rman_release_resource(res);
 1018                 return (NULL);
 1019         }
 1020 
 1021         rman_set_rid(res, *rid);
 1022 
 1023         if (flags & RF_ACTIVE) {
 1024                 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
 1025                 if (error != 0) {
 1026                         resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
 1027                             *rid);
 1028                         rman_release_resource(res);
 1029                         return (NULL);
 1030                 }
 1031         }
 1032         rle->res = res;
 1033 
 1034         return (res);
 1035 }
 1036 
 1037 int
 1038 pci_vf_release_mem_resource(device_t dev, device_t child, int rid,
 1039     struct resource *r)
 1040 {
 1041         struct pci_devinfo *dinfo;
 1042         struct resource_list_entry *rle;
 1043         int error;
 1044 
 1045         dinfo = device_get_ivars(child);
 1046 
 1047         if (rman_get_flags(r) & RF_ACTIVE) {
 1048                 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
 1049                 if (error != 0)
 1050                         return (error);
 1051         }
 1052 
 1053         rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
 1054         if (rle != NULL) {
 1055                 rle->res = NULL;
 1056                 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
 1057                     rid);
 1058         }
 1059 
 1060         return (rman_release_resource(r));
 1061 }
 1062 

Cache object: ff72250fd889ecfb8cba2d5975a13bc1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.