FreeBSD/Linux Kernel Cross Reference
sys/dev/pci/pci_iov.c
1 /*-
2 * Copyright (c) 2013-2015 Sandvine Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include "opt_bus.h"
31
32 #include <sys/param.h>
33 #include <sys/conf.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/bus.h>
37 #include <sys/fcntl.h>
38 #include <sys/ioccom.h>
39 #include <sys/iov.h>
40 #include <sys/linker.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/module.h>
44 #include <sys/mutex.h>
45 #include <sys/pciio.h>
46 #include <sys/queue.h>
47 #include <sys/rman.h>
48 #include <sys/sysctl.h>
49
50 #include <machine/bus.h>
51 #include <machine/stdarg.h>
52
53 #include <sys/nv.h>
54 #include <sys/iov_schema.h>
55
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcivar.h>
58 #include <dev/pci/pci_iov.h>
59 #include <dev/pci/pci_private.h>
60 #include <dev/pci/pci_iov_private.h>
61 #include <dev/pci/schema_private.h>
62
63 #include "pcib_if.h"
64
65 static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
66
67 static d_ioctl_t pci_iov_ioctl;
68
69 static struct cdevsw iov_cdevsw = {
70 .d_version = D_VERSION,
71 .d_name = "iov",
72 .d_ioctl = pci_iov_ioctl
73 };
74
75 SYSCTL_DECL(_hw_pci);
76
77 /*
78 * The maximum amount of memory we will allocate for user configuration of an
79 * SR-IOV device. 1MB ought to be enough for anyone, but leave this
80 * configurable just in case.
81 */
82 static u_long pci_iov_max_config = 1024 * 1024;
83 SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
84 &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
85
86 #define IOV_READ(d, r, w) \
87 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
88
89 #define IOV_WRITE(d, r, v, w) \
90 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
91
92 static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema,
93 nvlist_t **vf_schema);
94 static void pci_iov_build_pf_schema(nvlist_t *schema,
95 nvlist_t **driver_schema);
96 static void pci_iov_build_vf_schema(nvlist_t *schema,
97 nvlist_t **driver_schema);
98 static int pci_iov_delete_iov_children(struct pci_devinfo *dinfo);
99 static nvlist_t *pci_iov_get_pf_subsystem_schema(void);
100 static nvlist_t *pci_iov_get_vf_subsystem_schema(void);
101
102 int
103 pci_iov_attach_name(device_t dev, struct nvlist *pf_schema,
104 struct nvlist *vf_schema, const char *fmt, ...)
105 {
106 char buf[NAME_MAX + 1];
107 va_list ap;
108
109 va_start(ap, fmt);
110 vsnprintf(buf, sizeof(buf), fmt, ap);
111 va_end(ap);
112 return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema,
113 vf_schema, buf));
114 }
115
116 int
117 pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
118 nvlist_t *vf_schema, const char *name)
119 {
120 struct pci_devinfo *dinfo;
121 struct pcicfg_iov *iov;
122 nvlist_t *schema;
123 uint32_t version;
124 int error;
125 int iov_pos;
126
127 dinfo = device_get_ivars(dev);
128 schema = NULL;
129
130 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
131
132 if (error != 0)
133 return (error);
134
135 version = pci_read_config(dev, iov_pos, 4);
136 if (PCI_EXTCAP_VER(version) != 1) {
137 if (bootverbose)
138 device_printf(dev,
139 "Unsupported version of SR-IOV (%d) detected\n",
140 PCI_EXTCAP_VER(version));
141
142 return (ENXIO);
143 }
144
145 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
146
147 mtx_lock(&Giant);
148 if (dinfo->cfg.iov != NULL) {
149 error = EBUSY;
150 goto cleanup;
151 }
152 iov->iov_pf = dev;
153 iov->iov_pos = iov_pos;
154
155 schema = pci_iov_build_schema(&pf_schema, &vf_schema);
156 if (schema == NULL) {
157 error = ENOMEM;
158 goto cleanup;
159 }
160
161 error = pci_iov_validate_schema(schema);
162 if (error != 0)
163 goto cleanup;
164 iov->iov_schema = schema;
165
166 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
167 UID_ROOT, GID_WHEEL, 0600, "iov/%s", name);
168
169 if (iov->iov_cdev == NULL) {
170 error = ENOMEM;
171 goto cleanup;
172 }
173
174 dinfo->cfg.iov = iov;
175 iov->iov_cdev->si_drv1 = dinfo;
176 mtx_unlock(&Giant);
177
178 return (0);
179
180 cleanup:
181 nvlist_destroy(schema);
182 nvlist_destroy(pf_schema);
183 nvlist_destroy(vf_schema);
184 free(iov, M_SRIOV);
185 mtx_unlock(&Giant);
186 return (error);
187 }
188
189 int
190 pci_iov_detach_method(device_t bus, device_t dev)
191 {
192 struct pci_devinfo *dinfo;
193 struct pcicfg_iov *iov;
194 int error;
195
196 mtx_lock(&Giant);
197 dinfo = device_get_ivars(dev);
198 iov = dinfo->cfg.iov;
199
200 if (iov == NULL) {
201 mtx_unlock(&Giant);
202 return (0);
203 }
204
205 if ((iov->iov_flags & IOV_BUSY) != 0) {
206 mtx_unlock(&Giant);
207 return (EBUSY);
208 }
209
210 error = pci_iov_delete_iov_children(dinfo);
211 if (error != 0) {
212 mtx_unlock(&Giant);
213 return (error);
214 }
215
216 dinfo->cfg.iov = NULL;
217
218 if (iov->iov_cdev) {
219 destroy_dev(iov->iov_cdev);
220 iov->iov_cdev = NULL;
221 }
222 nvlist_destroy(iov->iov_schema);
223
224 free(iov, M_SRIOV);
225 mtx_unlock(&Giant);
226
227 return (0);
228 }
229
230 static nvlist_t *
231 pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
232 {
233 nvlist_t *schema, *pf_driver, *vf_driver;
234
235 /* We always take ownership of the schemas. */
236 pf_driver = *pf;
237 *pf = NULL;
238 vf_driver = *vf;
239 *vf = NULL;
240
241 schema = pci_iov_schema_alloc_node();
242 if (schema == NULL)
243 goto cleanup;
244
245 pci_iov_build_pf_schema(schema, &pf_driver);
246 pci_iov_build_vf_schema(schema, &vf_driver);
247
248 if (nvlist_error(schema) != 0)
249 goto cleanup;
250
251 return (schema);
252
253 cleanup:
254 nvlist_destroy(schema);
255 nvlist_destroy(pf_driver);
256 nvlist_destroy(vf_driver);
257 return (NULL);
258 }
259
260 static void
261 pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
262 {
263 nvlist_t *pf_schema, *iov_schema;
264
265 pf_schema = pci_iov_schema_alloc_node();
266 if (pf_schema == NULL) {
267 nvlist_set_error(schema, ENOMEM);
268 return;
269 }
270
271 iov_schema = pci_iov_get_pf_subsystem_schema();
272
273 /*
274 * Note that if either *driver_schema or iov_schema is NULL, then
275 * nvlist_move_nvlist will put the schema in the error state and
276 * SR-IOV will fail to initialize later, so we don't have to explicitly
277 * handle that case.
278 */
279 nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
280 nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
281 nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
282 *driver_schema = NULL;
283 }
284
285 static void
286 pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
287 {
288 nvlist_t *vf_schema, *iov_schema;
289
290 vf_schema = pci_iov_schema_alloc_node();
291 if (vf_schema == NULL) {
292 nvlist_set_error(schema, ENOMEM);
293 return;
294 }
295
296 iov_schema = pci_iov_get_vf_subsystem_schema();
297
298 /*
299 * Note that if either *driver_schema or iov_schema is NULL, then
300 * nvlist_move_nvlist will put the schema in the error state and
301 * SR-IOV will fail to initialize later, so we don't have to explicitly
302 * handle that case.
303 */
304 nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
305 nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
306 nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
307 *driver_schema = NULL;
308 }
309
310 static nvlist_t *
311 pci_iov_get_pf_subsystem_schema(void)
312 {
313 nvlist_t *pf;
314
315 pf = pci_iov_schema_alloc_node();
316 if (pf == NULL)
317 return (NULL);
318
319 pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
320 pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
321
322 return (pf);
323 }
324
325 static nvlist_t *
326 pci_iov_get_vf_subsystem_schema(void)
327 {
328 nvlist_t *vf;
329
330 vf = pci_iov_schema_alloc_node();
331 if (vf == NULL)
332 return (NULL);
333
334 pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
335
336 return (vf);
337 }
338
339 static int
340 pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
341 {
342 struct resource *res;
343 struct pcicfg_iov *iov;
344 device_t dev, bus;
345 rman_res_t start, end;
346 pci_addr_t bar_size;
347 int rid;
348
349 iov = dinfo->cfg.iov;
350 dev = dinfo->cfg.dev;
351 bus = device_get_parent(dev);
352 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
353 bar_size = 1 << bar_shift;
354
355 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0,
356 ~0, 1, iov->iov_num_vfs, RF_ACTIVE);
357
358 if (res == NULL)
359 return (ENXIO);
360
361 iov->iov_bar[bar].res = res;
362 iov->iov_bar[bar].bar_size = bar_size;
363 iov->iov_bar[bar].bar_shift = bar_shift;
364
365 start = rman_get_start(res);
366 end = rman_get_end(res);
367 return (rman_manage_region(&iov->rman, start, end));
368 }
369
370 static void
371 pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
372 {
373 struct pci_iov_bar *bar;
374 uint64_t bar_start;
375 int i;
376
377 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
378 bar = &iov->iov_bar[i];
379 if (bar->res != NULL) {
380 bar_start = rman_get_start(bar->res) +
381 dinfo->cfg.vf.index * bar->bar_size;
382
383 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
384 bar->bar_shift);
385 }
386 }
387 }
388
389 static int
390 pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
391 nvlist_t **ret)
392 {
393 void *packed_config;
394 nvlist_t *config;
395 int error;
396
397 config = NULL;
398 packed_config = NULL;
399
400 if (arg->len > pci_iov_max_config) {
401 error = EMSGSIZE;
402 goto out;
403 }
404
405 packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
406
407 error = copyin(arg->config, packed_config, arg->len);
408 if (error != 0)
409 goto out;
410
411 config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE);
412 if (config == NULL) {
413 error = EINVAL;
414 goto out;
415 }
416
417 error = pci_iov_schema_validate_config(iov->iov_schema, config);
418 if (error != 0)
419 goto out;
420
421 error = nvlist_error(config);
422 if (error != 0)
423 goto out;
424
425 *ret = config;
426 config = NULL;
427
428 out:
429 nvlist_destroy(config);
430 free(packed_config, M_SRIOV);
431 return (error);
432 }
433
434 /*
435 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
436 * capability. This bit is only writeable on the lowest-numbered PF but
437 * affects all PFs on the device.
438 */
439 static int
440 pci_iov_set_ari(device_t bus)
441 {
442 device_t lowest;
443 device_t *devlist;
444 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
445 uint16_t iov_ctl;
446
447 /* If ARI is disabled on the downstream port there is nothing to do. */
448 if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
449 return (0);
450
451 error = device_get_children(bus, &devlist, &devcount);
452
453 if (error != 0)
454 return (error);
455
456 lowest = NULL;
457 for (i = 0; i < devcount; i++) {
458 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
459 dev_func = pci_get_function(devlist[i]);
460 if (lowest == NULL || dev_func < lowest_func) {
461 lowest = devlist[i];
462 lowest_func = dev_func;
463 lowest_pos = iov_pos;
464 }
465 }
466 }
467 free(devlist, M_TEMP);
468
469 /*
470 * If we called this function some device must have the SR-IOV
471 * capability.
472 */
473 KASSERT(lowest != NULL,
474 ("Could not find child of %s with SR-IOV capability",
475 device_get_nameunit(bus)));
476
477 iov_ctl = pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2);
478 iov_ctl |= PCIM_SRIOV_ARI_EN;
479 pci_write_config(lowest, lowest_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
480 if ((pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2) &
481 PCIM_SRIOV_ARI_EN) == 0) {
482 device_printf(lowest, "failed to enable ARI\n");
483 return (ENXIO);
484 }
485 return (0);
486 }
487
488 static int
489 pci_iov_config_page_size(struct pci_devinfo *dinfo)
490 {
491 uint32_t page_cap, page_size;
492
493 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
494
495 /*
496 * If the system page size is less than the smallest SR-IOV page size
497 * then round up to the smallest SR-IOV page size.
498 */
499 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
500 page_size = (1 << 0);
501 else
502 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
503
504 /* Check that the device supports the system page size. */
505 if (!(page_size & page_cap))
506 return (ENXIO);
507
508 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
509 return (0);
510 }
511
512 static int
513 pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config)
514 {
515 const nvlist_t *device, *driver_config;
516
517 device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
518 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
519 return (PCI_IOV_INIT(dev, num_vfs, driver_config));
520 }
521
522 static int
523 pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
524 {
525 int error;
526
527 iov->rman.rm_start = 0;
528 iov->rman.rm_end = ~0;
529 iov->rman.rm_type = RMAN_ARRAY;
530 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
531 device_get_nameunit(pf));
532 iov->rman.rm_descr = iov->rman_name;
533
534 error = rman_init(&iov->rman);
535 if (error != 0)
536 return (error);
537
538 iov->iov_flags |= IOV_RMAN_INITED;
539 return (0);
540 }
541
542 static int
543 pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar)
544 {
545 struct pcicfg_iov *iov;
546 rman_res_t start, end;
547 struct resource *res;
548 struct resource_list *rl;
549 struct resource_list_entry *rle;
550
551 rl = &dinfo->resources;
552 iov = dinfo->cfg.iov;
553
554 rle = resource_list_find(rl, SYS_RES_MEMORY,
555 iov->iov_pos + PCIR_SRIOV_BAR(bar));
556 if (rle == NULL)
557 rle = resource_list_find(rl, SYS_RES_IOPORT,
558 iov->iov_pos + PCIR_SRIOV_BAR(bar));
559 if (rle == NULL)
560 return (ENXIO);
561 res = rle->res;
562
563 iov->iov_bar[bar].res = res;
564 iov->iov_bar[bar].bar_size = rman_get_size(res) / iov->iov_num_vfs;
565 iov->iov_bar[bar].bar_shift = pci_mapsize(iov->iov_bar[bar].bar_size);
566
567 start = rman_get_start(res);
568 end = rman_get_end(res);
569
570 return (rman_manage_region(&iov->rman, start, end));
571 }
572
573 static int
574 pci_iov_setup_bars(struct pci_devinfo *dinfo)
575 {
576 device_t dev;
577 struct pcicfg_iov *iov;
578 pci_addr_t bar_value, testval;
579 int i, last_64, error;
580
581 iov = dinfo->cfg.iov;
582 dev = dinfo->cfg.dev;
583 last_64 = 0;
584
585 pci_add_resources_ea(device_get_parent(dev), dev, 1);
586
587 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
588 /* First, try to use BARs allocated with EA */
589 error = pci_iov_alloc_bar_ea(dinfo, i);
590 if (error == 0)
591 continue;
592
593 /* Allocate legacy-BAR only if EA is not enabled */
594 if (pci_ea_is_enabled(dev, iov->iov_pos + PCIR_SRIOV_BAR(i)))
595 continue;
596
597 /*
598 * If a PCI BAR is a 64-bit wide BAR, then it spans two
599 * consecutive registers. Therefore if the last BAR that
600 * we looked at was a 64-bit BAR, we need to skip this
601 * register as it's the second half of the last BAR.
602 */
603 if (!last_64) {
604 pci_read_bar(dev,
605 iov->iov_pos + PCIR_SRIOV_BAR(i),
606 &bar_value, &testval, &last_64);
607
608 if (testval != 0) {
609 error = pci_iov_alloc_bar(dinfo, i,
610 pci_mapsize(testval));
611 if (error != 0)
612 return (error);
613 }
614 } else
615 last_64 = 0;
616 }
617
618 return (0);
619 }
620
621 static void
622 pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
623 uint16_t first_rid, uint16_t rid_stride)
624 {
625 char device_name[VF_MAX_NAME];
626 const nvlist_t *device, *driver_config, *iov_config;
627 device_t bus, dev, vf;
628 struct pcicfg_iov *iov;
629 struct pci_devinfo *vfinfo;
630 int i, error;
631 uint16_t vid, did, next_rid;
632
633 iov = dinfo->cfg.iov;
634 dev = dinfo->cfg.dev;
635 bus = device_get_parent(dev);
636 next_rid = first_rid;
637 vid = pci_get_vendor(dev);
638 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
639
640 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
641 snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
642 device = nvlist_get_nvlist(config, device_name);
643 iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
644 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
645
646 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
647 if (vf == NULL)
648 break;
649
650 /*
651 * If we are creating passthrough devices then force the ppt
652 * driver to attach to prevent a VF driver from claiming the
653 * VFs.
654 */
655 if (nvlist_get_bool(iov_config, "passthrough"))
656 device_set_devclass_fixed(vf, "ppt");
657
658 vfinfo = device_get_ivars(vf);
659
660 vfinfo->cfg.iov = iov;
661 vfinfo->cfg.vf.index = i;
662
663 pci_iov_add_bars(iov, vfinfo);
664
665 error = PCI_IOV_ADD_VF(dev, i, driver_config);
666 if (error != 0) {
667 device_printf(dev, "Failed to add VF %d\n", i);
668 device_delete_child(bus, vf);
669 }
670 }
671
672 bus_generic_attach(bus);
673 }
674
675 static int
676 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
677 {
678 device_t bus, dev;
679 struct pci_devinfo *dinfo;
680 struct pcicfg_iov *iov;
681 nvlist_t *config;
682 int i, error;
683 uint16_t rid_off, rid_stride;
684 uint16_t first_rid, last_rid;
685 uint16_t iov_ctl;
686 uint16_t num_vfs, total_vfs;
687 int iov_inited;
688
689 mtx_lock(&Giant);
690 dinfo = cdev->si_drv1;
691 iov = dinfo->cfg.iov;
692 dev = dinfo->cfg.dev;
693 bus = device_get_parent(dev);
694 iov_inited = 0;
695 config = NULL;
696
697 if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
698 mtx_unlock(&Giant);
699 return (EBUSY);
700 }
701 iov->iov_flags |= IOV_BUSY;
702
703 error = pci_iov_parse_config(iov, arg, &config);
704 if (error != 0)
705 goto out;
706
707 num_vfs = pci_iov_config_get_num_vfs(config);
708 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
709 if (num_vfs > total_vfs) {
710 error = EINVAL;
711 goto out;
712 }
713
714 error = pci_iov_config_page_size(dinfo);
715 if (error != 0)
716 goto out;
717
718 error = pci_iov_set_ari(bus);
719 if (error != 0)
720 goto out;
721
722 error = pci_iov_init(dev, num_vfs, config);
723 if (error != 0)
724 goto out;
725 iov_inited = 1;
726
727 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
728
729 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
730 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
731
732 first_rid = pci_get_rid(dev) + rid_off;
733 last_rid = first_rid + (num_vfs - 1) * rid_stride;
734
735 /* We don't yet support allocating extra bus numbers for VFs. */
736 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
737 error = ENOSPC;
738 goto out;
739 }
740
741 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
742 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
743 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
744
745 error = pci_iov_init_rman(dev, iov);
746 if (error != 0)
747 goto out;
748
749 iov->iov_num_vfs = num_vfs;
750
751 error = pci_iov_setup_bars(dinfo);
752 if (error != 0)
753 goto out;
754
755 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
756 iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
757 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
758
759 /* Per specification, we must wait 100ms before accessing VFs. */
760 pause("iov", roundup(hz, 10));
761 pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
762
763 nvlist_destroy(config);
764 iov->iov_flags &= ~IOV_BUSY;
765 mtx_unlock(&Giant);
766
767 return (0);
768 out:
769 if (iov_inited)
770 PCI_IOV_UNINIT(dev);
771
772 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
773 if (iov->iov_bar[i].res != NULL) {
774 pci_release_resource(bus, dev, SYS_RES_MEMORY,
775 iov->iov_pos + PCIR_SRIOV_BAR(i),
776 iov->iov_bar[i].res);
777 pci_delete_resource(bus, dev, SYS_RES_MEMORY,
778 iov->iov_pos + PCIR_SRIOV_BAR(i));
779 iov->iov_bar[i].res = NULL;
780 }
781 }
782
783 if (iov->iov_flags & IOV_RMAN_INITED) {
784 rman_fini(&iov->rman);
785 iov->iov_flags &= ~IOV_RMAN_INITED;
786 }
787
788 nvlist_destroy(config);
789 iov->iov_num_vfs = 0;
790 iov->iov_flags &= ~IOV_BUSY;
791 mtx_unlock(&Giant);
792 return (error);
793 }
794
795 void
796 pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
797 {
798 struct pcicfg_iov *iov;
799
800 iov = dinfo->cfg.iov;
801
802 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, iov->iov_page_size, 4);
803 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, iov->iov_num_vfs, 2);
804 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov->iov_ctl, 2);
805 }
806
807 void
808 pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo)
809 {
810 struct pcicfg_iov *iov;
811
812 iov = dinfo->cfg.iov;
813
814 iov->iov_page_size = IOV_READ(dinfo, PCIR_SRIOV_PAGE_SIZE, 4);
815 iov->iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
816 }
817
818 /* Return true if child is a VF of the given PF. */
819 static int
820 pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
821 {
822 struct pci_devinfo *vfinfo;
823
824 vfinfo = device_get_ivars(child);
825
826 if (!(vfinfo->cfg.flags & PCICFG_VF))
827 return (0);
828
829 return (pf == vfinfo->cfg.iov);
830 }
831
832 static int
833 pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
834 {
835 device_t bus, dev, vf, *devlist;
836 struct pcicfg_iov *iov;
837 int i, error, devcount;
838 uint32_t iov_ctl;
839
840 mtx_assert(&Giant, MA_OWNED);
841
842 iov = dinfo->cfg.iov;
843 dev = dinfo->cfg.dev;
844 bus = device_get_parent(dev);
845 devlist = NULL;
846
847 iov->iov_flags |= IOV_BUSY;
848
849 error = device_get_children(bus, &devlist, &devcount);
850
851 if (error != 0)
852 goto out;
853
854 for (i = 0; i < devcount; i++) {
855 vf = devlist[i];
856
857 if (!pci_iov_is_child_vf(iov, vf))
858 continue;
859
860 error = device_detach(vf);
861 if (error != 0) {
862 device_printf(dev,
863 "Could not disable SR-IOV: failed to detach VF %s\n",
864 device_get_nameunit(vf));
865 goto out;
866 }
867 }
868
869 for (i = 0; i < devcount; i++) {
870 vf = devlist[i];
871
872 if (pci_iov_is_child_vf(iov, vf))
873 device_delete_child(bus, vf);
874 }
875 PCI_IOV_UNINIT(dev);
876
877 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
878 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
879 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
880 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
881
882 iov->iov_num_vfs = 0;
883
884 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
885 if (iov->iov_bar[i].res != NULL) {
886 pci_release_resource(bus, dev, SYS_RES_MEMORY,
887 iov->iov_pos + PCIR_SRIOV_BAR(i),
888 iov->iov_bar[i].res);
889 pci_delete_resource(bus, dev, SYS_RES_MEMORY,
890 iov->iov_pos + PCIR_SRIOV_BAR(i));
891 iov->iov_bar[i].res = NULL;
892 }
893 }
894
895 if (iov->iov_flags & IOV_RMAN_INITED) {
896 rman_fini(&iov->rman);
897 iov->iov_flags &= ~IOV_RMAN_INITED;
898 }
899
900 error = 0;
901 out:
902 free(devlist, M_TEMP);
903 iov->iov_flags &= ~IOV_BUSY;
904 return (error);
905 }
906
907 static int
908 pci_iov_delete(struct cdev *cdev)
909 {
910 struct pci_devinfo *dinfo;
911 struct pcicfg_iov *iov;
912 int error;
913
914 mtx_lock(&Giant);
915 dinfo = cdev->si_drv1;
916 iov = dinfo->cfg.iov;
917
918 if ((iov->iov_flags & IOV_BUSY) != 0) {
919 error = EBUSY;
920 goto out;
921 }
922 if (iov->iov_num_vfs == 0) {
923 error = ECHILD;
924 goto out;
925 }
926
927 error = pci_iov_delete_iov_children(dinfo);
928
929 out:
930 mtx_unlock(&Giant);
931 return (error);
932 }
933
934 static int
935 pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
936 {
937 struct pci_devinfo *dinfo;
938 void *packed;
939 size_t output_len, size;
940 int error;
941
942 packed = NULL;
943
944 mtx_lock(&Giant);
945 dinfo = cdev->si_drv1;
946 packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
947 mtx_unlock(&Giant);
948
949 if (packed == NULL) {
950 error = ENOMEM;
951 goto fail;
952 }
953
954 output_len = output->len;
955 output->len = size;
956 if (size <= output_len) {
957 error = copyout(packed, output->schema, size);
958
959 if (error != 0)
960 goto fail;
961
962 output->error = 0;
963 } else
964 /*
965 * If we return an error then the ioctl code won't copyout
966 * output back to userland, so we flag the error in the struct
967 * instead.
968 */
969 output->error = EMSGSIZE;
970
971 error = 0;
972
973 fail:
974 free(packed, M_NVLIST);
975
976 return (error);
977 }
978
979 static int
980 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
981 struct thread *td)
982 {
983
984 switch (cmd) {
985 case IOV_CONFIG:
986 return (pci_iov_config(dev, (struct pci_iov_arg *)data));
987 case IOV_DELETE:
988 return (pci_iov_delete(dev));
989 case IOV_GET_SCHEMA:
990 return (pci_iov_get_schema_ioctl(dev,
991 (struct pci_iov_schema *)data));
992 default:
993 return (EINVAL);
994 }
995 }
996
997 struct resource *
998 pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid,
999 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1000 {
1001 struct pci_devinfo *dinfo;
1002 struct pcicfg_iov *iov;
1003 struct pci_map *map;
1004 struct resource *res;
1005 struct resource_list_entry *rle;
1006 rman_res_t bar_start, bar_end;
1007 pci_addr_t bar_length;
1008 int error;
1009
1010 dinfo = device_get_ivars(child);
1011 iov = dinfo->cfg.iov;
1012
1013 map = pci_find_bar(child, *rid);
1014 if (map == NULL)
1015 return (NULL);
1016
1017 bar_length = 1 << map->pm_size;
1018 bar_start = map->pm_value;
1019 bar_end = bar_start + bar_length - 1;
1020
1021 /* Make sure that the resource fits the constraints. */
1022 if (bar_start >= end || bar_end <= bar_start || count != 1)
1023 return (NULL);
1024
1025 /* Clamp the resource to the constraints if necessary. */
1026 if (bar_start < start)
1027 bar_start = start;
1028 if (bar_end > end)
1029 bar_end = end;
1030 bar_length = bar_end - bar_start + 1;
1031
1032 res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
1033 bar_length, flags, child);
1034 if (res == NULL)
1035 return (NULL);
1036
1037 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
1038 bar_start, bar_end, 1);
1039 if (rle == NULL) {
1040 rman_release_resource(res);
1041 return (NULL);
1042 }
1043
1044 rman_set_rid(res, *rid);
1045
1046 if (flags & RF_ACTIVE) {
1047 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
1048 if (error != 0) {
1049 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
1050 *rid);
1051 rman_release_resource(res);
1052 return (NULL);
1053 }
1054 }
1055 rle->res = res;
1056
1057 return (res);
1058 }
1059
1060 int
1061 pci_vf_release_mem_resource(device_t dev, device_t child, int rid,
1062 struct resource *r)
1063 {
1064 struct pci_devinfo *dinfo;
1065 struct resource_list_entry *rle;
1066 int error;
1067
1068 dinfo = device_get_ivars(child);
1069
1070 if (rman_get_flags(r) & RF_ACTIVE) {
1071 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
1072 if (error != 0)
1073 return (error);
1074 }
1075
1076 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
1077 if (rle != NULL) {
1078 rle->res = NULL;
1079 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
1080 rid);
1081 }
1082
1083 return (rman_release_resource(r));
1084 }
Cache object: bb95cc45df7aeded25f7752baa5b4c89
|