1 /*-
2 * Copyright (c) 2016-2017 Microsoft Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #ifdef NEW_PCIB
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/kernel.h>
38 #include <sys/queue.h>
39 #include <sys/lock.h>
40 #include <sys/sx.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 #include <sys/mutex.h>
46 #include <sys/errno.h>
47
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_kern.h>
51 #include <vm/pmap.h>
52
53 #include <machine/atomic.h>
54 #include <machine/bus.h>
55 #include <machine/frame.h>
56 #include <machine/pci_cfgreg.h>
57 #include <machine/resource.h>
58
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 #include <dev/pci/pcib_private.h>
64 #include "pcib_if.h"
65
66 #include <machine/intr_machdep.h>
67 #include <x86/apicreg.h>
68
69 #include <dev/hyperv/include/hyperv.h>
70 #include <dev/hyperv/include/hyperv_busdma.h>
71 #include <dev/hyperv/include/vmbus_xact.h>
72 #include <dev/hyperv/vmbus/vmbus_reg.h>
73 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
74
75 #include "vmbus_if.h"
76
77 #if __FreeBSD_version < 1100000
78 typedef u_long rman_res_t;
79 #define RM_MAX_END (~(rman_res_t)0)
80 #endif
81
82 struct completion {
83 unsigned int done;
84 struct mtx lock;
85 };
86
87 static void
88 init_completion(struct completion *c)
89 {
90 memset(c, 0, sizeof(*c));
91 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
92 c->done = 0;
93 }
94
95 static void
96 free_completion(struct completion *c)
97 {
98 mtx_destroy(&c->lock);
99 }
100
101 static void
102 complete(struct completion *c)
103 {
104 mtx_lock(&c->lock);
105 c->done++;
106 mtx_unlock(&c->lock);
107 wakeup(c);
108 }
109
110 static void
111 wait_for_completion(struct completion *c)
112 {
113 mtx_lock(&c->lock);
114 while (c->done == 0)
115 mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
116 c->done--;
117 mtx_unlock(&c->lock);
118 }
119
120 /*
121 * Return: 0 if completed, a non-zero value if timed out.
122 */
123 static int
124 wait_for_completion_timeout(struct completion *c, int timeout)
125 {
126 int ret;
127
128 mtx_lock(&c->lock);
129
130 if (c->done == 0)
131 mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
132
133 if (c->done > 0) {
134 c->done--;
135 ret = 0;
136 } else {
137 ret = 1;
138 }
139
140 mtx_unlock(&c->lock);
141
142 return (ret);
143 }
144
145 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major)))
146
147 enum {
148 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
149 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1
150 };
151
152 #define PCI_CONFIG_MMIO_LENGTH 0x2000
153 #define CFG_PAGE_OFFSET 0x1000
154 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
155
156 /*
157 * Message Types
158 */
159
160 enum pci_message_type {
161 /*
162 * Version 1.1
163 */
164 PCI_MESSAGE_BASE = 0x42490000,
165 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0,
166 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1,
167 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4,
168 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
169 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6,
170 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7,
171 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8,
172 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9,
173 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA,
174 PCI_EJECT = PCI_MESSAGE_BASE + 0xB,
175 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC,
176 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD,
177 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE,
178 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF,
179 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10,
180 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11,
181 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12,
182 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13,
183 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14,
184 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15,
185 PCI_MESSAGE_MAXIMUM
186 };
187
188 /*
189 * Structures defining the virtual PCI Express protocol.
190 */
191
192 union pci_version {
193 struct {
194 uint16_t minor_version;
195 uint16_t major_version;
196 } parts;
197 uint32_t version;
198 } __packed;
199
200 /*
201 * This representation is the one used in Windows, which is
202 * what is expected when sending this back and forth with
203 * the Hyper-V parent partition.
204 */
205 union win_slot_encoding {
206 struct {
207 uint32_t slot:5;
208 uint32_t func:3;
209 uint32_t reserved:24;
210 } bits;
211 uint32_t val;
212 } __packed;
213
214 struct pci_func_desc {
215 uint16_t v_id; /* vendor ID */
216 uint16_t d_id; /* device ID */
217 uint8_t rev;
218 uint8_t prog_intf;
219 uint8_t subclass;
220 uint8_t base_class;
221 uint32_t subsystem_id;
222 union win_slot_encoding wslot;
223 uint32_t ser; /* serial number */
224 } __packed;
225
226 struct hv_msi_desc {
227 uint8_t vector;
228 uint8_t delivery_mode;
229 uint16_t vector_count;
230 uint32_t reserved;
231 uint64_t cpu_mask;
232 } __packed;
233
234 struct tran_int_desc {
235 uint16_t reserved;
236 uint16_t vector_count;
237 uint32_t data;
238 uint64_t address;
239 } __packed;
240
241 struct pci_message {
242 uint32_t type;
243 } __packed;
244
245 struct pci_child_message {
246 struct pci_message message_type;
247 union win_slot_encoding wslot;
248 } __packed;
249
250 struct pci_incoming_message {
251 struct vmbus_chanpkt_hdr hdr;
252 struct pci_message message_type;
253 } __packed;
254
255 struct pci_response {
256 struct vmbus_chanpkt_hdr hdr;
257 int32_t status; /* negative values are failures */
258 } __packed;
259
260 struct pci_packet {
261 void (*completion_func)(void *context, struct pci_response *resp,
262 int resp_packet_size);
263 void *compl_ctxt;
264
265 struct pci_message message[0];
266 };
267
268 /*
269 * Specific message types supporting the PCI protocol.
270 */
271
272 struct pci_version_request {
273 struct pci_message message_type;
274 uint32_t protocol_version;
275 uint32_t is_last_attempt:1;
276 uint32_t reservedz:31;
277 } __packed;
278
279 struct pci_bus_d0_entry {
280 struct pci_message message_type;
281 uint32_t reserved;
282 uint64_t mmio_base;
283 } __packed;
284
285 struct pci_bus_relations {
286 struct pci_incoming_message incoming;
287 uint32_t device_count;
288 struct pci_func_desc func[0];
289 } __packed;
290
291 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1)
292 struct pci_q_res_req_response {
293 struct vmbus_chanpkt_hdr hdr;
294 int32_t status; /* negative values are failures */
295 uint32_t probed_bar[MAX_NUM_BARS];
296 } __packed;
297
298 struct pci_resources_assigned {
299 struct pci_message message_type;
300 union win_slot_encoding wslot;
301 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
302 uint32_t msi_descriptors;
303 uint32_t reserved[4];
304 } __packed;
305
306 struct pci_create_interrupt {
307 struct pci_message message_type;
308 union win_slot_encoding wslot;
309 struct hv_msi_desc int_desc;
310 } __packed;
311
312 struct pci_create_int_response {
313 struct pci_response response;
314 uint32_t reserved;
315 struct tran_int_desc int_desc;
316 } __packed;
317
318 struct pci_delete_interrupt {
319 struct pci_message message_type;
320 union win_slot_encoding wslot;
321 struct tran_int_desc int_desc;
322 } __packed;
323
324 struct pci_dev_incoming {
325 struct pci_incoming_message incoming;
326 union win_slot_encoding wslot;
327 } __packed;
328
329 struct pci_eject_response {
330 struct pci_message message_type;
331 union win_slot_encoding wslot;
332 uint32_t status;
333 } __packed;
334
335 /*
336 * Driver specific state.
337 */
338
339 enum hv_pcibus_state {
340 hv_pcibus_init = 0,
341 hv_pcibus_installed,
342 };
343
344 struct hv_pcibus {
345 device_t pcib;
346 device_t pci_bus;
347 struct vmbus_pcib_softc *sc;
348
349 uint16_t pci_domain;
350
351 enum hv_pcibus_state state;
352
353 struct resource *cfg_res;
354
355 struct completion query_completion, *query_comp;
356
357 struct mtx config_lock; /* Avoid two threads writing index page */
358 struct mtx device_list_lock; /* Protect lists below */
359 TAILQ_HEAD(, hv_pci_dev) children;
360 TAILQ_HEAD(, hv_dr_state) dr_list;
361
362 volatile int detaching;
363 };
364
365 struct hv_pci_dev {
366 TAILQ_ENTRY(hv_pci_dev) link;
367
368 struct pci_func_desc desc;
369
370 bool reported_missing;
371
372 struct hv_pcibus *hbus;
373 struct task eject_task;
374
375 TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
376
377 /*
378 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
379 * read it back, for each of the BAR offsets within config space.
380 */
381 uint32_t probed_bar[MAX_NUM_BARS];
382 };
383
384 /*
385 * Tracks "Device Relations" messages from the host, which must be both
386 * processed in order.
387 */
388 struct hv_dr_work {
389 struct task task;
390 struct hv_pcibus *bus;
391 };
392
393 struct hv_dr_state {
394 TAILQ_ENTRY(hv_dr_state) link;
395 uint32_t device_count;
396 struct pci_func_desc func[0];
397 };
398
399 struct hv_irq_desc {
400 TAILQ_ENTRY(hv_irq_desc) link;
401 struct tran_int_desc desc;
402 int irq;
403 };
404
405 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
406 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
407 #define PCI_FUNC(devfn) ((devfn) & 0x07)
408
409 static uint32_t
410 devfn_to_wslot(unsigned int devfn)
411 {
412 union win_slot_encoding wslot;
413
414 wslot.val = 0;
415 wslot.bits.slot = PCI_SLOT(devfn);
416 wslot.bits.func = PCI_FUNC(devfn);
417
418 return (wslot.val);
419 }
420
421 static unsigned int
422 wslot_to_devfn(uint32_t wslot)
423 {
424 union win_slot_encoding encoding;
425 unsigned int slot;
426 unsigned int func;
427
428 encoding.val = wslot;
429
430 slot = encoding.bits.slot;
431 func = encoding.bits.func;
432
433 return (PCI_DEVFN(slot, func));
434 }
435
436 struct vmbus_pcib_softc {
437 struct vmbus_channel *chan;
438 void *rx_buf;
439
440 struct taskqueue *taskq;
441
442 struct hv_pcibus *hbus;
443 };
444
445 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
446 static const struct hyperv_guid g_pass_through_dev_type = {
447 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
448 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
449 };
450
451 struct hv_pci_compl {
452 struct completion host_event;
453 int32_t completion_status;
454 };
455
456 struct q_res_req_compl {
457 struct completion host_event;
458 struct hv_pci_dev *hpdev;
459 };
460
461 struct compose_comp_ctxt {
462 struct hv_pci_compl comp_pkt;
463 struct tran_int_desc int_desc;
464 };
465
466 /*
467 * It is possible the device is revoked during initialization.
468 * Check if this happens during wait.
469 * Return: 0 if response arrived, ENODEV if device revoked.
470 */
471 static int
472 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
473 {
474 do {
475 if (vmbus_chan_is_revoked(hbus->sc->chan)) {
476 device_printf(hbus->pcib,
477 "The device is revoked.\n");
478 return (ENODEV);
479 }
480 } while (wait_for_completion_timeout(c, hz /10) != 0);
481
482 return 0;
483 }
484
485 static void
486 hv_pci_generic_compl(void *context, struct pci_response *resp,
487 int resp_packet_size)
488 {
489 struct hv_pci_compl *comp_pkt = context;
490
491 if (resp_packet_size >= sizeof(struct pci_response))
492 comp_pkt->completion_status = resp->status;
493 else
494 comp_pkt->completion_status = -1;
495
496 complete(&comp_pkt->host_event);
497 }
498
499 static void
500 q_resource_requirements(void *context, struct pci_response *resp,
501 int resp_packet_size)
502 {
503 struct q_res_req_compl *completion = context;
504 struct pci_q_res_req_response *q_res_req =
505 (struct pci_q_res_req_response *)resp;
506 int i;
507
508 if (resp->status < 0) {
509 printf("vmbus_pcib: failed to query resource requirements\n");
510 } else {
511 for (i = 0; i < MAX_NUM_BARS; i++)
512 completion->hpdev->probed_bar[i] =
513 q_res_req->probed_bar[i];
514 }
515
516 complete(&completion->host_event);
517 }
518
519 static void
520 hv_pci_compose_compl(void *context, struct pci_response *resp,
521 int resp_packet_size)
522 {
523 struct compose_comp_ctxt *comp_pkt = context;
524 struct pci_create_int_response *int_resp =
525 (struct pci_create_int_response *)resp;
526
527 comp_pkt->comp_pkt.completion_status = resp->status;
528 comp_pkt->int_desc = int_resp->int_desc;
529 complete(&comp_pkt->comp_pkt.host_event);
530 }
531
532 static void
533 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
534 {
535 struct pci_delete_interrupt *int_pkt;
536 struct {
537 struct pci_packet pkt;
538 uint8_t buffer[sizeof(struct pci_delete_interrupt)];
539 } ctxt;
540
541 memset(&ctxt, 0, sizeof(ctxt));
542 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
543 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
544 int_pkt->wslot.val = hpdev->desc.wslot.val;
545 int_pkt->int_desc = hid->desc;
546
547 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
548 int_pkt, sizeof(*int_pkt), 0);
549
550 free(hid, M_DEVBUF);
551 }
552
553 static void
554 hv_pci_delete_device(struct hv_pci_dev *hpdev)
555 {
556 struct hv_pcibus *hbus = hpdev->hbus;
557 struct hv_irq_desc *hid, *tmp_hid;
558 device_t pci_dev;
559 int devfn;
560
561 devfn = wslot_to_devfn(hpdev->desc.wslot.val);
562
563 bus_topo_lock();
564
565 pci_dev = pci_find_dbsf(hbus->pci_domain,
566 0, PCI_SLOT(devfn), PCI_FUNC(devfn));
567 if (pci_dev)
568 device_delete_child(hbus->pci_bus, pci_dev);
569
570 bus_topo_unlock();
571
572 mtx_lock(&hbus->device_list_lock);
573 TAILQ_REMOVE(&hbus->children, hpdev, link);
574 mtx_unlock(&hbus->device_list_lock);
575
576 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
577 hv_int_desc_free(hpdev, hid);
578
579 free(hpdev, M_DEVBUF);
580 }
581
582 static struct hv_pci_dev *
583 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc)
584 {
585 struct hv_pci_dev *hpdev;
586 struct pci_child_message *res_req;
587 struct q_res_req_compl comp_pkt;
588 struct {
589 struct pci_packet pkt;
590 uint8_t buffer[sizeof(struct pci_child_message)];
591 } ctxt;
592 int ret;
593
594 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
595 hpdev->hbus = hbus;
596
597 TAILQ_INIT(&hpdev->irq_desc_list);
598
599 init_completion(&comp_pkt.host_event);
600 comp_pkt.hpdev = hpdev;
601
602 ctxt.pkt.compl_ctxt = &comp_pkt;
603 ctxt.pkt.completion_func = q_resource_requirements;
604
605 res_req = (struct pci_child_message *)&ctxt.pkt.message;
606 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
607 res_req->wslot.val = desc->wslot.val;
608
609 ret = vmbus_chan_send(hbus->sc->chan,
610 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
611 res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
612 if (ret)
613 goto err;
614
615 if (wait_for_response(hbus, &comp_pkt.host_event))
616 goto err;
617
618 free_completion(&comp_pkt.host_event);
619
620 hpdev->desc = *desc;
621
622 mtx_lock(&hbus->device_list_lock);
623 if (TAILQ_EMPTY(&hbus->children))
624 hbus->pci_domain = desc->ser & 0xFFFF;
625 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
626 mtx_unlock(&hbus->device_list_lock);
627 return (hpdev);
628 err:
629 free_completion(&comp_pkt.host_event);
630 free(hpdev, M_DEVBUF);
631 return (NULL);
632 }
633
634 #if __FreeBSD_version < 1100000
635
636 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */
637
638 static struct pci_devinfo *
639 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
640 int slot, int func, size_t dinfo_size)
641 {
642 struct pci_devinfo *dinfo;
643
644 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
645 if (dinfo != NULL)
646 pci_add_child(dev, dinfo);
647
648 return (dinfo);
649 }
650
651 static int
652 pci_rescan(device_t dev)
653 {
654 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
655 device_t pcib = device_get_parent(dev);
656 struct pci_softc *sc;
657 device_t child, *devlist, *unchanged;
658 int devcount, error, i, j, maxslots, oldcount;
659 int busno, domain, s, f, pcifunchigh;
660 uint8_t hdrtype;
661
662 /* No need to check for ARI on a rescan. */
663 error = device_get_children(dev, &devlist, &devcount);
664 if (error)
665 return (error);
666 if (devcount != 0) {
667 unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
668 M_NOWAIT | M_ZERO);
669 if (unchanged == NULL) {
670 free(devlist, M_TEMP);
671 return (ENOMEM);
672 }
673 } else
674 unchanged = NULL;
675
676 sc = device_get_softc(dev);
677 domain = pcib_get_domain(dev);
678 busno = pcib_get_bus(dev);
679 maxslots = PCIB_MAXSLOTS(pcib);
680 for (s = 0; s <= maxslots; s++) {
681 /* If function 0 is not present, skip to the next slot. */
682 f = 0;
683 if (REG(PCIR_VENDOR, 2) == 0xffff)
684 continue;
685 pcifunchigh = 0;
686 hdrtype = REG(PCIR_HDRTYPE, 1);
687 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
688 continue;
689 if (hdrtype & PCIM_MFDEV)
690 pcifunchigh = PCIB_MAXFUNCS(pcib);
691 for (f = 0; f <= pcifunchigh; f++) {
692 if (REG(PCIR_VENDOR, 2) == 0xffff)
693 continue;
694
695 /*
696 * Found a valid function. Check if a
697 * device_t for this device already exists.
698 */
699 for (i = 0; i < devcount; i++) {
700 child = devlist[i];
701 if (child == NULL)
702 continue;
703 if (pci_get_slot(child) == s &&
704 pci_get_function(child) == f) {
705 unchanged[i] = child;
706 goto next_func;
707 }
708 }
709
710 pci_identify_function(pcib, dev, domain, busno, s, f,
711 sizeof(struct pci_devinfo));
712 next_func:;
713 }
714 }
715
716 /* Remove devices that are no longer present. */
717 for (i = 0; i < devcount; i++) {
718 if (unchanged[i] != NULL)
719 continue;
720 device_delete_child(dev, devlist[i]);
721 }
722
723 free(devlist, M_TEMP);
724 oldcount = devcount;
725
726 /* Try to attach the devices just added. */
727 error = device_get_children(dev, &devlist, &devcount);
728 if (error) {
729 free(unchanged, M_TEMP);
730 return (error);
731 }
732
733 for (i = 0; i < devcount; i++) {
734 for (j = 0; j < oldcount; j++) {
735 if (devlist[i] == unchanged[j])
736 goto next_device;
737 }
738
739 device_probe_and_attach(devlist[i]);
740 next_device:;
741 }
742
743 free(unchanged, M_TEMP);
744 free(devlist, M_TEMP);
745 return (0);
746 #undef REG
747 }
748
749 #else
750
751 static int
752 pci_rescan(device_t dev)
753 {
754 return (BUS_RESCAN(dev));
755 }
756
757 #endif
758
759 static void
760 pci_devices_present_work(void *arg, int pending __unused)
761 {
762 struct hv_dr_work *dr_wrk = arg;
763 struct hv_dr_state *dr = NULL;
764 struct hv_pcibus *hbus;
765 uint32_t child_no;
766 bool found;
767 struct pci_func_desc *new_desc;
768 struct hv_pci_dev *hpdev, *tmp_hpdev;
769 struct completion *query_comp;
770 bool need_rescan = false;
771
772 hbus = dr_wrk->bus;
773 free(dr_wrk, M_DEVBUF);
774
775 /* Pull this off the queue and process it if it was the last one. */
776 mtx_lock(&hbus->device_list_lock);
777 while (!TAILQ_EMPTY(&hbus->dr_list)) {
778 dr = TAILQ_FIRST(&hbus->dr_list);
779 TAILQ_REMOVE(&hbus->dr_list, dr, link);
780
781 /* Throw this away if the list still has stuff in it. */
782 if (!TAILQ_EMPTY(&hbus->dr_list)) {
783 free(dr, M_DEVBUF);
784 continue;
785 }
786 }
787 mtx_unlock(&hbus->device_list_lock);
788
789 if (!dr)
790 return;
791
792 /* First, mark all existing children as reported missing. */
793 mtx_lock(&hbus->device_list_lock);
794 TAILQ_FOREACH(hpdev, &hbus->children, link)
795 hpdev->reported_missing = true;
796 mtx_unlock(&hbus->device_list_lock);
797
798 /* Next, add back any reported devices. */
799 for (child_no = 0; child_no < dr->device_count; child_no++) {
800 found = false;
801 new_desc = &dr->func[child_no];
802
803 mtx_lock(&hbus->device_list_lock);
804 TAILQ_FOREACH(hpdev, &hbus->children, link) {
805 if ((hpdev->desc.wslot.val ==
806 new_desc->wslot.val) &&
807 (hpdev->desc.v_id == new_desc->v_id) &&
808 (hpdev->desc.d_id == new_desc->d_id) &&
809 (hpdev->desc.ser == new_desc->ser)) {
810 hpdev->reported_missing = false;
811 found = true;
812 break;
813 }
814 }
815 mtx_unlock(&hbus->device_list_lock);
816
817 if (!found) {
818 if (!need_rescan)
819 need_rescan = true;
820
821 hpdev = new_pcichild_device(hbus, new_desc);
822 if (!hpdev)
823 printf("vmbus_pcib: failed to add a child\n");
824 }
825 }
826
827 /* Remove missing device(s), if any */
828 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
829 if (hpdev->reported_missing)
830 hv_pci_delete_device(hpdev);
831 }
832
833 /* Rescan the bus to find any new device, if necessary. */
834 if (hbus->state == hv_pcibus_installed && need_rescan)
835 pci_rescan(hbus->pci_bus);
836
837 /* Wake up hv_pci_query_relations(), if it's waiting. */
838 query_comp = hbus->query_comp;
839 if (query_comp) {
840 hbus->query_comp = NULL;
841 complete(query_comp);
842 }
843
844 free(dr, M_DEVBUF);
845 }
846
847 static struct hv_pci_dev *
848 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
849 {
850 struct hv_pci_dev *hpdev, *ret = NULL;
851
852 mtx_lock(&hbus->device_list_lock);
853 TAILQ_FOREACH(hpdev, &hbus->children, link) {
854 if (hpdev->desc.wslot.val == wslot) {
855 ret = hpdev;
856 break;
857 }
858 }
859 mtx_unlock(&hbus->device_list_lock);
860
861 return (ret);
862 }
863
864 static void
865 hv_pci_devices_present(struct hv_pcibus *hbus,
866 struct pci_bus_relations *relations)
867 {
868 struct hv_dr_state *dr;
869 struct hv_dr_work *dr_wrk;
870 unsigned long dr_size;
871
872 if (hbus->detaching && relations->device_count > 0)
873 return;
874
875 dr_size = offsetof(struct hv_dr_state, func) +
876 (sizeof(struct pci_func_desc) * relations->device_count);
877 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
878
879 dr->device_count = relations->device_count;
880 if (dr->device_count != 0)
881 memcpy(dr->func, relations->func,
882 sizeof(struct pci_func_desc) * dr->device_count);
883
884 mtx_lock(&hbus->device_list_lock);
885 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
886 mtx_unlock(&hbus->device_list_lock);
887
888 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
889 dr_wrk->bus = hbus;
890 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
891 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
892 }
893
894 static void
895 hv_eject_device_work(void *arg, int pending __unused)
896 {
897 struct hv_pci_dev *hpdev = arg;
898 union win_slot_encoding wslot = hpdev->desc.wslot;
899 struct hv_pcibus *hbus = hpdev->hbus;
900 struct pci_eject_response *eject_pkt;
901 struct {
902 struct pci_packet pkt;
903 uint8_t buffer[sizeof(struct pci_eject_response)];
904 } ctxt;
905
906 hv_pci_delete_device(hpdev);
907
908 memset(&ctxt, 0, sizeof(ctxt));
909 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
910 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
911 eject_pkt->wslot.val = wslot.val;
912 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
913 eject_pkt, sizeof(*eject_pkt), 0);
914 }
915
916 static void
917 hv_pci_eject_device(struct hv_pci_dev *hpdev)
918 {
919 struct hv_pcibus *hbus = hpdev->hbus;
920 struct taskqueue *taskq;
921
922 if (hbus->detaching)
923 return;
924
925 /*
926 * Push this task into the same taskqueue on which
927 * vmbus_pcib_attach() runs, so we're sure this task can't run
928 * concurrently with vmbus_pcib_attach().
929 */
930 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
931 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
932 taskqueue_enqueue(taskq, &hpdev->eject_task);
933 }
934
935 #define PCIB_PACKET_SIZE 0x100
936
937 static void
938 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
939 {
940 struct vmbus_pcib_softc *sc = arg;
941 struct hv_pcibus *hbus = sc->hbus;
942
943 void *buffer;
944 int bufferlen = PCIB_PACKET_SIZE;
945
946 struct pci_packet *comp_packet;
947 struct pci_response *response;
948 struct pci_incoming_message *new_msg;
949 struct pci_bus_relations *bus_rel;
950 struct pci_dev_incoming *dev_msg;
951 struct hv_pci_dev *hpdev;
952
953 buffer = sc->rx_buf;
954 do {
955 struct vmbus_chanpkt_hdr *pkt = buffer;
956 uint32_t bytes_rxed;
957 int ret;
958
959 bytes_rxed = bufferlen;
960 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
961
962 if (ret == ENOBUFS) {
963 /* Handle large packet */
964 if (bufferlen > PCIB_PACKET_SIZE) {
965 free(buffer, M_DEVBUF);
966 buffer = NULL;
967 }
968
969 /* alloc new buffer */
970 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
971 bufferlen = bytes_rxed;
972
973 continue;
974 }
975
976 if (ret != 0) {
977 /* ignore EIO or EAGAIN */
978 break;
979 }
980
981 if (bytes_rxed <= sizeof(struct pci_response))
982 continue;
983
984 switch (pkt->cph_type) {
985 case VMBUS_CHANPKT_TYPE_COMP:
986 comp_packet =
987 (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
988 response = (struct pci_response *)pkt;
989 comp_packet->completion_func(comp_packet->compl_ctxt,
990 response, bytes_rxed);
991 break;
992 case VMBUS_CHANPKT_TYPE_INBAND:
993 new_msg = (struct pci_incoming_message *)buffer;
994
995 switch (new_msg->message_type.type) {
996 case PCI_BUS_RELATIONS:
997 bus_rel = (struct pci_bus_relations *)buffer;
998
999 if (bus_rel->device_count == 0)
1000 break;
1001
1002 if (bytes_rxed <
1003 offsetof(struct pci_bus_relations, func) +
1004 (sizeof(struct pci_func_desc) *
1005 (bus_rel->device_count)))
1006 break;
1007
1008 hv_pci_devices_present(hbus, bus_rel);
1009 break;
1010
1011 case PCI_EJECT:
1012 dev_msg = (struct pci_dev_incoming *)buffer;
1013 hpdev = get_pcichild_wslot(hbus,
1014 dev_msg->wslot.val);
1015
1016 if (hpdev)
1017 hv_pci_eject_device(hpdev);
1018
1019 break;
1020 default:
1021 printf("vmbus_pcib: Unknown msg type 0x%x\n",
1022 new_msg->message_type.type);
1023 break;
1024 }
1025 break;
1026 default:
1027 printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1028 pkt->cph_type);
1029 break;
1030 }
1031 } while (1);
1032
1033 if (bufferlen > PCIB_PACKET_SIZE)
1034 free(buffer, M_DEVBUF);
1035 }
1036
1037 static int
1038 hv_pci_protocol_negotiation(struct hv_pcibus *hbus)
1039 {
1040 struct pci_version_request *version_req;
1041 struct hv_pci_compl comp_pkt;
1042 struct {
1043 struct pci_packet pkt;
1044 uint8_t buffer[sizeof(struct pci_version_request)];
1045 } ctxt;
1046 int ret;
1047
1048 init_completion(&comp_pkt.host_event);
1049
1050 ctxt.pkt.completion_func = hv_pci_generic_compl;
1051 ctxt.pkt.compl_ctxt = &comp_pkt;
1052 version_req = (struct pci_version_request *)&ctxt.pkt.message;
1053 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1054 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT;
1055 version_req->is_last_attempt = 1;
1056
1057 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1058 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req),
1059 (uint64_t)(uintptr_t)&ctxt.pkt);
1060 if (!ret)
1061 ret = wait_for_response(hbus, &comp_pkt.host_event);
1062
1063 if (ret) {
1064 device_printf(hbus->pcib,
1065 "vmbus_pcib failed to request version: %d\n",
1066 ret);
1067 goto out;
1068 }
1069
1070 if (comp_pkt.completion_status < 0) {
1071 device_printf(hbus->pcib,
1072 "vmbus_pcib version negotiation failed: %x\n",
1073 comp_pkt.completion_status);
1074 ret = EPROTO;
1075 } else {
1076 ret = 0;
1077 }
1078 out:
1079 free_completion(&comp_pkt.host_event);
1080 return (ret);
1081 }
1082
1083 /* Ask the host to send along the list of child devices */
1084 static int
1085 hv_pci_query_relations(struct hv_pcibus *hbus)
1086 {
1087 struct pci_message message;
1088 int ret;
1089
1090 message.type = PCI_QUERY_BUS_RELATIONS;
1091 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1092 &message, sizeof(message), 0);
1093 return (ret);
1094 }
1095
1096 static int
1097 hv_pci_enter_d0(struct hv_pcibus *hbus)
1098 {
1099 struct pci_bus_d0_entry *d0_entry;
1100 struct hv_pci_compl comp_pkt;
1101 struct {
1102 struct pci_packet pkt;
1103 uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1104 } ctxt;
1105 int ret;
1106
1107 /*
1108 * Tell the host that the bus is ready to use, and moved into the
1109 * powered-on state. This includes telling the host which region
1110 * of memory-mapped I/O space has been chosen for configuration space
1111 * access.
1112 */
1113 init_completion(&comp_pkt.host_event);
1114
1115 ctxt.pkt.completion_func = hv_pci_generic_compl;
1116 ctxt.pkt.compl_ctxt = &comp_pkt;
1117
1118 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1119 memset(d0_entry, 0, sizeof(*d0_entry));
1120 d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1121 d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1122
1123 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1124 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1125 (uint64_t)(uintptr_t)&ctxt.pkt);
1126 if (!ret)
1127 ret = wait_for_response(hbus, &comp_pkt.host_event);
1128
1129 if (ret)
1130 goto out;
1131
1132 if (comp_pkt.completion_status < 0) {
1133 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1134 ret = EPROTO;
1135 } else {
1136 ret = 0;
1137 }
1138
1139 out:
1140 free_completion(&comp_pkt.host_event);
1141 return (ret);
1142 }
1143
1144 /*
1145 * It looks this is only needed by Windows VM, but let's send the message too
1146 * just to make the host happy.
1147 */
1148 static int
1149 hv_send_resources_allocated(struct hv_pcibus *hbus)
1150 {
1151 struct pci_resources_assigned *res_assigned;
1152 struct hv_pci_compl comp_pkt;
1153 struct hv_pci_dev *hpdev;
1154 struct pci_packet *pkt;
1155 uint32_t wslot;
1156 int ret = 0;
1157
1158 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned),
1159 M_DEVBUF, M_WAITOK | M_ZERO);
1160
1161 for (wslot = 0; wslot < 256; wslot++) {
1162 hpdev = get_pcichild_wslot(hbus, wslot);
1163 if (!hpdev)
1164 continue;
1165
1166 init_completion(&comp_pkt.host_event);
1167
1168 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned));
1169 pkt->completion_func = hv_pci_generic_compl;
1170 pkt->compl_ctxt = &comp_pkt;
1171
1172 res_assigned = (struct pci_resources_assigned *)&pkt->message;
1173 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED;
1174 res_assigned->wslot.val = hpdev->desc.wslot.val;
1175
1176 ret = vmbus_chan_send(hbus->sc->chan,
1177 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1178 &pkt->message, sizeof(*res_assigned),
1179 (uint64_t)(uintptr_t)pkt);
1180 if (!ret)
1181 ret = wait_for_response(hbus, &comp_pkt.host_event);
1182
1183 free_completion(&comp_pkt.host_event);
1184
1185 if (ret)
1186 break;
1187
1188 if (comp_pkt.completion_status < 0) {
1189 ret = EPROTO;
1190 device_printf(hbus->pcib,
1191 "failed to send PCI_RESOURCES_ASSIGNED\n");
1192 break;
1193 }
1194 }
1195
1196 free(pkt, M_DEVBUF);
1197 return (ret);
1198 }
1199
1200 static int
1201 hv_send_resources_released(struct hv_pcibus *hbus)
1202 {
1203 struct pci_child_message pkt;
1204 struct hv_pci_dev *hpdev;
1205 uint32_t wslot;
1206 int ret;
1207
1208 for (wslot = 0; wslot < 256; wslot++) {
1209 hpdev = get_pcichild_wslot(hbus, wslot);
1210 if (!hpdev)
1211 continue;
1212
1213 pkt.message_type.type = PCI_RESOURCES_RELEASED;
1214 pkt.wslot.val = hpdev->desc.wslot.val;
1215
1216 ret = vmbus_chan_send(hbus->sc->chan,
1217 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1218 if (ret)
1219 return (ret);
1220 }
1221
1222 return (0);
1223 }
1224
1225 #define hv_cfg_read(x, s) \
1226 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \
1227 bus_size_t offset) \
1228 { \
1229 return (bus_read_##s(bus->cfg_res, offset)); \
1230 }
1231
1232 #define hv_cfg_write(x, s) \
1233 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \
1234 bus_size_t offset, uint##x##_t val) \
1235 { \
1236 return (bus_write_##s(bus->cfg_res, offset, val)); \
1237 }
1238
1239 hv_cfg_read(8, 1)
1240 hv_cfg_read(16, 2)
1241 hv_cfg_read(32, 4)
1242
1243 hv_cfg_write(8, 1)
1244 hv_cfg_write(16, 2)
1245 hv_cfg_write(32, 4)
1246
1247 static void
1248 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1249 uint32_t *val)
1250 {
1251 struct hv_pcibus *hbus = hpdev->hbus;
1252 bus_size_t addr = CFG_PAGE_OFFSET + where;
1253
1254 /*
1255 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1256 */
1257 if (where + size <= PCIR_COMMAND) {
1258 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1259 } else if (where >= PCIR_REVID && where + size <=
1260 PCIR_CACHELNSZ) {
1261 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1262 PCIR_REVID, size);
1263 } else if (where >= PCIR_SUBVEND_0 && where + size <=
1264 PCIR_BIOS) {
1265 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1266 PCIR_SUBVEND_0, size);
1267 } else if (where >= PCIR_BIOS && where + size <=
1268 PCIR_CAP_PTR) {
1269 /* ROM BARs are unimplemented */
1270 *val = 0;
1271 } else if ((where >= PCIR_INTLINE && where + size <=
1272 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1273 /*
1274 * Interrupt Line and Interrupt PIN are hard-wired to zero
1275 * because this front-end only supports message-signaled
1276 * interrupts.
1277 */
1278 *val = 0;
1279 } else if (where + size <= CFG_PAGE_SIZE) {
1280 mtx_lock(&hbus->config_lock);
1281
1282 /* Choose the function to be read. */
1283 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1284
1285 /* Make sure the function was chosen before we start reading.*/
1286 mb();
1287
1288 /* Read from that function's config space. */
1289 switch (size) {
1290 case 1:
1291 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1292 break;
1293 case 2:
1294 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1295 break;
1296 default:
1297 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1298 break;
1299 }
1300 /*
1301 * Make sure the write was done before we release the lock,
1302 * allowing consecutive reads/writes.
1303 */
1304 mb();
1305
1306 mtx_unlock(&hbus->config_lock);
1307 } else {
1308 /* Invalid config read: it's unlikely to reach here. */
1309 memset(val, 0, size);
1310 }
1311 }
1312
1313 static void
1314 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1315 uint32_t val)
1316 {
1317 struct hv_pcibus *hbus = hpdev->hbus;
1318 bus_size_t addr = CFG_PAGE_OFFSET + where;
1319
1320 /* SSIDs and ROM BARs are read-only */
1321 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1322 return;
1323
1324 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1325 mtx_lock(&hbus->config_lock);
1326
1327 /* Choose the function to be written. */
1328 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1329
1330 /* Make sure the function was chosen before we start writing.*/
1331 wmb();
1332
1333 /* Write to that function's config space. */
1334 switch (size) {
1335 case 1:
1336 hv_cfg_write_1(hbus, addr, (uint8_t)val);
1337 break;
1338 case 2:
1339 hv_cfg_write_2(hbus, addr, (uint16_t)val);
1340 break;
1341 default:
1342 hv_cfg_write_4(hbus, addr, (uint32_t)val);
1343 break;
1344 }
1345
1346 /*
1347 * Make sure the write was done before we release the lock,
1348 * allowing consecutive reads/writes.
1349 */
1350 mb();
1351
1352 mtx_unlock(&hbus->config_lock);
1353 } else {
1354 /* Invalid config write: it's unlikely to reach here. */
1355 return;
1356 }
1357 }
1358
1359 /*
1360 * The vPCI in some Hyper-V releases do not initialize the last 4
1361 * bit of BAR registers. This could result weird problems causing PCI
1362 * code fail to configure BAR correctly.
1363 *
1364 * Just write all 1's to those BARs whose probed values are not zero.
1365 * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1366 * correctly.
1367 */
1368
1369 static void
1370 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1371 {
1372 struct hv_pci_dev *hpdev;
1373 int i;
1374
1375 mtx_lock(&hbus->device_list_lock);
1376 TAILQ_FOREACH(hpdev, &hbus->children, link) {
1377 for (i = 0; i < 6; i++) {
1378 /* Ignore empty bar */
1379 if (hpdev->probed_bar[i] == 0)
1380 continue;
1381
1382 uint32_t bar_val = 0;
1383
1384 _hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1385 4, &bar_val);
1386
1387 if (hpdev->probed_bar[i] != bar_val) {
1388 if (bootverbose)
1389 printf("vmbus_pcib: initialize bar %d "
1390 "by writing all 1s\n", i);
1391
1392 _hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1393 4, 0xffffffff);
1394
1395 /* Now write the original value back */
1396 _hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1397 4, bar_val);
1398 }
1399 }
1400 }
1401 mtx_unlock(&hbus->device_list_lock);
1402 }
1403
1404 static void
1405 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1406 {
1407 struct hv_pcibus *hbus = arg;
1408
1409 atomic_set_int(&hbus->detaching, 1);
1410 }
1411
1412 static void
1413 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1414 {
1415 struct task task;
1416
1417 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1418
1419 /*
1420 * Make sure the channel callback won't push any possible new
1421 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1422 */
1423 vmbus_chan_run_task(hbus->sc->chan, &task);
1424
1425 taskqueue_drain_all(hbus->sc->taskq);
1426 }
1427
1428
1429 /*
1430 * Standard probe entry point.
1431 *
1432 */
1433 static int
1434 vmbus_pcib_probe(device_t dev)
1435 {
1436 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1437 &g_pass_through_dev_type) == 0) {
1438 device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1439 return (BUS_PROBE_DEFAULT);
1440 }
1441 return (ENXIO);
1442 }
1443
1444 /*
1445 * Standard attach entry point.
1446 *
1447 */
1448 static int
1449 vmbus_pcib_attach(device_t dev)
1450 {
1451 const int pci_ring_size = (4 * PAGE_SIZE);
1452 const struct hyperv_guid *inst_guid;
1453 struct vmbus_channel *channel;
1454 struct vmbus_pcib_softc *sc;
1455 struct hv_pcibus *hbus;
1456 int rid = 0;
1457 int ret;
1458
1459 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1460 hbus->pcib = dev;
1461
1462 channel = vmbus_get_channel(dev);
1463 inst_guid = vmbus_chan_guid_inst(channel);
1464 hbus->pci_domain = inst_guid->hv_guid[9] |
1465 (inst_guid->hv_guid[8] << 8);
1466
1467 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1468 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1469 TAILQ_INIT(&hbus->children);
1470 TAILQ_INIT(&hbus->dr_list);
1471
1472 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1473 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1474 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1475
1476 if (!hbus->cfg_res) {
1477 device_printf(dev, "failed to get resource for cfg window\n");
1478 ret = ENXIO;
1479 goto free_bus;
1480 }
1481
1482 sc = device_get_softc(dev);
1483 sc->chan = channel;
1484 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1485 sc->hbus = hbus;
1486
1487 /*
1488 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1489 * messages. NB: we can't handle the messages in the channel callback
1490 * directly, because the message handlers need to send new messages
1491 * to the host and waits for the host's completion messages, which
1492 * must also be handled by the channel callback.
1493 */
1494 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1495 taskqueue_thread_enqueue, &sc->taskq);
1496 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1497
1498 hbus->sc = sc;
1499
1500 init_completion(&hbus->query_completion);
1501 hbus->query_comp = &hbus->query_completion;
1502
1503 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1504 NULL, 0, vmbus_pcib_on_channel_callback, sc);
1505 if (ret)
1506 goto free_res;
1507
1508 ret = hv_pci_protocol_negotiation(hbus);
1509 if (ret)
1510 goto vmbus_close;
1511
1512 ret = hv_pci_query_relations(hbus);
1513 if (!ret)
1514 ret = wait_for_response(hbus, hbus->query_comp);
1515
1516 if (ret)
1517 goto vmbus_close;
1518
1519 ret = hv_pci_enter_d0(hbus);
1520 if (ret)
1521 goto vmbus_close;
1522
1523 ret = hv_send_resources_allocated(hbus);
1524 if (ret)
1525 goto vmbus_close;
1526
1527 vmbus_pcib_prepopulate_bars(hbus);
1528
1529 hbus->pci_bus = device_add_child(dev, "pci", -1);
1530 if (!hbus->pci_bus) {
1531 device_printf(dev, "failed to create pci bus\n");
1532 ret = ENXIO;
1533 goto vmbus_close;
1534 }
1535
1536 bus_generic_attach(dev);
1537
1538 hbus->state = hv_pcibus_installed;
1539
1540 return (0);
1541
1542 vmbus_close:
1543 vmbus_pcib_pre_detach(hbus);
1544 vmbus_chan_close(sc->chan);
1545 free_res:
1546 taskqueue_free(sc->taskq);
1547 free_completion(&hbus->query_completion);
1548 free(sc->rx_buf, M_DEVBUF);
1549 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1550 free_bus:
1551 mtx_destroy(&hbus->device_list_lock);
1552 mtx_destroy(&hbus->config_lock);
1553 free(hbus, M_DEVBUF);
1554 return (ret);
1555 }
1556
1557 /*
1558 * Standard detach entry point
1559 */
1560 static int
1561 vmbus_pcib_detach(device_t dev)
1562 {
1563 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1564 struct hv_pcibus *hbus = sc->hbus;
1565 struct pci_message teardown_packet;
1566 struct pci_bus_relations relations;
1567 int ret;
1568
1569 vmbus_pcib_pre_detach(hbus);
1570
1571 if (hbus->state == hv_pcibus_installed)
1572 bus_generic_detach(dev);
1573
1574 /* Delete any children which might still exist. */
1575 memset(&relations, 0, sizeof(relations));
1576 hv_pci_devices_present(hbus, &relations);
1577
1578 ret = hv_send_resources_released(hbus);
1579 if (ret)
1580 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1581
1582 teardown_packet.type = PCI_BUS_D0EXIT;
1583 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1584 &teardown_packet, sizeof(struct pci_message), 0);
1585 if (ret)
1586 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1587
1588 taskqueue_drain_all(hbus->sc->taskq);
1589 vmbus_chan_close(sc->chan);
1590 taskqueue_free(sc->taskq);
1591
1592 free_completion(&hbus->query_completion);
1593 free(sc->rx_buf, M_DEVBUF);
1594 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1595
1596 mtx_destroy(&hbus->device_list_lock);
1597 mtx_destroy(&hbus->config_lock);
1598 free(hbus, M_DEVBUF);
1599
1600 return (0);
1601 }
1602
1603 static int
1604 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1605 {
1606 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1607
1608 switch (which) {
1609 case PCIB_IVAR_DOMAIN:
1610 *val = sc->hbus->pci_domain;
1611 return (0);
1612
1613 case PCIB_IVAR_BUS:
1614 /* There is only bus 0. */
1615 *val = 0;
1616 return (0);
1617 }
1618 return (ENOENT);
1619 }
1620
1621 static int
1622 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1623 {
1624 return (ENOENT);
1625 }
1626
1627 static struct resource *
1628 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1629 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1630 {
1631 unsigned int bar_no;
1632 struct hv_pci_dev *hpdev;
1633 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1634 struct resource *res;
1635 unsigned int devfn;
1636
1637 if (type == PCI_RES_BUS)
1638 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1639 start, end, count, flags));
1640
1641 /* Devices with port I/O BAR are not supported. */
1642 if (type == SYS_RES_IOPORT)
1643 return (NULL);
1644
1645 if (type == SYS_RES_MEMORY) {
1646 devfn = PCI_DEVFN(pci_get_slot(child),
1647 pci_get_function(child));
1648 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1649 if (!hpdev)
1650 return (NULL);
1651
1652 bar_no = PCI_RID2BAR(*rid);
1653 if (bar_no >= MAX_NUM_BARS)
1654 return (NULL);
1655
1656 /* Make sure a 32-bit BAR gets a 32-bit address */
1657 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1658 end = ulmin(end, 0xFFFFFFFF);
1659 }
1660
1661 res = bus_generic_alloc_resource(dev, child, type, rid,
1662 start, end, count, flags);
1663 /*
1664 * If this is a request for a specific range, assume it is
1665 * correct and pass it up to the parent.
1666 */
1667 if (res == NULL && start + count - 1 == end)
1668 res = bus_generic_alloc_resource(dev, child, type, rid,
1669 start, end, count, flags);
1670 return (res);
1671 }
1672
1673 static int
1674 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
1675 struct resource *r)
1676 {
1677 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1678
1679 if (type == PCI_RES_BUS)
1680 return (pci_domain_release_bus(sc->hbus->pci_domain, child,
1681 rid, r));
1682
1683 if (type == SYS_RES_IOPORT)
1684 return (EINVAL);
1685
1686 return (bus_generic_release_resource(dev, child, type, rid, r));
1687 }
1688
1689 #if __FreeBSD_version >= 1100000
1690 static int
1691 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1692 size_t setsize, cpuset_t *cpuset)
1693 {
1694 return (bus_get_cpus(pcib, op, setsize, cpuset));
1695 }
1696 #endif
1697
1698 static uint32_t
1699 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1700 u_int reg, int bytes)
1701 {
1702 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1703 struct hv_pci_dev *hpdev;
1704 unsigned int devfn = PCI_DEVFN(slot, func);
1705 uint32_t data = 0;
1706
1707 KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1708
1709 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1710 if (!hpdev)
1711 return (~0);
1712
1713 _hv_pcifront_read_config(hpdev, reg, bytes, &data);
1714
1715 return (data);
1716 }
1717
1718 static void
1719 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1720 u_int reg, uint32_t data, int bytes)
1721 {
1722 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1723 struct hv_pci_dev *hpdev;
1724 unsigned int devfn = PCI_DEVFN(slot, func);
1725
1726 KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1727
1728 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1729 if (!hpdev)
1730 return;
1731
1732 _hv_pcifront_write_config(hpdev, reg, bytes, data);
1733 }
1734
1735 static int
1736 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1737 {
1738 /* We only support MSI/MSI-X and don't support INTx interrupt. */
1739 return (PCI_INVALID_IRQ);
1740 }
1741
1742 static int
1743 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1744 int maxcount, int *irqs)
1745 {
1746 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1747 irqs));
1748 }
1749
1750 static int
1751 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1752 {
1753 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1754 }
1755
1756 static int
1757 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1758 {
1759 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1760 }
1761
1762 static int
1763 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1764 {
1765 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1766 }
1767
1768 #define MSI_INTEL_ADDR_DEST 0x000ff000
1769 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
1770 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1771
1772 static int
1773 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1774 uint64_t *addr, uint32_t *data)
1775 {
1776 unsigned int devfn;
1777 struct hv_pci_dev *hpdev;
1778
1779 uint64_t v_addr;
1780 uint32_t v_data;
1781 struct hv_irq_desc *hid, *tmp_hid;
1782 unsigned int cpu, vcpu_id;
1783 unsigned int vector;
1784
1785 struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1786 struct pci_create_interrupt *int_pkt;
1787 struct compose_comp_ctxt comp;
1788 struct {
1789 struct pci_packet pkt;
1790 uint8_t buffer[sizeof(struct pci_create_interrupt)];
1791 } ctxt;
1792
1793 int ret;
1794
1795 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1796 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1797 if (!hpdev)
1798 return (ENOENT);
1799
1800 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1801 &v_addr, &v_data);
1802 if (ret)
1803 return (ret);
1804
1805 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1806 if (hid->irq == irq) {
1807 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1808 hv_int_desc_free(hpdev, hid);
1809 break;
1810 }
1811 }
1812
1813 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
1814 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1815 vector = v_data & MSI_INTEL_DATA_INTVEC;
1816
1817 init_completion(&comp.comp_pkt.host_event);
1818
1819 memset(&ctxt, 0, sizeof(ctxt));
1820 ctxt.pkt.completion_func = hv_pci_compose_compl;
1821 ctxt.pkt.compl_ctxt = ∁
1822
1823 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message;
1824 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
1825 int_pkt->wslot.val = hpdev->desc.wslot.val;
1826 int_pkt->int_desc.vector = vector;
1827 int_pkt->int_desc.vector_count = 1;
1828 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED;
1829 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id;
1830
1831 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1832 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt),
1833 (uint64_t)(uintptr_t)&ctxt.pkt);
1834 if (ret) {
1835 free_completion(&comp.comp_pkt.host_event);
1836 return (ret);
1837 }
1838
1839 wait_for_completion(&comp.comp_pkt.host_event);
1840 free_completion(&comp.comp_pkt.host_event);
1841
1842 if (comp.comp_pkt.completion_status < 0)
1843 return (EPROTO);
1844
1845 *addr = comp.int_desc.address;
1846 *data = comp.int_desc.data;
1847
1848 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1849 hid->irq = irq;
1850 hid->desc = comp.int_desc;
1851 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
1852
1853 return (0);
1854 }
1855
1856 static device_method_t vmbus_pcib_methods[] = {
1857 /* Device interface */
1858 DEVMETHOD(device_probe, vmbus_pcib_probe),
1859 DEVMETHOD(device_attach, vmbus_pcib_attach),
1860 DEVMETHOD(device_detach, vmbus_pcib_detach),
1861 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1862 DEVMETHOD(device_suspend, bus_generic_suspend),
1863 DEVMETHOD(device_resume, bus_generic_resume),
1864
1865 /* Bus interface */
1866 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar),
1867 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar),
1868 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource),
1869 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource),
1870 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1871 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1872 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
1873 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
1874 #if __FreeBSD_version >= 1100000
1875 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus),
1876 #endif
1877
1878 /* pcib interface */
1879 DEVMETHOD(pcib_maxslots, pcib_maxslots),
1880 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config),
1881 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config),
1882 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr),
1883 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi),
1884 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi),
1885 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix),
1886 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix),
1887 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi),
1888 DEVMETHOD(pcib_request_feature, pcib_request_feature_allow),
1889
1890 DEVMETHOD_END
1891 };
1892
1893 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
1894 sizeof(struct vmbus_pcib_softc));
1895 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
1896 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
1897 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
1898
1899 #endif /* NEW_PCIB */
Cache object: 06c40e2fa4b754ace66c2661642e5391
|