1 /*-
2 * Copyright (c) 2004 Marcel Moolenaar
3 * Copyright (c) 2001 Doug Rabson
4 * Copyright (c) 2016 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Konstantin Belousov
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/11.2/sys/amd64/amd64/efirt_machdep.c 332028 2018-04-04 13:58:18Z kevans $");
34
35 #include <sys/param.h>
36 #include <sys/efi.h>
37 #include <sys/kernel.h>
38 #include <sys/linker.h>
39 #include <sys/lock.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/clock.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/sched.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48 #include <sys/vmmeter.h>
49 #include <isa/rtc.h>
50 #include <machine/fpu.h>
51 #include <machine/efi.h>
52 #include <machine/metadata.h>
53 #include <machine/md_var.h>
54 #include <machine/smp.h>
55 #include <machine/vmparam.h>
56 #include <vm/vm.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_map.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pager.h>
62
63 static pml4_entry_t *efi_pml4;
64 static vm_object_t obj_1t1_pt;
65 static vm_page_t efi_pml4_page;
66 static vm_pindex_t efi_1t1_idx;
67
68 void
69 efi_destroy_1t1_map(void)
70 {
71 vm_page_t m;
72
73 if (obj_1t1_pt != NULL) {
74 VM_OBJECT_RLOCK(obj_1t1_pt);
75 TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq)
76 m->wire_count = 0;
77 atomic_subtract_int(&vm_cnt.v_wire_count,
78 obj_1t1_pt->resident_page_count);
79 VM_OBJECT_RUNLOCK(obj_1t1_pt);
80 vm_object_deallocate(obj_1t1_pt);
81 }
82
83 obj_1t1_pt = NULL;
84 efi_pml4 = NULL;
85 efi_pml4_page = NULL;
86 }
87
88 static vm_page_t
89 efi_1t1_page(void)
90 {
91
92 return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY |
93 VM_ALLOC_WIRED | VM_ALLOC_ZERO));
94 }
95
96 static pt_entry_t *
97 efi_1t1_pte(vm_offset_t va)
98 {
99 pml4_entry_t *pml4e;
100 pdp_entry_t *pdpe;
101 pd_entry_t *pde;
102 pt_entry_t *pte;
103 vm_page_t m;
104 vm_pindex_t pml4_idx, pdp_idx, pd_idx;
105 vm_paddr_t mphys;
106
107 pml4_idx = pmap_pml4e_index(va);
108 pml4e = &efi_pml4[pml4_idx];
109 if (*pml4e == 0) {
110 m = efi_1t1_page();
111 mphys = VM_PAGE_TO_PHYS(m);
112 *pml4e = mphys | X86_PG_RW | X86_PG_V;
113 } else {
114 mphys = *pml4e & ~PAGE_MASK;
115 }
116
117 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys);
118 pdp_idx = pmap_pdpe_index(va);
119 pdpe += pdp_idx;
120 if (*pdpe == 0) {
121 m = efi_1t1_page();
122 mphys = VM_PAGE_TO_PHYS(m);
123 *pdpe = mphys | X86_PG_RW | X86_PG_V;
124 } else {
125 mphys = *pdpe & ~PAGE_MASK;
126 }
127
128 pde = (pd_entry_t *)PHYS_TO_DMAP(mphys);
129 pd_idx = pmap_pde_index(va);
130 pde += pd_idx;
131 if (*pde == 0) {
132 m = efi_1t1_page();
133 mphys = VM_PAGE_TO_PHYS(m);
134 *pde = mphys | X86_PG_RW | X86_PG_V;
135 } else {
136 mphys = *pde & ~PAGE_MASK;
137 }
138
139 pte = (pt_entry_t *)PHYS_TO_DMAP(mphys);
140 pte += pmap_pte_index(va);
141 KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte));
142
143 return (pte);
144 }
145
146 bool
147 efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
148 {
149 struct efi_md *p;
150 pt_entry_t *pte;
151 vm_offset_t va;
152 uint64_t idx;
153 int bits, i, mode;
154
155 obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 +
156 NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG),
157 VM_PROT_ALL, 0, NULL);
158 efi_1t1_idx = 0;
159 VM_OBJECT_WLOCK(obj_1t1_pt);
160 efi_pml4_page = efi_1t1_page();
161 VM_OBJECT_WUNLOCK(obj_1t1_pt);
162 efi_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pml4_page));
163 pmap_pinit_pml4(efi_pml4_page);
164
165 for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
166 descsz)) {
167 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
168 continue;
169 if (p->md_virt != NULL && (uint64_t)p->md_virt != p->md_phys) {
170 if (bootverbose)
171 printf("EFI Runtime entry %d is mapped\n", i);
172 goto fail;
173 }
174 if ((p->md_phys & EFI_PAGE_MASK) != 0) {
175 if (bootverbose)
176 printf("EFI Runtime entry %d is not aligned\n",
177 i);
178 goto fail;
179 }
180 if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys ||
181 p->md_phys + p->md_pages * EFI_PAGE_SIZE >=
182 VM_MAXUSER_ADDRESS) {
183 printf("EFI Runtime entry %d is not in mappable for RT:"
184 "base %#016jx %#jx pages\n",
185 i, (uintmax_t)p->md_phys,
186 (uintmax_t)p->md_pages);
187 goto fail;
188 }
189 if ((p->md_attr & EFI_MD_ATTR_WB) != 0)
190 mode = VM_MEMATTR_WRITE_BACK;
191 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
192 mode = VM_MEMATTR_WRITE_THROUGH;
193 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
194 mode = VM_MEMATTR_WRITE_COMBINING;
195 else if ((p->md_attr & EFI_MD_ATTR_WP) != 0)
196 mode = VM_MEMATTR_WRITE_PROTECTED;
197 else if ((p->md_attr & EFI_MD_ATTR_UC) != 0)
198 mode = VM_MEMATTR_UNCACHEABLE;
199 else {
200 if (bootverbose)
201 printf("EFI Runtime entry %d mapping "
202 "attributes unsupported\n", i);
203 mode = VM_MEMATTR_UNCACHEABLE;
204 }
205 bits = pmap_cache_bits(kernel_pmap, mode, FALSE) | X86_PG_RW |
206 X86_PG_V;
207 VM_OBJECT_WLOCK(obj_1t1_pt);
208 for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++,
209 va += PAGE_SIZE) {
210 pte = efi_1t1_pte(va);
211 pte_store(pte, va | bits);
212 }
213 VM_OBJECT_WUNLOCK(obj_1t1_pt);
214 }
215
216 return (true);
217
218 fail:
219 efi_destroy_1t1_map();
220 return (false);
221 }
222
223 /*
224 * Create an environment for the EFI runtime code call. The most
225 * important part is creating the required 1:1 physical->virtual
226 * mappings for the runtime segments. To do that, we manually create
227 * page table which unmap userspace but gives correct kernel mapping.
228 * The 1:1 mappings for runtime segments usually occupy low 4G of the
229 * physical address map.
230 *
231 * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT
232 * service, because there are some BIOSes which fail to correctly
233 * relocate itself on the call, requiring both 1:1 and virtual
234 * mapping. As result, we must provide 1:1 mapping anyway, so no
235 * reason to bother with the virtual map, and no need to add a
236 * complexity into loader.
237 *
238 * The fpu_kern_enter() call allows firmware to use FPU, as mandated
239 * by the specification. In particular, CR0.TS bit is cleared. Also
240 * it enters critical section, giving us neccessary protection against
241 * context switch.
242 *
243 * There is no need to disable interrupts around the change of %cr3,
244 * the kernel mappings are correct, while we only grabbed the
245 * userspace portion of VA. Interrupts handlers must not access
246 * userspace. Having interrupts enabled fixes the issue with
247 * firmware/SMM long operation, which would negatively affect IPIs,
248 * esp. TLB shootdown requests.
249 */
250 int
251 efi_arch_enter(void)
252 {
253 pmap_t curpmap;
254
255 curpmap = PCPU_GET(curpmap);
256 PMAP_LOCK_ASSERT(curpmap, MA_OWNED);
257
258 /*
259 * IPI TLB shootdown handler invltlb_pcid_handler() reloads
260 * %cr3 from the curpmap->pm_cr3, which would disable runtime
261 * segments mappings. Block the handler's action by setting
262 * curpmap to impossible value. See also comment in
263 * pmap.c:pmap_activate_sw().
264 */
265 if (pmap_pcid_enabled && !invpcid_works)
266 PCPU_SET(curpmap, NULL);
267
268 load_cr3(VM_PAGE_TO_PHYS(efi_pml4_page) | (pmap_pcid_enabled ?
269 curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
270 /*
271 * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3
272 * causes TLB invalidation.
273 */
274 if (!pmap_pcid_enabled)
275 invltlb();
276 return (0);
277 }
278
279 void
280 efi_arch_leave(void)
281 {
282 pmap_t curpmap;
283
284 curpmap = &curproc->p_vmspace->vm_pmap;
285 if (pmap_pcid_enabled && !invpcid_works)
286 PCPU_SET(curpmap, curpmap);
287 load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ?
288 curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
289 if (!pmap_pcid_enabled)
290 invltlb();
291 }
292
293 /* XXX debug stuff */
294 static int
295 efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS)
296 {
297 struct efi_tm tm;
298 int error, val;
299
300 val = 0;
301 error = sysctl_handle_int(oidp, &val, 0, req);
302 if (error != 0 || req->newptr == NULL)
303 return (error);
304 error = efi_get_time(&tm);
305 if (error == 0) {
306 uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d "
307 "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour,
308 tm.tm_min, tm.tm_sec);
309 }
310 return (error);
311 }
312
313 SYSCTL_PROC(_debug, OID_AUTO, efi_time, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
314 efi_time_sysctl_handler, "I", "");
Cache object: cb5e944f84864d47cb131a94c4aae0a6
|