1 /*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: releng/9.2/sys/amd64/amd64/minidump_machdep.c 237839 2012-06-30 14:17:59Z avg $");
29
30 #include "opt_pmap.h"
31 #include "opt_watchdog.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cons.h>
37 #include <sys/kernel.h>
38 #include <sys/kerneldump.h>
39 #include <sys/msgbuf.h>
40 #include <sys/watchdog.h>
41 #include <vm/vm.h>
42 #include <vm/vm_page.h>
43 #include <vm/pmap.h>
44 #include <machine/atomic.h>
45 #include <machine/elf.h>
46 #include <machine/md_var.h>
47 #include <machine/vmparam.h>
48 #include <machine/minidump.h>
49
50 CTASSERT(sizeof(struct kerneldumpheader) == 512);
51
52 /*
53 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
54 * is to protect us from metadata and to protect metadata from us.
55 */
56 #define SIZEOF_METADATA (64*1024)
57
58 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
59 #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
60
61 uint64_t *vm_page_dump;
62 int vm_page_dump_size;
63
64 static struct kerneldumpheader kdh;
65 static off_t dumplo;
66
67 /* Handle chunked writes. */
68 static size_t fragsz;
69 static void *dump_va;
70 static size_t counter, progress, dumpsize;
71
72 CTASSERT(sizeof(*vm_page_dump) == 8);
73
74 static int
75 is_dumpable(vm_paddr_t pa)
76 {
77 vm_page_t m;
78 int i;
79
80 if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
81 return ((m->flags & PG_NODUMP) == 0);
82 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
83 if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
84 return (1);
85 }
86 return (0);
87 }
88
89 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
90
91 static int
92 blk_flush(struct dumperinfo *di)
93 {
94 int error;
95
96 if (fragsz == 0)
97 return (0);
98
99 error = dump_write(di, dump_va, 0, dumplo, fragsz);
100 dumplo += fragsz;
101 fragsz = 0;
102 return (error);
103 }
104
105 static struct {
106 int min_per;
107 int max_per;
108 int visited;
109 } progress_track[10] = {
110 { 0, 10, 0},
111 { 10, 20, 0},
112 { 20, 30, 0},
113 { 30, 40, 0},
114 { 40, 50, 0},
115 { 50, 60, 0},
116 { 60, 70, 0},
117 { 70, 80, 0},
118 { 80, 90, 0},
119 { 90, 100, 0}
120 };
121
122 static void
123 report_progress(size_t progress, size_t dumpsize)
124 {
125 int sofar, i;
126
127 sofar = 100 - ((progress * 100) / dumpsize);
128 for (i = 0; i < 10; i++) {
129 if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per)
130 continue;
131 if (progress_track[i].visited)
132 return;
133 progress_track[i].visited = 1;
134 printf("..%d%%", sofar);
135 return;
136 }
137 }
138
139 static int
140 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
141 {
142 size_t len;
143 int error, i, c;
144 u_int maxdumpsz;
145
146 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
147 if (maxdumpsz == 0) /* seatbelt */
148 maxdumpsz = PAGE_SIZE;
149 error = 0;
150 if ((sz % PAGE_SIZE) != 0) {
151 printf("size not page aligned\n");
152 return (EINVAL);
153 }
154 if (ptr != NULL && pa != 0) {
155 printf("cant have both va and pa!\n");
156 return (EINVAL);
157 }
158 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
159 printf("address not page aligned\n");
160 return (EINVAL);
161 }
162 if (ptr != NULL) {
163 /* If we're doing a virtual dump, flush any pre-existing pa pages */
164 error = blk_flush(di);
165 if (error)
166 return (error);
167 }
168 while (sz) {
169 len = maxdumpsz - fragsz;
170 if (len > sz)
171 len = sz;
172 counter += len;
173 progress -= len;
174 if (counter >> 24) {
175 report_progress(progress, dumpsize);
176 counter &= (1<<24) - 1;
177 }
178
179 wdog_kern_pat(WD_LASTVAL);
180
181 if (ptr) {
182 error = dump_write(di, ptr, 0, dumplo, len);
183 if (error)
184 return (error);
185 dumplo += len;
186 ptr += len;
187 sz -= len;
188 } else {
189 for (i = 0; i < len; i += PAGE_SIZE)
190 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
191 fragsz += len;
192 pa += len;
193 sz -= len;
194 if (fragsz == maxdumpsz) {
195 error = blk_flush(di);
196 if (error)
197 return (error);
198 }
199 }
200
201 /* Check for user abort. */
202 c = cncheckc();
203 if (c == 0x03)
204 return (ECANCELED);
205 if (c != -1)
206 printf(" (CTRL-C to abort) ");
207 }
208
209 return (0);
210 }
211
212 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
213 static pd_entry_t fakepd[NPDEPG];
214
215 void
216 minidumpsys(struct dumperinfo *di)
217 {
218 uint32_t pmapsize;
219 vm_offset_t va;
220 int error;
221 uint64_t bits;
222 uint64_t *pdp, *pd, *pt, pa;
223 int i, j, k, n, bit;
224 int retry_count;
225 struct minidumphdr mdhdr;
226
227 retry_count = 0;
228 retry:
229 retry_count++;
230 counter = 0;
231 /* Walk page table pages, set bits in vm_page_dump */
232 pmapsize = 0;
233 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
234 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
235 kernel_vm_end); ) {
236 /*
237 * We always write a page, even if it is zero. Each
238 * page written corresponds to 1GB of space
239 */
240 pmapsize += PAGE_SIZE;
241 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
242 if ((pdp[i] & PG_V) == 0) {
243 va += NBPDP;
244 continue;
245 }
246
247 /*
248 * 1GB page is represented as 512 2MB pages in a dump.
249 */
250 if ((pdp[i] & PG_PS) != 0) {
251 va += NBPDP;
252 pa = pdp[i] & PG_PS_FRAME;
253 for (n = 0; n < NPDEPG * NPTEPG; n++) {
254 if (is_dumpable(pa))
255 dump_add_page(pa);
256 pa += PAGE_SIZE;
257 }
258 continue;
259 }
260
261 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
262 for (n = 0; n < NPDEPG; n++, va += NBPDR) {
263 j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
264
265 if ((pd[j] & PG_V) == 0)
266 continue;
267
268 if ((pd[j] & PG_PS) != 0) {
269 /* This is an entire 2M page. */
270 pa = pd[j] & PG_PS_FRAME;
271 for (k = 0; k < NPTEPG; k++) {
272 if (is_dumpable(pa))
273 dump_add_page(pa);
274 pa += PAGE_SIZE;
275 }
276 continue;
277 }
278
279 pa = pd[j] & PG_FRAME;
280 /* set bit for this PTE page */
281 if (is_dumpable(pa))
282 dump_add_page(pa);
283 /* and for each valid page in this 2MB block */
284 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
285 for (k = 0; k < NPTEPG; k++) {
286 if ((pt[k] & PG_V) == 0)
287 continue;
288 pa = pt[k] & PG_FRAME;
289 if (is_dumpable(pa))
290 dump_add_page(pa);
291 }
292 }
293 }
294
295 /* Calculate dump size. */
296 dumpsize = pmapsize;
297 dumpsize += round_page(msgbufp->msg_size);
298 dumpsize += round_page(vm_page_dump_size);
299 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
300 bits = vm_page_dump[i];
301 while (bits) {
302 bit = bsfq(bits);
303 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
304 /* Clear out undumpable pages now if needed */
305 if (is_dumpable(pa)) {
306 dumpsize += PAGE_SIZE;
307 } else {
308 dump_drop_page(pa);
309 }
310 bits &= ~(1ul << bit);
311 }
312 }
313 dumpsize += PAGE_SIZE;
314
315 /* Determine dump offset on device. */
316 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
317 error = E2BIG;
318 goto fail;
319 }
320 dumplo = di->mediaoffset + di->mediasize - dumpsize;
321 dumplo -= sizeof(kdh) * 2;
322 progress = dumpsize;
323
324 /* Initialize mdhdr */
325 bzero(&mdhdr, sizeof(mdhdr));
326 strcpy(mdhdr.magic, MINIDUMP_MAGIC);
327 mdhdr.version = MINIDUMP_VERSION;
328 mdhdr.msgbufsize = msgbufp->msg_size;
329 mdhdr.bitmapsize = vm_page_dump_size;
330 mdhdr.pmapsize = pmapsize;
331 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
332 mdhdr.dmapbase = DMAP_MIN_ADDRESS;
333 mdhdr.dmapend = DMAP_MAX_ADDRESS;
334
335 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
336
337 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
338 ptoa((uintmax_t)physmem) / 1048576);
339
340 /* Dump leader */
341 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
342 if (error)
343 goto fail;
344 dumplo += sizeof(kdh);
345
346 /* Dump my header */
347 bzero(&fakepd, sizeof(fakepd));
348 bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
349 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
350 if (error)
351 goto fail;
352
353 /* Dump msgbuf up front */
354 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
355 if (error)
356 goto fail;
357
358 /* Dump bitmap */
359 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
360 if (error)
361 goto fail;
362
363 /* Dump kernel page directory pages */
364 bzero(fakepd, sizeof(fakepd));
365 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
366 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
367 kernel_vm_end); va += NBPDP) {
368 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
369
370 /* We always write a page, even if it is zero */
371 if ((pdp[i] & PG_V) == 0) {
372 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
373 if (error)
374 goto fail;
375 /* flush, in case we reuse fakepd in the same block */
376 error = blk_flush(di);
377 if (error)
378 goto fail;
379 continue;
380 }
381
382 /* 1GB page is represented as 512 2MB pages in a dump */
383 if ((pdp[i] & PG_PS) != 0) {
384 /* PDPE and PDP have identical layout in this case */
385 fakepd[0] = pdp[i];
386 for (j = 1; j < NPDEPG; j++)
387 fakepd[j] = fakepd[j - 1] + NBPDR;
388 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
389 if (error)
390 goto fail;
391 /* flush, in case we reuse fakepd in the same block */
392 error = blk_flush(di);
393 if (error)
394 goto fail;
395 bzero(fakepd, sizeof(fakepd));
396 continue;
397 }
398
399 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
400 error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
401 if (error)
402 goto fail;
403 error = blk_flush(di);
404 if (error)
405 goto fail;
406 }
407
408 /* Dump memory chunks */
409 /* XXX cluster it up and use blk_dump() */
410 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
411 bits = vm_page_dump[i];
412 while (bits) {
413 bit = bsfq(bits);
414 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
415 error = blk_write(di, 0, pa, PAGE_SIZE);
416 if (error)
417 goto fail;
418 bits &= ~(1ul << bit);
419 }
420 }
421
422 error = blk_flush(di);
423 if (error)
424 goto fail;
425
426 /* Dump trailer */
427 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
428 if (error)
429 goto fail;
430 dumplo += sizeof(kdh);
431
432 /* Signal completion, signoff and exit stage left. */
433 dump_write(di, NULL, 0, 0, 0);
434 printf("\nDump complete\n");
435 return;
436
437 fail:
438 if (error < 0)
439 error = -error;
440
441 printf("\n");
442 if (error == ENOSPC) {
443 printf("Dump map grown while dumping. ");
444 if (retry_count < 5) {
445 printf("Retrying...\n");
446 goto retry;
447 }
448 printf("Dump failed.\n");
449 }
450 else if (error == ECANCELED)
451 printf("Dump aborted\n");
452 else if (error == E2BIG)
453 printf("Dump failed. Partition too small.\n");
454 else
455 printf("** DUMP FAILED (ERROR %d) **\n", error);
456 }
457
458 void
459 dump_add_page(vm_paddr_t pa)
460 {
461 int idx, bit;
462
463 pa >>= PAGE_SHIFT;
464 idx = pa >> 6; /* 2^6 = 64 */
465 bit = pa & 63;
466 atomic_set_long(&vm_page_dump[idx], 1ul << bit);
467 }
468
469 void
470 dump_drop_page(vm_paddr_t pa)
471 {
472 int idx, bit;
473
474 pa >>= PAGE_SHIFT;
475 idx = pa >> 6; /* 2^6 = 64 */
476 bit = pa & 63;
477 atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
478 }
Cache object: 9f98386e1d7955e7d0006709cacc65b2
|