1 /*
2 * SPDX-License-Identifier: CDDL 1.0
3 *
4 * Copyright 2022 Christos Margiolis <christos@FreeBSD.org>
5 * Copyright 2022 Mark Johnston <markj@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/pcpu.h>
10
11 #include <machine/cpufunc.h>
12 #include <machine/md_var.h>
13
14 #include <sys/dtrace.h>
15 #include <cddl/dev/dtrace/dtrace_cddl.h>
16 #include <dis_tables.h>
17
18 #include "kinst.h"
19
20 #define KINST_PUSHL_RBP 0x55
21 #define KINST_STI 0xfb
22 #define KINST_POPF 0x9d
23
24 #define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6)
25 #define KINST_MODRM_REG(b) (((b) & 0x38) >> 3)
26 #define KINST_MODRM_RM(b) ((b) & 0x07)
27
28 #define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6)
29 #define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3)
30 #define KINST_SIB_BASE(s) (((s) & 0x07) >> 0)
31
32 #define KINST_REX_W(r) (((r) & 0x08) >> 3)
33 #define KINST_REX_R(r) (((r) & 0x04) >> 2)
34 #define KINST_REX_X(r) (((r) & 0x02) >> 1)
35 #define KINST_REX_B(r) (((r) & 0x01) >> 0)
36
37 #define KINST_F_CALL 0x0001 /* instruction is a "call" */
38 #define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */
39 #define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */
40 #define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */
41 #define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */
42
43 /*
44 * Per-CPU trampolines used when the interrupted thread is executing with
45 * interrupts disabled. If an interrupt is raised while executing a trampoline,
46 * the interrupt thread cannot safely overwrite its trampoline if it hits a
47 * kinst probe while executing the interrupt handler.
48 */
49 DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);
50
51 /*
52 * Map ModR/M register bits to a trapframe offset.
53 */
54 static int
55 kinst_regoff(int reg)
56 {
57 #define _MATCH_REG(i, reg) \
58 case i: \
59 return (offsetof(struct trapframe, tf_ ## reg) / \
60 sizeof(register_t))
61 switch (reg) {
62 _MATCH_REG( 0, rax);
63 _MATCH_REG( 1, rcx);
64 _MATCH_REG( 2, rdx);
65 _MATCH_REG( 3, rbx);
66 _MATCH_REG( 4, rsp); /* SIB when mod != 3 */
67 _MATCH_REG( 5, rbp);
68 _MATCH_REG( 6, rsi);
69 _MATCH_REG( 7, rdi);
70 _MATCH_REG( 8, r8); /* REX.R is set */
71 _MATCH_REG( 9, r9);
72 _MATCH_REG(10, r10);
73 _MATCH_REG(11, r11);
74 _MATCH_REG(12, r12);
75 _MATCH_REG(13, r13);
76 _MATCH_REG(14, r14);
77 _MATCH_REG(15, r15);
78 }
79 #undef _MATCH_REG
80 panic("%s: unhandled register index %d", __func__, reg);
81 }
82
83 /*
84 * Obtain the specified register's value.
85 */
86 static uint64_t
87 kinst_regval(struct trapframe *frame, int reg)
88 {
89 if (reg == -1)
90 return (0);
91 return (((register_t *)frame)[kinst_regoff(reg)]);
92 }
93
94 static uint32_t
95 kinst_riprel_disp(struct kinst_probe *kp, void *dst)
96 {
97 return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp -
98 (intptr_t)dst));
99 }
100
101 static void
102 kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp)
103 {
104 uint8_t *instr;
105 uint32_t disp;
106 int ilen;
107
108 ilen = kp->kp_md.tinstlen;
109
110 memcpy(tramp, kp->kp_md.template, ilen);
111 if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) {
112 disp = kinst_riprel_disp(kp, tramp);
113 memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t));
114 }
115
116 /*
117 * The following position-independent jmp takes us back to the
118 * original code. It is encoded as "jmp *0(%rip)" (six bytes),
119 * followed by the absolute address of the instruction following
120 * the one that was traced (eight bytes).
121 */
122 tramp[ilen + 0] = 0xff;
123 tramp[ilen + 1] = 0x25;
124 tramp[ilen + 2] = 0x00;
125 tramp[ilen + 3] = 0x00;
126 tramp[ilen + 4] = 0x00;
127 tramp[ilen + 5] = 0x00;
128 instr = kp->kp_patchpoint + kp->kp_md.instlen;
129 memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t));
130 }
131
132 int
133 kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
134 {
135 solaris_cpu_t *cpu;
136 uintptr_t *stack, retaddr;
137 struct kinst_probe *kp;
138 struct kinst_probe_md *kpmd;
139 uint8_t *tramp;
140
141 stack = (uintptr_t *)frame->tf_rsp;
142 cpu = &solaris_cpu[curcpu];
143
144 LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {
145 if ((uintptr_t)kp->kp_patchpoint == addr)
146 break;
147 }
148 if (kp == NULL)
149 return (0);
150
151 /*
152 * Report the address of the breakpoint for the benefit of consumers
153 * fetching register values with regs[].
154 */
155 frame->tf_rip--;
156
157 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
158 cpu->cpu_dtrace_caller = stack[0];
159 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
160 dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
161 cpu->cpu_dtrace_caller = 0;
162
163 kpmd = &kp->kp_md;
164 if ((kpmd->flags & KINST_F_CALL) != 0) {
165 /*
166 * dtrace_invop_start() reserves space on the stack to
167 * store the return address of the call instruction.
168 */
169 retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen);
170 *(uintptr_t *)scratch = retaddr;
171
172 if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) {
173 frame->tf_rip = (uintptr_t)(kp->kp_patchpoint +
174 kpmd->disp + kpmd->instlen);
175 } else {
176 register_t rval;
177
178 if (kpmd->reg1 == -1 && kpmd->reg2 == -1) {
179 /* rip-relative */
180 rval = frame->tf_rip + kpmd->instlen;
181 } else {
182 /* indirect */
183 rval = kinst_regval(frame, kpmd->reg1) +
184 (kinst_regval(frame, kpmd->reg2) <<
185 kpmd->scale);
186 }
187
188 if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) {
189 frame->tf_rip = rval + kpmd->disp;
190 } else {
191 frame->tf_rip =
192 *(uintptr_t *)(rval + kpmd->disp);
193 }
194 }
195 return (DTRACE_INVOP_CALL);
196 } else {
197 if ((frame->tf_rflags & PSL_I) == 0)
198 tramp = DPCPU_GET(intr_tramp);
199 else
200 tramp = curthread->t_kinst;
201 if (tramp == NULL) {
202 /*
203 * A trampoline allocation failed, so this probe is
204 * effectively disabled. Restore the original
205 * instruction.
206 *
207 * We can't safely print anything here, but the
208 * trampoline allocator should have left a breadcrumb in
209 * the dmesg.
210 */
211 kinst_patch_tracepoint(kp, kp->kp_savedval);
212 frame->tf_rip = (register_t)kp->kp_patchpoint;
213 } else {
214 kinst_trampoline_populate(kp, tramp);
215 frame->tf_rip = (register_t)tramp;
216 }
217 return (DTRACE_INVOP_NOP);
218 }
219 }
220
221 void
222 kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
223 {
224 register_t reg;
225 int oldwp;
226
227 reg = intr_disable();
228 oldwp = disable_wp();
229 *kp->kp_patchpoint = val;
230 restore_wp(oldwp);
231 intr_restore(reg);
232 }
233
234 static void
235 kinst_set_disp8(struct kinst_probe *kp, uint8_t byte)
236 {
237 kp->kp_md.disp = (int64_t)(int8_t)byte;
238 }
239
240 static void
241 kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes)
242 {
243 int32_t disp32;
244
245 memcpy(&disp32, bytes, sizeof(disp32));
246 kp->kp_md.disp = (int64_t)disp32;
247 }
248
249 static int
250 kinst_dis_get_byte(void *p)
251 {
252 int ret;
253 uint8_t **instr = p;
254
255 ret = **instr;
256 (*instr)++;
257
258 return (ret);
259 }
260
261 /*
262 * Set up all of the state needed to faithfully execute a probed instruction.
263 *
264 * In the simple case, we copy the instruction unmodified to a per-thread
265 * trampoline, wherein it is followed by a jump back to the original code.
266 * - Instructions can have %rip as an operand:
267 * - with %rip-relative addressing encoded in ModR/M, or
268 * - implicitly as a part of the instruction definition (jmp, call).
269 * - Call instructions (which may be %rip-relative) need to push the correct
270 * return address onto the stack.
271 *
272 * Call instructions are simple enough to be emulated in software, so we simply
273 * do not use the trampoline mechanism in that case. kinst_invop() will compute
274 * the branch target using the address info computed here (register operands and
275 * displacement).
276 *
277 * %rip-relative operands encoded using the ModR/M byte always use a 32-bit
278 * displacement; when populating the trampoline the displacement is adjusted to
279 * be relative to the trampoline address. Trampolines are always allocated
280 * above KERNBASE for this reason.
281 *
282 * For other %rip-relative operands (just jumps) we take the same approach.
283 * Instructions which specify an 8-bit displacement must be rewritten to use a
284 * 32-bit displacement.
285 */
286 static int
287 kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr)
288 {
289 struct kinst_probe_md *kpmd;
290 dis86_t d86;
291 uint8_t *bytes, modrm, rex;
292 int dispoff, i, ilen, opcidx;
293
294 kpmd = &kp->kp_md;
295
296 d86.d86_data = instr;
297 d86.d86_get_byte = kinst_dis_get_byte;
298 d86.d86_check_func = NULL;
299 if (dtrace_disx86(&d86, SIZE64) != 0) {
300 KINST_LOG("failed to disassemble instruction at: %p", *instr);
301 return (EINVAL);
302 }
303 bytes = d86.d86_bytes;
304 kpmd->instlen = kpmd->tinstlen = d86.d86_len;
305
306 /*
307 * Skip over prefixes, save REX.
308 */
309 rex = 0;
310 for (i = 0; i < kpmd->instlen; i++) {
311 switch (bytes[i]) {
312 case 0xf0 ... 0xf3:
313 /* group 1 */
314 continue;
315 case 0x26:
316 case 0x2e:
317 case 0x36:
318 case 0x3e:
319 case 0x64:
320 case 0x65:
321 /* group 2 */
322 continue;
323 case 0x66:
324 /* group 3 */
325 continue;
326 case 0x67:
327 /* group 4 */
328 continue;
329 case 0x40 ... 0x4f:
330 /* REX */
331 rex = bytes[i];
332 continue;
333 }
334 break;
335 }
336 KASSERT(i < kpmd->instlen,
337 ("%s: failed to disassemble instruction at %p", __func__, bytes));
338 opcidx = i;
339
340 /*
341 * Identify instructions of interest by opcode: calls and jumps.
342 * Extract displacements.
343 */
344 dispoff = -1;
345 switch (bytes[opcidx]) {
346 case 0x0f:
347 switch (bytes[opcidx + 1]) {
348 case 0x80 ... 0x8f:
349 /* conditional jmp near */
350 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
351 dispoff = opcidx + 2;
352 kinst_set_disp32(kp, &bytes[dispoff]);
353 break;
354 }
355 break;
356 case 0xe3:
357 /*
358 * There is no straightforward way to translate this instruction
359 * to use a 32-bit displacement. Fortunately, it is rarely
360 * used.
361 */
362 return (EINVAL);
363 case 0x70 ... 0x7f:
364 /* conditional jmp short */
365 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
366 dispoff = opcidx + 1;
367 kinst_set_disp8(kp, bytes[dispoff]);
368 break;
369 case 0xe9:
370 /* unconditional jmp near */
371 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
372 dispoff = opcidx + 1;
373 kinst_set_disp32(kp, &bytes[dispoff]);
374 break;
375 case 0xeb:
376 /* unconditional jmp short */
377 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
378 dispoff = opcidx + 1;
379 kinst_set_disp8(kp, bytes[dispoff]);
380 break;
381 case 0xe8:
382 case 0x9a:
383 /* direct call */
384 kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL;
385 dispoff = opcidx + 1;
386 kinst_set_disp32(kp, &bytes[dispoff]);
387 break;
388 case 0xff:
389 KASSERT(d86.d86_got_modrm,
390 ("no ModR/M byte for instr at %p", *instr - kpmd->instlen));
391 switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) {
392 case 0x02:
393 case 0x03:
394 /* indirect call */
395 kpmd->flags |= KINST_F_CALL;
396 break;
397 case 0x04:
398 case 0x05:
399 /* indirect jump */
400 kpmd->flags |= KINST_F_JMP;
401 break;
402 }
403 }
404
405 /*
406 * If there's a ModR/M byte, we need to check it to see if the operand
407 * is %rip-relative, and rewrite the displacement if so. If not, we
408 * might still have to extract operand info if this is a call
409 * instruction.
410 */
411 if (d86.d86_got_modrm) {
412 uint8_t mod, rm, sib;
413
414 kpmd->reg1 = kpmd->reg2 = -1;
415
416 modrm = bytes[d86.d86_rmindex];
417 mod = KINST_MODRM_MOD(modrm);
418 rm = KINST_MODRM_RM(modrm);
419 if (mod == 0 && rm == 5) {
420 kpmd->flags |= KINST_F_RIPREL;
421 dispoff = d86.d86_rmindex + 1;
422 kinst_set_disp32(kp, &bytes[dispoff]);
423 } else if ((kpmd->flags & KINST_F_CALL) != 0) {
424 bool havesib;
425
426 havesib = (mod != 3 && rm == 4);
427 dispoff = d86.d86_rmindex + (havesib ? 2 : 1);
428 if (mod == 1)
429 kinst_set_disp8(kp, bytes[dispoff]);
430 else if (mod == 2)
431 kinst_set_disp32(kp, &bytes[dispoff]);
432 else if (mod == 3)
433 kpmd->flags |= KINST_F_MOD_DIRECT;
434
435 if (havesib) {
436 sib = bytes[d86.d86_rmindex + 1];
437 if (KINST_SIB_BASE(sib) != 5) {
438 kpmd->reg1 = KINST_SIB_BASE(sib) |
439 (KINST_REX_B(rex) << 3);
440 }
441 kpmd->scale = KINST_SIB_SCALE(sib);
442 kpmd->reg2 = KINST_SIB_INDEX(sib) |
443 (KINST_REX_X(rex) << 3);
444 } else {
445 kpmd->reg1 = rm | (KINST_REX_B(rex) << 3);
446 }
447 }
448 }
449
450 /*
451 * Calls are emulated in software; once operands are decoded we have
452 * nothing else to do.
453 */
454 if ((kpmd->flags & KINST_F_CALL) != 0)
455 return (0);
456
457 /*
458 * Allocate and populate an instruction trampoline template.
459 *
460 * Position-independent instructions can simply be copied, but
461 * position-dependent instructions require some surgery: jump
462 * instructions with an 8-bit displacement need to be converted to use a
463 * 32-bit displacement, and the adjusted displacement needs to be
464 * computed.
465 */
466 ilen = kpmd->instlen;
467 if ((kpmd->flags & KINST_F_RIPREL) != 0) {
468 if ((kpmd->flags & KINST_F_JMP) == 0 ||
469 bytes[opcidx] == 0x0f ||
470 bytes[opcidx] == 0xe9 ||
471 bytes[opcidx] == 0xff) {
472 memcpy(kpmd->template, bytes, dispoff);
473 memcpy(&kpmd->template[dispoff + 4],
474 &bytes[dispoff + 4], ilen - (dispoff + 4));
475 kpmd->dispoff = dispoff;
476 } else if (bytes[opcidx] == 0xeb) {
477 memcpy(kpmd->template, bytes, opcidx);
478 kpmd->template[opcidx] = 0xe9;
479 kpmd->dispoff = opcidx + 1;
480
481 /* Instruction length changes from 2 to 5. */
482 kpmd->tinstlen = 5;
483 kpmd->disp -= 3;
484 } else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) {
485 memcpy(kpmd->template, bytes, opcidx);
486 kpmd->template[opcidx] = 0x0f;
487 kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10;
488 kpmd->dispoff = opcidx + 2;
489
490 /* Instruction length changes from 2 to 6. */
491 kpmd->tinstlen = 6;
492 kpmd->disp -= 4;
493 } else {
494 panic("unhandled opcode %#x", bytes[opcidx]);
495 }
496 } else {
497 memcpy(kpmd->template, bytes, ilen);
498 }
499
500 return (0);
501 }
502
503 int
504 kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
505 void *opaque)
506 {
507 struct kinst_probe *kp;
508 dtrace_kinst_probedesc_t *pd;
509 const char *func;
510 int error, instrsize, n, off;
511 uint8_t *instr, *limit;
512
513 pd = opaque;
514 func = symval->name;
515 if (strcmp(func, pd->kpd_func) != 0 || strcmp(func, "trap_check") == 0)
516 return (0);
517
518 instr = (uint8_t *)symval->value;
519 limit = (uint8_t *)symval->value + symval->size;
520 if (instr >= limit)
521 return (0);
522
523 /*
524 * Ignore functions not beginning with the usual function prologue.
525 * These might correspond to exception handlers with which we should not
526 * meddle. This does however exclude functions which can be safely
527 * traced, such as cpu_switch().
528 */
529 if (*instr != KINST_PUSHL_RBP)
530 return (0);
531
532 n = 0;
533 while (instr < limit) {
534 instrsize = dtrace_instr_size(instr);
535 off = (int)(instr - (uint8_t *)symval->value);
536 if (pd->kpd_off != -1 && off != pd->kpd_off) {
537 instr += instrsize;
538 continue;
539 }
540
541 /*
542 * Check for instructions which may enable interrupts. Such
543 * instructions are tricky to trace since it is unclear whether
544 * to use the per-thread or per-CPU trampolines. Since they are
545 * rare, we don't bother to implement special handling for them.
546 *
547 * If the caller specified an offset, return an error, otherwise
548 * silently ignore the instruction so that it remains possible
549 * to enable all instructions in a function.
550 */
551 if (instrsize == 1 &&
552 (instr[0] == KINST_POPF || instr[0] == KINST_STI)) {
553 if (pd->kpd_off != -1)
554 return (EINVAL);
555 instr += instrsize;
556 continue;
557 }
558
559 /*
560 * Prevent separate dtrace(1) instances from creating copies of
561 * the same probe.
562 */
563 LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {
564 if (strcmp(kp->kp_func, func) == 0 &&
565 strtol(kp->kp_name, NULL, 10) == off)
566 return (0);
567 }
568 if (++n > KINST_PROBETAB_MAX) {
569 KINST_LOG("probe list full: %d entries", n);
570 return (ENOMEM);
571 }
572 kp = malloc(sizeof(struct kinst_probe), M_KINST,
573 M_WAITOK | M_ZERO);
574 kp->kp_func = func;
575 snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
576 kp->kp_savedval = *instr;
577 kp->kp_patchval = KINST_PATCHVAL;
578 kp->kp_patchpoint = instr;
579
580 error = kinst_instr_dissect(kp, &instr);
581 if (error != 0)
582 return (error);
583
584 kinst_probe_create(kp, lf);
585 }
586
587 return (0);
588 }
589
590 int
591 kinst_md_init(void)
592 {
593 uint8_t *tramp;
594 int cpu;
595
596 CPU_FOREACH(cpu) {
597 tramp = kinst_trampoline_alloc(M_WAITOK);
598 if (tramp == NULL)
599 return (ENOMEM);
600 DPCPU_ID_SET(cpu, intr_tramp, tramp);
601 }
602
603 return (0);
604 }
605
606 void
607 kinst_md_deinit(void)
608 {
609 uint8_t *tramp;
610 int cpu;
611
612 CPU_FOREACH(cpu) {
613 tramp = DPCPU_ID_GET(cpu, intr_tramp);
614 if (tramp != NULL) {
615 kinst_trampoline_dealloc(DPCPU_ID_GET(cpu, intr_tramp));
616 DPCPU_ID_SET(cpu, intr_tramp, NULL);
617 }
618 }
619 }
Cache object: a1ea1017f6fd8e6ffed3b8a4674dd23b
|