1 /*-
2 * Copyright (c) 2016 Cavium
3 * All rights reserved.
4 *
5 * This software was developed by Semihalf.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: releng/11.2/sys/arm64/arm64/disassem.c 295505 2016-02-11 06:50:11Z wma $");
31 #include <sys/param.h>
32
33 #include <sys/systm.h>
34 #include <machine/disassem.h>
35 #include <machine/armreg.h>
36 #include <ddb/ddb.h>
37
38 #define ARM64_MAX_TOKEN_LEN 8
39 #define ARM64_MAX_TOKEN_CNT 10
40
41 #define ARM_INSN_SIZE_OFFSET 30
42 #define ARM_INSN_SIZE_MASK 0x3
43
44 /* Special options for instruction printing */
45 #define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */
46 #define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */
47 #define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */
48 #define OP_SF32 (1UL << 3) /* Force 32-bit access */
49 #define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */
50
51 static const char *w_reg[] = {
52 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
53 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
54 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
55 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
56 };
57
58 static const char *x_reg[] = {
59 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
60 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
61 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
62 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
63 };
64
65 static const char *shift_2[] = {
66 "LSL", "LSR", "ASR", "RSV"
67 };
68
69 /*
70 * Structure representing single token (operand) inside instruction.
71 * name - name of operand
72 * pos - position within the instruction (in bits)
73 * len - operand length (in bits)
74 */
75 struct arm64_insn_token {
76 char name[ARM64_MAX_TOKEN_LEN];
77 int pos;
78 int len;
79 };
80
81 /*
82 * Define generic types for instruction printing.
83 */
84 enum arm64_format_type {
85 TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
86 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
87 TYPE_02, /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
88 OP <RT>, [<RN>], #<imm>{!} SF32/64
89 OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
90 TYPE_03, /* OP <RT>, #imm SF32/64 */
91 };
92
93 /*
94 * Structure representing single parsed instruction format.
95 * name - opcode name
96 * format - opcode format in a human-readable way
97 * type - syntax type for printing
98 * special_ops - special options passed to a printer (if any)
99 * mask - bitmask for instruction matching
100 * pattern - pattern to look for
101 * tokens - array of tokens (operands) inside instruction
102 */
103 struct arm64_insn {
104 char* name;
105 char* format;
106 enum arm64_format_type type;
107 uint64_t special_ops;
108 uint32_t mask;
109 uint32_t pattern;
110 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
111 };
112
113 /*
114 * Specify instruction opcode format in a human-readable way. Use notation
115 * obtained from ARM Architecture Reference Manual for ARMv8-A.
116 *
117 * Format string description:
118 * Each group must be separated by "|". Group made of 0/1 is used to
119 * generate mask and pattern for instruction matching. Groups containing
120 * an operand token (in format NAME(length_bits)) are used to retrieve any
121 * operand data from the instruction. Names here must be meaningful
122 * and match the one described in the Manual.
123 *
124 * Token description:
125 * SF - "" represents 32-bit access, "1" represents 64-bit access
126 * SHIFT - type of shift (instruction dependent)
127 * IMM - immediate value
128 * Rx - register number
129 * OPTION - command specific options
130 * SCALE - scaling of immediate value
131 */
132 static struct arm64_insn arm64_i[] = {
133 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
134 TYPE_01, 0 },
135 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
136 TYPE_01, 0 },
137 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
138 TYPE_01, 0 },
139 { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
140 TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */
141 { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
142 TYPE_02, 0 }, /* ldr immediate unsigned */
143 { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
144 TYPE_02, 0 }, /* ldr register */
145 { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
146 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
147 { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
148 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */
149 { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
150 TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */
151 { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
152 TYPE_02, OP_SF32 }, /* ldrb register */
153 { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
154 OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */
155 { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
156 TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */
157 { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
158 TYPE_02, OP_SF32 }, /* ldrh register */
159 { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
160 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */
161 { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
162 TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */
163 { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
164 TYPE_02, OP_SF_INV }, /* ldrsb register */
165 { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
166 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */
167 { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
168 TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */
169 { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
170 TYPE_02, OP_SF_INV }, /* ldrsh register */
171 { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
172 TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */
173 { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
174 TYPE_02, 0 }, /* ldrsw immediate unsigned */
175 { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
176 TYPE_02, 0 }, /* ldrsw register */
177 { "ldrsw", "10011000|IMM(19)|RT(5)",
178 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
179 { NULL, NULL }
180 };
181
182 static void
183 arm64_disasm_generate_masks(struct arm64_insn *tab)
184 {
185 uint32_t mask, val;
186 int a, i;
187 int len, ret;
188 int token = 0;
189 char *format;
190 int error;
191
192 while (tab->name != NULL) {
193 mask = 0;
194 val = 0;
195 format = tab->format;
196 token = 0;
197 error = 0;
198
199 /*
200 * For each entry analyze format strings from the
201 * left (i.e. from the MSB).
202 */
203 a = (INSN_SIZE * NBBY) - 1;
204 while (*format != '\0' && (a >= 0)) {
205 switch(*format) {
206 case '':
207 /* Bit is 0, add to mask and pattern */
208 mask |= (1 << a);
209 a--;
210 format++;
211 break;
212 case '1':
213 /* Bit is 1, add to mask and pattern */
214 mask |= (1 << a);
215 val |= (1 << a);
216 a--;
217 format++;
218 break;
219 case '|':
220 /* skip */
221 format++;
222 break;
223 default:
224 /* Token found, copy the name */
225 memset(tab->tokens[token].name, 0,
226 sizeof(tab->tokens[token].name));
227 i = 0;
228 while (*format != '(') {
229 tab->tokens[token].name[i] = *format;
230 i++;
231 format++;
232 if (i >= ARM64_MAX_TOKEN_LEN) {
233 printf("ERROR: token too long in op %s\n",
234 tab->name);
235 error = 1;
236 break;
237 }
238 }
239 if (error != 0)
240 break;
241
242 /* Read the length value */
243 ret = sscanf(format, "(%d)", &len);
244 if (ret == 1) {
245 if (token >= ARM64_MAX_TOKEN_CNT) {
246 printf("ERROR: to many tokens in op %s\n",
247 tab->name);
248 error = 1;
249 break;
250 }
251
252 a -= len;
253 tab->tokens[token].pos = a + 1;
254 tab->tokens[token].len = len;
255 token++;
256 }
257
258 /* Skip to the end of the token */
259 while (*format != 0 && *format != '|')
260 format++;
261 }
262 }
263
264 /* Write mask and pattern to the instruction array */
265 tab->mask = mask;
266 tab->pattern = val;
267
268 /*
269 * If we got here, format string must be parsed and "a"
270 * should point to -1. If it's not, wrong number of bits
271 * in format string. Mark this as invalid and prevent
272 * from being matched.
273 */
274 if (*format != 0 || (a != -1) || (error != 0)) {
275 tab->mask = 0;
276 tab->pattern = 0xffffffff;
277 printf("ERROR: skipping instruction op %s\n",
278 tab->name);
279 }
280
281 tab++;
282 }
283 }
284
285 static int
286 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
287 const char *token, int *val)
288 {
289 int i;
290
291 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
292 if (strcmp(insn->tokens[i].name, token) == 0) {
293 *val = (opcode >> insn->tokens[i].pos &
294 ((1 << insn->tokens[i].len) - 1));
295 return (0);
296 }
297 }
298
299 return (EINVAL);
300 }
301
302 static int
303 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
304 const char *token, int *val)
305 {
306 int i;
307 int msk;
308
309 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
310 if (strcmp(insn->tokens[i].name, token) == 0) {
311 msk = (1 << insn->tokens[i].len) - 1;
312 *val = ((opcode >> insn->tokens[i].pos) & msk);
313
314 /* If last bit is 1, sign-extend the value */
315 if (*val & (1 << (insn->tokens[i].len - 1)))
316 *val |= ~msk;
317
318 return (0);
319 }
320 }
321
322 return (EINVAL);
323 }
324
325 static const char *
326 arm64_reg(int b64, int num)
327 {
328
329 if (b64 != 0)
330 return (x_reg[num]);
331
332 return (w_reg[num]);
333 }
334
335 vm_offset_t
336 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
337 {
338 struct arm64_insn *i_ptr = arm64_i;
339 uint32_t insn;
340 int matchp;
341 int ret;
342 int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
343 int sign_ext;
344 int rm_absent;
345 /* Indicate if immediate should be outside or inside brackets */
346 int inside;
347 /* Print exclamation mark if pre-incremented */
348 int pre;
349
350 /* Initialize defaults, all are 0 except SF indicating 64bit access */
351 shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
352 sign_ext = 0;
353 sf = 1;
354
355 matchp = 0;
356 insn = di->di_readword(loc);
357 while (i_ptr->name) {
358 /* If mask is 0 then the parser was not initialized yet */
359 if ((i_ptr->mask != 0) &&
360 ((insn & i_ptr->mask) == i_ptr->pattern)) {
361 matchp = 1;
362 break;
363 }
364 i_ptr++;
365 }
366 if (matchp == 0)
367 goto undefined;
368
369 /* Global options */
370 if (i_ptr->special_ops & OP_SF32)
371 sf = 0;
372
373 /* Global optional tokens */
374 arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
375 if (i_ptr->special_ops & OP_SF_INV)
376 sf = 1 - sf;
377 if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
378 sign_ext = 1 - sign_ext;
379 if (i_ptr->special_ops & OP_SIGN_EXT)
380 sign_ext = 1;
381 if (sign_ext != 0)
382 arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
383 else
384 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
385 if (i_ptr->special_ops & OP_MULT_4)
386 imm <<= 2;
387
388 /* Print opcode by type */
389 switch (i_ptr->type) {
390 case TYPE_01:
391 /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
392 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
393
394 /* Mandatory tokens */
395 ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
396 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
397 if (ret != 0) {
398 printf("ERROR: Missing mandatory token for op %s type %d\n",
399 i_ptr->name, i_ptr->type);
400 goto undefined;
401 }
402
403 /* Optional tokens */
404 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
405 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
406
407 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
408 arm64_reg(sf, rn));
409
410 /* If RM is present use it, otherwise use immediate notation */
411 if (rm_absent == 0) {
412 di->di_printf(", %s", arm64_reg(sf, rm));
413 if (imm != 0)
414 di->di_printf(", %s #%d", shift_2[shift], imm);
415 } else {
416 if (imm != 0 || shift != 0)
417 di->di_printf(", #0x%x", imm);
418 if (shift != 0)
419 di->di_printf(" LSL #12");
420 }
421 break;
422 case TYPE_02:
423 /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
424 OP <RT>, [<RN>], #<imm>{!} SF32/64
425 OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
426
427 /* Mandatory tokens */
428 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
429 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
430 if (ret != 0) {
431 printf("ERROR: Missing mandatory token for op %s type %d\n",
432 i_ptr->name, i_ptr->type);
433 goto undefined;
434 }
435
436 /* Optional tokens */
437 arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
438 arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
439 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
440
441 if (rm_absent) {
442 /*
443 * In unsigned operation, shift immediate value
444 * and reset options to default.
445 */
446 if (sign_ext == 0) {
447 imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
448 ARM_INSN_SIZE_MASK);
449 option = 0;
450 }
451 switch (option) {
452 case 0x0:
453 pre = 0;
454 inside = 1;
455 break;
456 case 0x1:
457 pre = 0;
458 inside = 0;
459 break;
460 case 0x2:
461 default:
462 pre = 1;
463 inside = 1;
464 break;
465 }
466
467 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
468 if (inside != 0) {
469 di->di_printf("[%s", arm64_reg(1, rn));
470 if (imm != 0)
471 di->di_printf(", #%d", imm);
472 di->di_printf("]");
473 } else {
474 di->di_printf("[%s]", arm64_reg(1, rn));
475 if (imm != 0)
476 di->di_printf(", #%d", imm);
477 }
478 if (pre != 0)
479 di->di_printf("!");
480 } else {
481 /* Last bit of option field determines 32/64 bit offset */
482 di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
483 arm64_reg(sf, rt), arm64_reg(1, rn),
484 arm64_reg(option & 1, rm));
485
486 /* Calculate amount, it's op(31:30) */
487 amount = (insn >> ARM_INSN_SIZE_OFFSET) &
488 ARM_INSN_SIZE_MASK;
489
490 switch (option) {
491 case 0x2:
492 di->di_printf(", uxtw #%d", amount);
493 break;
494 case 0x3:
495 if (scale != 0)
496 di->di_printf(", lsl #%d", amount);
497 break;
498 case 0x6:
499 di->di_printf(", sxtw #%d", amount);
500 break;
501 case 0x7:
502 di->di_printf(", sxts #%d", amount);
503 break;
504 default:
505 di->di_printf(", RSVD");
506 break;
507 }
508 di->di_printf("]");
509 }
510
511 break;
512
513 case TYPE_03:
514 /* OP <RT>, #imm SF32/64 */
515
516 /* Mandatory tokens */
517 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
518 if (ret != 0) {
519 printf("ERROR: Missing mandatory token for op %s type %d\n",
520 i_ptr->name, i_ptr->type);
521 goto undefined;
522 }
523
524 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
525 if (i_ptr->special_ops & OP_LITERAL)
526 di->di_printf("0x%lx", loc + imm);
527 else
528 di->di_printf("#%d", imm);
529
530 break;
531 default:
532 goto undefined;
533 }
534
535 di->di_printf("\n");
536 return(loc + INSN_SIZE);
537
538 undefined:
539 di->di_printf("undefined\t%08x\n", insn);
540 return(loc + INSN_SIZE);
541 }
542
543 /* Parse format strings at the very beginning */
544 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
545 SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);
Cache object: e5c0a1ec2e267782030c7f71d8fd90bc
|