1 // clang++ -o kt-dump{,.cpp} -Wall -std=c++20
2
3 #include <assert.h>
4 #include <cstdio>
5 #include <filesystem>
6 #include <fstream>
7 #include <iostream>
8 #include <mach-o/fat.h>
9 #include <mach-o/loader.h>
10 #include <optional>
11 #include <set>
12 #include <span>
13 #include <vector>
14
15 /*
16 * kt-dump.cpp
17 *
18 * Tool to dump the kalloc type information from a given Mach-O binary.
19 * Usage:
20 * kt-dump [-f <simple|json|struct|stats>] <mach-o>
21 *
22 * The tool will scan the given Mach-O to find the __kalloc_type section.
23 * It will then walk that section using the kalloc_type_view definition
24 * provided below, in order to dump the type names and signatures that
25 * have been compiled into the binary.
26 *
27 * The output "format" can be specified with the -f option. The default
28 * format ("simple") will output the type name and the signature,
29 * enclosed in square brackets. The "json" format will print a JSON
30 * dictionary for each kalloc_type_view entry, including the type name,
31 * its size and the signature. The "struct" output format will use
32 * __builtin_dump_struct to dump a C-like representation of the view.
33 * Finally, if the "stats" output format is chosen, the tool will only
34 * show overall information about the __kalloc_type section.
35 *
36 * The tool supports both MH_KEXT_BUNDLE and kernel cache files. If a
37 * FAT Mach-O is provided, it must contain an arm64 slice.
38 */
39
40 /* Read in_path into out_vec */
41 static bool read_file(std::string in_path, std::vector<uint8_t> &out_vec);
42
43 /* Find a suitable arch span in a FAT file */
44 static std::optional<std::span<uint8_t> >
45 find_arm64_slice(const std::span<uint8_t> &contents);
46
47 /* Note: these must be kept in sync with the defs in kalloc.h/zalloc.h */
48 struct zone_view {
49 void *zv_zone;
50 void *zv_stats;
51 const char *zv_name;
52 void *zv_next;
53 };
54
55 struct kalloc_type_view {
56 struct zone_view kt_zv;
57 const char *kt_signature;
58 uint32_t kt_flags;
59 uint32_t kt_size;
60 void *kt_site;
61 void *unused;
62 };
63
64 template <typename T> struct macho_section {
65 section_64 section;
66 std::span<const T> contents;
67
68 macho_section(const section_64 &sec, std::span<uint8_t> data)
69 : section(sec),
70 contents(reinterpret_cast<T *>(
71 data.subspan(sec.offset, sec.size / sizeof(T)).data()),
72 sec.size / sizeof(T))
73 {
74 }
75 };
76
77 int
78 main(int argc, char const *argv[])
79 {
80 if (argc != 2 && argc != 4) {
81 std::cout << "Usage: " << argv[0]
82 << " [-f <simple|json|struct|stats>] <mach-o>\n";
83 return 1;
84 }
85
86 enum class out_fmt_type {
87 SIMPLE,
88 JSON,
89 STRUCT,
90 STATS
91 } out_fmt = out_fmt_type::SIMPLE;
92 std::string arg_str;
93 std::vector<uint8_t> file_contents;
94 uint32_t file_magic = 0;
95 std::span<uint8_t> slice_contents;
96 mach_header_64 *hdr = NULL;
97 std::optional<macho_section<kalloc_type_view> > sec_types;
98 std::optional<macho_section<char> > sec_cstring;
99 struct {
100 size_t uniq_structs_sz;
101 size_t names_sz;
102 size_t sig_sz;
103 } stats = {};
104
105 /* Parse command line args */
106 for (int i = 1; i < argc; i++) {
107 std::string arg(argv[i]);
108 if (arg == "-f") {
109 if (++i == argc) {
110 std::cerr << "Option " << arg << " requires an argument\n";
111 return 1;
112 }
113 arg = argv[i];
114 if (arg == "simple") {
115 out_fmt = out_fmt_type::SIMPLE;
116 } else if (arg == "json" || arg == "JSON") {
117 out_fmt = out_fmt_type::JSON;
118 } else if (arg == "struct") {
119 out_fmt = out_fmt_type::STRUCT;
120 } else if (arg == "stats") {
121 out_fmt = out_fmt_type::STATS;
122 } else {
123 std::cerr << "Unknown output format: " << arg << std::endl;
124 return 1;
125 }
126 } else {
127 /* Read the file specified as a positional arg */
128 if (!read_file(arg, file_contents)) {
129 std::cerr << "Failed to read file: " << arg << std::endl;
130 return 1;
131 }
132 }
133 }
134
135 file_magic = *reinterpret_cast<uint32_t *>(file_contents.data());
136 if (file_magic == MH_MAGIC_64) {
137 /* Single arch Mach-O file: the slice covers the whole file */
138 slice_contents = std::span(file_contents);
139 } else if (file_magic == FAT_CIGAM) {
140 /* FAT Mach-O: Retrieve the appropriate slice */
141 auto arch_span = find_arm64_slice(file_contents);
142 if (!arch_span) {
143 std::cerr << "Could not find a suitable arch\n";
144 return 1;
145 }
146 slice_contents = arch_span.value();
147 } else {
148 std::cerr << "Unsupported file magic: 0x" << std::hex << file_magic << "\n";
149 return 1;
150 }
151 assert(slice_contents.size() > sizeof(*hdr));
152 hdr = reinterpret_cast<mach_header_64 *>(slice_contents.data());
153 if (hdr->magic != MH_MAGIC_64) {
154 std::cerr << "Unsupported header magic: 0x" << std::hex << hdr->magic
155 << "\n";
156 return 1;
157 }
158
159 for (uint32_t cmds_offset = sizeof(*hdr); cmds_offset < hdr->sizeofcmds;) {
160 load_command *cmd =
161 reinterpret_cast<load_command *>(&slice_contents[cmds_offset]);
162 cmds_offset += cmd->cmdsize;
163 /* We only need to process LC_SEGMENT_64 */
164 if (cmd->cmd != LC_SEGMENT_64) {
165 continue;
166 }
167
168 segment_command_64 *seg_cmd = reinterpret_cast<segment_command_64 *>(cmd);
169 std::span<section_64> sections(reinterpret_cast<section_64 *>(seg_cmd + 1),
170 seg_cmd->nsects);
171 for (auto &sec : sections) {
172 std::string segname(sec.segname);
173 std::string sectname(sec.sectname);
174 if (sectname == "__kalloc_type") {
175 assert(!sec_types && "Multiple __kalloc_type sections?");
176 assert(sec.size % sizeof(kalloc_type_view) == 0 &&
177 "Check the definition of kalloc_type_view");
178 sec_types = macho_section<kalloc_type_view>(sec, slice_contents);
179 } else if (segname == "__TEXT" && sectname == "__cstring") {
180 sec_cstring = macho_section<char>(sec, slice_contents);
181 }
182 }
183 }
184
185 if (!sec_types) {
186 std::cerr << "Could not find __kalloc_type section\n";
187 return 1;
188 }
189 if (!sec_cstring) {
190 std::cerr << "Could not find __TEXT,__cstring section\n";
191 return 1;
192 }
193
194 std::set<std::pair<uint32_t, uint32_t> > dedup_entries;
195 std::set<uint32_t> dedup_strings;
196
197 for (auto &ktv : sec_types->contents) {
198 uintptr_t name_p = reinterpret_cast<uintptr_t>(ktv.kt_zv.zv_name);
199 uintptr_t signature_p = reinterpret_cast<uintptr_t>(ktv.kt_signature);
200 /*
201 * Compute the offsets into the __cstring section.
202 * This works for both single kexts (MH_KEXT_BUNDLE) and kernel caches.
203 * For the former, the __cstring section addr is the offset of the section
204 * into the slice. For the latter, the __cstring section addr is the virtual
205 * address of the section, and the fields are pointers into such space.
206 */
207 uint32_t name_off = (name_p - sec_cstring->section.addr) & 0xffffffff;
208 uint32_t sig_off = (signature_p - sec_cstring->section.addr) & 0xffffffff;
209
210 /* Only output the equal entries (same name/signature) once */
211 if (!dedup_entries.insert(std::make_tuple(name_off, sig_off)).second) {
212 continue;
213 }
214
215 stats.uniq_structs_sz += sizeof(ktv);
216
217 const char *name = &sec_cstring->contents[name_off];
218 const char *signature = &sec_cstring->contents[sig_off];
219 if (dedup_strings.insert(name_off).second) {
220 stats.names_sz += strlen(name) + 1;
221 }
222 if (dedup_strings.insert(sig_off).second) {
223 stats.sig_sz += strlen(signature) + 1;
224 }
225
226 switch (out_fmt) {
227 case out_fmt_type::SIMPLE:
228 std::cout << name << " [" << signature << "]\n";
229 break;
230 case out_fmt_type::JSON:
231 std::cout << "{\"name\":\"" << name << "\","
232 << "\"signature\":\"" << signature << "\","
233 << "\"size\":" << ktv.kt_size << "}\n";
234 break;
235 case out_fmt_type::STRUCT: {
236 /* Make a copy and fill in the pointers to the cstring section */
237 kalloc_type_view printable_view = ktv;
238 printable_view.kt_zv.zv_name = name;
239 printable_view.kt_signature = signature;
240 __builtin_dump_struct(&printable_view, &printf);
241 } break;
242 case out_fmt_type::STATS:
243 break;
244 }
245 }
246 if (out_fmt == out_fmt_type::STATS) {
247 std::cout << "__kalloc_type: " << sec_types->section.size << std::endl;
248 std::cout << "uniq structs: " << stats.uniq_structs_sz << std::endl;
249 std::cout << "names strings: " << stats.names_sz << std::endl;
250 std::cout << "signatures strings: " << stats.sig_sz << std::endl;
251 }
252
253 return 0;
254 }
255
256 static bool
257 read_file(std::string in_path, std::vector<uint8_t> &out_vec)
258 {
259 std::filesystem::path path(in_path);
260 std::ifstream file(path, std::ifstream::binary);
261 size_t size(std::filesystem::file_size(path));
262 out_vec.resize(size);
263 file.read(reinterpret_cast<char *>(out_vec.data()), size);
264 file.close();
265 return true;
266 }
267
268 static std::optional<std::span<uint8_t> >
269 find_arm64_slice(const std::span<uint8_t> &contents)
270 {
271 fat_header *fhdr = reinterpret_cast<fat_header *>(contents.data());
272 std::span<fat_arch> fat_archs(
273 reinterpret_cast<fat_arch *>(&contents[sizeof(fat_header)]),
274 OSSwapInt32(fhdr->nfat_arch));
275 std::optional<std::span<uint8_t> > chosen_span;
276 for (auto &arch : fat_archs) {
277 if (OSSwapInt32(arch.cputype) == CPU_TYPE_ARM64) {
278 if (OSSwapInt32(arch.cpusubtype) == CPU_SUBTYPE_ARM64E || !chosen_span) {
279 chosen_span =
280 contents.subspan(OSSwapInt32(arch.offset), OSSwapInt32(arch.size));
281 }
282 }
283 }
284 return chosen_span;
285 }
Cache object: e97caa58dd31470baf05709daa721e01
|