1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * HAMMER PFS ioctls - Manage pseudo-fs configurations
36 */
37
38 #include "hammer.h"
39
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
41 hammer_inode_t ip);
42 static int hammer_pfs_rollback(hammer_transaction_t trans,
43 hammer_pseudofs_inmem_t pfsm,
44 hammer_tid_t trunc_tid);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
46 hammer_tid_t trunc_tid);
47
48 /*
49 * Get mirroring/pseudo-fs information
50 *
51 * NOTE: The ip used for ioctl is not necessarily related to the PFS
52 */
53 int
54 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
55 struct hammer_ioc_pseudofs_rw *pfs)
56 {
57 hammer_pseudofs_inmem_t pfsm;
58 u_int32_t localization;
59 int error;
60
61 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
62 return(error);
63 localization = (u_int32_t)pfs->pfs_id << 16;
64 pfs->bytes = sizeof(struct hammer_pseudofs_data);
65 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
66
67 pfsm = hammer_load_pseudofs(trans, localization, &error);
68 if (error) {
69 hammer_rel_pseudofs(trans->hmp, pfsm);
70 return(error);
71 }
72
73 /*
74 * If the PFS is a master the sync tid is set by normal operation
75 * rather than the mirroring code, and will always track the
76 * real HAMMER filesystem.
77 *
78 * We use flush_tid1, which is the highest fully committed TID.
79 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
80 * caught up to it yet so a crash will roll us back to flush_tid1.
81 */
82 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
83 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
84
85 /*
86 * Copy out to userland.
87 */
88 error = 0;
89 if (pfs->ondisk && error == 0)
90 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
91 hammer_rel_pseudofs(trans->hmp, pfsm);
92 return(error);
93 }
94
95 /*
96 * Set mirroring/pseudo-fs information
97 *
98 * NOTE: The ip used for ioctl is not necessarily related to the PFS
99 */
100 int
101 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
102 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
103 {
104 hammer_pseudofs_inmem_t pfsm;
105 u_int32_t localization;
106 int error;
107
108 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
109 return(error);
110 localization = (u_int32_t)pfs->pfs_id << 16;
111 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
112 error = EINVAL;
113 localization = (u_int32_t)pfs->pfs_id << 16;
114
115 if (error == 0 && pfs->ondisk) {
116 /*
117 * Load the PFS so we can modify our in-core copy. Ignore
118 * ENOENT errors.
119 */
120 pfsm = hammer_load_pseudofs(trans, localization, &error);
121 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
122
123 /*
124 * Save it back, create a root inode if we are in master
125 * mode and no root exists.
126 *
127 * We do not create root inodes for slaves, the root inode
128 * must be mirrored from the master.
129 */
130 if (error == 0 &&
131 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
132 error = hammer_mkroot_pseudofs(trans, cred, pfsm);
133 }
134 if (error == 0)
135 error = hammer_save_pseudofs(trans, pfsm);
136
137 /*
138 * Wakeup anyone waiting for a TID update for this PFS
139 */
140 wakeup(&pfsm->pfsd.sync_end_tid);
141 hammer_rel_pseudofs(trans->hmp, pfsm);
142 }
143 return(error);
144 }
145
146 /*
147 * Upgrade a slave to a master
148 *
149 * This is fairly easy to do, but we must physically undo any partial syncs
150 * for transaction ids > sync_end_tid. Effective, we must do a partial
151 * rollback.
152 *
153 * NOTE: The ip used for ioctl is not necessarily related to the PFS
154 */
155 int
156 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
157 struct hammer_ioc_pseudofs_rw *pfs)
158 {
159 hammer_pseudofs_inmem_t pfsm;
160 u_int32_t localization;
161 int error;
162
163 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
164 return(error);
165 localization = (u_int32_t)pfs->pfs_id << 16;
166 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
167 return(error);
168
169 /*
170 * A master id must be set when upgrading
171 */
172 pfsm = hammer_load_pseudofs(trans, localization, &error);
173 if (error == 0) {
174 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
175 error = hammer_pfs_rollback(trans, pfsm,
176 pfsm->pfsd.sync_end_tid + 1);
177 if (error == 0) {
178 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
179 error = hammer_save_pseudofs(trans, pfsm);
180 }
181 }
182 }
183 hammer_rel_pseudofs(trans->hmp, pfsm);
184 if (error == EINTR) {
185 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
186 error = 0;
187 }
188 return (error);
189 }
190
191 /*
192 * Downgrade a master to a slave
193 *
194 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
195 *
196 * We previously did not update sync_end_tid in consideration for a slave
197 * upgraded to a master and then downgraded again, but this completely breaks
198 * the case where one starts with a master and then downgrades to a slave,
199 * then upgrades again.
200 *
201 * NOTE: The ip used for ioctl is not necessarily related to the PFS
202 */
203 int
204 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
205 struct hammer_ioc_pseudofs_rw *pfs)
206 {
207 hammer_mount_t hmp = trans->hmp;
208 hammer_pseudofs_inmem_t pfsm;
209 u_int32_t localization;
210 int error;
211
212 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
213 return(error);
214 localization = (u_int32_t)pfs->pfs_id << 16;
215 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
216 return(error);
217
218 pfsm = hammer_load_pseudofs(trans, localization, &error);
219 if (error == 0) {
220 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
221 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
222 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
223 pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
224 error = hammer_save_pseudofs(trans, pfsm);
225 }
226 }
227 hammer_rel_pseudofs(trans->hmp, pfsm);
228 return (error);
229 }
230
231 /*
232 * Destroy a PFS
233 *
234 * We can destroy a PFS by scanning and deleting all of its records in the
235 * B-Tree. The hammer utility will delete the softlink in the primary
236 * filesystem.
237 *
238 * NOTE: The ip used for ioctl is not necessarily related to the PFS
239 */
240 int
241 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
242 struct hammer_ioc_pseudofs_rw *pfs)
243 {
244 hammer_pseudofs_inmem_t pfsm;
245 u_int32_t localization;
246 int error;
247
248 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
249 return(error);
250 localization = (u_int32_t)pfs->pfs_id << 16;
251
252 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
253 return(error);
254
255 pfsm = hammer_load_pseudofs(trans, localization, &error);
256 if (error == 0) {
257 error = hammer_pfs_rollback(trans, pfsm, 0);
258 if (error == 0) {
259 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
260 error = hammer_save_pseudofs(trans, pfsm);
261 }
262 }
263 hammer_rel_pseudofs(trans->hmp, pfsm);
264 if (error == EINTR) {
265 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
266 error = 0;
267 }
268 return(error);
269 }
270
271 /*
272 * Wait for the PFS to sync past the specified TID
273 */
274 int
275 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
276 struct hammer_ioc_pseudofs_rw *pfs)
277 {
278 hammer_pseudofs_inmem_t pfsm;
279 struct hammer_pseudofs_data pfsd;
280 u_int32_t localization;
281 hammer_tid_t tid;
282 void *waitp;
283 int error;
284
285 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
286 return(error);
287 localization = (u_int32_t)pfs->pfs_id << 16;
288
289 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
290 return(error);
291
292 pfsm = hammer_load_pseudofs(trans, localization, &error);
293 if (error == 0) {
294 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
295 tid = pfsm->pfsd.sync_end_tid;
296 waitp = &pfsm->pfsd.sync_end_tid;
297 } else {
298 tid = trans->hmp->flush_tid1;
299 waitp = &trans->hmp->flush_tid1;
300 }
301 if (tid <= pfsd.sync_end_tid)
302 tsleep(waitp, PCATCH, "hmrmwt", 0);
303 }
304 hammer_rel_pseudofs(trans->hmp, pfsm);
305 if (error == EINTR) {
306 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
307 error = 0;
308 }
309 return(error);
310 }
311
312
313 /*
314 * Auto-detect the pseudofs and do basic bounds checking.
315 */
316 static
317 int
318 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
319 {
320 int error = 0;
321
322 if (pfs->pfs_id == -1)
323 pfs->pfs_id = (int)(ip->obj_localization >> 16);
324 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
325 error = EINVAL;
326 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
327 error = EINVAL;
328 return(error);
329 }
330
331 /*
332 * Rollback the specified PFS to (trunc_tid - 1), removing everything
333 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
334 * mode or the MIRROR_FILTERED scan will not work properly.
335 *
336 * This is typically used to remove any partial syncs when upgrading a
337 * slave to a master. It can theoretically also be used to rollback
338 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
339 * PRUNED, and to points that are older only if they are on a retained
340 * (pruning softlink) boundary.
341 *
342 * Rollbacks destroy information. If you don't mind inode numbers changing
343 * a better way would be to cpdup a snapshot back onto the master.
344 */
345 static
346 int
347 hammer_pfs_rollback(hammer_transaction_t trans,
348 hammer_pseudofs_inmem_t pfsm,
349 hammer_tid_t trunc_tid)
350 {
351 struct hammer_cmirror cmirror;
352 struct hammer_cursor cursor;
353 struct hammer_base_elm key_cur;
354 int error;
355 int seq;
356
357 bzero(&cmirror, sizeof(cmirror));
358 bzero(&key_cur, sizeof(key_cur));
359 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
360 key_cur.obj_id = HAMMER_MIN_OBJID;
361 key_cur.key = HAMMER_MIN_KEY;
362 key_cur.create_tid = 1;
363 key_cur.rec_type = HAMMER_MIN_RECTYPE;
364
365 seq = trans->hmp->flusher.done;
366
367 retry:
368 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
369 if (error) {
370 hammer_done_cursor(&cursor);
371 goto failed;
372 }
373 cursor.key_beg = key_cur;
374 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
375 pfsm->localization;
376 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
377 cursor.key_end.key = HAMMER_MAX_KEY;
378 cursor.key_end.create_tid = HAMMER_MAX_TID;
379 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
380
381 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
382 cursor.flags |= HAMMER_CURSOR_BACKEND;
383
384 /*
385 * Do an optimized scan of only records created or modified
386 * >= trunc_tid, so we can fix up those records. We must
387 * still check the TIDs but this greatly reduces the size of
388 * the scan.
389 */
390 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
391 cursor.cmirror = &cmirror;
392 cmirror.mirror_tid = trunc_tid;
393
394 error = hammer_btree_first(&cursor);
395 while (error == 0) {
396 /*
397 * Abort the rollback.
398 */
399 if (error == 0) {
400 error = hammer_signal_check(trans->hmp);
401 if (error)
402 break;
403 }
404
405 /*
406 * We only care about leafs. Internal nodes can be returned
407 * in mirror-filtered mode (they are used to generate SKIP
408 * mrecords), but we don't need them for this code.
409 *
410 * WARNING: See warnings in hammer_unlock_cursor() function.
411 */
412 cursor.flags |= HAMMER_CURSOR_ATEDISK;
413 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
414 key_cur = cursor.node->ondisk->elms[cursor.index].base;
415 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
416 }
417
418 while (hammer_flusher_meta_halflimit(trans->hmp) ||
419 hammer_flusher_undo_exhausted(trans, 2)) {
420 hammer_unlock_cursor(&cursor);
421 hammer_flusher_wait(trans->hmp, seq);
422 hammer_lock_cursor(&cursor);
423 seq = hammer_flusher_async_one(trans->hmp);
424 }
425
426 if (error == 0)
427 error = hammer_btree_iterate(&cursor);
428 }
429 if (error == ENOENT)
430 error = 0;
431 hammer_done_cursor(&cursor);
432 if (error == EDEADLK)
433 goto retry;
434 failed:
435 return(error);
436 }
437
438 /*
439 * Helper function - perform rollback on a B-Tree element given trunc_tid.
440 *
441 * If create_tid >= trunc_tid the record is physically destroyed.
442 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
443 */
444 static
445 int
446 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
447 {
448 hammer_btree_leaf_elm_t elm;
449 int error;
450
451 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
452 if (elm->base.create_tid < trunc_tid &&
453 elm->base.delete_tid < trunc_tid) {
454 return(0);
455 }
456
457 if (elm->base.create_tid >= trunc_tid) {
458 error = hammer_delete_at_cursor(
459 cursor, HAMMER_DELETE_DESTROY,
460 cursor->trans->tid, cursor->trans->time32,
461 1, NULL);
462 } else if (elm->base.delete_tid >= trunc_tid) {
463 error = hammer_delete_at_cursor(
464 cursor, HAMMER_DELETE_ADJUST,
465 0, 0,
466 1, NULL);
467 } else {
468 error = 0;
469 }
470 return(error);
471 }
472
Cache object: 99b8c32c5a45864ffeb88bc2342c3263
|