1 #
2 # This file and its contents are supplied under the terms of the
3 # Common Development and Distribution License ("CDDL"), version 1.0.
4 # You may only use this file in accordance with the terms of version
5 # 1.0 of the CDDL.
6 #
7 # A full copy of the text of the CDDL should have accompanied this
8 # source. A copy of the CDDL is also available via the Internet at
9 # http://www.illumos.org/license/CDDL.
10 #
11
12 #
13 # Copyright 2009 Sun Microsystems, Inc. All rights reserved.
14 # Use is subject to license terms.
15 # Copyright (c) 2012, 2019 by Delphix. All rights reserved.
16 # Copyright 2016 Nexenta Systems, Inc.
17 # Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
18 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
19 # Copyright (c) 2017 Datto Inc.
20 # Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
21 # Copyright 2019 Richard Elling
22 #
23
24 #
25 # Returns SCSI host number for the given disk
26 #
27 function get_scsi_host #disk
28 {
29 typeset disk=$1
30 ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
31 }
32
33 #
34 # Cause a scan of all scsi host adapters by default
35 #
36 # $1 optional host number
37 #
38 function scan_scsi_hosts
39 {
40 typeset hostnum=${1}
41
42 if is_linux; then
43 if [[ -z $hostnum ]]; then
44 for host in /sys/class/scsi_host/host*; do
45 log_must eval "echo '- - -' > $host/scan"
46 done
47 else
48 log_note "/sys/class/scsi_host/host$hostnum/scan"
49 log_must eval \
50 "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
51 fi
52 fi
53 }
54
55 #
56 # Wait for newly created block devices to have their minors created.
57 # Additional arguments can be passed to udevadm trigger, with the expected
58 # arguments to typically be a block device pathname. This is useful when
59 # checking waiting on a specific device to settle rather than triggering
60 # all devices and waiting for them all to settle.
61 #
62 # The udevadm settle timeout can be 120 or 180 seconds by default for
63 # some distros. If a long delay is experienced, it could be due to some
64 # strangeness in a malfunctioning device that isn't related to the devices
65 # under test. To help debug this condition, a notice is given if settle takes
66 # too long.
67 #
68 # Note: there is no meaningful return code if udevadm fails. Consumers
69 # should not expect a return code (do not call as argument to log_must)
70 #
71 function block_device_wait
72 {
73 if is_linux; then
74 udevadm trigger $* 2>/dev/null
75 typeset start=$SECONDS
76 udevadm settle
77 typeset elapsed=$((SECONDS - start))
78 [[ $elapsed > 60 ]] && \
79 log_note udevadm settle time too long: $elapsed
80 elif is_freebsd; then
81 if [[ ${#@} -eq 0 ]]; then
82 # Do something that has to go through the geom event
83 # queue to complete.
84 sysctl kern.geom.conftxt >/dev/null
85 return
86 fi
87 fi
88 # Poll for the given paths to appear, but give up eventually.
89 typeset -i i
90 for (( i = 0; i < 5; ++i )); do
91 typeset missing=false
92 typeset dev
93 for dev in "${@}"; do
94 if ! [[ -e $dev ]]; then
95 missing=true
96 break
97 fi
98 done
99 if ! $missing; then
100 break
101 fi
102 sleep ${#@}
103 done
104 }
105
106 #
107 # Check if the given device is physical device
108 #
109 function is_physical_device #device
110 {
111 typeset device=${1#$DEV_DSKDIR/}
112 device=${device#$DEV_RDSKDIR/}
113
114 if is_linux; then
115 is_disk_device "$DEV_DSKDIR/$device" && \
116 [ -f /sys/module/loop/parameters/max_part ]
117 elif is_freebsd; then
118 is_disk_device "$DEV_DSKDIR/$device" && \
119 echo $device | grep -qE \
120 -e '^a?da[0-9]+$' \
121 -e '^md[0-9]+$' \
122 -e '^mfid[0-9]+$' \
123 -e '^nda[0-9]+$' \
124 -e '^nvd[0-9]+$' \
125 -e '^vtbd[0-9]+$'
126 else
127 echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$"
128 fi
129 }
130
131 #
132 # Check if the given device is a real device (ie SCSI device)
133 #
134 function is_real_device #disk
135 {
136 typeset disk=$1
137 [[ -z $disk ]] && log_fail "No argument for disk given."
138
139 if is_linux; then
140 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
141 grep -q disk
142 fi
143 }
144
145 #
146 # Check if the given device is a loop device
147 #
148 function is_loop_device #disk
149 {
150 typeset disk=$1
151 [[ -z $disk ]] && log_fail "No argument for disk given."
152
153 if is_linux; then
154 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
155 grep -q loop
156 fi
157 }
158
159 #
160 # Linux:
161 # Check if the given device is a multipath device and if there is a symbolic
162 # link to a device mapper and to a disk
163 # Currently no support for dm devices alone without multipath
164 #
165 # FreeBSD:
166 # Check if the given device is a gmultipath device.
167 #
168 # Others:
169 # No multipath detection.
170 #
171 function is_mpath_device #disk
172 {
173 typeset disk=$1
174 [[ -z $disk ]] && log_fail "No argument for disk given."
175
176 if is_linux; then
177 if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
178 grep -q mpath; then
179 readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
180 else
181 false
182 fi
183 elif is_freebsd; then
184 is_disk_device $DEV_MPATHDIR/$disk
185 else
186 false
187 fi
188 }
189
190 #
191 # Check if the given path is the appropriate sort of device special node.
192 #
193 function is_disk_device #path
194 {
195 typeset path=$1
196
197 if is_freebsd; then
198 # FreeBSD doesn't have block devices, only character devices.
199 test -c $path
200 else
201 test -b $path
202 fi
203 }
204
205 # Set the slice prefix for disk partitioning depending
206 # on whether the device is a real, multipath, or loop device.
207 # Currently all disks have to be of the same type, so only
208 # checks first disk to determine slice prefix.
209 #
210 function set_slice_prefix
211 {
212 typeset disk
213 typeset -i i=0
214
215 if is_linux; then
216 while (( i < $DISK_ARRAY_NUM )); do
217 disk="$(echo $DISKS | awk '{print $(i + 1)}')"
218 if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then
219 export SLICE_PREFIX=""
220 return 0
221 elif is_mpath_device $disk || is_loop_device $disk; then
222 export SLICE_PREFIX="p"
223 return 0
224 else
225 log_fail "$disk not supported for partitioning."
226 fi
227 (( i = i + 1))
228 done
229 fi
230 }
231
232 #
233 # Set the directory path of the listed devices in $DISK_ARRAY_NUM
234 # Currently all disks have to be of the same type, so only
235 # checks first disk to determine device directory
236 # default = /dev (linux)
237 # real disk = /dev (linux)
238 # multipath device = /dev/mapper (linux)
239 #
240 function set_device_dir
241 {
242 typeset disk
243 typeset -i i=0
244
245 if is_linux; then
246 while (( i < $DISK_ARRAY_NUM )); do
247 disk="$(echo $DISKS | awk '{print $(i + 1)}')"
248 if is_mpath_device $disk; then
249 export DEV_DSKDIR=$DEV_MPATHDIR
250 return 0
251 else
252 export DEV_DSKDIR=$DEV_RDSKDIR
253 return 0
254 fi
255 (( i = i + 1))
256 done
257 else
258 export DEV_DSKDIR=$DEV_RDSKDIR
259 fi
260 }
261
262 #
263 # Get the directory path of given device
264 #
265 function get_device_dir #device
266 {
267 typeset device=$1
268
269 if ! is_freebsd && ! is_physical_device $device; then
270 if [[ $device != "/" ]]; then
271 device=${device%/*}
272 fi
273 if is_disk_device "$DEV_DSKDIR/$device"; then
274 device="$DEV_DSKDIR"
275 fi
276 echo $device
277 else
278 echo "$DEV_DSKDIR"
279 fi
280 }
281
282 #
283 # Get persistent name for given disk
284 #
285 function get_persistent_disk_name #device
286 {
287 typeset device=$1
288
289 if is_linux; then
290 if is_real_device $device; then
291 udevadm info -q all -n $DEV_DSKDIR/$device \
292 | awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3
293 elif is_mpath_device $device; then
294 udevadm info -q all -n $DEV_DSKDIR/$device \
295 | awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \
296 | cut -d/ -f3
297 else
298 echo $device
299 fi
300 else
301 echo $device
302 fi
303 }
304
305 #
306 # Online or offline a disk on the system
307 #
308 # First checks state of disk. Test will fail if disk is not properly onlined
309 # or offlined. Online is a full rescan of SCSI disks by echoing to every
310 # host entry.
311 #
312 function on_off_disk # disk state{online,offline} host
313 {
314 typeset disk=$1
315 typeset state=$2
316 typeset host=$3
317
318 [[ -z $disk ]] || [[ -z $state ]] && \
319 log_fail "Arguments invalid or missing"
320
321 if is_linux; then
322 if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
323 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
324 dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')"
325 while [[ -n $dep ]]; do
326 #check if disk is online
327 if lsscsi | grep -qF $dep; then
328 dep_dir="/sys/block/${dm_name}"
329 dep_dir+="/slaves/${dep}/device"
330 ss="${dep_dir}/state"
331 sd="${dep_dir}/delete"
332 log_must eval "echo 'offline' > ${ss}"
333 log_must eval "echo '1' > ${sd}"
334 if lsscsi | grep -qF $dep; then
335 log_fail "Offlining $disk failed"
336 fi
337 fi
338 dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')"
339 done
340 elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
341 #check if disk is online
342 if lsscsi | grep -qF $disk; then
343 dev_state="/sys/block/$disk/device/state"
344 dev_delete="/sys/block/$disk/device/delete"
345 log_must eval "echo 'offline' > ${dev_state}"
346 log_must eval "echo '1' > ${dev_delete}"
347 if lsscsi | grep -qF $disk; then
348 log_fail "Offlining $disk failed"
349 fi
350 else
351 log_note "$disk is already offline"
352 fi
353 elif [[ $state == "online" ]]; then
354 #force a full rescan
355 scan_scsi_hosts $host
356 block_device_wait
357 if is_mpath_device $disk; then
358 dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
359 dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')"
360 if lsscsi | grep -qF $dep; then
361 log_fail "Onlining $disk failed"
362 fi
363 elif is_real_device $disk; then
364 block_device_wait
365 typeset -i retries=0
366 while ! lsscsi | grep -qF $disk; do
367 if (( $retries > 2 )); then
368 log_fail "Onlining $disk failed"
369 break
370 fi
371 (( ++retries ))
372 sleep 1
373 done
374 else
375 log_fail "$disk is not a real dev"
376 fi
377 else
378 log_fail "$disk failed to $state"
379 fi
380 fi
381 }
382
383 #
384 # Simulate disk removal
385 #
386 function remove_disk #disk
387 {
388 typeset disk=$1
389 on_off_disk $disk "offline"
390 block_device_wait
391 }
392
393 #
394 # Simulate disk insertion for the given SCSI host
395 #
396 function insert_disk #disk scsi_host
397 {
398 typeset disk=$1
399 typeset scsi_host=$2
400 on_off_disk $disk "online" $scsi_host
401 block_device_wait
402 }
403
404 #
405 # Load scsi_debug module with specified parameters
406 # $blksz can be either one of: < 512b | 512e | 4Kn >
407 #
408 function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
409 {
410 typeset devsize=$1
411 typeset hosts=$2
412 typeset tgts=$3
413 typeset luns=$4
414 typeset blksz=$5
415
416 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
417 [[ -z $luns ]] || [[ -z $blksz ]] && \
418 log_fail "Arguments invalid or missing"
419
420 case "$5" in
421 '512b')
422 typeset sector=512
423 typeset blkexp=0
424 ;;
425 '512e')
426 typeset sector=512
427 typeset blkexp=3
428 ;;
429 '4Kn')
430 typeset sector=4096
431 typeset blkexp=0
432 ;;
433 *) log_fail "Unsupported blksz value: $5" ;;
434 esac
435
436 if is_linux; then
437 modprobe -n scsi_debug ||
438 log_unsupported "Platform does not have scsi_debug module"
439 if lsmod | grep -q scsi_debug; then
440 log_fail "scsi_debug module already installed"
441 else
442 log_must modprobe scsi_debug dev_size_mb=$devsize \
443 add_host=$hosts num_tgts=$tgts max_luns=$luns \
444 sector_size=$sector physblk_exp=$blkexp
445 block_device_wait
446 if ! lsscsi | grep -q scsi_debug; then
447 log_fail "scsi_debug module install failed"
448 fi
449 fi
450 fi
451 }
452
453 #
454 # Unload scsi_debug module, if needed.
455 #
456 function unload_scsi_debug
457 {
458 log_must_retry "in use" 5 modprobe -r scsi_debug
459 }
460
461 #
462 # Get scsi_debug device name.
463 # Returns basename of scsi_debug device (for example "sdb").
464 #
465 function get_debug_device
466 {
467 for i in {1..10} ; do
468 val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3)
469
470 # lsscsi can take time to settle
471 if [ "$val" != "-" ] ; then
472 break
473 fi
474 sleep 1
475 done
476 echo "$val"
477 }
478
479 #
480 # Get actual devices used by the pool (i.e. linux sdb1 not sdb).
481 #
482 function get_pool_devices #testpool #devdir
483 {
484 typeset testpool=$1
485 typeset devdir=$2
486 typeset out=""
487
488 case "$UNAME" in
489 Linux|FreeBSD)
490 zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}'
491 ;;
492 esac
493 }
494
495 #
496 # Write to standard out giving the level, device name, offset and length
497 # of all blocks in an input file. The offset and length are in units of
498 # 512 byte blocks. In the case of mirrored vdevs, only the first
499 # device is listed, as the levels, blocks and offsets will be the same
500 # on other devices. Note that this function only works with mirrored
501 # or non-redundant pools, not raidz.
502 #
503 # The output of this function can be used to introduce corruption at
504 # varying levels of indirection.
505 #
506 function list_file_blocks # input_file
507 {
508 typeset input_file=$1
509
510 [[ -f $input_file ]] || log_fail "Couldn't find $input_file"
511
512 typeset ds="$(zfs list -H -o name $input_file)"
513 typeset pool="${ds%%/*}"
514 typeset objnum="$(get_objnum $input_file)"
515
516 #
517 # Establish a mapping between vdev ids as shown in a DVA and the
518 # pathnames they correspond to in ${VDEV_MAP[][]}.
519 #
520 # The vdev bits in a DVA refer to the top level vdev id.
521 # ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
522 #
523 eval $(zdb -C $pool | awk '
524 BEGIN { printf "typeset -a VDEV_MAP;" }
525 function subscript(s) {
526 # "[#]" is more convenient than the bare "#"
527 match(s, /\[[0-9]*\]/)
528 return substr(s, RSTART, RLENGTH)
529 }
530 id && !/^ / {
531 # left a top level vdev
532 id = 0
533 }
534 id && $1 ~ /^path:$/ {
535 # found a vdev path; save it in the map
536 printf "VDEV_MAP%s%s=%s;", id, child, $2
537 }
538 /^ children/ {
539 # entering a top level vdev
540 id = subscript($0)
541 child = "[0]" # default in case there is no nested vdev
542 printf "typeset -a VDEV_MAP%s;", id
543 }
544 /^ children/ {
545 # entering a nested vdev (e.g. child of a top level mirror)
546 child = subscript($0)
547 }
548 ')
549
550 #
551 # The awk below parses the output of zdb, printing out the level
552 # of each block along with vdev id, offset and length. The last
553 # two are converted to decimal in the while loop. 4M is added to
554 # the offset to compensate for the first two labels and boot
555 # block. Lastly, the offset and length are printed in units of
556 # 512B blocks for ease of use with dd.
557 #
558 typeset level vdev path offset length
559 if awk -n '' 2>/dev/null; then
560 # gawk needs -n to decode hex
561 AWK='awk -n'
562 else
563 AWK='awk'
564 fi
565 sync_all_pools true
566 zdb -dddddd $ds $objnum | $AWK -v pad=$((4<<20)) -v bs=512 '
567 /^$/ { looking = 0 }
568 looking {
569 level = $2
570 field = 3
571 while (split($field, dva, ":") == 3) {
572 # top level vdev id
573 vdev = int(dva[1])
574 # offset + 4M label/boot pad in 512B blocks
575 offset = (int("0x"dva[2]) + pad) / bs
576 # length in 512B blocks
577 len = int("0x"dva[3]) / bs
578
579 print level, vdev, offset, len
580
581 ++field
582 }
583 }
584 /^Indirect blocks:/ { looking = 1 }
585 ' | \
586 while read level vdev offset length; do
587 for path in ${VDEV_MAP[$vdev][@]}; do
588 echo "$level $path $offset $length"
589 done
590 done 2>/dev/null
591 }
592
593 function corrupt_blocks_at_level # input_file corrupt_level
594 {
595 typeset input_file=$1
596 typeset corrupt_level="L${2:-0}"
597 typeset level path offset length
598
599 [[ -f $input_file ]] || log_fail "Couldn't find $input_file"
600
601 if is_freebsd; then
602 # Temporarily allow corrupting an inuse device.
603 debugflags=$(sysctl -n kern.geom.debugflags)
604 sysctl kern.geom.debugflags=16
605 fi
606
607 list_file_blocks $input_file | \
608 while read level path offset length; do
609 if [[ $level = $corrupt_level ]]; then
610 log_must dd if=/dev/urandom of=$path bs=512 \
611 count=$length seek=$offset conv=notrunc
612 fi
613 done
614
615 if is_freebsd; then
616 sysctl kern.geom.debugflags=$debugflags
617 fi
618
619 # This is necessary for pools made of loop devices.
620 sync
621 }
622
623 function corrupt_label_checksum # label_number vdev_path
624 {
625 typeset label_size=$((256*1024))
626 typeset vdev_size=$(stat_size ${2})
627 typeset -a offsets=("$((128*1024 - 32))" \
628 "$(($label_size + (128*1024 - 32)))" \
629 "$(($vdev_size - $label_size - (128*1024 + 32)))" \
630 "$(($vdev_size - (128*1024 + 32)))")
631
632 dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \
633 conv=notrunc
634 }
Cache object: 367ab772271489e170641d1e676c6dc1
|