The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/cmd/zstream/zstream_redup.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * This file and its contents are supplied under the terms of the
    5  * Common Development and Distribution License ("CDDL"), version 1.0.
    6  * You may only use this file in accordance with the terms of version
    7  * 1.0 of the CDDL.
    8  *
    9  * A full copy of the text of the CDDL should have accompanied this
   10  * source.  A copy of the CDDL is also available via the Internet at
   11  * http://www.illumos.org/license/CDDL.
   12  *
   13  * CDDL HEADER END
   14  */
   15 
   16 /*
   17  * Copyright (c) 2020 by Delphix. All rights reserved.
   18  */
   19 
   20 #include <assert.h>
   21 #include <cityhash.h>
   22 #include <ctype.h>
   23 #include <errno.h>
   24 #include <fcntl.h>
   25 #include <libzfs.h>
   26 #include <libzutil.h>
   27 #include <stddef.h>
   28 #include <stdio.h>
   29 #include <stdlib.h>
   30 #include <string.h>
   31 #include <umem.h>
   32 #include <unistd.h>
   33 #include <sys/debug.h>
   34 #include <sys/stat.h>
   35 #include <sys/zfs_ioctl.h>
   36 #include <sys/zio_checksum.h>
   37 #include "zfs_fletcher.h"
   38 #include "zstream.h"
   39 
   40 
   41 #define MAX_RDT_PHYSMEM_PERCENT         20
   42 #define SMALLEST_POSSIBLE_MAX_RDT_MB            128
   43 
   44 typedef struct redup_entry {
   45         struct redup_entry      *rde_next;
   46         uint64_t rde_guid;
   47         uint64_t rde_object;
   48         uint64_t rde_offset;
   49         uint64_t rde_stream_offset;
   50 } redup_entry_t;
   51 
   52 typedef struct redup_table {
   53         redup_entry_t   **redup_hash_array;
   54         umem_cache_t    *ddecache;
   55         uint64_t        ddt_count;
   56         int             numhashbits;
   57 } redup_table_t;
   58 
   59 int
   60 highbit64(uint64_t i)
   61 {
   62         if (i == 0)
   63                 return (0);
   64 
   65         return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
   66 }
   67 
   68 void *
   69 safe_calloc(size_t n)
   70 {
   71         void *rv = calloc(1, n);
   72         if (rv == NULL) {
   73                 fprintf(stderr,
   74                     "Error: could not allocate %u bytes of memory\n",
   75                     (int)n);
   76                 exit(1);
   77         }
   78         return (rv);
   79 }
   80 
   81 /*
   82  * Safe version of fread(), exits on error.
   83  */
   84 int
   85 sfread(void *buf, size_t size, FILE *fp)
   86 {
   87         int rv = fread(buf, size, 1, fp);
   88         if (rv == 0 && ferror(fp)) {
   89                 (void) fprintf(stderr, "Error while reading file: %s\n",
   90                     strerror(errno));
   91                 exit(1);
   92         }
   93         return (rv);
   94 }
   95 
   96 /*
   97  * Safe version of pread(), exits on error.
   98  */
   99 static void
  100 spread(int fd, void *buf, size_t count, off_t offset)
  101 {
  102         ssize_t err = pread(fd, buf, count, offset);
  103         if (err == -1) {
  104                 (void) fprintf(stderr,
  105                     "Error while reading file: %s\n",
  106                     strerror(errno));
  107                 exit(1);
  108         } else if (err != count) {
  109                 (void) fprintf(stderr,
  110                     "Error while reading file: short read\n");
  111                 exit(1);
  112         }
  113 }
  114 
  115 static int
  116 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
  117     zio_cksum_t *zc, int outfd)
  118 {
  119         assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
  120             == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
  121         fletcher_4_incremental_native(drr,
  122             offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
  123         if (drr->drr_type != DRR_BEGIN) {
  124                 assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
  125                     drr_checksum.drr_checksum));
  126                 drr->drr_u.drr_checksum.drr_checksum = *zc;
  127         }
  128         fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
  129             sizeof (zio_cksum_t), zc);
  130         if (write(outfd, drr, sizeof (*drr)) == -1)
  131                 return (errno);
  132         if (payload_len != 0) {
  133                 fletcher_4_incremental_native(payload, payload_len, zc);
  134                 if (write(outfd, payload, payload_len) == -1)
  135                         return (errno);
  136         }
  137         return (0);
  138 }
  139 
  140 static void
  141 rdt_insert(redup_table_t *rdt,
  142     uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
  143 {
  144         uint64_t ch = cityhash4(guid, object, offset, 0);
  145         uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
  146         redup_entry_t **rdepp;
  147 
  148         rdepp = &(rdt->redup_hash_array[hashcode]);
  149         redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
  150         rde->rde_next = *rdepp;
  151         rde->rde_guid = guid;
  152         rde->rde_object = object;
  153         rde->rde_offset = offset;
  154         rde->rde_stream_offset = stream_offset;
  155         *rdepp = rde;
  156         rdt->ddt_count++;
  157 }
  158 
  159 static void
  160 rdt_lookup(redup_table_t *rdt,
  161     uint64_t guid, uint64_t object, uint64_t offset,
  162     uint64_t *stream_offsetp)
  163 {
  164         uint64_t ch = cityhash4(guid, object, offset, 0);
  165         uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
  166 
  167         for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
  168             rde != NULL; rde = rde->rde_next) {
  169                 if (rde->rde_guid == guid &&
  170                     rde->rde_object == object &&
  171                     rde->rde_offset == offset) {
  172                         *stream_offsetp = rde->rde_stream_offset;
  173                         return;
  174                 }
  175         }
  176         assert(!"could not find expected redup table entry");
  177 }
  178 
  179 /*
  180  * Convert a dedup stream (generated by "zfs send -D") to a
  181  * non-deduplicated stream.  The entire infd will be converted, including
  182  * any substreams in a stream package (generated by "zfs send -RD"). The
  183  * infd must be seekable.
  184  */
  185 static void
  186 zfs_redup_stream(int infd, int outfd, boolean_t verbose)
  187 {
  188         int bufsz = SPA_MAXBLOCKSIZE;
  189         dmu_replay_record_t thedrr = { 0 };
  190         dmu_replay_record_t *drr = &thedrr;
  191         redup_table_t rdt;
  192         zio_cksum_t stream_cksum;
  193         uint64_t numbuckets;
  194         uint64_t num_records = 0;
  195         uint64_t num_write_byref_records = 0;
  196 
  197 #ifdef _ILP32
  198         uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
  199 #else
  200         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
  201         uint64_t max_rde_size =
  202             MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
  203             SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
  204 #endif
  205 
  206         numbuckets = max_rde_size / (sizeof (redup_entry_t));
  207 
  208         /*
  209          * numbuckets must be a power of 2.  Increase number to
  210          * a power of 2 if necessary.
  211          */
  212         if (!ISP2(numbuckets))
  213                 numbuckets = 1ULL << highbit64(numbuckets);
  214 
  215         rdt.redup_hash_array =
  216             safe_calloc(numbuckets * sizeof (redup_entry_t *));
  217         rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
  218             NULL, NULL, NULL, NULL, NULL, 0);
  219         rdt.numhashbits = highbit64(numbuckets) - 1;
  220         rdt.ddt_count = 0;
  221 
  222         char *buf = safe_calloc(bufsz);
  223         FILE *ofp = fdopen(infd, "r");
  224         long offset = ftell(ofp);
  225         int begin = 0;
  226         boolean_t seen = B_FALSE;
  227         while (sfread(drr, sizeof (*drr), ofp) != 0) {
  228                 num_records++;
  229 
  230                 /*
  231                  * We need to regenerate the checksum.
  232                  */
  233                 if (drr->drr_type != DRR_BEGIN) {
  234                         memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
  235                             sizeof (drr->drr_u.drr_checksum.drr_checksum));
  236                 }
  237 
  238                 uint64_t payload_size = 0;
  239                 switch (drr->drr_type) {
  240                 case DRR_BEGIN:
  241                 {
  242                         struct drr_begin *drrb = &drr->drr_u.drr_begin;
  243                         int fflags;
  244                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
  245                         VERIFY0(begin++);
  246                         seen = B_TRUE;
  247 
  248                         assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
  249 
  250                         /* clear the DEDUP feature flag for this stream */
  251                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
  252                         fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
  253                             DMU_BACKUP_FEATURE_DEDUPPROPS);
  254                         /* cppcheck-suppress syntaxError */
  255                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
  256 
  257                         uint32_t sz = drr->drr_payloadlen;
  258 
  259                         VERIFY3U(sz, <=, 1U << 28);
  260 
  261                         if (sz != 0) {
  262                                 if (sz > bufsz) {
  263                                         free(buf);
  264                                         buf = safe_calloc(sz);
  265                                         bufsz = sz;
  266                                 }
  267                                 (void) sfread(buf, sz, ofp);
  268                         }
  269                         payload_size = sz;
  270                         break;
  271                 }
  272 
  273                 case DRR_END:
  274                 {
  275                         struct drr_end *drre = &drr->drr_u.drr_end;
  276                         /*
  277                          * We would prefer to just check --begin == 0, but
  278                          * replication streams have an end of stream END
  279                          * record, so we must avoid tripping it.
  280                          */
  281                         VERIFY3B(seen, ==, B_TRUE);
  282                         begin--;
  283                         /*
  284                          * Use the recalculated checksum, unless this is
  285                          * the END record of a stream package, which has
  286                          * no checksum.
  287                          */
  288                         if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
  289                                 drre->drr_checksum = stream_cksum;
  290                         break;
  291                 }
  292 
  293                 case DRR_OBJECT:
  294                 {
  295                         struct drr_object *drro = &drr->drr_u.drr_object;
  296                         VERIFY3S(begin, ==, 1);
  297 
  298                         if (drro->drr_bonuslen > 0) {
  299                                 payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
  300                                 (void) sfread(buf, payload_size, ofp);
  301                         }
  302                         break;
  303                 }
  304 
  305                 case DRR_SPILL:
  306                 {
  307                         struct drr_spill *drrs = &drr->drr_u.drr_spill;
  308                         VERIFY3S(begin, ==, 1);
  309                         payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
  310                         (void) sfread(buf, payload_size, ofp);
  311                         break;
  312                 }
  313 
  314                 case DRR_WRITE_BYREF:
  315                 {
  316                         struct drr_write_byref drrwb =
  317                             drr->drr_u.drr_write_byref;
  318                         VERIFY3S(begin, ==, 1);
  319 
  320                         num_write_byref_records++;
  321 
  322                         /*
  323                          * Look up in hash table by drrwb->drr_refguid,
  324                          * drr_refobject, drr_refoffset.  Replace this
  325                          * record with the found WRITE record, but with
  326                          * drr_object,drr_offset,drr_toguid replaced with ours.
  327                          */
  328                         uint64_t stream_offset = 0;
  329                         rdt_lookup(&rdt, drrwb.drr_refguid,
  330                             drrwb.drr_refobject, drrwb.drr_refoffset,
  331                             &stream_offset);
  332 
  333                         spread(infd, drr, sizeof (*drr), stream_offset);
  334 
  335                         assert(drr->drr_type == DRR_WRITE);
  336                         struct drr_write *drrw = &drr->drr_u.drr_write;
  337                         assert(drrw->drr_toguid == drrwb.drr_refguid);
  338                         assert(drrw->drr_object == drrwb.drr_refobject);
  339                         assert(drrw->drr_offset == drrwb.drr_refoffset);
  340 
  341                         payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
  342                         spread(infd, buf, payload_size,
  343                             stream_offset + sizeof (*drr));
  344 
  345                         drrw->drr_toguid = drrwb.drr_toguid;
  346                         drrw->drr_object = drrwb.drr_object;
  347                         drrw->drr_offset = drrwb.drr_offset;
  348                         break;
  349                 }
  350 
  351                 case DRR_WRITE:
  352                 {
  353                         struct drr_write *drrw = &drr->drr_u.drr_write;
  354                         VERIFY3S(begin, ==, 1);
  355                         payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
  356                         (void) sfread(buf, payload_size, ofp);
  357 
  358                         rdt_insert(&rdt, drrw->drr_toguid,
  359                             drrw->drr_object, drrw->drr_offset, offset);
  360                         break;
  361                 }
  362 
  363                 case DRR_WRITE_EMBEDDED:
  364                 {
  365                         struct drr_write_embedded *drrwe =
  366                             &drr->drr_u.drr_write_embedded;
  367                         VERIFY3S(begin, ==, 1);
  368                         payload_size =
  369                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
  370                         (void) sfread(buf, payload_size, ofp);
  371                         break;
  372                 }
  373 
  374                 case DRR_FREEOBJECTS:
  375                 case DRR_FREE:
  376                 case DRR_OBJECT_RANGE:
  377                         VERIFY3S(begin, ==, 1);
  378                         break;
  379 
  380                 default:
  381                         (void) fprintf(stderr, "INVALID record type 0x%x\n",
  382                             drr->drr_type);
  383                         /* should never happen, so assert */
  384                         assert(B_FALSE);
  385                 }
  386 
  387                 if (feof(ofp)) {
  388                         fprintf(stderr, "Error: unexpected end-of-file\n");
  389                         exit(1);
  390                 }
  391                 if (ferror(ofp)) {
  392                         fprintf(stderr, "Error while reading file: %s\n",
  393                             strerror(errno));
  394                         exit(1);
  395                 }
  396 
  397                 /*
  398                  * We need to recalculate the checksum, and it needs to be
  399                  * initially zero to do that.  BEGIN records don't have
  400                  * a checksum.
  401                  */
  402                 if (drr->drr_type != DRR_BEGIN) {
  403                         memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
  404                             sizeof (drr->drr_u.drr_checksum.drr_checksum));
  405                 }
  406                 if (dump_record(drr, buf, payload_size,
  407                     &stream_cksum, outfd) != 0)
  408                         break;
  409                 if (drr->drr_type == DRR_END) {
  410                         /*
  411                          * Typically the END record is either the last
  412                          * thing in the stream, or it is followed
  413                          * by a BEGIN record (which also zeros the checksum).
  414                          * However, a stream package ends with two END
  415                          * records.  The last END record's checksum starts
  416                          * from zero.
  417                          */
  418                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
  419                 }
  420                 offset = ftell(ofp);
  421         }
  422 
  423         if (verbose) {
  424                 char mem_str[16];
  425                 zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
  426                     mem_str, sizeof (mem_str));
  427                 fprintf(stderr, "converted stream with %llu total records, "
  428                     "including %llu dedup records, using %sB memory.\n",
  429                     (long long)num_records,
  430                     (long long)num_write_byref_records,
  431                     mem_str);
  432         }
  433 
  434         umem_cache_destroy(rdt.ddecache);
  435         free(rdt.redup_hash_array);
  436         free(buf);
  437         (void) fclose(ofp);
  438 }
  439 
  440 int
  441 zstream_do_redup(int argc, char *argv[])
  442 {
  443         boolean_t verbose = B_FALSE;
  444         int c;
  445 
  446         while ((c = getopt(argc, argv, "v")) != -1) {
  447                 switch (c) {
  448                 case 'v':
  449                         verbose = B_TRUE;
  450                         break;
  451                 case '?':
  452                         (void) fprintf(stderr, "invalid option '%c'\n",
  453                             optopt);
  454                         zstream_usage();
  455                         break;
  456                 }
  457         }
  458 
  459         argc -= optind;
  460         argv += optind;
  461 
  462         if (argc != 1)
  463                 zstream_usage();
  464 
  465         const char *filename = argv[0];
  466 
  467         if (isatty(STDOUT_FILENO)) {
  468                 (void) fprintf(stderr,
  469                     "Error: Stream can not be written to a terminal.\n"
  470                     "You must redirect standard output.\n");
  471                 return (1);
  472         }
  473 
  474         int fd = open(filename, O_RDONLY);
  475         if (fd == -1) {
  476                 (void) fprintf(stderr,
  477                     "Error while opening file '%s': %s\n",
  478                     filename, strerror(errno));
  479                 exit(1);
  480         }
  481 
  482         fletcher_4_init();
  483         zfs_redup_stream(fd, STDOUT_FILENO, verbose);
  484         fletcher_4_fini();
  485 
  486         close(fd);
  487 
  488         return (0);
  489 }

Cache object: 17726a6be7b2fcd2f9f754cc2a9f213d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.