The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/tools/tests/libMicro/libmicro.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms
    5  * of the Common Development and Distribution License
    6  * (the "License").  You may not use this file except
    7  * in compliance with the License.
    8  *
    9  * You can obtain a copy of the license at
   10  * src/OPENSOLARIS.LICENSE
   11  * or http://www.opensolaris.org/os/licensing.
   12  * See the License for the specific language governing
   13  * permissions and limitations under the License.
   14  *
   15  * When distributing Covered Code, include this CDDL
   16  * HEADER in each file and include the License file at
   17  * usr/src/OPENSOLARIS.LICENSE.  If applicable,
   18  * add the following below this CDDL HEADER, with the
   19  * fields enclosed by brackets "[]" replaced with your
   20  * own identifying information: Portions Copyright [yyyy]
   21  * [name of copyright owner]
   22  *
   23  * CDDL HEADER END
   24  */
   25 
   26 /*
   27  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
   28  * Use is subject to license terms.
   29  */
   30 
   31 /*
   32  * benchmarking routines
   33  */
   34 
   35 #include <sys/types.h>
   36 #include <sys/time.h>
   37 #include <sys/ipc.h>
   38 #include <sys/sem.h>
   39 #include <sys/mman.h>
   40 #include <sys/wait.h>
   41 #include <ctype.h>
   42 #include <string.h>
   43 #include <strings.h>
   44 #include <signal.h>
   45 #include <stdio.h>
   46 #include <unistd.h>
   47 #include <stdlib.h>
   48 #include <poll.h>
   49 #include <pthread.h>
   50 #include <dlfcn.h>
   51 #include <errno.h>
   52 #include <sys/resource.h>
   53 #include <math.h>
   54 #include <limits.h>
   55 
   56 #ifdef  __sun
   57 #include <sys/elf.h>
   58 #endif
   59 
   60 #include "libmicro.h"
   61 
   62 
   63 #if defined(__APPLE__)
   64 #include <mach/mach_time.h>
   65 
   66 long long
   67 gethrtime(void)
   68 {
   69    long long        elapsed;
   70    static long long        start;
   71    static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
   72 
   73    // If this is the first time we've run, get the timebase.
   74    // We can use denom == 0 to indicate that sTimebaseInfo is
   75    // uninitialised because it makes no sense to have a zero
   76    // denominator in a fraction.
   77 
   78    if ( sTimebaseInfo.denom == 0 ) {
   79        (void) mach_timebase_info(&sTimebaseInfo);
   80                 start = mach_absolute_time();
   81    }
   82 
   83    elapsed = mach_absolute_time() - start;
   84 
   85    // Convert to nanoseconds.
   86         // return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom;
   87         
   88         // Provided the final result is representable in 64 bits the following maneuver will
   89         // deliver that result without intermediate overflow.
   90         if (sTimebaseInfo.denom == sTimebaseInfo.numer)
   91                 return elapsed;
   92         else if (sTimebaseInfo.denom == 1)
   93                 return elapsed * (long long)sTimebaseInfo.numer;
   94         else {
   95        // Decompose elapsed = eta32 * 2^32 + eps32:
   96        long long eta32 = elapsed >> 32;
   97        long long eps32 = elapsed & 0x00000000ffffffffLL;
   98 
   99        long long numer = sTimebaseInfo.numer, denom = sTimebaseInfo.denom;
  100 
  101        // Form product of elapsed64 (decomposed) and numer:
  102        long long mu64 = numer * eta32;
  103        long long lambda64 = numer * eps32;
  104 
  105        // Divide the constituents by denom:
  106        long long q32 = mu64/denom;
  107        long long r32 = mu64 - (q32 * denom); // mu64 % denom
  108 
  109        return (q32 << 32) + ((r32 << 32) + lambda64)/denom;
  110         }
  111 }
  112 
  113 #endif
  114 
  115 /*
  116  * user visible globals
  117  */
  118 
  119 int                             lm_argc = 0;
  120 char **                         lm_argv = NULL;
  121 
  122 int                             lm_opt1;
  123 int                             lm_optA;
  124 int                             lm_optB;
  125 int                             lm_optC = 100;
  126 int                             lm_optD;
  127 int                             lm_optE;
  128 int                             lm_optH;
  129 int                             lm_optI;
  130 int                             lm_optL = 0;
  131 int                             lm_optM = 0;
  132 char                            *lm_optN;
  133 int                             lm_optP;
  134 int                             lm_optS;
  135 int                             lm_optT;
  136 int                             lm_optW;
  137 
  138 int                             lm_def1 = 0;
  139 int                             lm_defB = 0; /* use lm_nsecs_per_op */
  140 int                             lm_defD = 10;
  141 int                             lm_defH = 0;
  142 char                            *lm_defN = NULL;
  143 int                             lm_defP = 1;
  144 
  145 int                             lm_defS = 0;
  146 int                             lm_defT = 1;
  147 
  148 /*
  149  * default on fast platform, should be overridden by individual
  150  * benchmarks if significantly wrong in either direction.
  151  */
  152 
  153 int                             lm_nsecs_per_op = 5;
  154 
  155 char                            *lm_procpath;
  156 char                            lm_procname[STRSIZE];
  157 char                            lm_usage[STRSIZE];
  158 char                            lm_optstr[STRSIZE];
  159 char                            lm_header[STRSIZE];
  160 size_t                          lm_tsdsize = 0;
  161 
  162 
  163 /*
  164  *  Globals we do not export to the user
  165  */
  166 
  167 static barrier_t                *lm_barrier;
  168 static pid_t                    *pids = NULL;
  169 static pthread_t                *tids = NULL;
  170 static int                      pindex = -1;
  171 static void                     *tsdseg = NULL;
  172 static size_t                   tsdsize = 0;
  173 
  174 #ifdef USE_RDTSC
  175 static long long                lm_hz = 0;
  176 #endif
  177 
  178 
  179 /*
  180  * Forward references
  181  */
  182 
  183 static void             worker_process();
  184 static void             usage();
  185 static void             print_stats(barrier_t *);
  186 static void             print_histo(barrier_t *);
  187 static int              remove_outliers(double *, int, stats_t *);
  188 static long long        nsecs_overhead;
  189 static long long        nsecs_resolution;
  190 static long long        get_nsecs_overhead();
  191 static int              crunch_stats(double *, int, stats_t *);
  192 static void             compute_stats(barrier_t *);
  193 /*
  194  * main routine; renamed in this file to allow linking with other
  195  * files
  196  */
  197 
  198 int
  199 actual_main(int argc, char *argv[])
  200 {
  201         int                     i;
  202         int                     opt;
  203         extern char             *optarg;
  204         char                    *tmp;
  205         char                    optstr[256];
  206         barrier_t               *b;
  207         long long               startnsecs = getnsecs();
  208 
  209 #ifdef USE_RDTSC
  210         if (getenv("LIBMICRO_HZ") == NULL) {
  211                 (void) printf("LIBMICRO_HZ needed but not set\n");
  212                 exit(1);
  213         }
  214         lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
  215 #endif
  216 
  217         lm_argc = argc;
  218         lm_argv = argv;
  219 
  220         /* before we do anything */
  221         (void) benchmark_init();
  222 
  223 
  224         nsecs_overhead = get_nsecs_overhead();
  225         nsecs_resolution = get_nsecs_resolution();
  226 
  227         /*
  228          * Set defaults
  229          */
  230 
  231         lm_opt1 = lm_def1;
  232         lm_optB = lm_defB;
  233         lm_optD = lm_defD;
  234         lm_optH = lm_defH;
  235         lm_optN = lm_defN;
  236         lm_optP = lm_defP;
  237 
  238         lm_optS = lm_defS;
  239         lm_optT = lm_defT;
  240 
  241         /*
  242          * squirrel away the path to the current
  243          * binary in a way that works on both
  244          * Linux and Solaris
  245          */
  246 
  247         if (*argv[0] == '/') {
  248                 lm_procpath = strdup(argv[0]);
  249                 *strrchr(lm_procpath, '/') = 0;
  250         } else {
  251                 char path[1024];
  252                 (void) getcwd(path, 1024);
  253                 (void) strcat(path, "/");
  254                 (void) strcat(path, argv[0]);
  255                 *strrchr(path, '/') = 0;
  256                 lm_procpath = strdup(path);
  257         }
  258 
  259         /*
  260          * name of binary
  261          */
  262 
  263         if ((tmp = strrchr(argv[0], '/')) == NULL)
  264                 (void) strcpy(lm_procname, argv[0]);
  265         else
  266                 (void) strcpy(lm_procname, tmp + 1);
  267 
  268         if (lm_optN == NULL) {
  269                 lm_optN = lm_procname;
  270         }
  271 
  272         /*
  273          * Parse command line arguments
  274          */
  275 
  276         (void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
  277         while ((opt = getopt(argc, argv, optstr)) != -1) {
  278                 switch (opt) {
  279                 case '1':
  280                         lm_opt1 = 1;
  281                         break;
  282                 case 'A':
  283                         lm_optA = 1;
  284                         break;
  285                 case 'B':
  286                         lm_optB = sizetoint(optarg);
  287                         break;
  288                 case 'C':
  289                         lm_optC = sizetoint(optarg);
  290                         break;
  291                 case 'D':
  292                         lm_optD = sizetoint(optarg);
  293                         break;
  294                 case 'E':
  295                         lm_optE = 1;
  296                         break;
  297                 case 'H':
  298                         lm_optH = 1;
  299                         break;
  300                 case 'I':
  301                         lm_optI = sizetoint(optarg);
  302                         break;
  303                 case 'L':
  304                         lm_optL = 1;
  305                         break;
  306                 case 'M':
  307                         lm_optM = 1;
  308                         break;
  309                 case 'N':
  310                         lm_optN = optarg;
  311                         break;
  312                 case 'P':
  313                         lm_optP = sizetoint(optarg);
  314                         break;
  315                 case 'S':
  316                         lm_optS = 1;
  317                         break;
  318                 case 'T':
  319                         lm_optT = sizetoint(optarg);
  320                         break;
  321                 case 'V':
  322                         (void) printf("%s\n", LIBMICRO_VERSION);
  323                         exit(0);
  324                         break;
  325                 case 'W':
  326                         lm_optW = 1;
  327                         lm_optS = 1;
  328                         break;
  329                 case '?':
  330                         usage();
  331                         exit(0);
  332                         break;
  333                 default:
  334                         if (benchmark_optswitch(opt, optarg) == -1) {
  335                                 usage();
  336                                 exit(0);
  337                         }
  338                 }
  339         }
  340 
  341         /* deal with implicit and overriding options */
  342         if (lm_opt1 && lm_optP > 1) {
  343                 lm_optP = 1;
  344                 (void) printf("warning: -1 overrides -P\n");
  345         }
  346 
  347         if (lm_optE) {
  348                 (void) fprintf(stderr, "Running:%20s", lm_optN);
  349                 (void) fflush(stderr);
  350         }
  351 
  352         if (lm_optB == 0) {
  353                 /*
  354                  * neither benchmark or user has specified the number
  355                  * of cnts/sample, so use computed value
  356                  */
  357                 if (lm_optI)
  358                         lm_nsecs_per_op = lm_optI;
  359 #define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */
  360                 lm_optB = nsecs_resolution * BLOCK_TOCK_DURATION / lm_nsecs_per_op;
  361                 if (lm_optB == 0)
  362                         lm_optB = 1;
  363         }
  364 
  365         /*
  366          * now that the options are set
  367          */
  368 
  369         if (benchmark_initrun() == -1) {
  370                 exit(1);
  371         }
  372 
  373         /* allocate dynamic data */
  374         pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
  375         if (pids == NULL) {
  376                 perror("malloc(pids)");
  377                 exit(1);
  378         }
  379         tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
  380         if (tids == NULL) {
  381                 perror("malloc(tids)");
  382                 exit(1);
  383         }
  384 
  385         /* check that the case defines lm_tsdsize before proceeding */
  386         if (lm_tsdsize == (size_t)-1) {
  387                 (void) fprintf(stderr, "error in benchmark_init: "
  388                     "lm_tsdsize not set\n");
  389                 exit(1);
  390         }
  391 
  392         /* round up tsdsize to nearest 128 to eliminate false sharing */
  393         tsdsize = ((lm_tsdsize + 127) / 128) * 128;
  394 
  395         /* allocate sufficient TSD for each thread in each process */
  396         tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
  397             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
  398         if (tsdseg == NULL) {
  399                 perror("mmap(tsd)");
  400                 exit(1);
  401         }
  402 
  403         /* initialise worker synchronisation */
  404         b = barrier_create(lm_optT * lm_optP, DATASIZE);
  405         if (b == NULL) {
  406                 perror("barrier_create()");
  407                 exit(1);
  408         }
  409         lm_barrier = b;
  410         b->ba_flag = 1;
  411 
  412         /* need this here so that parent and children can call exit() */
  413         (void) fflush(stdout);
  414         (void) fflush(stderr);
  415 
  416         /* when we started and when to stop */
  417 
  418         b->ba_starttime = getnsecs();
  419         b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
  420 
  421         /* do the work */
  422         if (lm_opt1) {
  423                 /* single process, non-fork mode */
  424                 pindex = 0;
  425                 worker_process();
  426         } else {
  427                 /* create worker processes */
  428                 for (i = 0; i < lm_optP; i++) {
  429                         pids[i] = fork();
  430 
  431                         switch (pids[i]) {
  432                         case 0:
  433                                 pindex = i;
  434                                 worker_process();
  435                                 exit(0);
  436                                 break;
  437                         case -1:
  438                                 perror("fork");
  439                                 exit(1);
  440                                 break;
  441                         default:
  442                                 continue;
  443                         }
  444                 }
  445 
  446                 /* wait for worker processes */
  447                 for (i = 0; i < lm_optP; i++) {
  448                         if (pids[i] > 0) {
  449                                 (void) waitpid(pids[i], NULL, 0);
  450                         }
  451                 }
  452         }
  453 
  454         b->ba_endtime = getnsecs();
  455 
  456         /* compute results */
  457 
  458         compute_stats(b);
  459 
  460         /* print arguments benchmark was invoked with ? */
  461         if (lm_optL) {
  462                 int l;
  463                 (void) printf("# %s ", argv[0]);
  464                 for (l = 1; l < argc; l++) {
  465                         (void) printf("%s ", argv[l]);
  466                 }
  467                 (void) printf("\n");
  468         }
  469 
  470         /* print result header (unless suppressed) */
  471         if (!lm_optH) {
  472                 (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
  473                     "", "prc", "thr",
  474                     "usecs/call",
  475                     "samples", "errors", "cnt/samp", lm_header);
  476         }
  477 
  478         /* print result */
  479 
  480         (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
  481             lm_optN, lm_optP, lm_optT,
  482             (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
  483             b->ba_batches, b->ba_errors, lm_optB,
  484             benchmark_result());
  485 
  486         if (lm_optS) {
  487                 print_stats(b);
  488         }
  489 
  490         /* just incase something goes awry */
  491         (void) fflush(stdout);
  492         (void) fflush(stderr);
  493 
  494         /* cleanup by stages */
  495         (void) benchmark_finirun();
  496         (void) barrier_destroy(b);
  497         (void) benchmark_fini();
  498 
  499         if (lm_optE) {
  500                 (void) fprintf(stderr, " for %12.5f seconds\n",
  501                     (double)(getnsecs() - startnsecs) /
  502                     1.e9);
  503                 (void) fflush(stderr);
  504         }
  505         return (0);
  506 }
  507 
  508 void *
  509 worker_thread(void *arg)
  510 {
  511         result_t                r;
  512         long long               last_sleep = 0;
  513         long long               t;
  514 
  515         r.re_errors = benchmark_initworker(arg);
  516 
  517         while (lm_barrier->ba_flag) {
  518                 r.re_count = 0;
  519                 r.re_errors += benchmark_initbatch(arg);
  520 
  521                 /* sync to clock */
  522 
  523                 if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
  524                         (void) poll(0, 0, 10);
  525                         last_sleep = t;
  526                 }
  527                 /* wait for it ... */
  528                 (void) barrier_queue(lm_barrier, NULL);
  529 
  530                 /* time the test */
  531                 r.re_t0 = getnsecs();
  532                 (void) benchmark(arg, &r);
  533                 r.re_t1 = getnsecs();
  534 
  535                 /* time to stop? */
  536                 if (r.re_t1 > lm_barrier->ba_deadline &&
  537                     (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
  538                         lm_barrier->ba_flag = 0;
  539                 }
  540 
  541                 /* record results and sync */
  542                 (void) barrier_queue(lm_barrier, &r);
  543 
  544                 (void) benchmark_finibatch(arg);
  545 
  546                 r.re_errors = 0;
  547         }
  548 
  549         (void) benchmark_finiworker(arg);
  550 
  551         return (0);
  552 }
  553 
  554 void
  555 worker_process()
  556 {
  557         int                     i;
  558         void                    *tsd;
  559 
  560         for (i = 1; i < lm_optT; i++) {
  561                 tsd = gettsd(pindex, i);
  562                 if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
  563                         perror("pthread_create");
  564                         exit(1);
  565                 }
  566         }
  567 
  568         tsd = gettsd(pindex, 0);
  569         (void) worker_thread(tsd);
  570 
  571         for (i = 1; i < lm_optT; i++) {
  572                 (void) pthread_join(tids[i], NULL);
  573         }
  574 }
  575 
  576 void
  577 usage()
  578 {
  579         (void) printf(
  580             "usage: %s\n"
  581             "       [-1] (single process; overrides -P > 1)\n"
  582             "       [-A] (align with clock)\n"
  583             "       [-B batch-size (default %d)]\n"
  584             "       [-C minimum number of samples (default 0)]\n"
  585             "       [-D duration in msecs (default %ds)]\n"
  586             "       [-E (echo name to stderr)]\n"
  587             "       [-H] (suppress headers)\n"
  588             "       [-I] nsecs per op (used to compute batch size)"
  589             "       [-L] (print argument line)\n"
  590             "       [-M] (reports mean rather than median)\n"
  591             "       [-N test-name (default '%s')]\n"
  592             "       [-P processes (default %d)]\n"
  593             "       [-S] (print detailed stats)\n"
  594             "       [-T threads (default %d)]\n"
  595             "       [-V] (print the libMicro version and exit)\n"
  596             "       [-W] (flag possible benchmark problems)\n"
  597             "%s\n",
  598             lm_procname,
  599             lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
  600             lm_usage);
  601 }
  602 
  603 void
  604 print_warnings(barrier_t *b)
  605 {
  606         int head = 0;
  607         int increase;
  608 
  609         if (b->ba_quant) {
  610                 if (!head++) {
  611                         (void) printf("#\n# WARNINGS\n");
  612                 }
  613                 increase = (int)(floor((nsecs_resolution * 100.0) /
  614                     ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
  615                     1.0);
  616                 (void) printf("#     Quantization error likely;"
  617                     "increase batch size (-B option) %dX to avoid.\n",
  618                     increase);
  619         }
  620 
  621         /*
  622          * XXX should warn on median != mean by a lot
  623          */
  624 
  625         if (b->ba_errors) {
  626                 if (!head++) {
  627                         (void) printf("#\n# WARNINGS\n");
  628                 }
  629                 (void) printf("#     Errors occured during benchmark.\n");
  630         }
  631 }
  632 
  633 void
  634 print_stats(barrier_t *b)
  635 {
  636         (void) printf("#\n");
  637         (void) printf("# STATISTICS         %12s          %12s\n",
  638             "usecs/call (raw)",
  639             "usecs/call (outliers removed)");
  640 
  641         if (b->ba_count == 0) {
  642                 (void) printf("zero samples\n");
  643                 return;
  644         }
  645 
  646         (void) printf("#                    min %12.5f            %12.5f\n",
  647             b->ba_raw.st_min,
  648             b->ba_corrected.st_min);
  649 
  650         (void) printf("#                    max %12.5f            %12.5f\n",
  651             b->ba_raw.st_max,
  652             b->ba_corrected.st_max);
  653         (void) printf("#                   mean %12.5f            %12.5f\n",
  654             b->ba_raw.st_mean,
  655             b->ba_corrected.st_mean);
  656         (void) printf("#                 median %12.5f            %12.5f\n",
  657             b->ba_raw.st_median,
  658             b->ba_corrected.st_median);
  659         (void) printf("#                 stddev %12.5f            %12.5f\n",
  660             b->ba_raw.st_stddev,
  661             b->ba_corrected.st_stddev);
  662         (void) printf("#         standard error %12.5f            %12.5f\n",
  663             b->ba_raw.st_stderr,
  664             b->ba_corrected.st_stderr);
  665         (void) printf("#   99%% confidence level %12.5f            %12.5f\n",
  666             b->ba_raw.st_99confidence,
  667             b->ba_corrected.st_99confidence);
  668         (void) printf("#                   skew %12.5f            %12.5f\n",
  669             b->ba_raw.st_skew,
  670             b->ba_corrected.st_skew);
  671         (void) printf("#               kurtosis %12.5f            %12.5f\n",
  672             b->ba_raw.st_kurtosis,
  673             b->ba_corrected.st_kurtosis);
  674 
  675         (void) printf("#       time correlation %12.5f            %12.5f\n",
  676             b->ba_raw.st_timecorr,
  677             b->ba_corrected.st_timecorr);
  678         (void) printf("#\n");
  679 
  680         (void) printf("#           elasped time %12.5f\n", (b->ba_endtime -
  681             b->ba_starttime) / 1.0e9);
  682         (void) printf("#      number of samples %12d\n",   b->ba_batches);
  683         (void) printf("#     number of outliers %12d\n", b->ba_outliers);
  684         (void) printf("#      getnsecs overhead %12d\n", (int)nsecs_overhead);
  685 
  686         (void) printf("#\n");
  687         (void) printf("# DISTRIBUTION\n");
  688 
  689         print_histo(b);
  690 
  691         if (lm_optW) {
  692                 print_warnings(b);
  693         }
  694 }
  695 
  696 void
  697 update_stats(barrier_t *b, result_t *r)
  698 {
  699         double                  time;
  700         double                  nsecs_per_call;
  701 
  702         if (b->ba_waiters == 0) {
  703                 /* first thread only */
  704                 b->ba_t0 = r->re_t0;
  705                 b->ba_t1 = r->re_t1;
  706                 b->ba_count0 = 0;
  707                 b->ba_errors0 = 0;
  708         } else {
  709                 /* all but first thread */
  710                 if (r->re_t0 < b->ba_t0) {
  711                         b->ba_t0 = r->re_t0;
  712                 }
  713                 if (r->re_t1 > b->ba_t1) {
  714                         b->ba_t1 = r->re_t1;
  715                 }
  716         }
  717 
  718         b->ba_count0  += r->re_count;
  719         b->ba_errors0 += r->re_errors;
  720 
  721         if (b->ba_waiters == b->ba_hwm - 1) {
  722                 /* last thread only */
  723 
  724 
  725                 time = (double)b->ba_t1 - (double)b->ba_t0 -
  726                     (double)nsecs_overhead;
  727 
  728                 if (time < 100 * nsecs_resolution)
  729                         b->ba_quant++;
  730 
  731                 /*
  732                  * normalize by procs * threads if not -U
  733                  */
  734 
  735                 nsecs_per_call = time / (double)b->ba_count0 *
  736                     (double)(lm_optT * lm_optP);
  737 
  738                 b->ba_count  += b->ba_count0;
  739                 b->ba_errors += b->ba_errors0;
  740 
  741                 b->ba_data[b->ba_batches % b->ba_datasize] =
  742                     nsecs_per_call;
  743 
  744                 b->ba_batches++;
  745         }
  746 }
  747 
  748 #ifdef USE_SEMOP
  749 barrier_t *
  750 barrier_create(int hwm, int datasize)
  751 {
  752         struct sembuf           s[1];
  753         barrier_t               *b;
  754 
  755         /*LINTED*/
  756         b = (barrier_t *)mmap(NULL,
  757             sizeof (barrier_t) + (datasize - 1) * sizeof (double),
  758             PROT_READ | PROT_WRITE,
  759             MAP_SHARED | MAP_ANON, -1, 0L);
  760         if (b == (barrier_t *)MAP_FAILED) {
  761                 return (NULL);
  762         }
  763         b->ba_datasize = datasize;
  764 
  765         b->ba_flag  = 0;
  766         b->ba_hwm   = hwm;
  767         b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
  768         if (b->ba_semid == -1) {
  769                 (void) munmap((void *)b, sizeof (barrier_t));
  770                 return (NULL);
  771         }
  772 
  773         /* [hwm - 1, 0, 0] */
  774         s[0].sem_num = 0;
  775         s[0].sem_op  = hwm - 1;
  776         s[0].sem_flg = 0;
  777         if (semop(b->ba_semid, s, 1) == -1) {
  778                 perror("semop(1)");
  779                 (void) semctl(b->ba_semid, 0, IPC_RMID);
  780                 (void) munmap((void *)b, sizeof (barrier_t));
  781                 return (NULL);
  782         }
  783 
  784         b->ba_waiters = 0;
  785         b->ba_phase = 0;
  786 
  787         b->ba_count = 0;
  788         b->ba_errors = 0;
  789 
  790         return (b);
  791 }
  792 
  793 int
  794 barrier_destroy(barrier_t *b)
  795 {
  796         (void) semctl(b->ba_semid, 0, IPC_RMID);
  797         (void) munmap((void *)b, sizeof (barrier_t));
  798 
  799         return (0);
  800 }
  801 
  802 int
  803 barrier_queue(barrier_t *b, result_t *r)
  804 {
  805         struct sembuf           s[2];
  806 
  807         /*
  808          * {s0(-(hwm-1))}
  809          * if ! nowait {s1(-(hwm-1))}
  810          *   (all other threads)
  811          *   update shared stats
  812          *   {s0(hwm-1), s1(1)}
  813          *   {s0(1), s2(-1)}
  814          * else
  815          *   (last thread)
  816          *   update shared stats
  817          *   {s2(hwm-1)}
  818          */
  819 
  820         s[0].sem_num = 0;
  821         s[0].sem_op  = -(b->ba_hwm - 1);
  822         s[0].sem_flg = 0;
  823         if (semop(b->ba_semid, s, 1) == -1) {
  824                 perror("semop(2)");
  825                 return (-1);
  826         }
  827 
  828         s[0].sem_num = 1;
  829         s[0].sem_op  = -(b->ba_hwm - 1);
  830         s[0].sem_flg = IPC_NOWAIT;
  831         if (semop(b->ba_semid, s, 1) == -1) {
  832                 if (errno != EAGAIN) {
  833                         perror("semop(3)");
  834                         return (-1);
  835                 }
  836 
  837                 /* all but the last thread */
  838 
  839                 if (r != NULL) {
  840                         update_stats(b, r);
  841                 }
  842 
  843                 b->ba_waiters++;
  844 
  845                 s[0].sem_num = 0;
  846                 s[0].sem_op  = b->ba_hwm - 1;
  847                 s[0].sem_flg = 0;
  848                 s[1].sem_num = 1;
  849                 s[1].sem_op  = 1;
  850                 s[1].sem_flg = 0;
  851                 if (semop(b->ba_semid, s, 2) == -1) {
  852                         perror("semop(4)");
  853                         return (-1);
  854                 }
  855 
  856                 s[0].sem_num = 0;
  857                 s[0].sem_op  = 1;
  858                 s[0].sem_flg = 0;
  859                 s[1].sem_num = 2;
  860                 s[1].sem_op  = -1;
  861                 s[1].sem_flg = 0;
  862                 if (semop(b->ba_semid, s, 2) == -1) {
  863                         perror("semop(5)");
  864                         return (-1);
  865                 }
  866 
  867         } else {
  868                 /* the last thread */
  869 
  870                 if (r != NULL) {
  871                         update_stats(b, r);
  872                 }
  873 
  874                 b->ba_waiters = 0;
  875                 b->ba_phase++;
  876 
  877                 s[0].sem_num = 2;
  878                 s[0].sem_op  = b->ba_hwm - 1;
  879                 s[0].sem_flg = 0;
  880                 if (semop(b->ba_semid, s, 1) == -1) {
  881                         perror("semop(6)");
  882                         return (-1);
  883                 }
  884         }
  885 
  886         return (0);
  887 }
  888 
  889 #else /* USE_SEMOP */
  890 
  891 barrier_t *
  892 barrier_create(int hwm, int datasize)
  893 {
  894         pthread_mutexattr_t     attr;
  895         pthread_condattr_t      cattr;
  896         barrier_t               *b;
  897 
  898         /*LINTED*/
  899         b = (barrier_t *)mmap(NULL,
  900             sizeof (barrier_t) + (datasize - 1) * sizeof (double),
  901             PROT_READ | PROT_WRITE,
  902             MAP_SHARED | MAP_ANON, -1, 0L);
  903         if (b == (barrier_t *)MAP_FAILED) {
  904                 return (NULL);
  905         }
  906         b->ba_datasize = datasize;
  907 
  908         b->ba_hwm = hwm;
  909         b->ba_flag  = 0;
  910 
  911         (void) pthread_mutexattr_init(&attr);
  912         (void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
  913 
  914         (void) pthread_condattr_init(&cattr);
  915         (void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
  916 
  917         (void) pthread_mutex_init(&b->ba_lock, &attr);
  918         (void) pthread_cond_init(&b->ba_cv, &cattr);
  919 
  920         b->ba_waiters = 0;
  921         b->ba_phase = 0;
  922 
  923         b->ba_count = 0;
  924         b->ba_errors = 0;
  925 
  926         return (b);
  927 }
  928 
  929 int
  930 barrier_destroy(barrier_t *b)
  931 {
  932         (void) munmap((void *)b, sizeof (barrier_t));
  933 
  934         return (0);
  935 }
  936 
  937 int
  938 barrier_queue(barrier_t *b, result_t *r)
  939 {
  940         int                     phase;
  941 
  942         (void) pthread_mutex_lock(&b->ba_lock);
  943 
  944         if (r != NULL) {
  945                 update_stats(b, r);
  946         }
  947 
  948         phase = b->ba_phase;
  949 
  950         b->ba_waiters++;
  951         if (b->ba_hwm == b->ba_waiters) {
  952                 b->ba_waiters = 0;
  953                 b->ba_phase++;
  954                 (void) pthread_cond_broadcast(&b->ba_cv);
  955         }
  956 
  957         while (b->ba_phase == phase) {
  958                 (void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
  959         }
  960 
  961         (void) pthread_mutex_unlock(&b->ba_lock);
  962         return (0);
  963 }
  964 #endif /* USE_SEMOP */
  965 
  966 int
  967 gettindex()
  968 {
  969         int                     i;
  970 
  971         if (tids == NULL) {
  972                 return (-1);
  973         }
  974 
  975         for (i = 1; i < lm_optT; i++) {
  976                 if (pthread_self() == tids[i]) {
  977                         return (i);
  978                 }
  979         }
  980 
  981         return (0);
  982 }
  983 
  984 int
  985 getpindex()
  986 {
  987         return (pindex);
  988 }
  989 
  990 void *
  991 gettsd(int p, int t)
  992 {
  993         if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
  994                 return (NULL);
  995 
  996         return ((void *)((unsigned long)tsdseg +
  997             (((p * lm_optT) + t) * tsdsize)));
  998 }
  999 
 1000 #if defined(__APPLE__)
 1001 int
 1002 gettsdindex(void *arg){
 1003         /*
 1004          * gettindex() can race with pthread_create() filling in tids[].
 1005          * This is an alternative approach to finding the calling thread's tsd in t
 1006 sdseg
 1007          */
 1008         return tsdsize ? ((unsigned long)arg - (unsigned long)tsdseg)/tsdsize : 0;
 1009 }
 1010 #endif /* __APPLE__ */
 1011 
 1012 #ifdef USE_GETHRTIME
 1013 long long
 1014 getnsecs()
 1015 {
 1016         return (gethrtime());
 1017 }
 1018 
 1019 long long
 1020 getusecs()
 1021 {
 1022         return (gethrtime() / 1000);
 1023 }
 1024 
 1025 #elif USE_RDTSC /* USE_GETHRTIME */
 1026 
 1027 __inline__ long long
 1028 rdtsc(void)
 1029 {
 1030         unsigned long long x;
 1031         __asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
 1032         return (x);
 1033 }
 1034 
 1035 long long
 1036 getusecs()
 1037 {
 1038         return (rdtsc() * 1000000 / lm_hz);
 1039 }
 1040 
 1041 long long
 1042 getnsecs()
 1043 {
 1044         return (rdtsc() * 1000000000 / lm_hz);
 1045 }
 1046 
 1047 #else /* USE_GETHRTIME */
 1048 
 1049 long long
 1050 getusecs()
 1051 {
 1052         struct timeval          tv;
 1053 
 1054         (void) gettimeofday(&tv, NULL);
 1055 
 1056         return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
 1057 }
 1058 
 1059 long long
 1060 getnsecs()
 1061 {
 1062         struct timeval          tv;
 1063 
 1064         (void) gettimeofday(&tv, NULL);
 1065 
 1066         return ((long long)tv.tv_sec * 1000000000LL +
 1067             (long long) tv.tv_usec * 1000LL);
 1068 }
 1069 
 1070 #endif /* USE_GETHRTIME */
 1071 
 1072 int
 1073 setfdlimit(int limit)
 1074 {
 1075         struct rlimit rlimit;
 1076 
 1077         if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
 1078                 perror("getrlimit");
 1079                 exit(1);
 1080         }
 1081 
 1082         if (rlimit.rlim_cur > limit)
 1083                 return (0); /* no worries */
 1084 
 1085         rlimit.rlim_cur = limit;
 1086 
 1087         if (rlimit.rlim_max < limit)
 1088                 rlimit.rlim_max = limit;
 1089 
 1090         if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
 1091                 perror("setrlimit");
 1092                 exit(3);
 1093         }
 1094 
 1095         return (0);
 1096 }
 1097 
 1098 
 1099 #define KILOBYTE                1024
 1100 #define MEGABYTE                (KILOBYTE * KILOBYTE)
 1101 #define GIGABYTE                (KILOBYTE * MEGABYTE)
 1102 
 1103 long long
 1104 sizetoll(const char *arg)
 1105 {
 1106         int                     len = strlen(arg);
 1107         int                     i;
 1108         long long               mult = 1;
 1109 
 1110         if (len && isalpha(arg[len - 1])) {
 1111                 switch (arg[len - 1]) {
 1112 
 1113                 case 'k':
 1114                 case 'K':
 1115                         mult = KILOBYTE;
 1116                         break;
 1117                 case 'm':
 1118                 case 'M':
 1119                         mult = MEGABYTE;
 1120                         break;
 1121                 case 'g':
 1122                 case 'G':
 1123                         mult = GIGABYTE;
 1124                         break;
 1125                 default:
 1126                         return (-1);
 1127                 }
 1128 
 1129                 for (i = 0; i < len - 1; i++)
 1130                         if (!isdigit(arg[i]))
 1131                                 return (-1);
 1132         }
 1133 
 1134         return (mult * strtoll(arg, NULL, 10));
 1135 }
 1136 
 1137 int
 1138 sizetoint(const char *arg)
 1139 {
 1140         int                     len = strlen(arg);
 1141         int                     i;
 1142         long long               mult = 1;
 1143 
 1144         if (len && isalpha(arg[len - 1])) {
 1145                 switch (arg[len - 1]) {
 1146 
 1147                 case 'k':
 1148                 case 'K':
 1149                         mult = KILOBYTE;
 1150                         break;
 1151                 case 'm':
 1152                 case 'M':
 1153                         mult = MEGABYTE;
 1154                         break;
 1155                 case 'g':
 1156                 case 'G':
 1157                         mult = GIGABYTE;
 1158                         break;
 1159                 default:
 1160                         return (-1);
 1161                 }
 1162 
 1163                 for (i = 0; i < len - 1; i++)
 1164                         if (!isdigit(arg[i]))
 1165                                 return (-1);
 1166         }
 1167 
 1168         return (mult * atoi(arg));
 1169 }
 1170 
 1171 static void
 1172 print_bar(long count, long total)
 1173 {
 1174         int                     i;
 1175 
 1176         (void) putchar_unlocked(count ? '*' : ' ');
 1177         for (i = 1; i < (32 * count) / total; i++)
 1178                 (void) putchar_unlocked('*');
 1179         for (; i < 32; i++)
 1180                 (void) putchar_unlocked(' ');
 1181 }
 1182 
 1183 static int
 1184 doublecmp(const void *p1, const void *p2)
 1185 {
 1186         double a = *((double *)p1);
 1187         double b = *((double *)p2);
 1188 
 1189         if (a > b)
 1190                 return (1);
 1191         if (a < b)
 1192                 return (-1);
 1193         return (0);
 1194 }
 1195 
 1196 static void
 1197 print_histo(barrier_t *b)
 1198 {
 1199         int                     n;
 1200         int                     i;
 1201         int                     j;
 1202         int                     last;
 1203         long long               maxcount;
 1204         double                  sum;
 1205         long long               min;
 1206         long long               scale;
 1207         double                  x;
 1208         long long               y;
 1209         long long               count;
 1210         int                     i95;
 1211         double                  p95;
 1212         double                  r95;
 1213         double                  m95;
 1214         histo_t                 *histo;
 1215 
 1216         (void) printf("#        %12s %12s %32s %12s\n", "counts", "usecs/call",
 1217             "", "means");
 1218 
 1219         /* calculate how much data we've captured */
 1220         n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
 1221 
 1222         /* find the 95th percentile - index, value and range */
 1223         qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
 1224         min = b->ba_data[0] + 0.000001;
 1225         i95 = n * 95 / 100;
 1226         p95 = b->ba_data[i95];
 1227         r95 = p95 - min + 1;
 1228 
 1229         /* find a suitable min and scale */
 1230         i = 0;
 1231         x = r95 / (HISTOSIZE - 1);
 1232         while (x >= 10.0) {
 1233                 x /= 10.0;
 1234                 i++;
 1235         }
 1236         y = x + 0.9999999999;
 1237         while (i > 0) {
 1238                 y *= 10;
 1239                 i--;
 1240         }
 1241         min /= y;
 1242         min *= y;
 1243         scale = y * (HISTOSIZE - 1);
 1244         if (scale < (HISTOSIZE - 1)) {
 1245                 scale = (HISTOSIZE - 1);
 1246         }
 1247 
 1248         /* create and initialise the histogram */
 1249         histo = malloc(HISTOSIZE * sizeof (histo_t));
 1250         for (i = 0; i < HISTOSIZE; i++) {
 1251                 histo[i].sum = 0.0;
 1252                 histo[i].count = 0;
 1253         }
 1254 
 1255         /* populate the histogram */
 1256         last = 0;
 1257         sum = 0.0;
 1258         count = 0;
 1259         for (i = 0; i < i95; i++) {
 1260                 j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
 1261 
 1262                 if (j >= HISTOSIZE) {
 1263                         (void) printf("panic!\n");
 1264                         j = HISTOSIZE - 1;
 1265                 }
 1266 
 1267                 histo[j].sum += b->ba_data[i];
 1268                 histo[j].count++;
 1269 
 1270                 sum += b->ba_data[i];
 1271                 count++;
 1272         }
 1273         m95 = sum / count;
 1274 
 1275         /* find the larges bucket */
 1276         maxcount = 0;
 1277         for (i = 0; i < HISTOSIZE; i++)
 1278                 if (histo[i].count > 0) {
 1279                         last = i;
 1280                         if (histo[i].count > maxcount)
 1281                                 maxcount = histo[i].count;
 1282                 }
 1283 
 1284         /* print the buckets */
 1285         for (i = 0; i <= last; i++) {
 1286                 (void) printf("#       %12lld %12.5f |", histo[i].count,
 1287                     (min + scale * (double)i / (HISTOSIZE - 1)));
 1288 
 1289                 print_bar(histo[i].count, maxcount);
 1290 
 1291                 if (histo[i].count > 0)
 1292                         (void) printf("%12.5f\n",
 1293                             histo[i].sum / histo[i].count);
 1294                 else
 1295                         (void) printf("%12s\n", "-");
 1296         }
 1297 
 1298         /* find the mean of values beyond the 95th percentile */
 1299         sum = 0.0;
 1300         count = 0;
 1301         for (i = i95; i < n; i++) {
 1302                 sum += b->ba_data[i];
 1303                 count++;
 1304         }
 1305 
 1306         /* print the >95% bucket summary */
 1307         (void) printf("#\n");
 1308         (void) printf("#       %12lld %12s |", count, "> 95%");
 1309         print_bar(count, maxcount);
 1310         if (count > 0)
 1311                 (void) printf("%12.5f\n", sum / count);
 1312         else
 1313                 (void) printf("%12s\n", "-");
 1314         (void) printf("#\n");
 1315         (void) printf("#       %12s %12.5f\n", "mean of 95%", m95);
 1316         (void) printf("#       %12s %12.5f\n", "95th %ile", p95);
 1317 
 1318         /* quantify any buffer overflow */
 1319         if (b->ba_batches > b->ba_datasize)
 1320                 (void) printf("#       %12s %12d\n", "data dropped",
 1321                     b->ba_batches - b->ba_datasize);
 1322 }
 1323 
 1324 static void
 1325 compute_stats(barrier_t *b)
 1326 {
 1327         int i;
 1328 
 1329         if (b->ba_batches > b->ba_datasize)
 1330                 b->ba_batches = b->ba_datasize;
 1331 
 1332         /*
 1333          * convert to usecs/call
 1334          */
 1335 
 1336         for (i = 0; i < b->ba_batches; i++)
 1337                 b->ba_data[i] /= 1000.0;
 1338 
 1339         /*
 1340          * do raw stats
 1341          */
 1342 
 1343         (void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
 1344 
 1345         /*
 1346          * recursively apply 3 sigma rule to remove outliers
 1347          */
 1348 
 1349         b->ba_corrected = b->ba_raw;
 1350         b->ba_outliers = 0;
 1351 
 1352         if (b->ba_batches > 40) { /* remove outliers */
 1353                 int removed;
 1354 
 1355                 do {
 1356                         removed = remove_outliers(b->ba_data, b->ba_batches,
 1357                             &b->ba_corrected);
 1358                         b->ba_outliers += removed;
 1359                         b->ba_batches -= removed;
 1360                         (void) crunch_stats(b->ba_data, b->ba_batches,
 1361                             &b->ba_corrected);
 1362                         } while (removed != 0 && b->ba_batches > 40);
 1363         }
 1364 
 1365 }
 1366 
 1367 /*
 1368  * routine to compute various statistics on array of doubles.
 1369  */
 1370 
 1371 static int
 1372 crunch_stats(double *data, int count, stats_t *stats)
 1373 {
 1374         double a;
 1375         double std;
 1376         double diff;
 1377         double sk;
 1378         double ku;
 1379         double mean;
 1380         int i;
 1381         int bytes;
 1382         double *dupdata;
 1383 
 1384         /*
 1385          * first we need the mean
 1386          */
 1387 
 1388         mean = 0.0;
 1389 
 1390         for (i = 0; i < count; i++) {
 1391                 mean += data[i];
 1392         }
 1393 
 1394         mean /= count;
 1395 
 1396         stats->st_mean = mean;
 1397 
 1398         /*
 1399          * malloc and sort so we can do median
 1400          */
 1401 
 1402         dupdata = malloc(bytes = sizeof (double) * count);
 1403         (void) memcpy(dupdata, data, bytes);
 1404         qsort((void *)dupdata, count, sizeof (double), doublecmp);
 1405         stats->st_median   = dupdata[count/2];
 1406 
 1407         /*
 1408          * reuse dupdata to compute time correlation of data to
 1409          * detect interesting time-based trends
 1410          */
 1411 
 1412         for (i = 0; i < count; i++)
 1413                 dupdata[i] = (double)i;
 1414 
 1415         (void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
 1416         free(dupdata);
 1417 
 1418         std = 0.0;
 1419         sk  = 0.0;
 1420         ku  = 0.0;
 1421 
 1422         stats->st_max = -1;
 1423         stats->st_min = 1.0e99; /* hard to find portable values */
 1424 
 1425         for (i = 0; i < count; i++) {
 1426                 if (data[i] > stats->st_max)
 1427                         stats->st_max = data[i];
 1428                 if (data[i] < stats->st_min)
 1429                         stats->st_min = data[i];
 1430 
 1431                 diff = data[i] - mean;
 1432                 std += diff * diff;
 1433                 sk  += diff * diff * diff;
 1434                 ku  += diff * diff * diff * diff;
 1435         }
 1436 
 1437         stats->st_stddev   = std = sqrt(std/(double)(count - 1));
 1438         stats->st_stderr   = std / sqrt(count);
 1439         stats->st_99confidence = stats->st_stderr * 2.326;
 1440         stats->st_skew     = sk / (std * std * std) / (double)(count);
 1441         stats->st_kurtosis = ku / (std * std * std * std) /
 1442             (double)(count) - 3;
 1443 
 1444         return (0);
 1445 }
 1446 
 1447 /*
 1448  * does a least squares fit to the set of points x, y and
 1449  * fits a line y = a + bx.  Returns a, b
 1450  */
 1451 
 1452 int
 1453 fit_line(double *x, double *y, int count, double *a, double *b)
 1454 {
 1455         double sumx, sumy, sumxy, sumx2;
 1456         double denom;
 1457         int i;
 1458 
 1459         sumx = sumy = sumxy = sumx2 = 0.0;
 1460 
 1461         for (i = 0; i < count; i++) {
 1462                 sumx    += x[i];
 1463                 sumx2   += x[i] * x[i];
 1464                 sumy    += y[i];
 1465                 sumxy   += x[i] * y[i];
 1466         }
 1467 
 1468         denom = count * sumx2 - sumx * sumx;
 1469 
 1470         if (denom == 0.0)
 1471                 return (-1);
 1472 
 1473         *a = (sumy * sumx2 - sumx * sumxy) / denom;
 1474 
 1475         *b = (count * sumxy - sumx * sumy) / denom;
 1476 
 1477         return (0);
 1478 }
 1479 
 1480 /*
 1481  * empty function for measurement purposes
 1482  */
 1483 
 1484 int
 1485 nop()
 1486 {
 1487         return (1);
 1488 }
 1489 
 1490 #define NSECITER 1000
 1491 
 1492 static long long
 1493 get_nsecs_overhead()
 1494 {
 1495         long long s;
 1496 
 1497         double data[NSECITER];
 1498         stats_t stats;
 1499 
 1500         int i;
 1501         int count;
 1502         int outliers;
 1503 
 1504         (void) getnsecs(); /* warmup */
 1505         (void) getnsecs(); /* warmup */
 1506         (void) getnsecs(); /* warmup */
 1507 
 1508         i = 0;
 1509 
 1510         count = NSECITER;
 1511 
 1512         for (i = 0; i < count; i++) {
 1513                 s = getnsecs();
 1514                 data[i] = getnsecs() - s;
 1515         }
 1516 
 1517         (void) crunch_stats(data, count, &stats);
 1518 
 1519         while ((outliers = remove_outliers(data, count, &stats)) != 0) {
 1520                 count -= outliers;
 1521                 (void) crunch_stats(data, count, &stats);
 1522         }
 1523 
 1524         return ((long long)stats.st_mean);
 1525 
 1526 }
 1527 
 1528 long long
 1529 get_nsecs_resolution()
 1530 {
 1531         long long y[1000];
 1532 
 1533         int i, j, nops, res;
 1534         long long start, stop;
 1535 
 1536         /*
 1537          * first, figure out how many nops to use
 1538          * to get any delta between time measurements.
 1539          * use a minimum of one.
 1540          */
 1541 
 1542         /*
 1543          * warm cache
 1544          */
 1545 
 1546         stop = start = getnsecs();
 1547 
 1548         for (i = 1; i < 10000000; i++) {
 1549                 start = getnsecs();
 1550                 for (j = i; j; j--)
 1551                         ;
 1552                 stop = getnsecs();
 1553                 if (stop > start)
 1554                         break;
 1555         }
 1556 
 1557         nops = i;
 1558 
 1559         /*
 1560          * now collect data at linearly varying intervals
 1561          */
 1562 
 1563         for (i = 0; i < 1000; i++) {
 1564                 start = getnsecs();
 1565                 for (j = nops * i; j; j--)
 1566                         ;
 1567                 stop = getnsecs();
 1568                 y[i] = stop - start;
 1569         }
 1570 
 1571         /*
 1572          * find smallest positive difference between samples;
 1573          * this is the timer resolution
 1574          */
 1575 
 1576         res = 1<<30;
 1577 
 1578         for (i = 1; i < 1000; i++) {
 1579                 int diff = y[i] - y[i-1];
 1580 
 1581                 if (diff > 0 && res > diff)
 1582                         res = diff;
 1583 
 1584         }
 1585 
 1586         return (res);
 1587 }
 1588 
 1589 /*
 1590  * remove any data points from the array more than 3 sigma out
 1591  */
 1592 
 1593 static int
 1594 remove_outliers(double *data, int count, stats_t *stats)
 1595 {
 1596         double outmin = stats->st_mean - 3 * stats->st_stddev;
 1597         double outmax = stats->st_mean + 3 * stats->st_stddev;
 1598 
 1599         int i, j, outliers;
 1600 
 1601         for (outliers = i = j = 0; i < count; i++)
 1602                 if (data[i] > outmax || data[i] < outmin)
 1603                         outliers++;
 1604                 else
 1605                         data[j++] = data[i];
 1606 
 1607         return (outliers);
 1608 }

Cache object: a815133df98b25d7195bed2150eb1f90


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.