| 
     1 /*
    2  * Copyright 2011-2015 Samy Al Bahra.
    3  * Copyright 2011 David Joseph.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <ck_barrier.h>
   29 #include <ck_cc.h>
   30 #include <ck_pr.h>
   31 #include <ck_spinlock.h>
   32 
   33 struct ck_barrier_combining_queue {
   34         struct ck_barrier_combining_group *head;
   35         struct ck_barrier_combining_group *tail;
   36 };
   37 
   38 static struct ck_barrier_combining_group *
   39 ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
   40 {
   41         struct ck_barrier_combining_group *front = NULL;
   42 
   43         if (queue->head != NULL) {
   44                 front = queue->head;
   45                 queue->head = queue->head->next;
   46         }
   47 
   48         return front;
   49 }
   50 
   51 static void
   52 ck_barrier_combining_insert(struct ck_barrier_combining_group *parent,
   53     struct ck_barrier_combining_group *tnode,
   54     struct ck_barrier_combining_group **child)
   55 {
   56 
   57         *child = tnode;
   58         tnode->parent = parent;
   59 
   60         /*
   61          * After inserting, we must increment the parent group's count for
   62          * number of threads expected to reach it; otherwise, the
   63          * barrier may end prematurely.
   64          */
   65         parent->k++;
   66         return;
   67 }
   68 
   69 /*
   70  * This implementation of software combining tree barriers
   71  * uses level order traversal to insert new thread groups
   72  * into the barrier's tree. We use a queue to implement this
   73  * traversal.
   74  */
   75 static void
   76 ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
   77     struct ck_barrier_combining_group *node_value)
   78 {
   79 
   80         node_value->next = NULL;
   81         if (queue->head == NULL) {
   82                 queue->head = queue->tail = node_value;
   83                 return;
   84         }
   85 
   86         queue->tail->next = node_value;
   87         queue->tail = node_value;
   88 
   89         return;
   90 }
   91 
   92 
   93 void
   94 ck_barrier_combining_group_init(struct ck_barrier_combining *root,
   95     struct ck_barrier_combining_group *tnode,
   96     unsigned int nthr)
   97 {
   98         struct ck_barrier_combining_group *node;
   99         struct ck_barrier_combining_queue queue;
  100 
  101         queue.head = queue.tail = NULL;
  102 
  103         tnode->k = nthr;
  104         tnode->count = 0;
  105         tnode->sense = 0;
  106         tnode->left = tnode->right = NULL;
  107 
  108         /*
  109          * Finds the first available node for linkage into the combining
  110          * tree. The use of a spinlock is excusable as this is a one-time
  111          * initialization cost.
  112          */
  113         ck_spinlock_fas_lock(&root->mutex);
  114         ck_barrier_combining_queue_enqueue(&queue, root->root);
  115         while (queue.head != NULL) {
  116                 node = ck_barrier_combining_queue_dequeue(&queue);
  117 
  118                 /* If the left child is free, link the group there. */
  119                 if (node->left == NULL) {
  120                         ck_barrier_combining_insert(node, tnode, &node->left);
  121                         goto leave;
  122                 }
  123 
  124                 /* If the right child is free, link the group there. */
  125                 if (node->right == NULL) {
  126                         ck_barrier_combining_insert(node, tnode, &node->right);
  127                         goto leave;
  128                 }
  129 
  130                 /*
  131                  * If unsuccessful, try inserting as a child of the children of the
  132                  * current node.
  133                  */
  134                 ck_barrier_combining_queue_enqueue(&queue, node->left);
  135                 ck_barrier_combining_queue_enqueue(&queue, node->right);
  136         }
  137 
  138 leave:
  139         ck_spinlock_fas_unlock(&root->mutex);
  140         return;
  141 }
  142 
  143 void
  144 ck_barrier_combining_init(struct ck_barrier_combining *root,
  145     struct ck_barrier_combining_group *init_root)
  146 {
  147 
  148         init_root->k = 0;
  149         init_root->count = 0;
  150         init_root->sense = 0;
  151         init_root->parent = init_root->left = init_root->right = NULL;
  152         ck_spinlock_fas_init(&root->mutex);
  153         root->root = init_root;
  154         return;
  155 }
  156 
  157 static void
  158 ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
  159     struct ck_barrier_combining_group *tnode,
  160     unsigned int sense)
  161 {
  162 
  163         /*
  164          * If this is the last thread in the group, it moves on to the parent group.
  165          * Otherwise, it spins on this group's sense.
  166          */
  167         if (ck_pr_faa_uint(&tnode->count, 1) == tnode->k - 1) {
  168                 /*
  169                  * If we are and will be the last thread entering the barrier for the
  170                  * current group then signal the parent group if one exists.
  171                  */
  172                 if (tnode->parent != NULL)
  173                         ck_barrier_combining_aux(barrier, tnode->parent, sense);
  174 
  175                 /*
  176                  * Once the thread returns from its parent(s), it reinitializes the group's
  177                  * arrival count and signals other threads to continue by flipping the group
  178                  * sense. Order of these operations is not important since we assume a static
  179                  * number of threads are members of a barrier for the lifetime of the barrier.
  180                  * Since count is explicitly reinitialized, it is guaranteed that at any point
  181                  * tnode->count is equivalent to tnode->k if and only if that many threads
  182                  * are at the barrier.
  183                  */
  184                 ck_pr_store_uint(&tnode->count, 0);
  185                 ck_pr_fence_store();
  186                 ck_pr_store_uint(&tnode->sense, ~tnode->sense);
  187         } else {
  188                 while (sense != ck_pr_load_uint(&tnode->sense))
  189                         ck_pr_stall();
  190         }
  191         ck_pr_fence_memory();
  192 
  193         return;
  194 }
  195 
  196 void
  197 ck_barrier_combining(struct ck_barrier_combining *barrier,
  198     struct ck_barrier_combining_group *tnode,
  199     struct ck_barrier_combining_state *state)
  200 {
  201 
  202         ck_barrier_combining_aux(barrier, tnode, state->sense);
  203 
  204         /* Reverse the execution context's sense for the next barrier. */
  205         state->sense = ~state->sense;
  206         return;
  207 }
Cache object: f3ba57b8bfe4052d3a0557bb04172247 
 
 |