qpid-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DISPATCH-1372) alloc_pool intrusive linked list can be replaced by a linked stack
Date Tue, 16 Jul 2019 19:22:00 GMT

    [ https://issues.apache.org/jira/browse/DISPATCH-1372?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16886399#comment-16886399
] 

ASF GitHub Bot commented on DISPATCH-1372:
------------------------------------------

ChugR commented on pull request #525: DISPATCH-1372 alloc_pool intrusive linked list can be
replaced by a linked stack
URL: https://github.com/apache/qpid-dispatch/pull/525#discussion_r303900546
 
 

 ##########
 File path: src/alloc_pool.c
 ##########
 @@ -44,20 +46,145 @@ DEQ_DECLARE(qd_alloc_type_t, qd_alloc_type_list_t);
 #define PATTERN_BACK  0xbabecafe
 
 struct qd_alloc_item_t {
-    DEQ_LINKS(qd_alloc_item_t);
     uint32_t              sequence;
 #ifdef QD_MEMORY_DEBUG
     qd_alloc_type_desc_t *desc;
     uint32_t              header;
 #endif
 };
 
-DEQ_DECLARE(qd_alloc_item_t, qd_alloc_item_list_t);
+//128 has been chosen because many CPUs arch use an
+//adiacent line prefetching optimization that load
+//2*cache line bytes in batch
+#define CHUNK_SIZE 128/sizeof(void*)
 
+struct qd_alloc_chunk_t {
+    qd_alloc_chunk_t     *prev;                 //do not use DEQ_LINKS here: field position
could affect access cost
+    qd_alloc_item_t      *items[CHUNK_SIZE];
+    qd_alloc_chunk_t     *next;
+};
+
+struct qd_alloc_linked_stack_t {
+    //the base
+    qd_alloc_chunk_t     *top_chunk;
+    uint32_t              top;                  //qd_alloc_item* top_item = top_chunk->items[top+1]
<-> top > 0
+    uint64_t              size;
+    qd_alloc_chunk_t      base_chunk;
+};
+
+static inline void init_stack(qd_alloc_linked_stack_t *stack)
+{
+    stack->top_chunk = &stack->base_chunk;
+    stack->top_chunk->next = NULL;
+    stack->top = 0;
+    stack->size = 0;
+}
+
+static inline void prev_chunk_stack(qd_alloc_linked_stack_t *const stack)
+{
+    const uint32_t chunk_size = CHUNK_SIZE;
+    assert(stack->top == 0);
+    assert(stack->size != 0);
+    assert(stack->top_chunk != &stack->base_chunk);
+    qd_alloc_chunk_t *prev = stack->top_chunk->prev;
+    //TODO(franz):  stack->top_chunk could be passed externally and walked its nexts
+    //              to recycle the last chunk.
+    //              Just need to pay attention to null out released_chunk->prev->next
+    //              to make it unreachable from the stack
+    stack->top_chunk = prev;
+    stack->top = chunk_size;
+}
+
+static inline qd_alloc_item_t *pop_stack(qd_alloc_linked_stack_t *const stack)
+{
+    if (stack->top == 0) {
+        if (stack->size == 0) {
+            assert(stack->top_chunk == &stack->base_chunk);
+            return NULL;
+        }
+        prev_chunk_stack(stack);
+    }
+    stack->top--;
+    assert(stack->top >= 0 && stack->top < CHUNK_SIZE);
+    stack->size--;
+    assert(stack->size >= 0);
+    qd_alloc_item_t *item = stack->top_chunk->items[stack->top];
+    assert(item != NULL);
+    return item;
+}
+
+static inline void free_stack_chunks(qd_alloc_linked_stack_t *stack)
+{
+    assert(stack->size == 0);
+    //the assumption here is that next is always correctly set
+    qd_alloc_chunk_t *chunk = stack->base_chunk.next;
+    while (chunk != NULL) {
+        qd_alloc_chunk_t *next = chunk->next;
+        free(chunk);
+        chunk = next;
+    }
+}
+
+static inline void next_chunk_stack(qd_alloc_linked_stack_t *const stack)
+{
+    assert(stack->top == CHUNK_SIZE);
+    qd_alloc_chunk_t *top = stack->top_chunk->next;
+    if (top == NULL) {
+        top = NEW(qd_alloc_chunk_t);
+        stack->top_chunk->next = top;
+        top->prev = stack->top_chunk;
+        top->next = NULL;
+    }
+    assert(top->prev == stack->top_chunk);
+    assert(stack->top_chunk->next == top);
+    stack->top_chunk = top;
+    stack->top = 0;
+}
+
+static inline void push_stack(qd_alloc_linked_stack_t *stack, qd_alloc_item_t *item)
+{
+    const uint32_t chunk_size = CHUNK_SIZE;
+    if (stack->top == chunk_size) {
+        next_chunk_stack(stack);
+    }
+    stack->size++;
+    stack->top_chunk->items[stack->top] = item;
+    stack->top++;
+}
+
+static inline int unordered_move_stack(qd_alloc_linked_stack_t *from, qd_alloc_linked_stack_t
*to, uint32_t length)
+{
+    length = from->size < length ? from->size : length;
 
 Review comment:
   How often is the _length_ value lowered because from->size is too small? This event
will mess up QD_MEMORY_STATS stats->held_by_threads since that value is always incremented
or decremented by the config->transfer_batch_size and not by the smaller value assigned
to _length_.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> alloc_pool intrusive linked list can be replaced by a linked stack
> ------------------------------------------------------------------
>
>                 Key: DISPATCH-1372
>                 URL: https://issues.apache.org/jira/browse/DISPATCH-1372
>             Project: Qpid Dispatch
>          Issue Type: Improvement
>          Components: Routing Engine
>    Affects Versions: 1.8.0
>            Reporter: Francesco Nigro
>            Priority: Major
>         Attachments: DOOM-3-BFG-Technical-Note.pdf, image-2019-06-21-11-08-17-015.png,
image-2019-06-21-11-09-02-228.png, linked_list_misses.svg, stack_list_misses.svg
>
>
> alloc_pool is currently using a intrusive linked list approach to reduce the need of
external data structures to hold data, saving expensive pointer chasing, but on modern architectures
the data dependency between a current node and next/prev prevent the CPU prefetcher to stream
nodes speculatively.
> There are different approaches that could benefit of prefetcing, but need to decouple
the data stored from its container eg a linked stack.
> A linked stack is composed by doubly-linked chunks (allocated lazily) that make possible
for the CPU to prefetch next/prev pointers given that those are already contained in the
current chunk (if any).
> Although it seems counter-intuitive (given that introduce 1 more hop to reach the data),
such data-structure is much more cache-friendly on modern architectures: I will attach some
cache misses analysis to show it.
>  



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@qpid.apache.org
For additional commands, e-mail: dev-help@qpid.apache.org


Mime
View raw message