The ROme OpTimistic Simulator  3.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
buddy.c
Go to the documentation of this file.
1 
9 #include <mm/buddy/buddy.h>
10 
11 #include <core/core.h>
12 #include <core/intrinsics.h>
13 #include <lp/lp.h>
14 
15 #include <errno.h>
16 #include <stdlib.h>
17 
18 #define left_child(i) (((i) << 1U) + 1U)
19 #define right_child(i) (((i) << 1U) + 2U)
20 #define parent(i) ((((i) + 1) >> 1U) - 1U)
21 #define is_power_of_2(i) (!((i) & ((i) - 1)))
22 #define next_exp_of_2(i) (sizeof(i) * CHAR_BIT - intrinsics_clz(i))
23 
24 void model_allocator_lp_init(void)
25 {
26  struct mm_state *self = &current_lp->mm_state;
27  uint_fast8_t node_size = B_TOTAL_EXP;
28 
29  for (uint_fast32_t i = 0;
30  i < sizeof(self->longest) / sizeof(*self->longest); ++i) {
31  self->longest[i] = node_size;
32  node_size -= is_power_of_2(i + 2);
33  }
34 
35  self->used_mem = 0;
36  array_init(self->logs);
37 #ifdef ROOTSIM_INCREMENTAL
38  memset(self->dirty, 0, sizeof(self->dirty));
39  self->dirty_mem = 0;
40 #endif
41 }
42 
43 void model_allocator_lp_fini(void)
44 {
45  array_count_t i = array_count(current_lp->mm_state.logs);
46  while (i--) {
47  mm_free(array_get_at(current_lp->mm_state.logs, i).c);
48  }
49  array_fini(current_lp->mm_state.logs);
50 }
51 
52 void *malloc_mt(size_t req_size)
53 {
54  if(unlikely(!req_size))
55  return NULL;
56 
57  struct mm_state *self = &current_lp->mm_state;
58 
59  uint_fast8_t req_blks = max(next_exp_of_2(req_size - 1), B_BLOCK_EXP);
60 
61  if (unlikely(self->longest[0] < req_blks)) {
62  errno = ENOMEM;
63  log_log(LOG_WARN, "LP %p is out of memory!", current_lp);
64  return NULL;
65  }
66 
67  /* search recursively for the child */
68  uint_fast8_t node_size = B_TOTAL_EXP;
69  uint_fast32_t i = 0;
70  while (node_size > req_blks) {
71  /* choose the child with smaller longest value which
72  * is still large at least *size* */
73  i = left_child(i);
74  i += self->longest[i] < req_blks;
75  --node_size;
76  }
77 
78  /* update the *longest* value back */
79  self->longest[i] = 0;
80  self->used_mem += 1 << node_size;
81 #ifdef ROOTSIM_INCREMENTAL
82  bitmap_set(self->dirty, i >> B_BLOCK_EXP);
83 #endif
84  uint_fast32_t offset = ((i + 1) << node_size) - (1 << B_TOTAL_EXP);
85 
86  while (i) {
87  i = parent(i);
88  self->longest[i] = max(
89  self->longest[left_child(i)],
90  self->longest[right_child(i)]
91  );
92 #ifdef ROOTSIM_INCREMENTAL
93  bitmap_set(self->dirty, i >> B_BLOCK_EXP);
94 #endif
95  }
96 
97  return ((char *)self->base_mem) + offset;
98 }
99 
100 void *calloc_mt(size_t nmemb, size_t size)
101 {
102  size_t tot = nmemb * size;
103  void *ret = malloc_mt(tot);
104 
105  if (likely(ret))
106  memset(ret, 0, tot);
107 
108  return ret;
109 }
110 
111 void free_mt(void *ptr)
112 {
113  if (unlikely(!ptr))
114  return;
115 
116  struct mm_state *self = &current_lp->mm_state;
117  uint_fast8_t node_size = B_BLOCK_EXP;
118  uint_fast32_t i =
119  (((uintptr_t)ptr - (uintptr_t)self->base_mem) >> B_BLOCK_EXP) +
120  (1 << (B_TOTAL_EXP - B_BLOCK_EXP)) - 1;
121 
122  for (; self->longest[i]; i = parent(i)) {
123  ++node_size;
124  }
125 
126  self->longest[i] = node_size;
127  self->used_mem -= 1 << node_size;
128 #ifdef ROOTSIM_INCREMENTAL
129  bitmap_set(self->dirty, i >> B_BLOCK_EXP);
130 #endif
131  while (i) {
132  i = parent(i);
133 
134  uint_fast8_t left_longest = self->longest[left_child(i)];
135  uint_fast8_t right_longest = self->longest[right_child(i)];
136 
137  if (left_longest == node_size && right_longest == node_size) {
138  self->longest[i] = node_size + 1;
139  } else {
140  self->longest[i] = max(left_longest, right_longest);
141  }
142 #ifdef ROOTSIM_INCREMENTAL
143  bitmap_set(self->dirty, i >> B_BLOCK_EXP);
144 #endif
145  ++node_size;
146  }
147 }
148 
149 void *realloc_mt(void *ptr, size_t req_size)
150 {
151  if (!req_size) {
152  free_mt(ptr);
153  return NULL;
154  }
155  if (!ptr) {
156  return malloc_mt(req_size);
157  }
158 
159  abort();
160  return NULL;
161 }
162 
163 #define buddy_tree_visit(longest, on_visit) \
164 __extension__({ \
165  bool __vis = false; \
166  uint_fast8_t __l = B_TOTAL_EXP; \
167  uint_fast32_t __i = 0; \
168  while (1) { \
169  uint_fast8_t __lon = longest[__i]; \
170  if (!__lon) { \
171  uint_fast32_t __len = 1U << __l; \
172  uint_fast32_t __o = \
173  ((__i + 1) << __l) - (1 << B_TOTAL_EXP);\
174  on_visit(__o, __len); \
175  } else if(__lon != __l) { \
176  __i = left_child(__i) + __vis; \
177  __vis = false; \
178  __l--; \
179  continue; \
180  } \
181  do { \
182  __vis = !(__i & 1U); \
183  __i = parent(__i); \
184  __l++; \
185  } while(__vis); \
186  \
187  if (__l > B_TOTAL_EXP) break; \
188  __vis = true; \
189  } \
190 })
191 
192 #ifdef ROOTSIM_INCREMENTAL
193 
194 void __write_mem(void *ptr, size_t siz)
195 {
196  struct mm_state *self = &current_lp->mm_state;
197  uintptr_t diff = (uintptr_t)ptr - (uintptr_t)self->base_mem;
198  if (diff >= (1 << B_TOTAL_EXP))
199  return;
200 
201  uint_fast32_t i = (diff >> B_BLOCK_EXP) +
202  (1 << (B_TOTAL_EXP - 2 * B_BLOCK_EXP + 1));
203 
204  siz += diff & ((1 << B_BLOCK_EXP) - 1);
205  --siz;
206  siz >>= B_BLOCK_EXP;
207  self->dirty_mem += siz;
208  do {
209  bitmap_set(self->dirty, i + siz);
210  } while(siz--);
211 }
212 
213 static struct mm_checkpoint *checkpoint_incremental_take(struct mm_state *self)
214 {
215  uint_fast32_t bset = bitmap_count_set(self->dirty, sizeof(self->dirty));
216 
217  struct mm_checkpoint *ret = mm_alloc(
218  offsetof(struct mm_checkpoint, longest) +
219  bset * (1 << B_BLOCK_EXP));
220 
221  unsigned char *ptr = ret->longest;
222  const unsigned char *src = self->longest;
223 
224 #define copy_block_to_ckp(i) \
225 __extension__({ \
226  memcpy(ptr, src + (i << B_BLOCK_EXP), 1 << B_BLOCK_EXP); \
227  ptr += 1 << B_BLOCK_EXP; \
228 })
229 
230  bitmap_foreach_set(self->dirty, sizeof(self->dirty), copy_block_to_ckp);
231 #undef copy_block_to_ckp
232 
233  ret->used_mem = self->used_mem;
234  memcpy(ret->dirty, self->dirty, sizeof(self->dirty));
235  ret->is_incremental = true;
236  return ret;
237 }
238 
239 static void checkpoint_incremental_restore(struct mm_state *self,
240  const struct mm_checkpoint *ckp)
241 {
242  self->used_mem = ckp->used_mem;
243 
244  array_count_t i = array_count(self->logs) - 1;
245  const struct mm_checkpoint *cur_ckp = array_get_at(self->logs, i).c;
246 
247  while (cur_ckp != ckp) {
248  bitmap_merge_or(self->dirty, cur_ckp->dirty,
249  sizeof(self->dirty));
250  cur_ckp = array_get_at(self->logs, --i).c;
251  }
252 
253 #define copy_dirty_block(i) \
254 __extension__({ \
255  if (bitmap_check(self->dirty, i)) { \
256  memcpy(self->longest + (i << B_BLOCK_EXP), ptr, \
257  1 << B_BLOCK_EXP); \
258  bitmap_reset(self->dirty, i); \
259  bset--; \
260  } \
261  ptr += 1 << B_BLOCK_EXP; \
262 })
263 
264  uint_fast32_t bset = bitmap_count_set(self->dirty, sizeof(self->dirty));
265  do {
266  const unsigned char *ptr = cur_ckp->longest;
267  bitmap_foreach_set(cur_ckp->dirty, sizeof(cur_ckp->dirty),
268  copy_dirty_block);
269  cur_ckp = array_get_at(self->logs, --i).c;
270  } while (bset && cur_ckp->is_incremental);
271 
272 #undef copy_dirty_block
273 
274  if (!bset)
275  return;
276 
277 #define copy_block_from_ckp(i) \
278 __extension__({ \
279  memcpy(self->longest + (i << B_BLOCK_EXP), cur_ckp->longest + \
280  (i << B_BLOCK_EXP), 1 << B_BLOCK_EXP); \
281 })
282 
283  const unsigned tree_bit_size =
284  bitmap_required_size(1 << (B_TOTAL_EXP - 2 * B_BLOCK_EXP + 1));
285  bitmap_foreach_set(self->dirty, tree_bit_size, copy_block_from_ckp);
286 #undef copy_block_from_ckp
287 
288 #define buddy_block_dirty_from_ckp(offset, len) \
289 __extension__({ \
290  uint_fast32_t b_off = (offset >> B_BLOCK_EXP) + \
291  (1 << (B_TOTAL_EXP - 2 * B_BLOCK_EXP + 1)); \
292  uint_fast32_t b_len = len >> B_BLOCK_EXP; \
293  while (b_len--) { \
294  if (bitmap_check(self->dirty, b_off)) { \
295  memcpy(self->longest + (b_off << B_BLOCK_EXP), \
296  ptr, (1 << B_BLOCK_EXP)); \
297  } \
298  ptr += 1 << B_BLOCK_EXP; \
299  b_off++; \
300  } \
301 })
302 
303  const unsigned char *ptr = cur_ckp->base_mem;
304  buddy_tree_visit(self->longest, buddy_block_dirty_from_ckp);
305 #undef buddy_block_dirty_from_ckp
306 }
307 
308 #endif
309 
310 static struct mm_checkpoint *checkpoint_full_take(const struct mm_state *self)
311 {
312  struct mm_checkpoint *ret = mm_alloc(
313  offsetof(struct mm_checkpoint, base_mem) + self->used_mem);
314 
315 #ifdef ROOTSIM_INCREMENTAL
316  ret->is_incremental = false;
317  memcpy(ret->dirty, self->dirty, sizeof(self->dirty));
318 #endif
319 
320  ret->used_mem = self->used_mem;
321  memcpy(ret->longest, self->longest, sizeof(ret->longest));
322 
323 #define buddy_block_copy_to_ckp(offset, len) \
324 __extension__({ \
325  memcpy(ptr, self->base_mem + offset, len); \
326  ptr += len; \
327 })
328 
329  unsigned char *ptr = ret->base_mem;
330  buddy_tree_visit(self->longest, buddy_block_copy_to_ckp);
331 
332 #undef buddy_block_copy_to_ckp
333  return ret;
334 }
335 
336 static void checkpoint_full_restore(struct mm_state *self,
337  const struct mm_checkpoint *ckp)
338 {
339  self->used_mem = ckp->used_mem;
340  memcpy(self->longest, ckp->longest, sizeof(self->longest));
341 
342 #define buddy_block_copy_from_ckp(offset, len) \
343 __extension__({ \
344  memcpy(self->base_mem + offset, ptr, len); \
345  ptr += len; \
346 })
347 
348  const unsigned char *ptr = ckp->base_mem;
349  buddy_tree_visit(self->longest, buddy_block_copy_from_ckp);
350 
351 #undef buddy_block_copy_from_ckp
352 }
353 
354 void model_allocator_checkpoint_take(array_count_t ref_i)
355 {
356  if (ref_i % B_LOG_FREQUENCY)
357  return;
358 
359  struct mm_state *self = &current_lp->mm_state;
360  struct mm_checkpoint *ckp;
361 
362 #ifdef ROOTSIM_INCREMENTAL
363  if (self->dirty_mem < self->used_mem * B_LOG_INCREMENTAL_THRESHOLD) {
364  ckp = checkpoint_incremental_take(self);
365  } else {
366 #endif
367  ckp = checkpoint_full_take(self);
368 #ifdef ROOTSIM_INCREMENTAL
369  }
370  memset(self->dirty, 0, sizeof(self->dirty));
371 #endif
372 
373  struct mm_log mm_log = {
374  .ref_i = ref_i,
375  .c = ckp
376  };
377  array_push(self->logs, mm_log);
378 }
379 
380 void model_allocator_checkpoint_next_force_full(void)
381 {
382 #ifdef ROOTSIM_INCREMENTAL
383  struct mm_state *self = &current_lp->mm_state;
384  self->dirty_mem = self->used_mem * B_LOG_INCREMENTAL_THRESHOLD;
385 #endif
386 }
387 
388 array_count_t model_allocator_checkpoint_restore(array_count_t ref_i)
389 {
390  struct mm_state *self = &current_lp->mm_state;
391 
392  array_count_t i = array_count(self->logs) - 1;
393  while (array_get_at(self->logs, i).ref_i > ref_i) {
394  i--;
395  }
396 
397  const struct mm_checkpoint *ckp = array_get_at(self->logs, i).c;
398 #ifdef ROOTSIM_INCREMENTAL
399  if (ckp->is_incremental) {
400  checkpoint_incremental_restore(self, ckp);
401  } else {
402 #endif
403  checkpoint_full_restore(self, ckp);
404 #ifdef ROOTSIM_INCREMENTAL
405  }
406  memset(self->dirty, 0, sizeof(self->dirty));
407  self->dirty_mem = 0;
408 #endif
409 
410  for (array_count_t j = array_count(self->logs) - 1; j > i; --j) {
411  mm_free(array_get_at(self->logs, j).c);
412  }
413  array_count(self->logs) = i + 1;
414 
415  return array_get_at(self->logs, i).ref_i;
416 }
417 
418 array_count_t model_allocator_fossil_lp_collect(array_count_t tgt_ref_i)
419 {
420  struct mm_state *self = &current_lp->mm_state;
421  array_count_t log_i = array_count(self->logs) - 1;
422  array_count_t ref_i = array_get_at(self->logs, log_i).ref_i;
423  while (ref_i > tgt_ref_i) {
424  --log_i;
425  ref_i = array_get_at(self->logs, log_i).ref_i;
426  }
427 
428 #ifdef ROOTSIM_INCREMENTAL
429  while (array_get_at(self->logs, log_i).c->is_incremental) {
430  --log_i;
431  ref_i = array_get_at(self->logs, log_i).ref_i;
432  }
433 #endif
434 
435  array_count_t j = array_count(self->logs);
436  while (j > log_i) {
437  --j;
438  array_get_at(self->logs, j).ref_i -= ref_i;
439  }
440  while (j--) {
441  mm_free(array_get_at(self->logs, j).c);
442  }
443 
444  array_truncate_first(self->logs, log_i);
445  return ref_i;
446 }
mm_state
The checkpointable memory context assigned to a single LP.
Definition: buddy.h:24
mm_state::dirty_mem
uint_fast32_t dirty_mem
The bytes count of the memory dirtied by writes.
Definition: buddy.h:43
mm_alloc
void * mm_alloc(size_t mem_size)
A version of the stdlib malloc() used internally.
Definition: mm.h:26
mm_checkpoint::used_mem
uint_fast32_t used_mem
The used memory in bytes when this checkpoint was taken.
Definition: buddy.h:73
buddy.h
A Buddy System implementation.
mm_checkpoint::is_incremental
bool is_incremental
If set this checkpoint is incremental, else it is a full one.
Definition: buddy.h:61
log_log
#define log_log(lvl,...)
Produces a log.
Definition: log.h:49
lp.h
LP construction functions.
bitmap_required_size
#define bitmap_required_size(requested_bits)
Computes the required size of a bitmap.
Definition: bitmap.h:56
array_count_t
uint_fast32_t array_count_t
The type used to handle dynamic arrays count of elements and capacity.
Definition: array.h:21
bitmap_foreach_set
#define bitmap_foreach_set(bitmap, bitmap_size, func)
Executes a user supplied function for each set bit in a bitmap.
Definition: bitmap.h:175
mm_free
void mm_free(void *ptr)
A version of the stdlib free() used internally.
Definition: mm.h:61
mm_checkpoint::dirty
block_bitmap dirty[bitmap_required_size((1<<(17U - 2 *6U+1))+(1<<(17U - 6U)))]
The checkpoint of the dirty bitmap.
Definition: buddy.h:70
mm_checkpoint::longest
uint_least8_t longest[(1U<<(17U - 6U+1))]
The checkpointed binary tree representing the buddy system.
Definition: buddy.h:75
likely
#define likely(exp)
Optimize the branch as likely taken.
Definition: core.h:57
array_count
#define array_count(self)
Gets the count of contained element in a dynamic array.
Definition: array.h:47
mm_state::base_mem
unsigned char base_mem[1U<< 17U]
The memory buffer served to the model.
Definition: buddy.h:40
LOG_WARN
#define LOG_WARN
The logging level reserved to unexpected, non deal breaking conditions.
Definition: log.h:31
core.h
Core ROOT-Sim functionalities.
bitmap_merge_or
#define bitmap_merge_or(dest, source, bitmap_size)
Merges a bitmap into another one by OR-ing all the bits.
Definition: bitmap.h:200
unlikely
#define unlikely(exp)
Optimize the branch as likely not taken.
Definition: core.h:59
bitmap_set
#define bitmap_set(bitmap, bit_index)
Sets a bit in a bitmap.
Definition: bitmap.h:84
bitmap_count_set
#define bitmap_count_set(bitmap, bitmap_size)
Counts the occurrences of set bits in a bitmap.
Definition: bitmap.h:117
mm_checkpoint
A restorable checkpoint of the memory context assigned to a single LP.
Definition: buddy.h:58
intrinsics.h
Easier access to compiler extensions.
lp_ctx::mm_state
struct mm_state mm_state
The memory allocator state of this LP.
Definition: lp.h:29
mm_checkpoint::base_mem
unsigned char base_mem[]
The checkpointed memory buffer assigned to the model.
Definition: buddy.h:77