18 #include <stdatomic.h>
45 static _Atomic(
nid_t) missing_nodes;
47 __thread
bool gvt_phase_green =
false;
48 __thread
unsigned remote_msg_sent[
MAX_NODES] = {0};
49 atomic_int remote_msg_received[2];
61 static inline simtime_t gvt_node_reduce(
void)
66 candidate = min(reducing_p[i], candidate);
75 static atomic_uint c_a = 0;
76 static atomic_uint c_b = 0;
78 if(
likely(thread_phase == tphase_rdy)) {
80 if (
likely(!atomic_load_explicit(&c_a,
81 memory_order_relaxed)))
86 &c_b, memory_order_relaxed)))
89 stats_time_start(STATS_GVT);
91 thread_phase = tphase_A;
92 atomic_fetch_add_explicit(&c_a, 1U, memory_order_relaxed);
96 switch (thread_phase) {
99 __builtin_unreachable();
102 if (atomic_load_explicit(&c_a, memory_order_relaxed)
105 reducing_p[
rid] = min(current_gvt, this_t);
106 thread_phase = tphase_B;
107 atomic_fetch_add_explicit(&c_b, 1U,
108 memory_order_release);
112 if (atomic_load_explicit(&c_b, memory_order_acquire)
114 thread_phase = tphase_wait;
115 atomic_fetch_sub_explicit(&c_a, 1U,
116 memory_order_relaxed);
120 stats_time_take(STATS_GVT);
121 return gvt_node_reduce();
125 if (!atomic_load_explicit(&c_a, memory_order_relaxed)) {
126 atomic_fetch_sub_explicit(&c_b, 1U,
127 memory_order_relaxed);
128 thread_phase = tphase_rdy;
138 static atomic_uint c_a = 0;
147 stats_time_start(STATS_GVT);
149 thread_phase = tphase_A;
150 atomic_fetch_add_explicit(&c_a, 1U, memory_order_relaxed);
160 atomic_fetch_sub_explicit(&missing_nodes, 1U, memory_order_relaxed);
165 static atomic_uint c_b = 0;
166 static unsigned remote_msg_to_receive;
167 static __thread
bool red_round =
false;
169 if(
likely(thread_phase == tphase_rdy)) {
171 if (
likely(!atomic_load_explicit(&c_a,
172 memory_order_relaxed)))
177 &missing_nodes, memory_order_relaxed)))
180 atomic_store_explicit(&missing_nodes, n_nodes,
181 memory_order_relaxed);
184 stats_time_start(STATS_GVT);
186 thread_phase = tphase_A;
187 atomic_fetch_add_explicit(&c_a, 1U, memory_order_relaxed);
191 switch (thread_phase) {
194 __builtin_unreachable();
197 if (atomic_load_explicit(&c_a, memory_order_relaxed)
201 thread_phase = tphase_B + (2 * red_round) + !
rid;
203 red_round = !red_round;
205 for(
nid_t i = 0; i < n_nodes; ++i)
206 atomic_fetch_add_explicit(&sent_tot[i],
208 memory_order_relaxed);
210 atomic_fetch_add_explicit(&c_b, 1U,
211 memory_order_release);
213 memset(remote_msg_sent, 0,
214 sizeof(
unsigned) * n_nodes);
215 gvt_phase_green = !gvt_phase_green;
216 current_gvt = min(current_gvt, this_t);
218 reducing_p[
rid] = min(current_gvt, this_t);
220 atomic_fetch_add_explicit(&c_b, 1U,
221 memory_order_release);
228 if (!atomic_load_explicit(&c_a, memory_order_acquire))
229 thread_phase = tphase_B_wait_msgs;
231 case tphase_B_reduce:
233 if (atomic_load_explicit(&c_b, memory_order_acquire) ==
236 &remote_msg_to_receive);
237 thread_phase = tphase_B_rdone;
242 atomic_fetch_sub_explicit(remote_msg_received +
243 !gvt_phase_green, remote_msg_to_receive,
244 memory_order_relaxed);
247 atomic_store_explicit(&c_a, 0, memory_order_release);
248 memset(sent_tot, 0,
sizeof(atomic_uint) * n_nodes);
249 thread_phase = tphase_B_wait_msgs;
254 if (!atomic_load_explicit(&c_a, memory_order_relaxed)) {
255 atomic_fetch_sub_explicit(&c_b, 1U,
256 memory_order_relaxed);
257 thread_phase = tphase_wait;
261 case tphase_C_reduce:
263 if (atomic_load_explicit(&c_b, memory_order_acquire) ==
265 *reducing_p = gvt_node_reduce();
267 thread_phase = tphase_C_rdone;
272 atomic_fetch_sub_explicit(&c_b, 1U,
273 memory_order_relaxed);
275 atomic_store_explicit(&c_a, 0, memory_order_relaxed);
277 thread_phase = tphase_wait;
281 case tphase_B_wait_msgs:
283 if (!atomic_load_explicit(remote_msg_received +
284 !gvt_phase_green, memory_order_relaxed)) {
285 thread_phase = tphase_wait;
286 atomic_fetch_sub_explicit(&c_b, 1U,
287 memory_order_relaxed);
291 if (!atomic_load_explicit(&c_b, memory_order_relaxed)) {
295 thread_phase = red_round;
297 atomic_fetch_add_explicit(&c_a, 1U,
298 memory_order_relaxed);
303 stats_time_take(STATS_GVT);
315 if (
unlikely(thread_phase && current_gvt > msg_t))