forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathatomics.h
289 lines (280 loc) · 10.9 KB
/
atomics.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
// This file is a part of Julia. License is MIT: https://julialang.org/license
#ifndef JL_ATOMICS_H
#define JL_ATOMICS_H
// Low-level atomic operations
#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
# error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
#endif
#ifdef _COMPILER_MICROSOFT_
# include <intrin.h>
# include <type_traits>
#endif
#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
# include <immintrin.h>
#endif
#ifndef _OS_WINDOWS_
# include <pthread.h>
#endif
#include <signal.h>
/**
* Thread synchronization primitives:
*
* These roughly follows the c11/c++11 memory model and the act as memory
* barriers at both the compiler level and the hardware level.
* The only exception is the GC safepoint and GC state transitions for which
* we use only a compiler (signal) barrier and use the signal handler to do the
* synchronization in order to lower the mutator overhead as much as possible.
*
* We use the compiler intrinsics to implement a similar API to the c11/c++11
* one instead of using it directly because,
*
* 1. We support GCC 4.7 and GCC add support for c11 atomics in 4.9.
* Luckily, the __atomic intrinsics were added in GCC 4.7.
* 2. (most importantly) we need interoperability between code written
* in different languages.
* The current c++ standard (c++14) does not allow using c11 atomic
* functions or types and there's currently no guarantee that the two
* types are compatible (although most of them probably are).
* We also need to access these atomic variables from the LLVM JIT code
* which is very hard unless the layout of the object is fully
* specified.
*/
#if defined(__GNUC__)
# define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
# define jl_atomic_fetch_add_relaxed(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_add(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_add_fetch(obj, arg) \
__atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_fetch_and_relaxed(obj, arg) \
__atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_and(obj, arg) \
__atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_fetch_or_relaxed(obj, arg) \
__atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_or(obj, arg) \
__atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
// Returns the original value of `obj`
// Use the legacy __sync builtins for now, this can also be written using
// the __atomic builtins or c11 atomics with GNU extension or c11 _Generic
# define jl_atomic_compare_exchange(obj, expected, desired) \
__sync_val_compare_and_swap(obj, expected, desired)
# define jl_atomic_bool_compare_exchange(obj, expected, desired) \
__sync_bool_compare_and_swap(obj, expected, desired)
# define jl_atomic_exchange(obj, desired) \
__atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
# define jl_atomic_exchange_generic(obj, desired, orig)\
__atomic_exchange(obj, desired, orig, __ATOMIC_SEQ_CST)
# define jl_atomic_exchange_relaxed(obj, desired) \
__atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
# define jl_atomic_store(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
# if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
!(defined(_CPU_X86_) || defined(_CPU_X86_64_))
// ICC and Clang doesn't have this bug...
# define jl_atomic_store_release(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_RELEASE)
# else
// Workaround a GCC bug when using store with release order by using the
// stronger version instead.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
# define jl_atomic_store_release(obj, val) do { \
jl_signal_fence(); \
__atomic_store_n(obj, val, __ATOMIC_RELEASE); \
} while (0)
# endif
# define jl_atomic_load(obj) \
__atomic_load_n(obj, __ATOMIC_SEQ_CST)
# define jl_atomic_load_acquire(obj) \
__atomic_load_n(obj, __ATOMIC_ACQUIRE)
# define jl_atomic_load_relaxed(obj) \
__atomic_load_n(obj, __ATOMIC_RELAXED)
#elif defined(_COMPILER_MICROSOFT_)
# define jl_signal_fence() _ReadWriteBarrier()
// add
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 1, T>::type
jl_atomic_fetch_add(T *obj, T2 arg)
{
return (T)_InterlockedExchangeAdd8((volatile char*)obj, (char)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 2, T>::type
jl_atomic_fetch_add(T *obj, T2 arg)
{
return (T)_InterlockedExchangeAdd16((volatile short*)obj, (short)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 4, T>::type
jl_atomic_fetch_add(T *obj, T2 arg)
{
return (T)_InterlockedExchangeAdd((volatile LONG*)obj, (LONG)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 8, T>::type
jl_atomic_fetch_add(T *obj, T2 arg)
{
return (T)_InterlockedExchangeAdd64((volatile __int64*)obj, (__int64)arg);
}
// TODO: jl_atomic_exchange_generic
#define jl_atomic_fetch_add_relaxed(obj, arg) jl_atomic_fetch_add(obj, arg)
// and
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 1, T>::type
jl_atomic_fetch_and(T *obj, T2 arg)
{
return (T)_InterlockedAnd8((volatile char*)obj, (char)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 2, T>::type
jl_atomic_fetch_and(T *obj, T2 arg)
{
return (T)_InterlockedAnd16((volatile short*)obj, (short)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 4, T>::type
jl_atomic_fetch_and(T *obj, T2 arg)
{
return (T)_InterlockedAnd((volatile LONG*)obj, (LONG)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 8, T>::type
jl_atomic_fetch_and(T *obj, T2 arg)
{
return (T)_InterlockedAnd64((volatile __int64*)obj, (__int64)arg);
}
#define jl_atomic_fetch_and_relaxed(obj, arg) jl_atomic_fetch_and(obj, arg)
// or
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 1, T>::type
jl_atomic_fetch_or(T *obj, T2 arg)
{
return (T)_InterlockedOr8((volatile char*)obj, (char)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 2, T>::type
jl_atomic_fetch_or(T *obj, T2 arg)
{
return (T)_InterlockedOr16((volatile short*)obj, (short)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 4, T>::type
jl_atomic_fetch_or(T *obj, T2 arg)
{
return (T)_InterlockedOr((volatile LONG*)obj, (LONG)arg);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 8, T>::type
jl_atomic_fetch_or(T *obj, T2 arg)
{
return (T)_InterlockedOr64((volatile __int64*)obj, (__int64)arg);
}
#define jl_atomic_fetch_or_relaxed(obj, arg) jl_atomic_fetch_or(obj, arg)
// Returns the original value of `obj`
template<typename T, typename T2, typename T3>
static inline typename std::enable_if<sizeof(T) == 1, T>::type
jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
{
return (T)_InterlockedCompareExchange8((volatile char*)obj,
(char)desired, (char)expected);
}
template<typename T, typename T2, typename T3>
static inline typename std::enable_if<sizeof(T) == 2, T>::type
jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
{
return (T)_InterlockedCompareExchange16((volatile short*)obj,
(short)desired, (short)expected);
}
template<typename T, typename T2, typename T3>
static inline typename std::enable_if<sizeof(T) == 4, T>::type
jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
{
return (T)_InterlockedCompareExchange((volatile LONG*)obj,
(LONG)desired, (LONG)expected);
}
template<typename T, typename T2, typename T3>
static inline typename std::enable_if<sizeof(T) == 8, T>::type
jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
{
return (T)_InterlockedCompareExchange64((volatile __int64*)obj,
(__int64)desired, (__int64)expected);
}
// TODO: jl_atomic_bool_compare_exchange
// atomic exchange
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 1, T>::type
jl_atomic_exchange(volatile T *obj, T2 val)
{
return _InterlockedExchange8((volatile char*)obj, (char)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 2, T>::type
jl_atomic_exchange(volatile T *obj, T2 val)
{
return _InterlockedExchange16((volatile short*)obj, (short)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 4, T>::type
jl_atomic_exchange(volatile T *obj, T2 val)
{
return _InterlockedExchange((volatile LONG*)obj, (LONG)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 8, T>::type
jl_atomic_exchange(volatile T *obj, T2 val)
{
return _InterlockedExchange64((volatile __int64*)obj, (__int64)val);
}
#define jl_atomic_exchange_relaxed(obj, val) jl_atomic_exchange(obj, val)
// atomic stores
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 1>::type
jl_atomic_store(volatile T *obj, T2 val)
{
_InterlockedExchange8((volatile char*)obj, (char)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 2>::type
jl_atomic_store(volatile T *obj, T2 val)
{
_InterlockedExchange16((volatile short*)obj, (short)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 4>::type
jl_atomic_store(volatile T *obj, T2 val)
{
_InterlockedExchange((volatile LONG*)obj, (LONG)val);
}
template<typename T, typename T2>
static inline typename std::enable_if<sizeof(T) == 8>::type
jl_atomic_store(volatile T *obj, T2 val)
{
_InterlockedExchange64((volatile __int64*)obj, (__int64)val);
}
template<typename T, typename T2>
static inline void jl_atomic_store_release(volatile T *obj, T2 val)
{
jl_signal_fence();
*obj = (T)val;
}
// atomic loads
template<typename T>
static inline T jl_atomic_load(volatile T *obj)
{
// Trick to generate cheaper instructions compare to `_InterlockedOr`
// Note that we don't care whether the exchange succeeded or not...
return jl_atomic_compare_exchange(obj, T(0), T(0));
}
template<typename T>
static inline T jl_atomic_load_acquire(volatile T *obj)
{
T val = *obj;
jl_signal_fence();
return val;
}
#else
# error "No atomic operations supported."
#endif
#endif // JL_ATOMICS_H