libstdc++
simd_detail.h
1 // Internal macros for the simd implementation -*- C++ -*-
2 
3 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <cstddef>
31 #include <cstdint>
32 
33 /// @cond undocumented
34 
35 #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36  namespace std _GLIBCXX_VISIBILITY(default) \
37  { \
38  _GLIBCXX_BEGIN_NAMESPACE_VERSION \
39  namespace experimental { \
40  inline namespace parallelism_v2 {
41 #define _GLIBCXX_SIMD_END_NAMESPACE \
42  } \
43  } \
44  _GLIBCXX_END_NAMESPACE_VERSION \
45  }
46 
47 // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
48 // macros ARM{{{
49 #if defined __ARM_NEON
50 #define _GLIBCXX_SIMD_HAVE_NEON 1
51 #else
52 #define _GLIBCXX_SIMD_HAVE_NEON 0
53 #endif
54 #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
55 #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
56 #else
57 #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
58 #endif
59 #if defined __ARM_NEON && defined __aarch64__
60 #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
61 #else
62 #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
63 #endif
64 #if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS==1)
65 #define _GLIBCXX_SIMD_HAVE_SVE 1
66 #else
67 #define _GLIBCXX_SIMD_HAVE_SVE 0
68 #endif
69 #ifdef __ARM_FEATURE_SVE2
70 #define _GLIBCXX_SIMD_HAVE_SVE2 1
71 #else
72 #define _GLIBCXX_SIMD_HAVE_SVE2 0
73 #endif
74 //}}}
75 // x86{{{
76 #ifdef __MMX__
77 #define _GLIBCXX_SIMD_HAVE_MMX 1
78 #else
79 #define _GLIBCXX_SIMD_HAVE_MMX 0
80 #endif
81 #if defined __SSE__ || defined __x86_64__
82 #define _GLIBCXX_SIMD_HAVE_SSE 1
83 #else
84 #define _GLIBCXX_SIMD_HAVE_SSE 0
85 #endif
86 #if defined __SSE2__ || defined __x86_64__
87 #define _GLIBCXX_SIMD_HAVE_SSE2 1
88 #else
89 #define _GLIBCXX_SIMD_HAVE_SSE2 0
90 #endif
91 #ifdef __SSE3__
92 #define _GLIBCXX_SIMD_HAVE_SSE3 1
93 #else
94 #define _GLIBCXX_SIMD_HAVE_SSE3 0
95 #endif
96 #ifdef __SSSE3__
97 #define _GLIBCXX_SIMD_HAVE_SSSE3 1
98 #else
99 #define _GLIBCXX_SIMD_HAVE_SSSE3 0
100 #endif
101 #ifdef __SSE4_1__
102 #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
103 #else
104 #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
105 #endif
106 #ifdef __SSE4_2__
107 #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
108 #else
109 #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
110 #endif
111 #ifdef __XOP__
112 #define _GLIBCXX_SIMD_HAVE_XOP 1
113 #else
114 #define _GLIBCXX_SIMD_HAVE_XOP 0
115 #endif
116 #ifdef __AVX__
117 #define _GLIBCXX_SIMD_HAVE_AVX 1
118 #else
119 #define _GLIBCXX_SIMD_HAVE_AVX 0
120 #endif
121 #ifdef __AVX2__
122 #define _GLIBCXX_SIMD_HAVE_AVX2 1
123 #else
124 #define _GLIBCXX_SIMD_HAVE_AVX2 0
125 #endif
126 #ifdef __BMI__
127 #define _GLIBCXX_SIMD_HAVE_BMI1 1
128 #else
129 #define _GLIBCXX_SIMD_HAVE_BMI1 0
130 #endif
131 #ifdef __BMI2__
132 #define _GLIBCXX_SIMD_HAVE_BMI2 1
133 #else
134 #define _GLIBCXX_SIMD_HAVE_BMI2 0
135 #endif
136 #ifdef __LZCNT__
137 #define _GLIBCXX_SIMD_HAVE_LZCNT 1
138 #else
139 #define _GLIBCXX_SIMD_HAVE_LZCNT 0
140 #endif
141 #ifdef __SSE4A__
142 #define _GLIBCXX_SIMD_HAVE_SSE4A 1
143 #else
144 #define _GLIBCXX_SIMD_HAVE_SSE4A 0
145 #endif
146 #ifdef __FMA__
147 #define _GLIBCXX_SIMD_HAVE_FMA 1
148 #else
149 #define _GLIBCXX_SIMD_HAVE_FMA 0
150 #endif
151 #ifdef __FMA4__
152 #define _GLIBCXX_SIMD_HAVE_FMA4 1
153 #else
154 #define _GLIBCXX_SIMD_HAVE_FMA4 0
155 #endif
156 #ifdef __F16C__
157 #define _GLIBCXX_SIMD_HAVE_F16C 1
158 #else
159 #define _GLIBCXX_SIMD_HAVE_F16C 0
160 #endif
161 #ifdef __POPCNT__
162 #define _GLIBCXX_SIMD_HAVE_POPCNT 1
163 #else
164 #define _GLIBCXX_SIMD_HAVE_POPCNT 0
165 #endif
166 #ifdef __AVX512F__
167 #define _GLIBCXX_SIMD_HAVE_AVX512F 1
168 #else
169 #define _GLIBCXX_SIMD_HAVE_AVX512F 0
170 #endif
171 #ifdef __AVX512DQ__
172 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
173 #else
174 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
175 #endif
176 #ifdef __AVX512VL__
177 #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
178 #else
179 #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
180 #endif
181 #ifdef __AVX512BW__
182 #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
183 #else
184 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
185 #endif
186 #ifdef __AVX512BITALG__
187 #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
188 #else
189 #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
190 #endif
191 #ifdef __AVX512VBMI2__
192 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
193 #else
194 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
195 #endif
196 #ifdef __AVX512VBMI__
197 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
198 #else
199 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
200 #endif
201 #ifdef __AVX512IFMA__
202 #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
203 #else
204 #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
205 #endif
206 #ifdef __AVX512CD__
207 #define _GLIBCXX_SIMD_HAVE_AVX512CD 1
208 #else
209 #define _GLIBCXX_SIMD_HAVE_AVX512CD 0
210 #endif
211 #ifdef __AVX512VNNI__
212 #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
213 #else
214 #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
215 #endif
216 #ifdef __AVX512VPOPCNTDQ__
217 #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
218 #else
219 #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
220 #endif
221 #ifdef __AVX512VP2INTERSECT__
222 #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
223 #else
224 #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
225 #endif
226 
227 #if _GLIBCXX_SIMD_HAVE_SSE
228 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
229 #else
230 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
231 #endif
232 #if _GLIBCXX_SIMD_HAVE_SSE2
233 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
234 #else
235 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
236 #endif
237 
238 #if _GLIBCXX_SIMD_HAVE_AVX
239 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
240 #else
241 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
242 #endif
243 #if _GLIBCXX_SIMD_HAVE_AVX2
244 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
245 #else
246 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
247 #endif
248 
249 #if _GLIBCXX_SIMD_HAVE_AVX512F
250 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
251 #else
252 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
253 #endif
254 #if _GLIBCXX_SIMD_HAVE_AVX512BW
255 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
256 #else
257 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
258 #endif
259 
260 #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
261 #error "Use of SSE2 is required on AMD64"
262 #endif
263 //}}}
264 
265 #ifdef __clang__
266 #define _GLIBCXX_SIMD_NORMAL_MATH
267 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
268 #else
269 #define _GLIBCXX_SIMD_NORMAL_MATH \
270  [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
271 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
272 #endif
273 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
274 #define _GLIBCXX_SIMD_INTRINSIC \
275  [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
276 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
277 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
278 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
279 
280 #if _GLIBCXX_SIMD_HAVE_SVE || __STRICT_ANSI__ || defined __clang__
281 #define _GLIBCXX_SIMD_CONSTEXPR
282 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
283 #else
284 #define _GLIBCXX_SIMD_CONSTEXPR constexpr
285 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
286 #endif
287 
288 #if defined __clang__
289 #define _GLIBCXX_SIMD_USE_CONSTEXPR const
290 #else
291 #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
292 #endif
293 
294 #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
295 #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
296 #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
297  __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
298 
299 #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
300  _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
301 #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
302  _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
303 #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
304  _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
305 
306 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
307 #undef _GLIBCXX_SIMD_ALWAYS_INLINE
308 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
309 #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
310 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
311 #undef _GLIBCXX_SIMD_INTRINSIC
312 #define _GLIBCXX_SIMD_INTRINSIC inline
313 #endif
314 
315 #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
316 #define _GLIBCXX_SIMD_X86INTRIN 1
317 #else
318 #define _GLIBCXX_SIMD_X86INTRIN 0
319 #endif
320 
321 // workaround macros {{{
322 // use aliasing loads to help GCC understand the data accesses better
323 // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
324 // fixed_size_simd<float, 16> x.
325 #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
326 
327 // vector conversions on x86 not optimized:
328 #if _GLIBCXX_SIMD_X86INTRIN
329 #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
330 #endif
331 
332 // integer division not optimized
333 #ifndef __clang__
334 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
335 #endif
336 
337 // very bad codegen for extraction and concatenation of 128/256 "subregisters"
338 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
339 #if _GLIBCXX_SIMD_X86INTRIN
340 #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
341 #endif
342 
343 // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
344 #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
345 
346 // bad codegen for zero-extend using simple concat(__x, 0)
347 #if _GLIBCXX_SIMD_X86INTRIN
348 #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
349 #endif
350 
351 // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
352 // of static_simd_cast)
353 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
354 
355 // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
356 // constraint on (static)_simd_cast)
357 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
358 // }}}
359 
360 /// @endcond
361 
362 #endif // __cplusplus >= 201703L
363 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
364 
365 // vim: foldmethod=marker