libstdc++
simd.h
1// Definition of the public simd interfaces -*- C++ -*-
2
3// Copyright (C) 2020-2023 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28#if __cplusplus >= 201703L
29
30#include "simd_detail.h"
31#include "numeric_traits.h"
32#include <bit>
33#include <bitset>
34#ifdef _GLIBCXX_DEBUG_UB
35#include <cstdio> // for stderr
36#endif
37#include <cstring>
38#include <cmath>
39#include <functional>
40#include <iosfwd>
41#include <utility>
42
43#if _GLIBCXX_SIMD_X86INTRIN
44#include <x86intrin.h>
45#elif _GLIBCXX_SIMD_HAVE_NEON
46#pragma GCC diagnostic push
47// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48// 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49#pragma GCC diagnostic ignored "-Wnarrowing"
50#include <arm_neon.h>
51#pragma GCC diagnostic pop
52#endif
53
54/** @ingroup ts_simd
55 * @{
56 */
57/* There are several closely related types, with the following naming
58 * convention:
59 * _Tp: vectorizable (arithmetic) type (or any type)
60 * _TV: __vector_type_t<_Tp, _Np>
61 * _TW: _SimdWrapper<_Tp, _Np>
62 * _TI: __intrinsic_type_t<_Tp, _Np>
63 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64 * If one additional type is needed use _U instead of _T.
65 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66 *
67 * More naming conventions:
68 * _Ap or _Abi: An ABI tag from the simd_abi namespace
69 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70 * _IV, _IW as for _TV, _TW
71 * _Np: number of elements (not bytes)
72 * _Bytes: number of bytes
73 *
74 * Variable names:
75 * __k: mask object (vector- or bitmask)
76 */
77_GLIBCXX_SIMD_BEGIN_NAMESPACE
78
79#if !_GLIBCXX_SIMD_X86INTRIN
80using __m128 [[__gnu__::__vector_size__(16)]] = float;
81using __m128d [[__gnu__::__vector_size__(16)]] = double;
82using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83using __m256 [[__gnu__::__vector_size__(32)]] = float;
84using __m256d [[__gnu__::__vector_size__(32)]] = double;
85using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86using __m512 [[__gnu__::__vector_size__(64)]] = float;
87using __m512d [[__gnu__::__vector_size__(64)]] = double;
88using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89#endif
90
91namespace simd_abi {
92// simd_abi forward declarations {{{
93// implementation details:
94struct _Scalar;
95
96template <int _Np>
97 struct _Fixed;
98
99// There are two major ABIs that appear on different architectures.
100// Both have non-boolean values packed into an N Byte register
101// -> #elements = N / sizeof(T)
102// Masks differ:
103// 1. Use value vector registers for masks (all 0 or all 1)
104// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105// value vector
106//
107// Both can be partially used, masking off the rest when doing horizontal
108// operations or operations that can trap (e.g. FP_INVALID or integer division
109// by 0). This is encoded as the number of used bytes.
110template <int _UsedBytes>
111 struct _VecBuiltin;
112
113template <int _UsedBytes>
114 struct _VecBltnBtmsk;
115
116template <typename _Tp, int _Np>
117 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118
119template <int _UsedBytes = 16>
120 using _Sse = _VecBuiltin<_UsedBytes>;
121
122template <int _UsedBytes = 32>
123 using _Avx = _VecBuiltin<_UsedBytes>;
124
125template <int _UsedBytes = 64>
126 using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127
128template <int _UsedBytes = 16>
129 using _Neon = _VecBuiltin<_UsedBytes>;
130
131// implementation-defined:
132using __sse = _Sse<>;
133using __avx = _Avx<>;
134using __avx512 = _Avx512<>;
135using __neon = _Neon<>;
136using __neon128 = _Neon<16>;
137using __neon64 = _Neon<8>;
138
139// standard:
140template <typename _Tp, size_t _Np, typename...>
141 struct deduce;
142
143template <int _Np>
144 using fixed_size = _Fixed<_Np>;
145
146using scalar = _Scalar;
147
148// }}}
149} // namespace simd_abi
150// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151template <typename _Tp>
152 struct is_simd;
153
154template <typename _Tp>
155 struct is_simd_mask;
156
157template <typename _Tp, typename _Abi>
158 class simd;
159
160template <typename _Tp, typename _Abi>
161 class simd_mask;
162
163template <typename _Tp, typename _Abi>
164 struct simd_size;
165
166// }}}
167// load/store flags {{{
168struct element_aligned_tag
169{
170 template <typename _Tp, typename _Up = typename _Tp::value_type>
171 static constexpr size_t _S_alignment = alignof(_Up);
172
173 template <typename _Tp, typename _Up>
174 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175 _S_apply(_Up* __ptr)
176 { return __ptr; }
177};
178
179struct vector_aligned_tag
180{
181 template <typename _Tp, typename _Up = typename _Tp::value_type>
182 static constexpr size_t _S_alignment
183 = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184
185 template <typename _Tp, typename _Up>
186 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187 _S_apply(_Up* __ptr)
188 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189};
190
191template <size_t _Np> struct overaligned_tag
192{
193 template <typename _Tp, typename _Up = typename _Tp::value_type>
194 static constexpr size_t _S_alignment = _Np;
195
196 template <typename _Tp, typename _Up>
197 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198 _S_apply(_Up* __ptr)
199 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200};
201
202inline constexpr element_aligned_tag element_aligned = {};
203
204inline constexpr vector_aligned_tag vector_aligned = {};
205
206template <size_t _Np>
207 inline constexpr overaligned_tag<_Np> overaligned = {};
208
209// }}}
210template <size_t _Xp>
211 using _SizeConstant = integral_constant<size_t, _Xp>;
212// constexpr feature detection{{{
213constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245
246constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249constexpr inline bool __support_neon_float =
250#if defined __GCC_IEC_559
251 __GCC_IEC_559 == 0;
252#elif defined __FAST_MATH__
253 true;
254#else
255 false;
256#endif
257
258#ifdef _ARCH_PWR10
259constexpr inline bool __have_power10vec = true;
260#else
261constexpr inline bool __have_power10vec = false;
262#endif
263#ifdef __POWER9_VECTOR__
264constexpr inline bool __have_power9vec = true;
265#else
266constexpr inline bool __have_power9vec = false;
267#endif
268#if defined __POWER8_VECTOR__
269constexpr inline bool __have_power8vec = true;
270#else
271constexpr inline bool __have_power8vec = __have_power9vec;
272#endif
273#if defined __VSX__
274constexpr inline bool __have_power_vsx = true;
275#else
276constexpr inline bool __have_power_vsx = __have_power8vec;
277#endif
278#if defined __ALTIVEC__
279constexpr inline bool __have_power_vmx = true;
280#else
281constexpr inline bool __have_power_vmx = __have_power_vsx;
282#endif
283
284// }}}
285
286namespace __detail
287{
288#ifdef math_errhandling
289 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291 // must be guessed.
292 template <int = math_errhandling>
293 constexpr bool
294 __handle_fpexcept_impl(int)
295 { return math_errhandling & MATH_ERREXCEPT; }
296#endif
297
298 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299 // ignored, otherwise implement correct exception behavior.
300 constexpr bool
301 __handle_fpexcept_impl(float)
302 {
303#if defined __FAST_MATH__
304 return false;
305#else
306 return true;
307#endif
308 }
309
310 /// True if math functions must raise floating-point exceptions as specified by C17.
311 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312
313 constexpr std::uint_least64_t
314 __floating_point_flags()
315 {
316 std::uint_least64_t __flags = 0;
317 if constexpr (_S_handle_fpexcept)
318 __flags |= 1;
319#ifdef __FAST_MATH__
320 __flags |= 1 << 1;
321#elif __FINITE_MATH_ONLY__
322 __flags |= 2 << 1;
323#elif __GCC_IEC_559 < 2
324 __flags |= 3 << 1;
325#endif
326 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327 return __flags;
328 }
329
330 constexpr std::uint_least64_t
331 __machine_flags()
332 {
333 if constexpr (__have_mmx || __have_sse)
334 return __have_mmx
335 | (__have_sse << 1)
336 | (__have_sse2 << 2)
337 | (__have_sse3 << 3)
338 | (__have_ssse3 << 4)
339 | (__have_sse4_1 << 5)
340 | (__have_sse4_2 << 6)
341 | (__have_xop << 7)
342 | (__have_avx << 8)
343 | (__have_avx2 << 9)
344 | (__have_bmi << 10)
345 | (__have_bmi2 << 11)
346 | (__have_lzcnt << 12)
347 | (__have_sse4a << 13)
348 | (__have_fma << 14)
349 | (__have_fma4 << 15)
350 | (__have_f16c << 16)
351 | (__have_popcnt << 17)
352 | (__have_avx512f << 18)
353 | (__have_avx512dq << 19)
354 | (__have_avx512vl << 20)
355 | (__have_avx512bw << 21)
356 | (__have_avx512bitalg << 22)
357 | (__have_avx512vbmi2 << 23)
358 | (__have_avx512vbmi << 24)
359 | (__have_avx512ifma << 25)
360 | (__have_avx512cd << 26)
361 | (__have_avx512vnni << 27)
362 | (__have_avx512vpopcntdq << 28)
363 | (__have_avx512vp2intersect << 29);
364 else if constexpr (__have_neon)
365 return __have_neon
366 | (__have_neon_a32 << 1)
367 | (__have_neon_a64 << 2)
368 | (__have_neon_a64 << 2)
369 | (__support_neon_float << 3);
370 else if constexpr (__have_power_vmx)
371 return __have_power_vmx
372 | (__have_power_vsx << 1)
373 | (__have_power8vec << 2)
374 | (__have_power9vec << 3)
375 | (__have_power10vec << 4);
376 else
377 return 0;
378 }
379
380 namespace
381 {
382 struct _OdrEnforcer {};
383 }
384
385 template <std::uint_least64_t...>
386 struct _MachineFlagsTemplate {};
387
388 /**@internal
389 * Use this type as default template argument to all function templates that
390 * are not declared always_inline. It ensures, that a function
391 * specialization, which the compiler decides not to inline, has a unique symbol
392 * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394 * users link TUs compiled with different flags. This is especially important
395 * for using simd in libraries.
396 */
397 using __odr_helper
398 = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400
401 struct _Minimum
402 {
403 template <typename _Tp>
404 _GLIBCXX_SIMD_INTRINSIC constexpr
405 _Tp
406 operator()(_Tp __a, _Tp __b) const
407 {
408 using std::min;
409 return min(__a, __b);
410 }
411 };
412
413 struct _Maximum
414 {
415 template <typename _Tp>
416 _GLIBCXX_SIMD_INTRINSIC constexpr
417 _Tp
418 operator()(_Tp __a, _Tp __b) const
419 {
420 using std::max;
421 return max(__a, __b);
422 }
423 };
424} // namespace __detail
425
426// unrolled/pack execution helpers
427// __execute_n_times{{{
428template <typename _Fp, size_t... _I>
429 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430 void
431 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432 { ((void)__f(_SizeConstant<_I>()), ...); }
433
434template <typename _Fp>
435 _GLIBCXX_SIMD_INTRINSIC constexpr void
436 __execute_on_index_sequence(_Fp&&, index_sequence<>)
437 { }
438
439template <size_t _Np, typename _Fp>
440 _GLIBCXX_SIMD_INTRINSIC constexpr void
441 __execute_n_times(_Fp&& __f)
442 {
443 __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444 make_index_sequence<_Np>{});
445 }
446
447// }}}
448// __generate_from_n_evaluations{{{
449template <typename _R, typename _Fp, size_t... _I>
450 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451 _R
452 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453 { return _R{__f(_SizeConstant<_I>())...}; }
454
455template <size_t _Np, typename _R, typename _Fp>
456 _GLIBCXX_SIMD_INTRINSIC constexpr _R
457 __generate_from_n_evaluations(_Fp&& __f)
458 {
459 return __execute_on_index_sequence_with_return<_R>(
460 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461 }
462
463// }}}
464// __call_with_n_evaluations{{{
465template <size_t... _I, typename _F0, typename _FArgs>
466 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467 auto
468 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469 { return __f0(__fargs(_SizeConstant<_I>())...); }
470
471template <size_t _Np, typename _F0, typename _FArgs>
472 _GLIBCXX_SIMD_INTRINSIC constexpr auto
473 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474 {
475 return __call_with_n_evaluations(make_index_sequence<_Np>{},
476 static_cast<_F0&&>(__f0),
477 static_cast<_FArgs&&>(__fargs));
478 }
479
480// }}}
481// __call_with_subscripts{{{
482template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484 auto
485 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486 { return __fun(__x[_First + _It]...); }
487
488template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489 _GLIBCXX_SIMD_INTRINSIC constexpr auto
490 __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491 {
492 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493 make_index_sequence<_Np>(),
494 static_cast<_Fp&&>(__fun));
495 }
496
497// }}}
498
499// vvv ---- type traits ---- vvv
500// integer type aliases{{{
501using _UChar = unsigned char;
502using _SChar = signed char;
503using _UShort = unsigned short;
504using _UInt = unsigned int;
505using _ULong = unsigned long;
506using _ULLong = unsigned long long;
507using _LLong = long long;
508
509//}}}
510// __first_of_pack{{{
511template <typename _T0, typename...>
512 struct __first_of_pack
513 { using type = _T0; };
514
515template <typename... _Ts>
516 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517
518//}}}
519// __value_type_or_identity_t {{{
520template <typename _Tp>
521 typename _Tp::value_type
522 __value_type_or_identity_impl(int);
523
524template <typename _Tp>
525 _Tp
526 __value_type_or_identity_impl(float);
527
528template <typename _Tp>
529 using __value_type_or_identity_t
530 = decltype(__value_type_or_identity_impl<_Tp>(int()));
531
532// }}}
533// __is_vectorizable {{{
534template <typename _Tp>
535 struct __is_vectorizable : public is_arithmetic<_Tp> {};
536
537template <>
538 struct __is_vectorizable<bool> : public false_type {};
539
540template <typename _Tp>
541 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542
543// Deduces to a vectorizable type
544template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545 using _Vectorizable = _Tp;
546
547// }}}
548// _LoadStorePtr / __is_possible_loadstore_conversion {{{
549template <typename _Ptr, typename _ValueType>
550 struct __is_possible_loadstore_conversion
551 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552
553template <>
554 struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555
556// Deduces to a type allowed for load/store with the given value type.
557template <typename _Ptr, typename _ValueType,
558 typename = enable_if_t<
559 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560 using _LoadStorePtr = _Ptr;
561
562// }}}
563// __is_bitmask{{{
564template <typename _Tp, typename = void_t<>>
565 struct __is_bitmask : false_type {};
566
567template <typename _Tp>
568 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569
570// the __mmaskXX case:
571template <typename _Tp>
572 struct __is_bitmask<_Tp,
573 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574 : true_type {};
575
576// }}}
577// __int_for_sizeof{{{
578#pragma GCC diagnostic push
579#pragma GCC diagnostic ignored "-Wpedantic"
580template <size_t _Bytes>
581 constexpr auto
582 __int_for_sizeof()
583 {
584 static_assert(_Bytes > 0);
585 if constexpr (_Bytes == sizeof(int))
586 return int();
587 else if constexpr (_Bytes == sizeof(_SChar))
588 return _SChar();
589 else if constexpr (_Bytes == sizeof(short))
590 return short();
591 else if constexpr (_Bytes == sizeof(long))
592 return long();
593 else if constexpr (_Bytes == sizeof(_LLong))
594 return _LLong();
595 #ifdef __SIZEOF_INT128__
596 else if constexpr (_Bytes == sizeof(__int128))
597 return __int128();
598 #endif // __SIZEOF_INT128__
599 else if constexpr (_Bytes % sizeof(int) == 0)
600 {
601 constexpr size_t _Np = _Bytes / sizeof(int);
602 struct _Ip
603 {
604 int _M_data[_Np];
605
606 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
607 operator&(_Ip __rhs) const
608 {
609 return __generate_from_n_evaluations<_Np, _Ip>(
610 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
611 return __rhs._M_data[__i] & _M_data[__i];
612 });
613 }
614
615 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
616 operator|(_Ip __rhs) const
617 {
618 return __generate_from_n_evaluations<_Np, _Ip>(
619 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
620 return __rhs._M_data[__i] | _M_data[__i];
621 });
622 }
623
624 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
625 operator^(_Ip __rhs) const
626 {
627 return __generate_from_n_evaluations<_Np, _Ip>(
628 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
629 return __rhs._M_data[__i] ^ _M_data[__i];
630 });
631 }
632
633 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
634 operator~() const
635 {
636 return __generate_from_n_evaluations<_Np, _Ip>(
637 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
638 }
639 };
640 return _Ip{};
641 }
642 else
643 static_assert(_Bytes == 0, "this should be unreachable");
644 }
645#pragma GCC diagnostic pop
646
647template <typename _Tp>
648 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
649
650template <size_t _Np>
651 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
652
653// }}}
654// __is_fixed_size_abi{{{
655template <typename _Tp>
656 struct __is_fixed_size_abi : false_type {};
657
658template <int _Np>
659 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
660
661template <typename _Tp>
662 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
663
664// }}}
665// __is_scalar_abi {{{
666template <typename _Abi>
667 constexpr bool
668 __is_scalar_abi()
669 { return is_same_v<simd_abi::scalar, _Abi>; }
670
671// }}}
672// __abi_bytes_v {{{
673template <template <int> class _Abi, int _Bytes>
674 constexpr int
675 __abi_bytes_impl(_Abi<_Bytes>*)
676 { return _Bytes; }
677
678template <typename _Tp>
679 constexpr int
680 __abi_bytes_impl(_Tp*)
681 { return -1; }
682
683template <typename _Abi>
684 inline constexpr int __abi_bytes_v
685 = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
686
687// }}}
688// __is_builtin_bitmask_abi {{{
689template <typename _Abi>
690 constexpr bool
691 __is_builtin_bitmask_abi()
692 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
693
694// }}}
695// __is_sse_abi {{{
696template <typename _Abi>
697 constexpr bool
698 __is_sse_abi()
699 {
700 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
701 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
702 }
703
704// }}}
705// __is_avx_abi {{{
706template <typename _Abi>
707 constexpr bool
708 __is_avx_abi()
709 {
710 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
711 return _Bytes > 16 && _Bytes <= 32
712 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
713 }
714
715// }}}
716// __is_avx512_abi {{{
717template <typename _Abi>
718 constexpr bool
719 __is_avx512_abi()
720 {
721 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
722 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
723 }
724
725// }}}
726// __is_neon_abi {{{
727template <typename _Abi>
728 constexpr bool
729 __is_neon_abi()
730 {
731 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
732 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
733 }
734
735// }}}
736// __make_dependent_t {{{
737template <typename, typename _Up>
738 struct __make_dependent
739 { using type = _Up; };
740
741template <typename _Tp, typename _Up>
742 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
743
744// }}}
745// ^^^ ---- type traits ---- ^^^
746
747// __invoke_ub{{{
748template <typename... _Args>
749 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
750 __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
751 {
752#ifdef _GLIBCXX_DEBUG_UB
753 __builtin_fprintf(stderr, __msg, __args...);
754 __builtin_trap();
755#else
756 __builtin_unreachable();
757#endif
758 }
759
760// }}}
761// __assert_unreachable{{{
762template <typename _Tp>
763 struct __assert_unreachable
764 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
765
766// }}}
767// __size_or_zero_v {{{
768template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
769 constexpr size_t
770 __size_or_zero_dispatch(int)
771 { return _Np; }
772
773template <typename _Tp, typename _Ap>
774 constexpr size_t
775 __size_or_zero_dispatch(float)
776 { return 0; }
777
778template <typename _Tp, typename _Ap>
779 inline constexpr size_t __size_or_zero_v
780 = __size_or_zero_dispatch<_Tp, _Ap>(0);
781
782// }}}
783// __div_roundup {{{
784inline constexpr size_t
785__div_roundup(size_t __a, size_t __b)
786{ return (__a + __b - 1) / __b; }
787
788// }}}
789// _ExactBool{{{
790class _ExactBool
791{
792 const bool _M_data;
793
794public:
795 _GLIBCXX_SIMD_INTRINSIC constexpr
796 _ExactBool(bool __b) : _M_data(__b) {}
797
798 _ExactBool(int) = delete;
799
800 _GLIBCXX_SIMD_INTRINSIC constexpr
801 operator bool() const
802 { return _M_data; }
803};
804
805// }}}
806// __may_alias{{{
807/**@internal
808 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
809 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
810 * that support it).
811 */
812template <typename _Tp>
813 using __may_alias [[__gnu__::__may_alias__]] = _Tp;
814
815// }}}
816// _UnsupportedBase {{{
817// simd and simd_mask base for unsupported <_Tp, _Abi>
818struct _UnsupportedBase
819{
820 _UnsupportedBase() = delete;
821 _UnsupportedBase(const _UnsupportedBase&) = delete;
822 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
823 ~_UnsupportedBase() = delete;
824};
825
826// }}}
827// _InvalidTraits {{{
828/**
829 * @internal
830 * Defines the implementation of __a given <_Tp, _Abi>.
831 *
832 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
833 * possible. Static assertions in the type definition do not suffice. It is
834 * important that SFINAE works.
835 */
836struct _InvalidTraits
837{
838 using _IsValid = false_type;
839 using _SimdBase = _UnsupportedBase;
840 using _MaskBase = _UnsupportedBase;
841
842 static constexpr size_t _S_full_size = 0;
843 static constexpr bool _S_is_partial = false;
844
845 static constexpr size_t _S_simd_align = 1;
846 struct _SimdImpl;
847 struct _SimdMember {};
848 struct _SimdCastType;
849
850 static constexpr size_t _S_mask_align = 1;
851 struct _MaskImpl;
852 struct _MaskMember {};
853 struct _MaskCastType;
854};
855
856// }}}
857// _SimdTraits {{{
858template <typename _Tp, typename _Abi, typename = void_t<>>
859 struct _SimdTraits : _InvalidTraits {};
860
861// }}}
862// __private_init, __bitset_init{{{
863/**
864 * @internal
865 * Tag used for private init constructor of simd and simd_mask
866 */
867inline constexpr struct _PrivateInit {} __private_init = {};
868
869inline constexpr struct _BitsetInit {} __bitset_init = {};
870
871// }}}
872// __is_narrowing_conversion<_From, _To>{{{
873template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
874 bool = is_arithmetic_v<_To>>
875 struct __is_narrowing_conversion;
876
877// ignore "signed/unsigned mismatch" in the following trait.
878// The implicit conversions will do the right thing here.
879template <typename _From, typename _To>
880 struct __is_narrowing_conversion<_From, _To, true, true>
881 : public __bool_constant<(
882 __digits_v<_From> > __digits_v<_To>
883 || __finite_max_v<_From> > __finite_max_v<_To>
884 || __finite_min_v<_From> < __finite_min_v<_To>
885 || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
886
887template <typename _Tp>
888 struct __is_narrowing_conversion<_Tp, bool, true, true>
889 : public true_type {};
890
891template <>
892 struct __is_narrowing_conversion<bool, bool, true, true>
893 : public false_type {};
894
895template <typename _Tp>
896 struct __is_narrowing_conversion<_Tp, _Tp, true, true>
897 : public false_type {};
898
899template <typename _From, typename _To>
900 struct __is_narrowing_conversion<_From, _To, false, true>
901 : public negation<is_convertible<_From, _To>> {};
902
903// }}}
904// __converts_to_higher_integer_rank{{{
905template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
906 struct __converts_to_higher_integer_rank : public true_type {};
907
908// this may fail for char -> short if sizeof(char) == sizeof(short)
909template <typename _From, typename _To>
910 struct __converts_to_higher_integer_rank<_From, _To, false>
911 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
912
913// }}}
914// __data(simd/simd_mask) {{{
915template <typename _Tp, typename _Ap>
916 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
917 __data(const simd<_Tp, _Ap>& __x);
918
919template <typename _Tp, typename _Ap>
920 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
921 __data(simd<_Tp, _Ap>& __x);
922
923template <typename _Tp, typename _Ap>
924 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
925 __data(const simd_mask<_Tp, _Ap>& __x);
926
927template <typename _Tp, typename _Ap>
928 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
929 __data(simd_mask<_Tp, _Ap>& __x);
930
931// }}}
932// _SimdConverter {{{
933template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
934 typename = void>
935 struct _SimdConverter;
936
937template <typename _Tp, typename _Ap>
938 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
939 {
940 template <typename _Up>
941 _GLIBCXX_SIMD_INTRINSIC const _Up&
942 operator()(const _Up& __x)
943 { return __x; }
944 };
945
946// }}}
947// __to_value_type_or_member_type {{{
948template <typename _V>
949 _GLIBCXX_SIMD_INTRINSIC constexpr auto
950 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
951 { return __data(__x); }
952
953template <typename _V>
954 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
955 __to_value_type_or_member_type(const typename _V::value_type& __x)
956 { return __x; }
957
958// }}}
959// __bool_storage_member_type{{{
960template <size_t _Size>
961 struct __bool_storage_member_type;
962
963template <size_t _Size>
964 using __bool_storage_member_type_t =
965 typename __bool_storage_member_type<_Size>::type;
966
967// }}}
968// _SimdTuple {{{
969// why not tuple?
970// 1. tuple gives no guarantee about the storage order, but I require
971// storage
972// equivalent to array<_Tp, _Np>
973// 2. direct access to the element type (first template argument)
974// 3. enforces equal element type, only different _Abi types are allowed
975template <typename _Tp, typename... _Abis>
976 struct _SimdTuple;
977
978//}}}
979// __fixed_size_storage_t {{{
980template <typename _Tp, int _Np>
981 struct __fixed_size_storage;
982
983template <typename _Tp, int _Np>
984 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
985
986// }}}
987// _SimdWrapper fwd decl{{{
988template <typename _Tp, size_t _Size, typename = void_t<>>
989 struct _SimdWrapper;
990
991template <typename _Tp>
992 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
993template <typename _Tp>
994 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
995template <typename _Tp>
996 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
997template <typename _Tp>
998 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
999
1000// }}}
1001// __is_simd_wrapper {{{
1002template <typename _Tp>
1003 struct __is_simd_wrapper : false_type {};
1004
1005template <typename _Tp, size_t _Np>
1006 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1007
1008template <typename _Tp>
1009 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1010
1011// }}}
1012// _BitOps {{{
1013struct _BitOps
1014{
1015 // _S_bit_iteration {{{
1016 template <typename _Tp, typename _Fp>
1017 static void
1018 _S_bit_iteration(_Tp __mask, _Fp&& __f)
1019 {
1020 static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1021 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1022 if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1023 __k = __mask;
1024 else
1025 __k = __mask.to_ullong();
1026 while(__k)
1027 {
1028 __f(std::__countr_zero(__k));
1029 __k &= (__k - 1);
1030 }
1031 }
1032
1033 //}}}
1034};
1035
1036//}}}
1037// __increment, __decrement {{{
1038template <typename _Tp = void>
1039 struct __increment
1040 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1041
1042template <>
1043 struct __increment<void>
1044 {
1045 template <typename _Tp>
1046 constexpr _Tp
1047 operator()(_Tp __a) const
1048 { return ++__a; }
1049 };
1050
1051template <typename _Tp = void>
1052 struct __decrement
1053 { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1054
1055template <>
1056 struct __decrement<void>
1057 {
1058 template <typename _Tp>
1059 constexpr _Tp
1060 operator()(_Tp __a) const
1061 { return --__a; }
1062 };
1063
1064// }}}
1065// _ValuePreserving(OrInt) {{{
1066template <typename _From, typename _To,
1067 typename = enable_if_t<negation<
1068 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1069 using _ValuePreserving = _From;
1070
1071template <typename _From, typename _To,
1072 typename _DecayedFrom = __remove_cvref_t<_From>,
1073 typename = enable_if_t<conjunction<
1074 is_convertible<_From, _To>,
1075 disjunction<
1076 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1077 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1078 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1079 using _ValuePreservingOrInt = _From;
1080
1081// }}}
1082// __intrinsic_type {{{
1083template <typename _Tp, size_t _Bytes, typename = void_t<>>
1084 struct __intrinsic_type;
1085
1086template <typename _Tp, size_t _Size>
1087 using __intrinsic_type_t =
1088 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1089
1090template <typename _Tp>
1091 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1092template <typename _Tp>
1093 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1094template <typename _Tp>
1095 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1096template <typename _Tp>
1097 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1098template <typename _Tp>
1099 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1100template <typename _Tp>
1101 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1102
1103// }}}
1104// _BitMask {{{
1105template <size_t _Np, bool _Sanitized = false>
1106 struct _BitMask;
1107
1108template <size_t _Np, bool _Sanitized>
1109 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1110
1111template <size_t _Np>
1112 using _SanitizedBitMask = _BitMask<_Np, true>;
1113
1114template <size_t _Np, bool _Sanitized>
1115 struct _BitMask
1116 {
1117 static_assert(_Np > 0);
1118
1119 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1120
1121 using _Tp = conditional_t<_Np == 1, bool,
1122 make_unsigned_t<__int_with_sizeof_t<std::min(
1123 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1124
1125 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1126
1127 _Tp _M_bits[_S_array_size];
1128
1129 static constexpr int _S_unused_bits
1130 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1131
1132 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1133
1134 constexpr _BitMask() noexcept = default;
1135
1136 constexpr _BitMask(unsigned long long __x) noexcept
1137 : _M_bits{static_cast<_Tp>(__x)} {}
1138
1139 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1140
1141 constexpr _BitMask(const _BitMask&) noexcept = default;
1142
1143 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1144 && _Sanitized == true>>
1145 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1146 : _BitMask(__rhs._M_sanitized()) {}
1147
1148 constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1149 {
1150 static_assert(_S_array_size == 1);
1151 return _M_bits[0];
1152 }
1153
1154 // precondition: is sanitized
1155 constexpr _Tp
1156 _M_to_bits() const noexcept
1157 {
1158 static_assert(_S_array_size == 1);
1159 return _M_bits[0];
1160 }
1161
1162 // precondition: is sanitized
1163 constexpr unsigned long long
1164 to_ullong() const noexcept
1165 {
1166 static_assert(_S_array_size == 1);
1167 return _M_bits[0];
1168 }
1169
1170 // precondition: is sanitized
1171 constexpr unsigned long
1172 to_ulong() const noexcept
1173 {
1174 static_assert(_S_array_size == 1);
1175 return _M_bits[0];
1176 }
1177
1178 constexpr bitset<_Np>
1179 _M_to_bitset() const noexcept
1180 {
1181 static_assert(_S_array_size == 1);
1182 return _M_bits[0];
1183 }
1184
1185 constexpr decltype(auto)
1186 _M_sanitized() const noexcept
1187 {
1188 if constexpr (_Sanitized)
1189 return *this;
1190 else if constexpr (_Np == 1)
1191 return _SanitizedBitMask<_Np>(_M_bits[0]);
1192 else
1193 {
1194 _SanitizedBitMask<_Np> __r = {};
1195 for (int __i = 0; __i < _S_array_size; ++__i)
1196 __r._M_bits[__i] = _M_bits[__i];
1197 if constexpr (_S_unused_bits > 0)
1198 __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1199 return __r;
1200 }
1201 }
1202
1203 template <size_t _Mp, bool _LSanitized>
1204 constexpr _BitMask<_Np + _Mp, _Sanitized>
1205 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1206 {
1207 constexpr size_t _RN = _Np + _Mp;
1208 using _Rp = _BitMask<_RN, _Sanitized>;
1209 if constexpr (_Rp::_S_array_size == 1)
1210 {
1211 _Rp __r{{_M_bits[0]}};
1212 __r._M_bits[0] <<= _Mp;
1213 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1214 return __r;
1215 }
1216 else
1217 __assert_unreachable<_Rp>();
1218 }
1219
1220 // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1221 // significant bits. If the operation implicitly produces a sanitized bitmask,
1222 // the result type will have _Sanitized set.
1223 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1224 constexpr auto
1225 _M_extract() const noexcept
1226 {
1227 static_assert(_Np > _DropLsb);
1228 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1229 "not implemented for bitmasks larger than one ullong");
1230 if constexpr (_NewSize == 1)
1231 // must sanitize because the return _Tp is bool
1232 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1233 else
1234 return _BitMask<_NewSize,
1235 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1236 && _NewSize + _DropLsb <= _Np)
1237 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1238 && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1239 >> _DropLsb);
1240 }
1241
1242 // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1243 constexpr bool
1244 all() const noexcept
1245 {
1246 if constexpr (_Np == 1)
1247 return _M_bits[0];
1248 else if constexpr (!_Sanitized)
1249 return _M_sanitized().all();
1250 else
1251 {
1252 constexpr _Tp __allbits = ~_Tp();
1253 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1254 if (_M_bits[__i] != __allbits)
1255 return false;
1256 return _M_bits[_S_array_size - 1] == _S_bitmask;
1257 }
1258 }
1259
1260 // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1261 // false.
1262 constexpr bool
1263 any() const noexcept
1264 {
1265 if constexpr (_Np == 1)
1266 return _M_bits[0];
1267 else if constexpr (!_Sanitized)
1268 return _M_sanitized().any();
1269 else
1270 {
1271 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1272 if (_M_bits[__i] != 0)
1273 return true;
1274 return _M_bits[_S_array_size - 1] != 0;
1275 }
1276 }
1277
1278 // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1279 constexpr bool
1280 none() const noexcept
1281 {
1282 if constexpr (_Np == 1)
1283 return !_M_bits[0];
1284 else if constexpr (!_Sanitized)
1285 return _M_sanitized().none();
1286 else
1287 {
1288 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1289 if (_M_bits[__i] != 0)
1290 return false;
1291 return _M_bits[_S_array_size - 1] == 0;
1292 }
1293 }
1294
1295 // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1296 // false.
1297 constexpr int
1298 count() const noexcept
1299 {
1300 if constexpr (_Np == 1)
1301 return _M_bits[0];
1302 else if constexpr (!_Sanitized)
1303 return _M_sanitized().none();
1304 else
1305 {
1306 int __result = __builtin_popcountll(_M_bits[0]);
1307 for (int __i = 1; __i < _S_array_size; ++__i)
1308 __result += __builtin_popcountll(_M_bits[__i]);
1309 return __result;
1310 }
1311 }
1312
1313 // Returns the bit at offset __i as bool.
1314 constexpr bool
1315 operator[](size_t __i) const noexcept
1316 {
1317 if constexpr (_Np == 1)
1318 return _M_bits[0];
1319 else if constexpr (_S_array_size == 1)
1320 return (_M_bits[0] >> __i) & 1;
1321 else
1322 {
1323 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1324 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1325 return (_M_bits[__j] >> __shift) & 1;
1326 }
1327 }
1328
1329 template <size_t __i>
1330 constexpr bool
1331 operator[](_SizeConstant<__i>) const noexcept
1332 {
1333 static_assert(__i < _Np);
1334 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1335 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1336 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1337 }
1338
1339 // Set the bit at offset __i to __x.
1340 constexpr void
1341 set(size_t __i, bool __x) noexcept
1342 {
1343 if constexpr (_Np == 1)
1344 _M_bits[0] = __x;
1345 else if constexpr (_S_array_size == 1)
1346 {
1347 _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1348 _M_bits[0] |= _Tp(_Tp(__x) << __i);
1349 }
1350 else
1351 {
1352 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1353 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1354 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1355 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1356 }
1357 }
1358
1359 template <size_t __i>
1360 constexpr void
1361 set(_SizeConstant<__i>, bool __x) noexcept
1362 {
1363 static_assert(__i < _Np);
1364 if constexpr (_Np == 1)
1365 _M_bits[0] = __x;
1366 else
1367 {
1368 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1369 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1370 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1371 _M_bits[__j] &= __mask;
1372 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1373 }
1374 }
1375
1376 // Inverts all bits. Sanitized input leads to sanitized output.
1377 constexpr _BitMask
1378 operator~() const noexcept
1379 {
1380 if constexpr (_Np == 1)
1381 return !_M_bits[0];
1382 else
1383 {
1384 _BitMask __result{};
1385 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1386 __result._M_bits[__i] = ~_M_bits[__i];
1387 if constexpr (_Sanitized)
1388 __result._M_bits[_S_array_size - 1]
1389 = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1390 else
1391 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1392 return __result;
1393 }
1394 }
1395
1396 constexpr _BitMask&
1397 operator^=(const _BitMask& __b) & noexcept
1398 {
1399 __execute_n_times<_S_array_size>(
1400 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1401 return *this;
1402 }
1403
1404 constexpr _BitMask&
1405 operator|=(const _BitMask& __b) & noexcept
1406 {
1407 __execute_n_times<_S_array_size>(
1408 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1409 return *this;
1410 }
1411
1412 constexpr _BitMask&
1413 operator&=(const _BitMask& __b) & noexcept
1414 {
1415 __execute_n_times<_S_array_size>(
1416 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1417 return *this;
1418 }
1419
1420 friend constexpr _BitMask
1421 operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1422 {
1423 _BitMask __r = __a;
1424 __r ^= __b;
1425 return __r;
1426 }
1427
1428 friend constexpr _BitMask
1429 operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1430 {
1431 _BitMask __r = __a;
1432 __r |= __b;
1433 return __r;
1434 }
1435
1436 friend constexpr _BitMask
1437 operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1438 {
1439 _BitMask __r = __a;
1440 __r &= __b;
1441 return __r;
1442 }
1443
1444 _GLIBCXX_SIMD_INTRINSIC
1445 constexpr bool
1446 _M_is_constprop() const
1447 {
1448 if constexpr (_S_array_size == 0)
1449 return __builtin_constant_p(_M_bits[0]);
1450 else
1451 {
1452 for (int __i = 0; __i < _S_array_size; ++__i)
1453 if (!__builtin_constant_p(_M_bits[__i]))
1454 return false;
1455 return true;
1456 }
1457 }
1458 };
1459
1460// }}}
1461
1462// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1463// __min_vector_size {{{
1464template <typename _Tp = void>
1465 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1466
1467#if _GLIBCXX_SIMD_HAVE_NEON
1468template <>
1469 inline constexpr int __min_vector_size<void> = 8;
1470#else
1471template <>
1472 inline constexpr int __min_vector_size<void> = 16;
1473#endif
1474
1475// }}}
1476// __vector_type {{{
1477template <typename _Tp, size_t _Np, typename = void>
1478 struct __vector_type_n {};
1479
1480// substition failure for 0-element case
1481template <typename _Tp>
1482 struct __vector_type_n<_Tp, 0, void> {};
1483
1484// special case 1-element to be _Tp itself
1485template <typename _Tp>
1486 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1487 { using type = _Tp; };
1488
1489// else, use GNU-style builtin vector types
1490template <typename _Tp, size_t _Np>
1491 struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1492 {
1493 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1494
1495 static constexpr size_t _S_Bytes =
1496#ifdef __i386__
1497 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1498 // those objects are passed via MMX registers and nothing ever calls EMMS.
1499 _S_Np2 == 8 ? 16 :
1500#endif
1501 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1502 : _S_Np2;
1503
1504 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1505 };
1506
1507template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1508 struct __vector_type;
1509
1510template <typename _Tp, size_t _Bytes>
1511 struct __vector_type<_Tp, _Bytes, 0>
1512 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1513
1514template <typename _Tp, size_t _Size>
1515 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1516
1517template <typename _Tp>
1518 using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1519template <typename _Tp>
1520 using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1521template <typename _Tp>
1522 using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1523template <typename _Tp>
1524 using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1525template <typename _Tp>
1526 using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1527template <typename _Tp>
1528 using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1529
1530// }}}
1531// __is_vector_type {{{
1532template <typename _Tp, typename = void_t<>>
1533 struct __is_vector_type : false_type {};
1534
1535template <typename _Tp>
1536 struct __is_vector_type<
1537 _Tp, void_t<typename __vector_type<
1538 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1539 : is_same<_Tp, typename __vector_type<
1540 remove_reference_t<decltype(declval<_Tp>()[0])>,
1541 sizeof(_Tp)>::type> {};
1542
1543template <typename _Tp>
1544 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1545
1546// }}}
1547// __is_intrinsic_type {{{
1548#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1549template <typename _Tp>
1550 using __is_intrinsic_type = __is_vector_type<_Tp>;
1551#else // not SSE (x86)
1552template <typename _Tp, typename = void_t<>>
1553 struct __is_intrinsic_type : false_type {};
1554
1555template <typename _Tp>
1556 struct __is_intrinsic_type<
1557 _Tp, void_t<typename __intrinsic_type<
1558 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1559 : is_same<_Tp, typename __intrinsic_type<
1560 remove_reference_t<decltype(declval<_Tp>()[0])>,
1561 sizeof(_Tp)>::type> {};
1562#endif
1563
1564template <typename _Tp>
1565 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1566
1567// }}}
1568// _VectorTraits{{{
1569template <typename _Tp, typename = void_t<>>
1570 struct _VectorTraitsImpl;
1571
1572template <typename _Tp>
1573 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1574 || __is_intrinsic_type_v<_Tp>>>
1575 {
1576 using type = _Tp;
1577 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1578 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1579 using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1580 template <typename _Up, int _W = _S_full_size>
1581 static constexpr bool _S_is
1582 = is_same_v<value_type, _Up> && _W == _S_full_size;
1583 };
1584
1585template <typename _Tp, size_t _Np>
1586 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1587 void_t<__vector_type_t<_Tp, _Np>>>
1588 {
1589 using type = __vector_type_t<_Tp, _Np>;
1590 using value_type = _Tp;
1591 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1592 using _Wrapper = _SimdWrapper<_Tp, _Np>;
1593 static constexpr bool _S_is_partial = (_Np == _S_full_size);
1594 static constexpr int _S_partial_width = _Np;
1595 template <typename _Up, int _W = _S_full_size>
1596 static constexpr bool _S_is
1597 = is_same_v<value_type, _Up>&& _W == _S_full_size;
1598 };
1599
1600template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1601 using _VectorTraits = _VectorTraitsImpl<_Tp>;
1602
1603// }}}
1604// __as_vector{{{
1605template <typename _V>
1606 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1607 __as_vector(_V __x)
1608 {
1609 if constexpr (__is_vector_type_v<_V>)
1610 return __x;
1611 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1612 {
1613 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
1614 {
1615 static_assert(is_simd<_V>::value);
1616 static_assert(_V::abi_type::template __traits<
1617 typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
1618 return __as_vector(__data(__x).first);
1619 }
1620 else if constexpr (_V::size() > 1)
1621 return __data(__x)._M_data;
1622 else
1623 {
1624 static_assert(is_simd<_V>::value);
1625 using _Tp = typename _V::value_type;
1626#ifdef __i386__
1627 constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
1628 using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
1629#else
1630 using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
1631#endif
1632 return _RV{__data(__x)};
1633 }
1634 }
1635 else if constexpr (__is_vectorizable_v<_V>)
1636 return __vector_type_t<_V, 2>{__x};
1637 else
1638 return __x._M_data;
1639 }
1640
1641// }}}
1642// __as_wrapper{{{
1643template <size_t _Np = 0, typename _V>
1644 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1645 __as_wrapper(_V __x)
1646 {
1647 if constexpr (__is_vector_type_v<_V>)
1648 return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1649 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1650 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1651 {
1652 static_assert(_V::size() == _Np);
1653 return __data(__x);
1654 }
1655 else
1656 {
1657 static_assert(_V::_S_size == _Np);
1658 return __x;
1659 }
1660 }
1661
1662// }}}
1663// __intrin_bitcast{{{
1664template <typename _To, typename _From>
1665 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1666 __intrin_bitcast(_From __v)
1667 {
1668 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1669 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1670 if constexpr (sizeof(_To) == sizeof(_From))
1671 return reinterpret_cast<_To>(__v);
1672 else if constexpr (sizeof(_From) > sizeof(_To))
1673 if constexpr (sizeof(_To) >= 16)
1674 return reinterpret_cast<const __may_alias<_To>&>(__v);
1675 else
1676 {
1677 _To __r;
1678 __builtin_memcpy(&__r, &__v, sizeof(_To));
1679 return __r;
1680 }
1681#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1682 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1683 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1684 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1685 else if constexpr (__have_avx512f && sizeof(_From) == 16
1686 && sizeof(_To) == 64)
1687 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1688 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1689 else if constexpr (__have_avx512f && sizeof(_From) == 32
1690 && sizeof(_To) == 64)
1691 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1692 reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1693#endif // _GLIBCXX_SIMD_X86INTRIN
1694 else if constexpr (sizeof(__v) <= 8)
1695 return reinterpret_cast<_To>(
1696 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1697 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1698 else
1699 {
1700 static_assert(sizeof(_To) > sizeof(_From));
1701 _To __r = {};
1702 __builtin_memcpy(&__r, &__v, sizeof(_From));
1703 return __r;
1704 }
1705 }
1706
1707// }}}
1708// __vector_bitcast{{{
1709template <typename _To, size_t _NN = 0, typename _From,
1710 typename _FromVT = _VectorTraits<_From>,
1711 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1712 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1713 __vector_bitcast(_From __x)
1714 {
1715 using _R = __vector_type_t<_To, _Np>;
1716 return __intrin_bitcast<_R>(__x);
1717 }
1718
1719template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1720 size_t _Np
1721 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1722 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1723 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1724 {
1725 static_assert(_Np > 1);
1726 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1727 }
1728
1729// }}}
1730// __convert_x86 declarations {{{
1731#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1732template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1733 _To __convert_x86(_Tp);
1734
1735template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1736 _To __convert_x86(_Tp, _Tp);
1737
1738template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1739 _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1740
1741template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1742 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1743
1744template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1745 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1746 _Tp, _Tp, _Tp, _Tp);
1747#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1748
1749//}}}
1750// __bit_cast {{{
1751template <typename _To, typename _From>
1752 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1753 __bit_cast(const _From __x)
1754 {
1755#if __has_builtin(__builtin_bit_cast)
1756 return __builtin_bit_cast(_To, __x);
1757#else
1758 static_assert(sizeof(_To) == sizeof(_From));
1759 constexpr bool __to_is_vectorizable
1760 = is_arithmetic_v<_To> || is_enum_v<_To>;
1761 constexpr bool __from_is_vectorizable
1762 = is_arithmetic_v<_From> || is_enum_v<_From>;
1763 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1764 return reinterpret_cast<_To>(__x);
1765 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1766 {
1767 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1768 return reinterpret_cast<_To>(_FV{__x});
1769 }
1770 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1771 {
1772 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1773 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1774 return reinterpret_cast<_TV>(_FV{__x})[0];
1775 }
1776 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1777 {
1778 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1779 return reinterpret_cast<_TV>(__x)[0];
1780 }
1781 else
1782 {
1783 _To __r;
1784 __builtin_memcpy(reinterpret_cast<char*>(&__r),
1785 reinterpret_cast<const char*>(&__x), sizeof(_To));
1786 return __r;
1787 }
1788#endif
1789 }
1790
1791// }}}
1792// __to_intrin {{{
1793template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1794 typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1795 _GLIBCXX_SIMD_INTRINSIC constexpr _R
1796 __to_intrin(_Tp __x)
1797 {
1798 static_assert(sizeof(__x) <= sizeof(_R),
1799 "__to_intrin may never drop values off the end");
1800 if constexpr (sizeof(__x) == sizeof(_R))
1801 return reinterpret_cast<_R>(__as_vector(__x));
1802 else
1803 {
1804 using _Up = __int_for_sizeof_t<_Tp>;
1805 return reinterpret_cast<_R>(
1806 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1807 }
1808 }
1809
1810// }}}
1811// __make_vector{{{
1812template <typename _Tp, typename... _Args>
1813 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1814 __make_vector(const _Args&... __args)
1815 { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1816
1817// }}}
1818// __vector_broadcast{{{
1819template <size_t _Np, typename _Tp, size_t... _I>
1820 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1821 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1822 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1823
1824template <size_t _Np, typename _Tp>
1825 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1826 __vector_broadcast(_Tp __x)
1827 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1828
1829// }}}
1830// __generate_vector{{{
1831 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1832 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1833 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1834 { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1835
1836template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1837 _GLIBCXX_SIMD_INTRINSIC constexpr _V
1838 __generate_vector(_Gp&& __gen)
1839 {
1840 if constexpr (__is_vector_type_v<_V>)
1841 return __generate_vector_impl<typename _VVT::value_type,
1842 _VVT::_S_full_size>(
1843 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1844 else
1845 return __generate_vector_impl<typename _VVT::value_type,
1846 _VVT::_S_partial_width>(
1847 static_cast<_Gp&&>(__gen),
1848 make_index_sequence<_VVT::_S_partial_width>());
1849 }
1850
1851template <typename _Tp, size_t _Np, typename _Gp>
1852 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1853 __generate_vector(_Gp&& __gen)
1854 {
1855 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1856 make_index_sequence<_Np>());
1857 }
1858
1859// }}}
1860// __xor{{{
1861template <typename _TW>
1862 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1863 __xor(_TW __a, _TW __b) noexcept
1864 {
1865 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1866 {
1867 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1868 _VectorTraitsImpl<_TW>>::value_type;
1869 if constexpr (is_floating_point_v<_Tp>)
1870 {
1871 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1873 ^ __vector_bitcast<_Ip>(__b));
1874 }
1875 else if constexpr (__is_vector_type_v<_TW>)
1876 return __a ^ __b;
1877 else
1878 return __a._M_data ^ __b._M_data;
1879 }
1880 else
1881 return __a ^ __b;
1882 }
1883
1884// }}}
1885// __or{{{
1886template <typename _TW>
1887 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1888 __or(_TW __a, _TW __b) noexcept
1889 {
1890 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1891 {
1892 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1893 _VectorTraitsImpl<_TW>>::value_type;
1894 if constexpr (is_floating_point_v<_Tp>)
1895 {
1896 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1897 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1898 | __vector_bitcast<_Ip>(__b));
1899 }
1900 else if constexpr (__is_vector_type_v<_TW>)
1901 return __a | __b;
1902 else
1903 return __a._M_data | __b._M_data;
1904 }
1905 else
1906 return __a | __b;
1907 }
1908
1909// }}}
1910// __and{{{
1911template <typename _TW>
1912 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1913 __and(_TW __a, _TW __b) noexcept
1914 {
1915 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1916 {
1917 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1918 _VectorTraitsImpl<_TW>>::value_type;
1919 if constexpr (is_floating_point_v<_Tp>)
1920 {
1921 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1922 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1923 & __vector_bitcast<_Ip>(__b));
1924 }
1925 else if constexpr (__is_vector_type_v<_TW>)
1926 return __a & __b;
1927 else
1928 return __a._M_data & __b._M_data;
1929 }
1930 else
1931 return __a & __b;
1932 }
1933
1934// }}}
1935// __andnot{{{
1936#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1937static constexpr struct
1938{
1939 _GLIBCXX_SIMD_INTRINSIC __v4sf
1940 operator()(__v4sf __a, __v4sf __b) const noexcept
1941 { return __builtin_ia32_andnps(__a, __b); }
1942
1943 _GLIBCXX_SIMD_INTRINSIC __v2df
1944 operator()(__v2df __a, __v2df __b) const noexcept
1945 { return __builtin_ia32_andnpd(__a, __b); }
1946
1947 _GLIBCXX_SIMD_INTRINSIC __v2di
1948 operator()(__v2di __a, __v2di __b) const noexcept
1949 { return __builtin_ia32_pandn128(__a, __b); }
1950
1951 _GLIBCXX_SIMD_INTRINSIC __v8sf
1952 operator()(__v8sf __a, __v8sf __b) const noexcept
1953 { return __builtin_ia32_andnps256(__a, __b); }
1954
1955 _GLIBCXX_SIMD_INTRINSIC __v4df
1956 operator()(__v4df __a, __v4df __b) const noexcept
1957 { return __builtin_ia32_andnpd256(__a, __b); }
1958
1959 _GLIBCXX_SIMD_INTRINSIC __v4di
1960 operator()(__v4di __a, __v4di __b) const noexcept
1961 {
1962 if constexpr (__have_avx2)
1963 return __builtin_ia32_andnotsi256(__a, __b);
1964 else
1965 return reinterpret_cast<__v4di>(
1966 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1967 reinterpret_cast<__v4df>(__b)));
1968 }
1969
1970 _GLIBCXX_SIMD_INTRINSIC __v16sf
1971 operator()(__v16sf __a, __v16sf __b) const noexcept
1972 {
1973 if constexpr (__have_avx512dq)
1974 return _mm512_andnot_ps(__a, __b);
1975 else
1976 return reinterpret_cast<__v16sf>(
1977 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1978 reinterpret_cast<__v8di>(__b)));
1979 }
1980
1981 _GLIBCXX_SIMD_INTRINSIC __v8df
1982 operator()(__v8df __a, __v8df __b) const noexcept
1983 {
1984 if constexpr (__have_avx512dq)
1985 return _mm512_andnot_pd(__a, __b);
1986 else
1987 return reinterpret_cast<__v8df>(
1988 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1989 reinterpret_cast<__v8di>(__b)));
1990 }
1991
1992 _GLIBCXX_SIMD_INTRINSIC __v8di
1993 operator()(__v8di __a, __v8di __b) const noexcept
1994 { return _mm512_andnot_si512(__a, __b); }
1995} _S_x86_andnot;
1996#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1997
1998template <typename _TW>
1999 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2000 __andnot(_TW __a, _TW __b) noexcept
2001 {
2002 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2003 {
2004 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2005 _VectorTraitsImpl<_TW>>;
2006 using _Tp = typename _TVT::value_type;
2007#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2008 if constexpr (sizeof(_TW) >= 16)
2009 {
2010 const auto __ai = __to_intrin(__a);
2011 const auto __bi = __to_intrin(__b);
2012 if (!__builtin_is_constant_evaluated()
2013 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2014 {
2015 const auto __r = _S_x86_andnot(__ai, __bi);
2016 if constexpr (is_convertible_v<decltype(__r), _TW>)
2017 return __r;
2018 else
2019 return reinterpret_cast<typename _TVT::type>(__r);
2020 }
2021 }
2022#endif // _GLIBCXX_SIMD_X86INTRIN
2023 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2024 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2025 & __vector_bitcast<_Ip>(__b));
2026 }
2027 else
2028 return ~__a & __b;
2029 }
2030
2031// }}}
2032// __not{{{
2033template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2034 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2035 __not(_Tp __a) noexcept
2036 {
2037 if constexpr (is_floating_point_v<typename _TVT::value_type>)
2038 return reinterpret_cast<typename _TVT::type>(
2039 ~__vector_bitcast<unsigned>(__a));
2040 else
2041 return ~__a;
2042 }
2043
2044// }}}
2045// __vec_shuffle{{{
2046template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
2047 _GLIBCXX_SIMD_INTRINSIC constexpr
2048 __vector_type_t<remove_reference_t<decltype(declval<_T0>()[0])>, sizeof...(_Is)>
2049 __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
2050 {
2051 constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
2052 constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
2053 using _Tp = remove_reference_t<decltype(declval<_T0>()[0])>;
2054 using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
2055#if __has_builtin(__builtin_shufflevector)
2056#ifdef __clang__
2057 // Clang requires _T0 == _T1
2058 if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
2059 return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
2060 else if constexpr (sizeof(__x) > sizeof(__y))
2061 return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
2062 else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
2063 return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
2064 __i = __idx_perm(__i);
2065 return __i < _N0 ? __i : __i - _N0 + _N1;
2066 });
2067 else if constexpr (sizeof(__x) < sizeof(__y))
2068 return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
2069 __i = __idx_perm(__i);
2070 return __i < _N0 ? __i : __i - _N0 + _N1;
2071 });
2072 else
2073#endif
2074 {
2075 const auto __r = __builtin_shufflevector(__x, __y, [=] {
2076 constexpr int __j = __idx_perm(_Is);
2077 static_assert(__j < _N0 + _N1);
2078 return __j;
2079 }()...);
2080#ifdef __i386__
2081 if constexpr (sizeof(__r) == sizeof(_RV))
2082 return __r;
2083 else
2084 return _RV {__r[_Is]...};
2085#else
2086 return __r;
2087#endif
2088 }
2089#else
2090 return _RV {
2091 [=]() -> _Tp {
2092 constexpr int __j = __idx_perm(_Is);
2093 static_assert(__j < _N0 + _N1);
2094 if constexpr (__j < 0)
2095 return 0;
2096 else if constexpr (__j < _N0)
2097 return __x[__j];
2098 else
2099 return __y[__j - _N0];
2100 }()...
2101 };
2102#endif
2103 }
2104
2105template <typename _T0, typename _Fun, typename _Seq>
2106 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2107 __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
2108 { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
2109
2110// }}}
2111// __concat{{{
2112template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2113 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2114 constexpr _R
2115 __concat(_Tp a_, _Tp b_)
2116 {
2117#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2118 using _W
2119 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2120 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2121 long long, typename _TVT::value_type>>;
2122 constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2123 const auto __a = __vector_bitcast<_W>(a_);
2124 const auto __b = __vector_bitcast<_W>(b_);
2125 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2126#else
2127 constexpr int input_width = _TVT::_S_full_size;
2128 const _Tp& __a = a_;
2129 const _Tp& __b = b_;
2130 using _Up = _R;
2131#endif
2132 if constexpr (input_width == 2)
2133 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2134 else if constexpr (input_width == 4)
2135 return reinterpret_cast<_R>(
2136 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2137 else if constexpr (input_width == 8)
2138 return reinterpret_cast<_R>(
2139 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2140 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2141 else if constexpr (input_width == 16)
2142 return reinterpret_cast<_R>(
2143 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2144 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2145 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2146 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2147 __b[12], __b[13], __b[14], __b[15]});
2148 else if constexpr (input_width == 32)
2149 return reinterpret_cast<_R>(
2150 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2151 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2152 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2153 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2154 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2155 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2156 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2157 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2158 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2159 __b[31]});
2160 }
2161
2162// }}}
2163// __zero_extend {{{
2164template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2165 struct _ZeroExtendProxy
2166 {
2167 using value_type = typename _TVT::value_type;
2168 static constexpr size_t _Np = _TVT::_S_full_size;
2169 const _Tp __x;
2170
2171 template <typename _To, typename _ToVT = _VectorTraits<_To>,
2172 typename
2173 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2174 _GLIBCXX_SIMD_INTRINSIC operator _To() const
2175 {
2176 constexpr size_t _ToN = _ToVT::_S_full_size;
2177 if constexpr (_ToN == _Np)
2178 return __x;
2179 else if constexpr (_ToN == 2 * _Np)
2180 {
2181#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2182 if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2183 return __vector_bitcast<value_type>(
2184 _mm256_insertf128_ps(__m256(), __x, 0));
2185 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2186 return __vector_bitcast<value_type>(
2187 _mm256_insertf128_pd(__m256d(), __x, 0));
2188 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2189 return __vector_bitcast<value_type>(
2190 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2191 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2192 {
2193 if constexpr (__have_avx512dq)
2194 return __vector_bitcast<value_type>(
2195 _mm512_insertf32x8(__m512(), __x, 0));
2196 else
2197 return reinterpret_cast<__m512>(
2198 _mm512_insertf64x4(__m512d(),
2199 reinterpret_cast<__m256d>(__x), 0));
2200 }
2201 else if constexpr (__have_avx512f
2202 && _TVT::template _S_is<double, 4>)
2203 return __vector_bitcast<value_type>(
2204 _mm512_insertf64x4(__m512d(), __x, 0));
2205 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2206 return __vector_bitcast<value_type>(
2207 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2208#endif
2209 return __concat(__x, _Tp());
2210 }
2211 else if constexpr (_ToN == 4 * _Np)
2212 {
2213#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2214 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2215 {
2216 return __vector_bitcast<value_type>(
2217 _mm512_insertf64x2(__m512d(), __x, 0));
2218 }
2219 else if constexpr (__have_avx512f
2220 && is_floating_point_v<value_type>)
2221 {
2222 return __vector_bitcast<value_type>(
2223 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2224 0));
2225 }
2226 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2227 {
2228 return __vector_bitcast<value_type>(
2229 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2230 }
2231#endif
2232 return __concat(__concat(__x, _Tp()),
2233 __vector_type_t<value_type, _Np * 2>());
2234 }
2235 else if constexpr (_ToN == 8 * _Np)
2236 return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2237 __vector_type_t<value_type, _Np * 4>());
2238 else if constexpr (_ToN == 16 * _Np)
2239 return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2240 __vector_type_t<value_type, _Np * 8>());
2241 else
2242 __assert_unreachable<_Tp>();
2243 }
2244 };
2245
2246template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2247 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2248 __zero_extend(_Tp __x)
2249 { return {__x}; }
2250
2251// }}}
2252// __extract<_Np, By>{{{
2253template <int _Offset,
2254 int _SplitBy,
2255 typename _Tp,
2256 typename _TVT = _VectorTraits<_Tp>,
2257 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2258 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2259 __extract(_Tp __in)
2260 {
2261 using value_type = typename _TVT::value_type;
2262#if _GLIBCXX_SIMD_X86INTRIN // {{{
2263 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2264 {
2265 if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2266 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2267 else if constexpr (is_floating_point_v<value_type>)
2268 return __vector_bitcast<value_type>(
2269 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2270 else
2271 return reinterpret_cast<_R>(
2272 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2273 _Offset));
2274 }
2275 else
2276#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2277 {
2278#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2279 using _W = conditional_t<
2280 is_floating_point_v<value_type>, double,
2281 conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2282 static_assert(sizeof(_R) % sizeof(_W) == 0);
2283 constexpr int __return_width = sizeof(_R) / sizeof(_W);
2284 using _Up = __vector_type_t<_W, __return_width>;
2285 const auto __x = __vector_bitcast<_W>(__in);
2286#else
2287 constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2288 using _Up = _R;
2289 const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2290 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2291#endif
2292 constexpr int _O = _Offset * __return_width;
2293 return __call_with_subscripts<__return_width, _O>(
2294 __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2295 return reinterpret_cast<_R>(_Up{__entries...});
2296 });
2297 }
2298 }
2299
2300// }}}
2301// __lo/__hi64[z]{{{
2302template <typename _Tp,
2303 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2304 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2305 __lo64(_Tp __x)
2306 {
2307 _R __r{};
2308 __builtin_memcpy(&__r, &__x, 8);
2309 return __r;
2310 }
2311
2312template <typename _Tp,
2313 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2314 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2315 __hi64(_Tp __x)
2316 {
2317 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2318 _R __r{};
2319 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2320 return __r;
2321 }
2322
2323template <typename _Tp,
2324 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2325 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2326 __hi64z([[maybe_unused]] _Tp __x)
2327 {
2328 _R __r{};
2329 if constexpr (sizeof(_Tp) == 16)
2330 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2331 return __r;
2332 }
2333
2334// }}}
2335// __lo/__hi128{{{
2336template <typename _Tp>
2337 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2338 __lo128(_Tp __x)
2339 { return __extract<0, sizeof(_Tp) / 16>(__x); }
2340
2341template <typename _Tp>
2342 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2343 __hi128(_Tp __x)
2344 {
2345 static_assert(sizeof(__x) == 32);
2346 return __extract<1, 2>(__x);
2347 }
2348
2349// }}}
2350// __lo/__hi256{{{
2351template <typename _Tp>
2352 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2353 __lo256(_Tp __x)
2354 {
2355 static_assert(sizeof(__x) == 64);
2356 return __extract<0, 2>(__x);
2357 }
2358
2359template <typename _Tp>
2360 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2361 __hi256(_Tp __x)
2362 {
2363 static_assert(sizeof(__x) == 64);
2364 return __extract<1, 2>(__x);
2365 }
2366
2367// }}}
2368// __auto_bitcast{{{
2369template <typename _Tp>
2370 struct _AutoCast
2371 {
2372 static_assert(__is_vector_type_v<_Tp>);
2373
2374 const _Tp __x;
2375
2376 template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2377 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2378 { return __intrin_bitcast<typename _UVT::type>(__x); }
2379 };
2380
2381template <typename _Tp>
2382 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2383 __auto_bitcast(const _Tp& __x)
2384 { return {__x}; }
2385
2386template <typename _Tp, size_t _Np>
2387 _GLIBCXX_SIMD_INTRINSIC constexpr
2388 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2389 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2390 { return {__x._M_data}; }
2391
2392// }}}
2393// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2394
2395#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2396// __bool_storage_member_type{{{
2397#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2398template <size_t _Size>
2399 struct __bool_storage_member_type
2400 {
2401 static_assert((_Size & (_Size - 1)) != 0,
2402 "This trait may only be used for non-power-of-2 sizes. "
2403 "Power-of-2 sizes must be specialized.");
2404 using type =
2405 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2406 };
2407
2408template <>
2409 struct __bool_storage_member_type<1> { using type = bool; };
2410
2411template <>
2412 struct __bool_storage_member_type<2> { using type = __mmask8; };
2413
2414template <>
2415 struct __bool_storage_member_type<4> { using type = __mmask8; };
2416
2417template <>
2418 struct __bool_storage_member_type<8> { using type = __mmask8; };
2419
2420template <>
2421 struct __bool_storage_member_type<16> { using type = __mmask16; };
2422
2423template <>
2424 struct __bool_storage_member_type<32> { using type = __mmask32; };
2425
2426template <>
2427 struct __bool_storage_member_type<64> { using type = __mmask64; };
2428#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2429
2430// }}}
2431// __intrinsic_type (x86){{{
2432// the following excludes bool via __is_vectorizable
2433#if _GLIBCXX_SIMD_HAVE_SSE
2434template <typename _Tp, size_t _Bytes>
2435 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2436 {
2437 static_assert(!is_same_v<_Tp, long double>,
2438 "no __intrinsic_type support for long double on x86");
2439
2440 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2441
2442 using type [[__gnu__::__vector_size__(_S_VBytes)]]
2443 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2444 };
2445#endif // _GLIBCXX_SIMD_HAVE_SSE
2446
2447// }}}
2448#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2449// __intrinsic_type (ARM){{{
2450#if _GLIBCXX_SIMD_HAVE_NEON
2451template <>
2452 struct __intrinsic_type<float, 8, void>
2453 { using type = float32x2_t; };
2454
2455template <>
2456 struct __intrinsic_type<float, 16, void>
2457 { using type = float32x4_t; };
2458
2459template <>
2460 struct __intrinsic_type<double, 8, void>
2461 {
2462#if _GLIBCXX_SIMD_HAVE_NEON_A64
2463 using type = float64x1_t;
2464#endif
2465 };
2466
2467template <>
2468 struct __intrinsic_type<double, 16, void>
2469 {
2470#if _GLIBCXX_SIMD_HAVE_NEON_A64
2471 using type = float64x2_t;
2472#endif
2473 };
2474
2475#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2476template <> \
2477 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2478 _Np * _Bits / 8, void> \
2479 { using type = int##_Bits##x##_Np##_t; }; \
2480template <> \
2481 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2482 _Np * _Bits / 8, void> \
2483 { using type = uint##_Bits##x##_Np##_t; }
2484_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2485_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2486_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2487_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2488_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2489_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2490_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2491_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2492#undef _GLIBCXX_SIMD_ARM_INTRIN
2493
2494template <typename _Tp, size_t _Bytes>
2495 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2496 {
2497 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2498
2499 using _Ip = __int_for_sizeof_t<_Tp>;
2500
2501 using _Up = conditional_t<
2502 is_floating_point_v<_Tp>, _Tp,
2503 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2504
2505 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2506 "should use explicit specialization above");
2507
2508 using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2509 };
2510#endif // _GLIBCXX_SIMD_HAVE_NEON
2511
2512// }}}
2513// __intrinsic_type (PPC){{{
2514#ifdef __ALTIVEC__
2515template <typename _Tp>
2516 struct __intrinsic_type_impl;
2517
2518#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2519 template <> \
2520 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2521_GLIBCXX_SIMD_PPC_INTRIN(float);
2522#ifdef __VSX__
2523_GLIBCXX_SIMD_PPC_INTRIN(double);
2524#endif
2525_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2526_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2527_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2528_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2529_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2530_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2531#if defined __VSX__ || __SIZEOF_LONG__ == 4
2532_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2533_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2534#endif
2535#ifdef __VSX__
2536_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2537_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2538#endif
2539#undef _GLIBCXX_SIMD_PPC_INTRIN
2540
2541template <typename _Tp, size_t _Bytes>
2542 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2543 {
2544 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2545
2546 // allow _Tp == long double with -mlong-double-64
2547 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2548 "no __intrinsic_type support for 128-bit floating point on PowerPC");
2549
2550#ifndef __VSX__
2551 static_assert(!(is_same_v<_Tp, double>
2552 || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2553 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2554#endif
2555
2556 static constexpr auto __element_type()
2557 {
2558 if constexpr (is_floating_point_v<_Tp>)
2559 {
2560 if constexpr (_S_is_ldouble)
2561 return double {};
2562 else
2563 return _Tp {};
2564 }
2565 else if constexpr (is_signed_v<_Tp>)
2566 {
2567 if constexpr (sizeof(_Tp) == sizeof(_SChar))
2568 return _SChar {};
2569 else if constexpr (sizeof(_Tp) == sizeof(short))
2570 return short {};
2571 else if constexpr (sizeof(_Tp) == sizeof(int))
2572 return int {};
2573 else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2574 return _LLong {};
2575 }
2576 else
2577 {
2578 if constexpr (sizeof(_Tp) == sizeof(_UChar))
2579 return _UChar {};
2580 else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2581 return _UShort {};
2582 else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2583 return _UInt {};
2584 else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2585 return _ULLong {};
2586 }
2587 }
2588
2589 using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2590 };
2591#endif // __ALTIVEC__
2592
2593// }}}
2594// _SimdWrapper<bool>{{{1
2595template <size_t _Width>
2596 struct _SimdWrapper<bool, _Width,
2597 void_t<typename __bool_storage_member_type<_Width>::type>>
2598 {
2599 using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2600 using value_type = bool;
2601
2602 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2603
2604 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2605 __as_full_vector() const
2606 { return _M_data; }
2607
2608 _GLIBCXX_SIMD_INTRINSIC constexpr
2609 _SimdWrapper() = default;
2610
2611 _GLIBCXX_SIMD_INTRINSIC constexpr
2612 _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2613
2614 _GLIBCXX_SIMD_INTRINSIC
2615 operator const _BuiltinType&() const
2616 { return _M_data; }
2617
2618 _GLIBCXX_SIMD_INTRINSIC
2619 operator _BuiltinType&()
2620 { return _M_data; }
2621
2622 _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2623 __intrin() const
2624 { return _M_data; }
2625
2626 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2627 operator[](size_t __i) const
2628 { return _M_data & (_BuiltinType(1) << __i); }
2629
2630 template <size_t __i>
2631 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2632 operator[](_SizeConstant<__i>) const
2633 { return _M_data & (_BuiltinType(1) << __i); }
2634
2635 _GLIBCXX_SIMD_INTRINSIC constexpr void
2636 _M_set(size_t __i, value_type __x)
2637 {
2638 if (__x)
2639 _M_data |= (_BuiltinType(1) << __i);
2640 else
2641 _M_data &= ~(_BuiltinType(1) << __i);
2642 }
2643
2644 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2645 _M_is_constprop() const
2646 { return __builtin_constant_p(_M_data); }
2647
2648 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2649 _M_is_constprop_none_of() const
2650 {
2651 if (__builtin_constant_p(_M_data))
2652 {
2653 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2654 constexpr _BuiltinType __active_mask
2655 = ~_BuiltinType() >> (__nbits - _Width);
2656 return (_M_data & __active_mask) == 0;
2657 }
2658 return false;
2659 }
2660
2661 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2662 _M_is_constprop_all_of() const
2663 {
2664 if (__builtin_constant_p(_M_data))
2665 {
2666 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2667 constexpr _BuiltinType __active_mask
2668 = ~_BuiltinType() >> (__nbits - _Width);
2669 return (_M_data & __active_mask) == __active_mask;
2670 }
2671 return false;
2672 }
2673
2674 _BuiltinType _M_data;
2675 };
2676
2677// _SimdWrapperBase{{{1
2678template <bool _MustZeroInitPadding, typename _BuiltinType>
2679 struct _SimdWrapperBase;
2680
2681template <typename _BuiltinType>
2682 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2683 {
2684 _GLIBCXX_SIMD_INTRINSIC constexpr
2685 _SimdWrapperBase() = default;
2686
2687 _GLIBCXX_SIMD_INTRINSIC constexpr
2688 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2689
2690 _BuiltinType _M_data;
2691 };
2692
2693template <typename _BuiltinType>
2694 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2695 // never become SNaN
2696 {
2697 _GLIBCXX_SIMD_INTRINSIC constexpr
2698 _SimdWrapperBase() : _M_data() {}
2699
2700 _GLIBCXX_SIMD_INTRINSIC constexpr
2701 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2702
2703 _BuiltinType _M_data;
2704 };
2705
2706// }}}
2707// _SimdWrapper{{{
2708struct _DisabledSimdWrapper;
2709
2710template <typename _Tp, size_t _Width>
2711 struct _SimdWrapper<
2712 _Tp, _Width,
2713 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2714 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2715 && sizeof(_Tp) * _Width
2716 == sizeof(__vector_type_t<_Tp, _Width>),
2717 __vector_type_t<_Tp, _Width>>
2718 {
2719 static constexpr bool _S_need_default_init
2720 = __has_iec559_behavior<__signaling_NaN, _Tp>::value
2721 and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
2722
2723 using _BuiltinType = __vector_type_t<_Tp, _Width>;
2724
2725 using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
2726
2727 static_assert(__is_vectorizable_v<_Tp>);
2728 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2729
2730 using value_type = _Tp;
2731
2732 static inline constexpr size_t _S_full_size
2733 = sizeof(_BuiltinType) / sizeof(value_type);
2734 static inline constexpr int _S_size = _Width;
2735 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2736
2737 using _Base::_M_data;
2738
2739 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2740 __as_full_vector() const
2741 { return _M_data; }
2742
2743 _GLIBCXX_SIMD_INTRINSIC constexpr
2744 _SimdWrapper(initializer_list<_Tp> __init)
2745 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2746 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2747 return __init.begin()[__i.value];
2748 })) {}
2749
2750 _GLIBCXX_SIMD_INTRINSIC constexpr
2751 _SimdWrapper() = default;
2752
2753 _GLIBCXX_SIMD_INTRINSIC constexpr
2754 _SimdWrapper(const _SimdWrapper&) = default;
2755
2756 _GLIBCXX_SIMD_INTRINSIC constexpr
2757 _SimdWrapper(_SimdWrapper&&) = default;
2758
2759 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2760 operator=(const _SimdWrapper&) = default;
2761
2762 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2763 operator=(_SimdWrapper&&) = default;
2764
2765 // Convert from exactly matching __vector_type_t
2766 using _SimdWrapperBase<_S_need_default_init, _BuiltinType>::_SimdWrapperBase;
2767
2768 // Convert from __intrinsic_type_t if __intrinsic_type_t and __vector_type_t differ, otherwise
2769 // this ctor should not exist. Making the argument type unusable is our next best solution.
2770 _GLIBCXX_SIMD_INTRINSIC constexpr
2771 _SimdWrapper(conditional_t<is_same_v<_BuiltinType, __intrinsic_type_t<_Tp, _Width>>,
2772 _DisabledSimdWrapper, __intrinsic_type_t<_Tp, _Width>> __x)
2773 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2774
2775 // Convert from different __vector_type_t, but only if bit reinterpretation is a correct
2776 // conversion of the value_type
2777 template <typename _V, typename _TVT = _VectorTraits<_V>,
2778 typename = enable_if_t<sizeof(typename _TVT::value_type) == sizeof(_Tp)
2779 and sizeof(_V) == sizeof(_BuiltinType)
2780 and is_integral_v<_Tp>
2781 and is_integral_v<typename _TVT::value_type>>>
2782 _GLIBCXX_SIMD_INTRINSIC constexpr
2783 _SimdWrapper(_V __x)
2784 : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
2785
2786 template <typename... _As,
2787 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2788 && sizeof...(_As) <= _Width)>>
2789 _GLIBCXX_SIMD_INTRINSIC constexpr
2790 operator _SimdTuple<_Tp, _As...>() const
2791 {
2792 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2793 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2794 { return _M_data[int(__i)]; });
2795 }
2796
2797 _GLIBCXX_SIMD_INTRINSIC constexpr
2798 operator const _BuiltinType&() const
2799 { return _M_data; }
2800
2801 _GLIBCXX_SIMD_INTRINSIC constexpr
2802 operator _BuiltinType&()
2803 { return _M_data; }
2804
2805 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2806 operator[](size_t __i) const
2807 { return _M_data[__i]; }
2808
2809 template <size_t __i>
2810 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2811 operator[](_SizeConstant<__i>) const
2812 { return _M_data[__i]; }
2813
2814 _GLIBCXX_SIMD_INTRINSIC constexpr void
2815 _M_set(size_t __i, _Tp __x)
2816 {
2817 if (__builtin_is_constant_evaluated())
2818 _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2819 return __j == __i ? __x : _M_data[__j()];
2820 });
2821 else
2822 _M_data[__i] = __x;
2823 }
2824
2825 _GLIBCXX_SIMD_INTRINSIC
2826 constexpr bool
2827 _M_is_constprop() const
2828 { return __builtin_constant_p(_M_data); }
2829
2830 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2831 _M_is_constprop_none_of() const
2832 {
2833 if (__builtin_constant_p(_M_data))
2834 {
2835 bool __r = true;
2836 if constexpr (is_floating_point_v<_Tp>)
2837 {
2838 using _Ip = __int_for_sizeof_t<_Tp>;
2839 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2840 __execute_n_times<_Width>(
2841 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2842 }
2843 else
2844 __execute_n_times<_Width>(
2845 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2846 if (__builtin_constant_p(__r))
2847 return __r;
2848 }
2849 return false;
2850 }
2851
2852 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2853 _M_is_constprop_all_of() const
2854 {
2855 if (__builtin_constant_p(_M_data))
2856 {
2857 bool __r = true;
2858 if constexpr (is_floating_point_v<_Tp>)
2859 {
2860 using _Ip = __int_for_sizeof_t<_Tp>;
2861 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2862 __execute_n_times<_Width>(
2863 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2864 }
2865 else
2866 __execute_n_times<_Width>(
2867 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2868 if (__builtin_constant_p(__r))
2869 return __r;
2870 }
2871 return false;
2872 }
2873 };
2874
2875// }}}
2876
2877// __vectorized_sizeof {{{
2878template <typename _Tp>
2879 constexpr size_t
2880 __vectorized_sizeof()
2881 {
2882 if constexpr (!__is_vectorizable_v<_Tp>)
2883 return 0;
2884
2885 if constexpr (sizeof(_Tp) <= 8)
2886 {
2887 // X86:
2888 if constexpr (__have_avx512bw)
2889 return 64;
2890 if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2891 return 64;
2892 if constexpr (__have_avx2)
2893 return 32;
2894 if constexpr (__have_avx && is_floating_point_v<_Tp>)
2895 return 32;
2896 if constexpr (__have_sse2)
2897 return 16;
2898 if constexpr (__have_sse && is_same_v<_Tp, float>)
2899 return 16;
2900 /* The following is too much trouble because of mixed MMX and x87 code.
2901 * While nothing here explicitly calls MMX instructions of registers,
2902 * they are still emitted but no EMMS cleanup is done.
2903 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2904 return 8;
2905 */
2906
2907 // PowerPC:
2908 if constexpr (__have_power8vec
2909 || (__have_power_vmx && (sizeof(_Tp) < 8))
2910 || (__have_power_vsx && is_floating_point_v<_Tp>) )
2911 return 16;
2912
2913 // ARM:
2914 if constexpr (__have_neon_a64)
2915 return 16;
2916 if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2917 or is_same_v<_Tp, float>))
2918 return 16;
2919 if constexpr (__have_neon
2920 && sizeof(_Tp) < 8
2921 // Only allow fp if the user allows non-ICE559 fp (e.g.
2922 // via -ffast-math). ARMv7 NEON fp is not conforming to
2923 // IEC559.
2924 && (__support_neon_float || !is_floating_point_v<_Tp>))
2925 return 16;
2926 }
2927
2928 return sizeof(_Tp);
2929 }
2930
2931// }}}
2932namespace simd_abi {
2933// most of simd_abi is defined in simd_detail.h
2934template <typename _Tp>
2935 inline constexpr int max_fixed_size
2936 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2937
2938// compatible {{{
2939#if defined __x86_64__ || defined __aarch64__
2940template <typename _Tp>
2941 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2942#elif defined __ARM_NEON
2943// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2944// ABI?)
2945template <typename _Tp>
2946 using compatible
2947 = conditional_t<(sizeof(_Tp) < 8
2948 && (__support_neon_float || !is_floating_point_v<_Tp>)),
2949 _VecBuiltin<16>, scalar>;
2950#else
2951template <typename>
2952 using compatible = scalar;
2953#endif
2954
2955// }}}
2956// native {{{
2957template <typename _Tp>
2958 constexpr auto
2959 __determine_native_abi()
2960 {
2961 constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2962 if constexpr (__bytes == sizeof(_Tp))
2963 return static_cast<scalar*>(nullptr);
2964 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2965 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2966 else
2967 return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2968 }
2969
2970template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2971 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2972
2973// }}}
2974// __default_abi {{{
2975#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2976template <typename _Tp>
2977 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2978#else
2979template <typename _Tp>
2980 using __default_abi = compatible<_Tp>;
2981#endif
2982
2983// }}}
2984} // namespace simd_abi
2985
2986// traits {{{1
2987template <typename _Tp>
2988 struct is_simd_flag_type
2989 : false_type
2990 {};
2991
2992template <>
2993 struct is_simd_flag_type<element_aligned_tag>
2994 : true_type
2995 {};
2996
2997template <>
2998 struct is_simd_flag_type<vector_aligned_tag>
2999 : true_type
3000 {};
3001
3002template <size_t _Np>
3003 struct is_simd_flag_type<overaligned_tag<_Np>>
3004 : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
3005 {};
3006
3007template <typename _Tp>
3008 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
3009
3010template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
3011 using _IsSimdFlagType = _Tp;
3012
3013// is_abi_tag {{{2
3014template <typename _Tp, typename = void_t<>>
3015 struct is_abi_tag : false_type {};
3016
3017template <typename _Tp>
3018 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
3019 : public _Tp::_IsValidAbiTag {};
3020
3021template <typename _Tp>
3022 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
3023
3024// is_simd(_mask) {{{2
3025template <typename _Tp>
3026 struct is_simd : public false_type {};
3027
3028template <typename _Tp>
3029 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
3030
3031template <typename _Tp>
3032 struct is_simd_mask : public false_type {};
3033
3034template <typename _Tp>
3035inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
3036
3037// simd_size {{{2
3038template <typename _Tp, typename _Abi, typename = void>
3039 struct __simd_size_impl {};
3040
3041template <typename _Tp, typename _Abi>
3042 struct __simd_size_impl<
3043 _Tp, _Abi,
3044 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
3045 : _SizeConstant<_Abi::template _S_size<_Tp>> {};
3046
3047template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3048 struct simd_size : __simd_size_impl<_Tp, _Abi> {};
3049
3050template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3051 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
3052
3053// simd_abi::deduce {{{2
3054template <typename _Tp, size_t _Np, typename = void>
3055 struct __deduce_impl;
3056
3057namespace simd_abi {
3058/**
3059 * @tparam _Tp The requested `value_type` for the elements.
3060 * @tparam _Np The requested number of elements.
3061 * @tparam _Abis This parameter is ignored, since this implementation cannot
3062 * make any use of it. Either __a good native ABI is matched and used as `type`
3063 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3064 * the best matching native ABIs.
3065 */
3066template <typename _Tp, size_t _Np, typename...>
3067 struct deduce : __deduce_impl<_Tp, _Np> {};
3068
3069template <typename _Tp, size_t _Np, typename... _Abis>
3070 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3071} // namespace simd_abi
3072
3073// }}}2
3074// rebind_simd {{{2
3075template <typename _Tp, typename _V, typename = void>
3076 struct rebind_simd;
3077
3078template <typename _Tp, typename _Up, typename _Abi>
3079 struct rebind_simd<_Tp, simd<_Up, _Abi>,
3080 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3081 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3082
3083template <typename _Tp, typename _Up, typename _Abi>
3084 struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3085 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3086 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3087
3088template <typename _Tp, typename _V>
3089 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3090
3091// resize_simd {{{2
3092template <int _Np, typename _V, typename = void>
3093 struct resize_simd;
3094
3095template <int _Np, typename _Tp, typename _Abi>
3096 struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3097 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3098
3099template <int _Np, typename _Tp, typename _Abi>
3100 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3101 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3102
3103template <int _Np, typename _V>
3104 using resize_simd_t = typename resize_simd<_Np, _V>::type;
3105
3106// }}}2
3107// memory_alignment {{{2
3108template <typename _Tp, typename _Up = typename _Tp::value_type>
3109 struct memory_alignment
3110 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3111
3112template <typename _Tp, typename _Up = typename _Tp::value_type>
3113 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3114
3115// class template simd [simd] {{{1
3116template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3117 class simd;
3118
3119template <typename _Tp, typename _Abi>
3120 struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3121
3122template <typename _Tp>
3123 using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3124
3125template <typename _Tp, int _Np>
3126 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3127
3128template <typename _Tp, size_t _Np>
3129 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3130
3131// class template simd_mask [simd_mask] {{{1
3132template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3133 class simd_mask;
3134
3135template <typename _Tp, typename _Abi>
3136 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3137
3138template <typename _Tp>
3139 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3140
3141template <typename _Tp, int _Np>
3142 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3143
3144template <typename _Tp, size_t _Np>
3145 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3146
3147// casts [simd.casts] {{{1
3148// static_simd_cast {{{2
3149template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3150 struct __static_simd_cast_return_type;
3151
3152template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3153 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3154 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3155
3156template <typename _Tp, typename _Up, typename _Ap>
3157 struct __static_simd_cast_return_type<
3158 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3159 { using type = _Tp; };
3160
3161template <typename _Tp, typename _Ap>
3162 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3163#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3164 enable_if_t<__is_vectorizable_v<_Tp>>
3165#else
3166 void
3167#endif
3168 >
3169 { using type = simd<_Tp, _Ap>; };
3170
3171template <typename _Tp, typename = void>
3172 struct __safe_make_signed { using type = _Tp;};
3173
3174template <typename _Tp>
3175 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3176 {
3177 // the extra make_unsigned_t is because of PR85951
3178 using type = make_signed_t<make_unsigned_t<_Tp>>;
3179 };
3180
3181template <typename _Tp>
3182 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3183
3184template <typename _Tp, typename _Up, typename _Ap>
3185 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3186#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3187 enable_if_t<__is_vectorizable_v<_Tp>>
3188#else
3189 void
3190#endif
3191 >
3192 {
3193 using type = conditional_t<
3194 (is_integral_v<_Up> && is_integral_v<_Tp> &&
3195#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3196 is_signed_v<_Up> != is_signed_v<_Tp> &&
3197#endif
3198 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3199 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3200 };
3201
3202template <typename _Tp, typename _Up, typename _Ap,
3203 typename _R
3204 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3205 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3206 static_simd_cast(const simd<_Up, _Ap>& __x)
3207 {
3208 if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3209 return __x;
3210 else
3211 {
3212 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3213 __c;
3214 return _R(__private_init, __c(__data(__x)));
3215 }
3216 }
3217
3218namespace __proposed {
3219template <typename _Tp, typename _Up, typename _Ap,
3220 typename _R
3221 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3222 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3223 static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3224 {
3225 using _RM = typename _R::mask_type;
3226 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3227 typename _RM::simd_type::value_type>(__x)};
3228 }
3229
3230template <typename _To, typename _Up, typename _Abi>
3231 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3232 _To
3233 simd_bit_cast(const simd<_Up, _Abi>& __x)
3234 {
3235 using _Tp = typename _To::value_type;
3236 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3237 using _From = simd<_Up, _Abi>;
3238 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3239 // with concepts, the following should be constraints
3240 static_assert(sizeof(_To) == sizeof(_From));
3241 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3242 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3243#if __has_builtin(__builtin_bit_cast)
3244 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3245#else
3246 return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3247#endif
3248 }
3249
3250template <typename _To, typename _Up, typename _Abi>
3251 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3252 _To
3253 simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3254 {
3255 using _From = simd_mask<_Up, _Abi>;
3256 static_assert(sizeof(_To) == sizeof(_From));
3257 static_assert(is_trivially_copyable_v<_From>);
3258 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3259 // copyable.
3260 if constexpr (is_simd_v<_To>)
3261 {
3262 using _Tp = typename _To::value_type;
3263 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3264 static_assert(is_trivially_copyable_v<_ToMember>);
3265#if __has_builtin(__builtin_bit_cast)
3266 return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3267#else
3268 return {__private_init, __bit_cast<_ToMember>(__x)};
3269#endif
3270 }
3271 else
3272 {
3273 static_assert(is_trivially_copyable_v<_To>);
3274#if __has_builtin(__builtin_bit_cast)
3275 return __builtin_bit_cast(_To, __x);
3276#else
3277 return __bit_cast<_To>(__x);
3278#endif
3279 }
3280 }
3281} // namespace __proposed
3282
3283// simd_cast {{{2
3284template <typename _Tp, typename _Up, typename _Ap,
3285 typename _To = __value_type_or_identity_t<_Tp>>
3286 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3287 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3288 -> decltype(static_simd_cast<_Tp>(__x))
3289 { return static_simd_cast<_Tp>(__x); }
3290
3291namespace __proposed {
3292template <typename _Tp, typename _Up, typename _Ap,
3293 typename _To = __value_type_or_identity_t<_Tp>>
3294 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3295 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3296 -> decltype(static_simd_cast<_Tp>(__x))
3297 { return static_simd_cast<_Tp>(__x); }
3298} // namespace __proposed
3299
3300// }}}2
3301// resizing_simd_cast {{{
3302namespace __proposed {
3303/* Proposed spec:
3304
3305template <class T, class U, class Abi>
3306T resizing_simd_cast(const simd<U, Abi>& x)
3307
3308p1 Constraints:
3309 - is_simd_v<T> is true and
3310 - T::value_type is the same type as U
3311
3312p2 Returns:
3313 A simd object with the i^th element initialized to x[i] for all i in the
3314 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3315 than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3316
3317template <class T, class U, class Abi>
3318T resizing_simd_cast(const simd_mask<U, Abi>& x)
3319
3320p1 Constraints: is_simd_mask_v<T> is true
3321
3322p2 Returns:
3323 A simd_mask object with the i^th element initialized to x[i] for all i in
3324the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3325 than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3326
3327 */
3328
3329template <typename _Tp, typename _Up, typename _Ap>
3330 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3331 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3332 resizing_simd_cast(const simd<_Up, _Ap>& __x)
3333 {
3334 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3335 return __x;
3336 else if (__builtin_is_constant_evaluated())
3337 return _Tp([&](auto __i) constexpr {
3338 return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3339 });
3340 else if constexpr (simd_size_v<_Up, _Ap> == 1)
3341 {
3342 _Tp __r{};
3343 __r[0] = __x[0];
3344 return __r;
3345 }
3346 else if constexpr (_Tp::size() == 1)
3347 return __x[0];
3348 else if constexpr (sizeof(_Tp) == sizeof(__x)
3349 && !__is_fixed_size_abi_v<_Ap>)
3350 return {__private_init,
3351 __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3352 _Ap::_S_masked(__data(__x))._M_data)};
3353 else
3354 {
3355 _Tp __r{};
3356 __builtin_memcpy(&__data(__r), &__data(__x),
3357 sizeof(_Up)
3358 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3359 return __r;
3360 }
3361 }
3362
3363template <typename _Tp, typename _Up, typename _Ap>
3364 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3365 enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3366 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3367 {
3368 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3369 typename _Tp::simd_type::value_type>(__x)};
3370 }
3371} // namespace __proposed
3372
3373// }}}
3374// to_fixed_size {{{2
3375template <typename _Tp, int _Np>
3376 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3377 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3378 { return __x; }
3379
3380template <typename _Tp, int _Np>
3381 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3382 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3383 { return __x; }
3384
3385template <typename _Tp, typename _Ap>
3386 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3387 to_fixed_size(const simd<_Tp, _Ap>& __x)
3388 {
3389 using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3390 return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3391 }
3392
3393template <typename _Tp, typename _Ap>
3394 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3395 to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3396 {
3397 return {__private_init,
3398 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3399 }
3400
3401// to_native {{{2
3402template <typename _Tp, int _Np>
3403 _GLIBCXX_SIMD_INTRINSIC
3404 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3405 to_native(const fixed_size_simd<_Tp, _Np>& __x)
3406 {
3407 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3408 __x.copy_to(__mem, vector_aligned);
3409 return {__mem, vector_aligned};
3410 }
3411
3412template <typename _Tp, int _Np>
3413 _GLIBCXX_SIMD_INTRINSIC
3414 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3415 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3416 {
3417 return native_simd_mask<_Tp>(
3418 __private_init,
3419 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3420 }
3421
3422// to_compatible {{{2
3423template <typename _Tp, int _Np>
3424 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3425 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3426 {
3427 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3428 __x.copy_to(__mem, vector_aligned);
3429 return {__mem, vector_aligned};
3430 }
3431
3432template <typename _Tp, int _Np>
3433 _GLIBCXX_SIMD_INTRINSIC
3434 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3435 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3436 {
3437 return simd_mask<_Tp>(
3438 __private_init,
3439 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3440 }
3441
3442// masked assignment [simd_mask.where] {{{1
3443
3444// where_expression {{{1
3445// const_where_expression<M, T> {{{2
3446template <typename _M, typename _Tp>
3447 class const_where_expression
3448 {
3449 using _V = _Tp;
3450 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3451
3452 struct _Wrapper { using value_type = _V; };
3453
3454 protected:
3455 using _Impl = typename _V::_Impl;
3456
3457 using value_type =
3458 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3459
3460 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3461 __get_mask(const const_where_expression& __x)
3462 { return __x._M_k; }
3463
3464 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3465 __get_lvalue(const const_where_expression& __x)
3466 { return __x._M_value; }
3467
3468 const _M& _M_k;
3469 _Tp& _M_value;
3470
3471 public:
3472 const_where_expression(const const_where_expression&) = delete;
3473
3474 const_where_expression& operator=(const const_where_expression&) = delete;
3475
3476 _GLIBCXX_SIMD_INTRINSIC constexpr
3477 const_where_expression(const _M& __kk, const _Tp& dd)
3478 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3479
3480 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3481 operator-() const&&
3482 {
3483 return {__private_init,
3484 _Impl::template _S_masked_unary<negate>(__data(_M_k),
3485 __data(_M_value))};
3486 }
3487
3488 template <typename _Up, typename _Flags>
3489 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3490 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3491 {
3492 return {__private_init,
3493 _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3494 _Flags::template _S_apply<_V>(__mem))};
3495 }
3496
3497 template <typename _Up, typename _Flags>
3498 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3499 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3500 {
3501 _Impl::_S_masked_store(__data(_M_value),
3502 _Flags::template _S_apply<_V>(__mem),
3503 __data(_M_k));
3504 }
3505 };
3506
3507// const_where_expression<bool, T> {{{2
3508template <typename _Tp>
3509 class const_where_expression<bool, _Tp>
3510 {
3511 using _M = bool;
3512 using _V = _Tp;
3513
3514 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3515
3516 struct _Wrapper { using value_type = _V; };
3517
3518 protected:
3519 using value_type
3520 = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3521
3522 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3523 __get_mask(const const_where_expression& __x)
3524 { return __x._M_k; }
3525
3526 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3527 __get_lvalue(const const_where_expression& __x)
3528 { return __x._M_value; }
3529
3530 const bool _M_k;
3531 _Tp& _M_value;
3532
3533 public:
3534 const_where_expression(const const_where_expression&) = delete;
3535 const_where_expression& operator=(const const_where_expression&) = delete;
3536
3537 _GLIBCXX_SIMD_INTRINSIC constexpr
3538 const_where_expression(const bool __kk, const _Tp& dd)
3539 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3540
3541 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3542 operator-() const&&
3543 { return _M_k ? -_M_value : _M_value; }
3544
3545 template <typename _Up, typename _Flags>
3546 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3547 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3548 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3549
3550 template <typename _Up, typename _Flags>
3551 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3552 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3553 {
3554 if (_M_k)
3555 __mem[0] = _M_value;
3556 }
3557 };
3558
3559// where_expression<M, T> {{{2
3560template <typename _M, typename _Tp>
3561 class where_expression : public const_where_expression<_M, _Tp>
3562 {
3563 using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3564
3565 static_assert(!is_const<_Tp>::value,
3566 "where_expression may only be instantiated with __a non-const "
3567 "_Tp parameter");
3568
3569 using typename const_where_expression<_M, _Tp>::value_type;
3570 using const_where_expression<_M, _Tp>::_M_k;
3571 using const_where_expression<_M, _Tp>::_M_value;
3572
3573 static_assert(
3574 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3575 static_assert(_M::size() == _Tp::size(), "");
3576
3577 _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3578 __get_lvalue(where_expression& __x)
3579 { return __x._M_value; }
3580
3581 public:
3582 where_expression(const where_expression&) = delete;
3583 where_expression& operator=(const where_expression&) = delete;
3584
3585 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3586 where_expression(const _M& __kk, _Tp& dd)
3587 : const_where_expression<_M, _Tp>(__kk, dd) {}
3588
3589 template <typename _Up>
3590 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3591 operator=(_Up&& __x) &&
3592 {
3593 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3594 __to_value_type_or_member_type<_Tp>(
3595 static_cast<_Up&&>(__x)));
3596 }
3597
3598#define _GLIBCXX_SIMD_OP_(__op, __name) \
3599 template <typename _Up> \
3600 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3601 operator __op##=(_Up&& __x)&& \
3602 { \
3603 _Impl::template _S_masked_cassign( \
3604 __data(_M_k), __data(_M_value), \
3605 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3606 [](auto __impl, auto __lhs, auto __rhs) \
3607 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3608 { return __impl.__name(__lhs, __rhs); }); \
3609 } \
3610 static_assert(true)
3611 _GLIBCXX_SIMD_OP_(+, _S_plus);
3612 _GLIBCXX_SIMD_OP_(-, _S_minus);
3613 _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3614 _GLIBCXX_SIMD_OP_(/, _S_divides);
3615 _GLIBCXX_SIMD_OP_(%, _S_modulus);
3616 _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3617 _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3618 _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3619 _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3620 _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3621#undef _GLIBCXX_SIMD_OP_
3622
3623 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3624 operator++() &&
3625 {
3626 __data(_M_value)
3627 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3628 }
3629
3630 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3631 operator++(int) &&
3632 {
3633 __data(_M_value)
3634 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3635 }
3636
3637 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3638 operator--() &&
3639 {
3640 __data(_M_value)
3641 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3642 }
3643
3644 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3645 operator--(int) &&
3646 {
3647 __data(_M_value)
3648 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3649 }
3650
3651 // intentionally hides const_where_expression::copy_from
3652 template <typename _Up, typename _Flags>
3653 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3654 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3655 {
3656 __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3657 _Flags::template _S_apply<_Tp>(__mem));
3658 }
3659 };
3660
3661// where_expression<bool, T> {{{2
3662template <typename _Tp>
3663 class where_expression<bool, _Tp>
3664 : public const_where_expression<bool, _Tp>
3665 {
3666 using _M = bool;
3667 using typename const_where_expression<_M, _Tp>::value_type;
3668 using const_where_expression<_M, _Tp>::_M_k;
3669 using const_where_expression<_M, _Tp>::_M_value;
3670
3671 public:
3672 where_expression(const where_expression&) = delete;
3673 where_expression& operator=(const where_expression&) = delete;
3674
3675 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3676 where_expression(const _M& __kk, _Tp& dd)
3677 : const_where_expression<_M, _Tp>(__kk, dd) {}
3678
3679#define _GLIBCXX_SIMD_OP_(__op) \
3680 template <typename _Up> \
3681 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3682 operator __op(_Up&& __x)&& \
3683 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3684
3685 _GLIBCXX_SIMD_OP_(=)
3686 _GLIBCXX_SIMD_OP_(+=)
3687 _GLIBCXX_SIMD_OP_(-=)
3688 _GLIBCXX_SIMD_OP_(*=)
3689 _GLIBCXX_SIMD_OP_(/=)
3690 _GLIBCXX_SIMD_OP_(%=)
3691 _GLIBCXX_SIMD_OP_(&=)
3692 _GLIBCXX_SIMD_OP_(|=)
3693 _GLIBCXX_SIMD_OP_(^=)
3694 _GLIBCXX_SIMD_OP_(<<=)
3695 _GLIBCXX_SIMD_OP_(>>=)
3696 #undef _GLIBCXX_SIMD_OP_
3697
3698 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3699 operator++() &&
3700 { if (_M_k) ++_M_value; }
3701
3702 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3703 operator++(int) &&
3704 { if (_M_k) ++_M_value; }
3705
3706 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3707 operator--() &&
3708 { if (_M_k) --_M_value; }
3709
3710 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3711 operator--(int) &&
3712 { if (_M_k) --_M_value; }
3713
3714 // intentionally hides const_where_expression::copy_from
3715 template <typename _Up, typename _Flags>
3716 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3717 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3718 { if (_M_k) _M_value = __mem[0]; }
3719 };
3720
3721// where {{{1
3722template <typename _Tp, typename _Ap>
3723 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3724 where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3725 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3726 { return {__k, __value}; }
3727
3728template <typename _Tp, typename _Ap>
3729 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3730 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3731 where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3732 { return {__k, __value}; }
3733
3734template <typename _Tp, typename _Ap>
3735 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3736 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3737 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3738 { return {__k, __value}; }
3739
3740template <typename _Tp, typename _Ap>
3741 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3742 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3743 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3744 { return {__k, __value}; }
3745
3746template <typename _Tp>
3747 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3748 where(_ExactBool __k, _Tp& __value)
3749 { return {__k, __value}; }
3750
3751template <typename _Tp>
3752 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3753 where(_ExactBool __k, const _Tp& __value)
3754 { return {__k, __value}; }
3755
3756template <typename _Tp, typename _Ap>
3757 _GLIBCXX_SIMD_CONSTEXPR void
3758 where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3759
3760template <typename _Tp, typename _Ap>
3761 _GLIBCXX_SIMD_CONSTEXPR void
3762 where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3763
3764// proposed mask iterations {{{1
3765namespace __proposed {
3766template <size_t _Np>
3767 class where_range
3768 {
3769 const bitset<_Np> __bits;
3770
3771 public:
3772 where_range(bitset<_Np> __b) : __bits(__b) {}
3773
3774 class iterator
3775 {
3776 size_t __mask;
3777 size_t __bit;
3778
3779 _GLIBCXX_SIMD_INTRINSIC void
3780 __next_bit()
3781 { __bit = __builtin_ctzl(__mask); }
3782
3783 _GLIBCXX_SIMD_INTRINSIC void
3784 __reset_lsb()
3785 {
3786 // 01100100 - 1 = 01100011
3787 __mask &= (__mask - 1);
3788 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3789 }
3790
3791 public:
3792 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3793 iterator(const iterator&) = default;
3794 iterator(iterator&&) = default;
3795
3796 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3797 operator->() const
3798 { return __bit; }
3799
3800 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3801 operator*() const
3802 { return __bit; }
3803
3804 _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3805 operator++()
3806 {
3807 __reset_lsb();
3808 __next_bit();
3809 return *this;
3810 }
3811
3812 _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3813 operator++(int)
3814 {
3815 iterator __tmp = *this;
3816 __reset_lsb();
3817 __next_bit();
3818 return __tmp;
3819 }
3820
3821 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3822 operator==(const iterator& __rhs) const
3823 { return __mask == __rhs.__mask; }
3824
3825 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3826 operator!=(const iterator& __rhs) const
3827 { return __mask != __rhs.__mask; }
3828 };
3829
3830 iterator
3831 begin() const
3832 { return __bits.to_ullong(); }
3833
3834 iterator
3835 end() const
3836 { return 0; }
3837 };
3838
3839template <typename _Tp, typename _Ap>
3840 where_range<simd_size_v<_Tp, _Ap>>
3841 where(const simd_mask<_Tp, _Ap>& __k)
3842 { return __k.__to_bitset(); }
3843
3844} // namespace __proposed
3845
3846// }}}1
3847// reductions [simd.reductions] {{{1
3848template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3849 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3850 reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3851 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3852
3853template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3854 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3855 reduce(const const_where_expression<_M, _V>& __x,
3856 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3857 {
3858 if (__builtin_expect(none_of(__get_mask(__x)), false))
3859 return __identity_element;
3860
3861 _V __tmp = __identity_element;
3862 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3863 __data(__get_lvalue(__x)));
3864 return reduce(__tmp, __binary_op);
3865 }
3866
3867template <typename _M, typename _V>
3868 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3869 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3870 { return reduce(__x, 0, __binary_op); }
3871
3872template <typename _M, typename _V>
3873 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3874 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3875 { return reduce(__x, 1, __binary_op); }
3876
3877template <typename _M, typename _V>
3878 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3879 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3880 { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3881
3882template <typename _M, typename _V>
3883 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3884 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3885 { return reduce(__x, 0, __binary_op); }
3886
3887template <typename _M, typename _V>
3888 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3889 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3890 { return reduce(__x, 0, __binary_op); }
3891
3892template <typename _Tp, typename _Abi>
3893 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3894 hmin(const simd<_Tp, _Abi>& __v) noexcept
3895 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3896
3897template <typename _Tp, typename _Abi>
3898 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3899 hmax(const simd<_Tp, _Abi>& __v) noexcept
3900 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3901
3902template <typename _M, typename _V>
3903 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3904 typename _V::value_type
3905 hmin(const const_where_expression<_M, _V>& __x) noexcept
3906 {
3907 using _Tp = typename _V::value_type;
3908 constexpr _Tp __id_elem =
3909#ifdef __FINITE_MATH_ONLY__
3910 __finite_max_v<_Tp>;
3911#else
3912 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3913#endif
3914 _V __tmp = __id_elem;
3915 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3916 __data(__get_lvalue(__x)));
3917 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3918 }
3919
3920template <typename _M, typename _V>
3921 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3922 typename _V::value_type
3923 hmax(const const_where_expression<_M, _V>& __x) noexcept
3924 {
3925 using _Tp = typename _V::value_type;
3926 constexpr _Tp __id_elem =
3927#ifdef __FINITE_MATH_ONLY__
3928 __finite_min_v<_Tp>;
3929#else
3930 [] {
3931 if constexpr (__value_exists_v<__infinity, _Tp>)
3932 return -__infinity_v<_Tp>;
3933 else
3934 return __finite_min_v<_Tp>;
3935 }();
3936#endif
3937 _V __tmp = __id_elem;
3938 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3939 __data(__get_lvalue(__x)));
3940 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3941 }
3942
3943// }}}1
3944// algorithms [simd.alg] {{{
3945template <typename _Tp, typename _Ap>
3946 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3947 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3948 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3949
3950template <typename _Tp, typename _Ap>
3951 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3952 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3953 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3954
3955template <typename _Tp, typename _Ap>
3956 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3957 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3958 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3959 {
3960 const auto pair_of_members
3961 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3962 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3963 simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3964 }
3965
3966template <typename _Tp, typename _Ap>
3967 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3968 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3969 {
3970 using _Impl = typename _Ap::_SimdImpl;
3971 return {__private_init,
3972 _Impl::_S_min(__data(__hi),
3973 _Impl::_S_max(__data(__lo), __data(__v)))};
3974 }
3975
3976// }}}
3977
3978template <size_t... _Sizes, typename _Tp, typename _Ap,
3979 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3980 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3981 split(const simd<_Tp, _Ap>&);
3982
3983// __extract_part {{{
3984template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3985 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3986 conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
3987 __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3988
3989template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3990 _GLIBCXX_SIMD_INTRINSIC constexpr auto
3991 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3992
3993// }}}
3994// _SizeList {{{
3995template <size_t _V0, size_t... _Values>
3996 struct _SizeList
3997 {
3998 template <size_t _I>
3999 static constexpr size_t
4000 _S_at(_SizeConstant<_I> = {})
4001 {
4002 if constexpr (_I == 0)
4003 return _V0;
4004 else
4005 return _SizeList<_Values...>::template _S_at<_I - 1>();
4006 }
4007
4008 template <size_t _I>
4009 static constexpr auto
4010 _S_before(_SizeConstant<_I> = {})
4011 {
4012 if constexpr (_I == 0)
4013 return _SizeConstant<0>();
4014 else
4015 return _SizeConstant<
4016 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
4017 }
4018
4019 template <size_t _Np>
4020 static constexpr auto
4021 _S_pop_front(_SizeConstant<_Np> = {})
4022 {
4023 if constexpr (_Np == 0)
4024 return _SizeList();
4025 else
4026 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
4027 }
4028 };
4029
4030// }}}
4031// __extract_center {{{
4032template <typename _Tp, size_t _Np>
4033 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
4034 __extract_center(_SimdWrapper<_Tp, _Np> __x)
4035 {
4036 static_assert(_Np >= 4);
4037 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4038#if _GLIBCXX_SIMD_X86INTRIN // {{{
4039 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4040 {
4041 const auto __intrin = __to_intrin(__x);
4042 if constexpr (is_integral_v<_Tp>)
4043 return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4044 _mm512_shuffle_i32x4(__intrin, __intrin,
4045 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4046 else if constexpr (sizeof(_Tp) == 4)
4047 return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4048 _mm512_shuffle_f32x4(__intrin, __intrin,
4049 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4050 else if constexpr (sizeof(_Tp) == 8)
4051 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4052 _mm512_shuffle_f64x2(__intrin, __intrin,
4053 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4054 else
4055 __assert_unreachable<_Tp>();
4056 }
4057 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4058 return __vector_bitcast<_Tp>(
4059 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4060 __hi128(__vector_bitcast<double>(__x)), 1));
4061 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4062 return __vector_bitcast<_Tp>(
4063 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4064 __lo128(__vector_bitcast<_LLong>(__x)),
4065 sizeof(_Tp) * _Np / 4));
4066 else
4067#endif // _GLIBCXX_SIMD_X86INTRIN }}}
4068 {
4069 __vector_type_t<_Tp, _Np / 2> __r;
4070 __builtin_memcpy(&__r,
4071 reinterpret_cast<const char*>(&__x)
4072 + sizeof(_Tp) * _Np / 4,
4073 sizeof(_Tp) * _Np / 2);
4074 return __r;
4075 }
4076 }
4077
4078template <typename _Tp, typename _A0, typename... _As>
4079 _GLIBCXX_SIMD_INTRINSIC
4080 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4081 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4082 {
4083 if constexpr (sizeof...(_As) == 0)
4084 return __extract_center(__x.first);
4085 else
4086 return __extract_part<1, 4, 2>(__x);
4087 }
4088
4089// }}}
4090// __split_wrapper {{{
4091template <size_t... _Sizes, typename _Tp, typename... _As>
4092 auto
4093 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4094 {
4095 return split<_Sizes...>(
4096 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4097 __x));
4098 }
4099
4100// }}}
4101
4102// split<simd>(simd) {{{
4103template <typename _V, typename _Ap,
4104 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4105 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4106 && is_simd_v<_V>, array<_V, _Parts>>
4107 split(const simd<typename _V::value_type, _Ap>& __x)
4108 {
4109 using _Tp = typename _V::value_type;
4110 if constexpr (_Parts == 1)
4111 {
4112 return {simd_cast<_V>(__x)};
4113 }
4114 else if (__x._M_is_constprop())
4115 {
4116 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4117 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4118 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4119 { return __x[__i * _V::size() + __j]; });
4120 });
4121 }
4122 else if constexpr (
4123 __is_fixed_size_abi_v<_Ap>
4124 && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4125 || (__is_fixed_size_abi_v<typename _V::abi_type>
4126 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4127 )))
4128 {
4129 // fixed_size -> fixed_size (w/o padding) or scalar
4130#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4131 const __may_alias<_Tp>* const __element_ptr
4132 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4133 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4134 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4135 { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4136#else
4137 const auto& __xx = __data(__x);
4138 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4139 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4140 [[maybe_unused]] constexpr size_t __offset
4141 = decltype(__i)::value * _V::size();
4142 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4143 constexpr _SizeConstant<__j + __offset> __k;
4144 return __xx[__k];
4145 });
4146 });
4147#endif
4148 }
4149 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4150 {
4151 // normally memcpy should work here as well
4152 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4153 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4154 }
4155 else
4156 {
4157 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4158 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4159 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4160 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4161 return __x[__i * _V::size() + __j];
4162 });
4163 else
4164 return _V(__private_init,
4165 __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4166 });
4167 }
4168 }
4169
4170// }}}
4171// split<simd_mask>(simd_mask) {{{
4172template <typename _V, typename _Ap,
4173 size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4174 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4175 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4176 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4177 {
4178 if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4179 return {__x};
4180 else if constexpr (_Parts == 1)
4181 return {__proposed::static_simd_cast<_V>(__x)};
4182 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4183 && __is_avx_abi<_Ap>())
4184 return {_V(__private_init, __lo128(__data(__x))),
4185 _V(__private_init, __hi128(__data(__x)))};
4186 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4187 {
4188 const bitset __bits = __x.__to_bitset();
4189 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4190 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4191 constexpr size_t __offset = __i * _V::size();
4192 return _V(__bitset_init, (__bits >> __offset).to_ullong());
4193 });
4194 }
4195 else
4196 {
4197 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4198 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4199 constexpr size_t __offset = __i * _V::size();
4200 return _V(__private_init,
4201 [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4202 return __x[__j + __offset];
4203 });
4204 });
4205 }
4206 }
4207
4208// }}}
4209// split<_Sizes...>(simd) {{{
4210template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4211 _GLIBCXX_SIMD_ALWAYS_INLINE
4212 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4213 split(const simd<_Tp, _Ap>& __x)
4214 {
4215 using _SL = _SizeList<_Sizes...>;
4216 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4217 constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4218 constexpr size_t _N0 = _SL::template _S_at<0>();
4219 using _V = __deduced_simd<_Tp, _N0>;
4220
4221 if (__x._M_is_constprop())
4222 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4223 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4224 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4225 constexpr size_t __offset = _SL::_S_before(__i);
4226 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4227 return __x[__offset + __j];
4228 });
4229 });
4230 else if constexpr (_Np == _N0)
4231 {
4232 static_assert(sizeof...(_Sizes) == 1);
4233 return {simd_cast<_V>(__x)};
4234 }
4235 else if constexpr // split from fixed_size, such that __x::first.size == _N0
4236 (__is_fixed_size_abi_v<
4237 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4238 {
4239 static_assert(
4240 !__is_fixed_size_abi_v<typename _V::abi_type>,
4241 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4242 "fixed_size_simd "
4243 "when deduced?");
4244 // extract first and recurse (__split_wrapper is needed to deduce a new
4245 // _Sizes pack)
4246 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4247 __split_wrapper(_SL::template _S_pop_front<1>(),
4248 __data(__x).second));
4249 }
4250 else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4251 {
4252 constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
4253 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4254 [&](auto __i) constexpr {
4255 constexpr size_t __offset = [&]() {
4256 size_t __r = 0;
4257 for (unsigned __j = 0; __j < __i; ++__j)
4258 __r += __size[__j];
4259 return __r;
4260 }();
4261 return __deduced_simd<_Tp, __size[__i]>(
4262 __private_init,
4263 __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
4264 });
4265 }
4266#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4267 const __may_alias<_Tp>* const __element_ptr
4268 = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4269 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4270 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4271 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4272 constexpr size_t __offset = _SL::_S_before(__i);
4273 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4274 constexpr size_t __a
4275 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4276 constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4277 constexpr size_t __alignment = __b == 0 ? __a : __b;
4278 return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4279 });
4280#else
4281 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4282 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4283 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4284 const auto& __xx = __data(__x);
4285 using _Offset = decltype(_SL::_S_before(__i));
4286 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4287 constexpr _SizeConstant<_Offset::value + __j> __k;
4288 return __xx[__k];
4289 });
4290 });
4291#endif
4292 }
4293
4294// }}}
4295
4296// __subscript_in_pack {{{
4297template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4298 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4299 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4300 {
4301 if constexpr (_I < simd_size_v<_Tp, _Ap>)
4302 return __x[_I];
4303 else
4304 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4305 }
4306
4307// }}}
4308// __store_pack_of_simd {{{
4309template <typename _Tp, typename _A0, typename... _As>
4310 _GLIBCXX_SIMD_INTRINSIC void
4311 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4312 {
4313 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4314 __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4315 if constexpr (sizeof...(__xs) > 0)
4316 __store_pack_of_simd(__mem + __n_bytes, __xs...);
4317 }
4318
4319// }}}
4320// concat(simd...) {{{
4321template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4322 inline _GLIBCXX_SIMD_CONSTEXPR
4323 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4324 concat(const simd<_Tp, _As>&... __xs)
4325 {
4326 constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
4327 using _Abi = simd_abi::deduce_t<_Tp, _Np>;
4328 using _Rp = simd<_Tp, _Abi>;
4329 using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
4330 if constexpr (sizeof...(__xs) == 1)
4331 return simd_cast<_Rp>(__xs...);
4332 else if ((... && __xs._M_is_constprop()))
4333 return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4334 { return __subscript_in_pack<__i>(__xs...); });
4335 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
4336 {
4337 return {__private_init,
4338 __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
4339 [](int __i) {
4340 constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
4341 constexpr int __vsizes[2]
4342 = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
4343 constexpr int __padding0 = __vsizes[0] - __sizes[0];
4344 return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
4345 })};
4346 }
4347 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
4348 return [](const auto& __x0, const auto& __x1, const auto& __x2)
4349 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4350 return concat(concat(__x0, __x1), __x2);
4351 }(__xs...);
4352 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
4353 return [](const auto& __x0, const auto& __x1, const auto&... __rest)
4354 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4355 return concat(concat(__x0, __x1), concat(__rest...));
4356 }(__xs...);
4357 else
4358 {
4359 _Rp __r{};
4360 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4361 return __r;
4362 }
4363 }
4364
4365// }}}
4366// concat(array<simd>) {{{
4367template <typename _Tp, typename _Abi, size_t _Np>
4368 _GLIBCXX_SIMD_ALWAYS_INLINE
4369 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4370 concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4371 {
4372 return __call_with_subscripts<_Np>(
4373 __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4374 return concat(__xs...);
4375 });
4376 }
4377
4378// }}}
4379
4380/// @cond undocumented
4381// _SmartReference {{{
4382template <typename _Up, typename _Accessor = _Up,
4383 typename _ValueType = typename _Up::value_type>
4384 class _SmartReference
4385 {
4386 friend _Accessor;
4387 int _M_index;
4388 _Up& _M_obj;
4389
4390 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4391 _M_read() const noexcept
4392 {
4393 if constexpr (is_arithmetic_v<_Up>)
4394 return _M_obj;
4395 else
4396 return _M_obj[_M_index];
4397 }
4398
4399 template <typename _Tp>
4400 _GLIBCXX_SIMD_INTRINSIC constexpr void
4401 _M_write(_Tp&& __x) const
4402 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4403
4404 public:
4405 _GLIBCXX_SIMD_INTRINSIC constexpr
4406 _SmartReference(_Up& __o, int __i) noexcept
4407 : _M_index(__i), _M_obj(__o) {}
4408
4409 using value_type = _ValueType;
4410
4411 _GLIBCXX_SIMD_INTRINSIC
4412 _SmartReference(const _SmartReference&) = delete;
4413
4414 _GLIBCXX_SIMD_INTRINSIC constexpr
4415 operator value_type() const noexcept
4416 { return _M_read(); }
4417
4418 template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4419 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4420 operator=(_Tp&& __x) &&
4421 {
4422 _M_write(static_cast<_Tp&&>(__x));
4423 return {_M_obj, _M_index};
4424 }
4425
4426#define _GLIBCXX_SIMD_OP_(__op) \
4427 template <typename _Tp, \
4428 typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4429 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4430 typename = _ValuePreservingOrInt<_TT, value_type>> \
4431 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4432 operator __op##=(_Tp&& __x) && \
4433 { \
4434 const value_type& __lhs = _M_read(); \
4435 _M_write(__lhs __op __x); \
4436 return {_M_obj, _M_index}; \
4437 }
4438 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4439 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4440 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4441#undef _GLIBCXX_SIMD_OP_
4442
4443 template <typename _Tp = void,
4444 typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4445 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4446 operator++() &&
4447 {
4448 value_type __x = _M_read();
4449 _M_write(++__x);
4450 return {_M_obj, _M_index};
4451 }
4452
4453 template <typename _Tp = void,
4454 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4455 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4456 operator++(int) &&
4457 {
4458 const value_type __r = _M_read();
4459 value_type __x = __r;
4460 _M_write(++__x);
4461 return __r;
4462 }
4463
4464 template <typename _Tp = void,
4465 typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4466 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4467 operator--() &&
4468 {
4469 value_type __x = _M_read();
4470 _M_write(--__x);
4471 return {_M_obj, _M_index};
4472 }
4473
4474 template <typename _Tp = void,
4475 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4476 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4477 operator--(int) &&
4478 {
4479 const value_type __r = _M_read();
4480 value_type __x = __r;
4481 _M_write(--__x);
4482 return __r;
4483 }
4484
4485 _GLIBCXX_SIMD_INTRINSIC friend void
4486 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4487 conjunction<
4488 is_nothrow_constructible<value_type, _SmartReference&&>,
4489 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4490 {
4491 value_type __tmp = static_cast<_SmartReference&&>(__a);
4492 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4493 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4494 }
4495
4496 _GLIBCXX_SIMD_INTRINSIC friend void
4497 swap(value_type& __a, _SmartReference&& __b) noexcept(
4498 conjunction<
4499 is_nothrow_constructible<value_type, value_type&&>,
4500 is_nothrow_assignable<value_type&, value_type&&>,
4501 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4502 {
4503 value_type __tmp(std::move(__a));
4504 __a = static_cast<value_type>(__b);
4505 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4506 }
4507
4508 _GLIBCXX_SIMD_INTRINSIC friend void
4509 swap(_SmartReference&& __a, value_type& __b) noexcept(
4510 conjunction<
4511 is_nothrow_constructible<value_type, _SmartReference&&>,
4512 is_nothrow_assignable<value_type&, value_type&&>,
4513 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4514 {
4515 value_type __tmp(__a);
4516 static_cast<_SmartReference&&>(__a) = std::move(__b);
4517 __b = std::move(__tmp);
4518 }
4519 };
4520
4521// }}}
4522// __scalar_abi_wrapper {{{
4523template <int _Bytes>
4524 struct __scalar_abi_wrapper
4525 {
4526 template <typename _Tp> static constexpr size_t _S_full_size = 1;
4527 template <typename _Tp> static constexpr size_t _S_size = 1;
4528 template <typename _Tp> static constexpr size_t _S_is_partial = false;
4529
4530 template <typename _Tp, typename _Abi = simd_abi::scalar>
4531 static constexpr bool _S_is_valid_v
4532 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4533 };
4534
4535// }}}
4536// __decay_abi metafunction {{{
4537template <typename _Tp>
4538 struct __decay_abi { using type = _Tp; };
4539
4540template <int _Bytes>
4541 struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4542 { using type = simd_abi::scalar; };
4543
4544// }}}
4545// __find_next_valid_abi metafunction {{{1
4546// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4547// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4548// recursion at 2 elements in the resulting ABI tag. In this case
4549// type::_S_is_valid_v<_Tp> may be false.
4550template <template <int> class _Abi, int _Bytes, typename _Tp>
4551 struct __find_next_valid_abi
4552 {
4553 static constexpr auto
4554 _S_choose()
4555 {
4556 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4557 using _NextAbi = _Abi<_NextBytes>;
4558 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4559 return _Abi<_Bytes>();
4560 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4561 && _NextAbi::template _S_is_valid_v<_Tp>)
4562 return _NextAbi();
4563 else
4564 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4565 }
4566
4567 using type = decltype(_S_choose());
4568 };
4569
4570template <int _Bytes, typename _Tp>
4571 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4572 { using type = simd_abi::scalar; };
4573
4574// _AbiList {{{1
4575template <template <int> class...>
4576 struct _AbiList
4577 {
4578 template <typename, int> static constexpr bool _S_has_valid_abi = false;
4579 template <typename, int> using _FirstValidAbi = void;
4580 template <typename, int> using _BestAbi = void;
4581 };
4582
4583template <template <int> class _A0, template <int> class... _Rest>
4584 struct _AbiList<_A0, _Rest...>
4585 {
4586 template <typename _Tp, int _Np>
4587 static constexpr bool _S_has_valid_abi
4588 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4589 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4590
4591 template <typename _Tp, int _Np>
4592 using _FirstValidAbi = conditional_t<
4593 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4594 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4595 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4596
4597 template <typename _Tp, int _Np>
4598 static constexpr auto
4599 _S_determine_best_abi()
4600 {
4601 static_assert(_Np >= 1);
4602 constexpr int _Bytes = sizeof(_Tp) * _Np;
4603 if constexpr (_Np == 1)
4604 return __make_dependent_t<_Tp, simd_abi::scalar>{};
4605 else
4606 {
4607 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4608 // _A0<_Bytes> is good if:
4609 // 1. The ABI tag is valid for _Tp
4610 // 2. The storage overhead is no more than padding to fill the next
4611 // power-of-2 number of bytes
4612 if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4613 _Tp> && __fullsize / 2 < _Np)
4614 return typename __decay_abi<_A0<_Bytes>>::type{};
4615 else
4616 {
4617 using _Bp =
4618 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4619 if constexpr (_Bp::template _S_is_valid_v<
4620 _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4621 return _Bp{};
4622 else
4623 return
4624 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4625 }
4626 }
4627 }
4628
4629 template <typename _Tp, int _Np>
4630 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4631 };
4632
4633// }}}1
4634
4635// the following lists all native ABIs, which makes them accessible to
4636// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4637// matters: Whatever comes first has higher priority.
4638using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4639 __scalar_abi_wrapper>;
4640
4641// valid _SimdTraits specialization {{{1
4642template <typename _Tp, typename _Abi>
4643 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4644 : _Abi::template __traits<_Tp> {};
4645
4646// __deduce_impl specializations {{{1
4647// try all native ABIs (including scalar) first
4648template <typename _Tp, size_t _Np>
4649 struct __deduce_impl<
4650 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4651 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4652
4653// fall back to fixed_size only if scalar and native ABIs don't match
4654template <typename _Tp, size_t _Np, typename = void>
4655 struct __deduce_fixed_size_fallback {};
4656
4657template <typename _Tp, size_t _Np>
4658 struct __deduce_fixed_size_fallback<_Tp, _Np,
4659 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4660 { using type = simd_abi::fixed_size<_Np>; };
4661
4662template <typename _Tp, size_t _Np, typename>
4663 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4664
4665//}}}1
4666/// @endcond
4667
4668// simd_mask {{{
4669template <typename _Tp, typename _Abi>
4670 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4671 {
4672 // types, tags, and friends {{{
4673 using _Traits = _SimdTraits<_Tp, _Abi>;
4674 using _MemberType = typename _Traits::_MaskMember;
4675
4676 // We map all masks with equal element sizeof to a single integer type, the
4677 // one given by __int_for_sizeof_t<_Tp>. This is the approach
4678 // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4679 // template specializations in the implementation classes.
4680 using _Ip = __int_for_sizeof_t<_Tp>;
4681 static constexpr _Ip* _S_type_tag = nullptr;
4682
4683 friend typename _Traits::_MaskBase;
4684 friend class simd<_Tp, _Abi>; // to construct masks on return
4685 friend typename _Traits::_SimdImpl; // to construct masks on return and
4686 // inspect data on masked operations
4687 public:
4688 using _Impl = typename _Traits::_MaskImpl;
4689 friend _Impl;
4690
4691 // }}}
4692 // member types {{{
4693 using value_type = bool;
4694 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4695 using simd_type = simd<_Tp, _Abi>;
4696 using abi_type = _Abi;
4697
4698 // }}}
4699 static constexpr size_t size() // {{{
4700 { return __size_or_zero_v<_Tp, _Abi>; }
4701
4702 // }}}
4703 // constructors & assignment {{{
4704 simd_mask() = default;
4705 simd_mask(const simd_mask&) = default;
4706 simd_mask(simd_mask&&) = default;
4707 simd_mask& operator=(const simd_mask&) = default;
4708 simd_mask& operator=(simd_mask&&) = default;
4709
4710 // }}}
4711 // access to internal representation (optional feature) {{{
4712 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4713 simd_mask(typename _Traits::_MaskCastType __init)
4714 : _M_data{__init} {}
4715 // conversions to internal type is done in _MaskBase
4716
4717 // }}}
4718 // bitset interface (extension to be proposed) {{{
4719 // TS_FEEDBACK:
4720 // Conversion of simd_mask to and from bitset makes it much easier to
4721 // interface with other facilities. I suggest adding `static
4722 // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4723 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4724 __from_bitset(bitset<size()> bs)
4725 { return {__bitset_init, bs}; }
4726
4727 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4728 __to_bitset() const
4729 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4730
4731 // }}}
4732 // explicit broadcast constructor {{{
4733 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4734 simd_mask(value_type __x)
4735 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4736
4737 // }}}
4738 // implicit type conversion constructor {{{
4739 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4740 // proposed improvement
4741 template <typename _Up, typename _A2,
4742 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4743 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4744 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4745 simd_mask(const simd_mask<_Up, _A2>& __x)
4746 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4747 #else
4748 // conforming to ISO/IEC 19570:2018
4749 template <typename _Up, typename = enable_if_t<conjunction<
4750 is_same<abi_type, simd_abi::fixed_size<size()>>,
4751 is_same<_Up, _Up>>::value>>
4752 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4753 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4754 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4755 #endif
4756
4757 // }}}
4758 // load constructor {{{
4759 template <typename _Flags>
4760 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4761 simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4762 : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4763
4764 template <typename _Flags>
4765 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4766 simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4767 : _M_data{}
4768 {
4769 _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4770 _Flags::template _S_apply<simd_mask>(__mem));
4771 }
4772
4773 // }}}
4774 // loads [simd_mask.load] {{{
4775 template <typename _Flags>
4776 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4777 copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4778 { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4779
4780 // }}}
4781 // stores [simd_mask.store] {{{
4782 template <typename _Flags>
4783 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4784 copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4785 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4786
4787 // }}}
4788 // scalar access {{{
4789 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4790 operator[](size_t __i)
4791 {
4792 if (__i >= size())
4793 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4794 return {_M_data, int(__i)};
4795 }
4796
4797 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4798 operator[](size_t __i) const
4799 {
4800 if (__i >= size())
4801 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4802 if constexpr (__is_scalar_abi<_Abi>())
4803 return _M_data;
4804 else
4805 return static_cast<bool>(_M_data[__i]);
4806 }
4807
4808 // }}}
4809 // negation {{{
4810 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4811 operator!() const
4812 { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4813
4814 // }}}
4815 // simd_mask binary operators [simd_mask.binary] {{{
4816 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4817 // simd_mask<int> && simd_mask<uint> needs disambiguation
4818 template <typename _Up, typename _A2,
4819 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4820 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4821 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4822 {
4823 return {__private_init,
4824 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4825 }
4826
4827 template <typename _Up, typename _A2,
4828 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4829 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4830 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4831 {
4832 return {__private_init,
4833 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4834 }
4835 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4836
4837 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4838 operator&&(const simd_mask& __x, const simd_mask& __y)
4839 { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4840
4841 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4842 operator||(const simd_mask& __x, const simd_mask& __y)
4843 { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4844
4845 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4846 operator&(const simd_mask& __x, const simd_mask& __y)
4847 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4848
4849 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4850 operator|(const simd_mask& __x, const simd_mask& __y)
4851 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4852
4853 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4854 operator^(const simd_mask& __x, const simd_mask& __y)
4855 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4856
4857 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4858 operator&=(simd_mask& __x, const simd_mask& __y)
4859 {
4860 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4861 return __x;
4862 }
4863
4864 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4865 operator|=(simd_mask& __x, const simd_mask& __y)
4866 {
4867 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4868 return __x;
4869 }
4870
4871 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4872 operator^=(simd_mask& __x, const simd_mask& __y)
4873 {
4874 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4875 return __x;
4876 }
4877
4878 // }}}
4879 // simd_mask compares [simd_mask.comparison] {{{
4880 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4881 operator==(const simd_mask& __x, const simd_mask& __y)
4882 { return !operator!=(__x, __y); }
4883
4884 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4885 operator!=(const simd_mask& __x, const simd_mask& __y)
4886 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4887
4888 // }}}
4889 // private_init ctor {{{
4890 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4891 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4892 : _M_data(__init) {}
4893
4894 // }}}
4895 // private_init generator ctor {{{
4896 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4897 _GLIBCXX_SIMD_INTRINSIC constexpr
4898 simd_mask(_PrivateInit, _Fp&& __gen)
4899 : _M_data()
4900 {
4901 __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4902 _Impl::_S_set(_M_data, __i, __gen(__i));
4903 });
4904 }
4905
4906 // }}}
4907 // bitset_init ctor {{{
4908 _GLIBCXX_SIMD_INTRINSIC constexpr
4909 simd_mask(_BitsetInit, bitset<size()> __init)
4910 : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4911 {}
4912
4913 // }}}
4914 // __cvt {{{
4915 // TS_FEEDBACK:
4916 // The conversion operator this implements should be a ctor on simd_mask.
4917 // Once you call .__cvt() on a simd_mask it converts conveniently.
4918 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4919 struct _CvtProxy
4920 {
4921 template <typename _Up, typename _A2,
4922 typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4923 _GLIBCXX_SIMD_ALWAYS_INLINE
4924 operator simd_mask<_Up, _A2>() &&
4925 {
4926 using namespace std::experimental::__proposed;
4927 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4928 }
4929
4930 const simd_mask<_Tp, _Abi>& _M_data;
4931 };
4932
4933 _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4934 __cvt() const
4935 { return {*this}; }
4936
4937 // }}}
4938 // operator?: overloads (suggested extension) {{{
4939 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4940 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4941 operator?:(const simd_mask& __k, const simd_mask& __where_true,
4942 const simd_mask& __where_false)
4943 {
4944 auto __ret = __where_false;
4945 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4946 return __ret;
4947 }
4948
4949 template <typename _U1, typename _U2,
4950 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4951 typename = enable_if_t<conjunction_v<
4952 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4953 is_convertible<simd_mask, typename _Rp::mask_type>>>>
4954 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4955 operator?:(const simd_mask& __k, const _U1& __where_true,
4956 const _U2& __where_false)
4957 {
4958 _Rp __ret = __where_false;
4959 _Rp::_Impl::_S_masked_assign(
4960 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4961 __data(static_cast<_Rp>(__where_true)));
4962 return __ret;
4963 }
4964
4965 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4966 template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4967 typename = enable_if_t<
4968 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4969 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4970 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4971 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4972 const simd_mask<_Up, _Au>& __where_false)
4973 {
4974 simd_mask __ret = __where_false;
4975 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4976 __where_true._M_data);
4977 return __ret;
4978 }
4979 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4980 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4981
4982 // }}}
4983 // _M_is_constprop {{{
4984 _GLIBCXX_SIMD_INTRINSIC constexpr bool
4985 _M_is_constprop() const
4986 {
4987 if constexpr (__is_scalar_abi<_Abi>())
4988 return __builtin_constant_p(_M_data);
4989 else
4990 return _M_data._M_is_constprop();
4991 }
4992
4993 // }}}
4994
4995 private:
4996 friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4997 friend auto& __data<_Tp, abi_type>(simd_mask&);
4998 alignas(_Traits::_S_mask_align) _MemberType _M_data;
4999 };
5000
5001// }}}
5002
5003/// @cond undocumented
5004// __data(simd_mask) {{{
5005template <typename _Tp, typename _Ap>
5006 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5007 __data(const simd_mask<_Tp, _Ap>& __x)
5008 { return __x._M_data; }
5009
5010template <typename _Tp, typename _Ap>
5011 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5012 __data(simd_mask<_Tp, _Ap>& __x)
5013 { return __x._M_data; }
5014
5015// }}}
5016/// @endcond
5017
5018// simd_mask reductions [simd_mask.reductions] {{{
5019template <typename _Tp, typename _Abi>
5020 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5021 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5022 {
5023 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5024 {
5025 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5026 if (!__k[__i])
5027 return false;
5028 return true;
5029 }
5030 else
5031 return _Abi::_MaskImpl::_S_all_of(__k);
5032 }
5033
5034template <typename _Tp, typename _Abi>
5035 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5036 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5037 {
5038 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5039 {
5040 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5041 if (__k[__i])
5042 return true;
5043 return false;
5044 }
5045 else
5046 return _Abi::_MaskImpl::_S_any_of(__k);
5047 }
5048
5049template <typename _Tp, typename _Abi>
5050 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5051 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5052 {
5053 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5054 {
5055 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5056 if (__k[__i])
5057 return false;
5058 return true;
5059 }
5060 else
5061 return _Abi::_MaskImpl::_S_none_of(__k);
5062 }
5063
5064template <typename _Tp, typename _Abi>
5065 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5066 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5067 {
5068 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5069 {
5070 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5071 if (__k[__i] != __k[__i - 1])
5072 return true;
5073 return false;
5074 }
5075 else
5076 return _Abi::_MaskImpl::_S_some_of(__k);
5077 }
5078
5079template <typename _Tp, typename _Abi>
5080 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5081 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5082 {
5083 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5084 {
5085 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5086 __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5087 return ((__elements != 0) + ...);
5088 });
5089 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5090 return __r;
5091 }
5092 return _Abi::_MaskImpl::_S_popcount(__k);
5093 }
5094
5095template <typename _Tp, typename _Abi>
5096 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5097 find_first_set(const simd_mask<_Tp, _Abi>& __k)
5098 {
5099 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5100 {
5101 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5102 const size_t _Idx = __call_with_n_evaluations<_Np>(
5103 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5104 return std::min({__indexes...});
5105 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5106 return __k[__i] ? +__i : _Np;
5107 });
5108 if (_Idx >= _Np)
5109 __invoke_ub("find_first_set(empty mask) is UB");
5110 if (__builtin_constant_p(_Idx))
5111 return _Idx;
5112 }
5113 return _Abi::_MaskImpl::_S_find_first_set(__k);
5114 }
5115
5116template <typename _Tp, typename _Abi>
5117 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5118 find_last_set(const simd_mask<_Tp, _Abi>& __k)
5119 {
5120 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5121 {
5122 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5123 const int _Idx = __call_with_n_evaluations<_Np>(
5124 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5125 return std::max({__indexes...});
5126 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5127 return __k[__i] ? int(__i) : -1;
5128 });
5129 if (_Idx < 0)
5130 __invoke_ub("find_first_set(empty mask) is UB");
5131 if (__builtin_constant_p(_Idx))
5132 return _Idx;
5133 }
5134 return _Abi::_MaskImpl::_S_find_last_set(__k);
5135 }
5136
5137_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5138all_of(_ExactBool __x) noexcept
5139{ return __x; }
5140
5141_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5142any_of(_ExactBool __x) noexcept
5143{ return __x; }
5144
5145_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5146none_of(_ExactBool __x) noexcept
5147{ return !__x; }
5148
5149_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5150some_of(_ExactBool) noexcept
5151{ return false; }
5152
5153_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5154popcount(_ExactBool __x) noexcept
5155{ return __x; }
5156
5157_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5158find_first_set(_ExactBool)
5159{ return 0; }
5160
5161_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5162find_last_set(_ExactBool)
5163{ return 0; }
5164
5165// }}}
5166
5167/// @cond undocumented
5168// _SimdIntOperators{{{1
5169template <typename _V, typename _Tp, typename _Abi, bool>
5170 class _SimdIntOperators {};
5171
5172template <typename _V, typename _Tp, typename _Abi>
5173 class _SimdIntOperators<_V, _Tp, _Abi, true>
5174 {
5175 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5176
5177 _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5178 __derived() const
5179 { return *static_cast<const _V*>(this); }
5180
5181 template <typename _Up>
5182 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5183 _S_make_derived(_Up&& __d)
5184 { return {__private_init, static_cast<_Up&&>(__d)}; }
5185
5186 public:
5187 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5188 _V&
5189 operator%=(_V& __lhs, const _V& __x)
5190 { return __lhs = __lhs % __x; }
5191
5192 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5193 _V&
5194 operator&=(_V& __lhs, const _V& __x)
5195 { return __lhs = __lhs & __x; }
5196
5197 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5198 _V&
5199 operator|=(_V& __lhs, const _V& __x)
5200 { return __lhs = __lhs | __x; }
5201
5202 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5203 _V&
5204 operator^=(_V& __lhs, const _V& __x)
5205 { return __lhs = __lhs ^ __x; }
5206
5207 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5208 _V&
5209 operator<<=(_V& __lhs, const _V& __x)
5210 { return __lhs = __lhs << __x; }
5211
5212 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5213 _V&
5214 operator>>=(_V& __lhs, const _V& __x)
5215 { return __lhs = __lhs >> __x; }
5216
5217 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5218 _V&
5219 operator<<=(_V& __lhs, int __x)
5220 { return __lhs = __lhs << __x; }
5221
5222 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5223 _V&
5224 operator>>=(_V& __lhs, int __x)
5225 { return __lhs = __lhs >> __x; }
5226
5227 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5228 _V
5229 operator%(const _V& __x, const _V& __y)
5230 {
5231 return _SimdIntOperators::_S_make_derived(
5232 _Impl::_S_modulus(__data(__x), __data(__y)));
5233 }
5234
5235 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5236 _V
5237 operator&(const _V& __x, const _V& __y)
5238 {
5239 return _SimdIntOperators::_S_make_derived(
5240 _Impl::_S_bit_and(__data(__x), __data(__y)));
5241 }
5242
5243 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5244 _V
5245 operator|(const _V& __x, const _V& __y)
5246 {
5247 return _SimdIntOperators::_S_make_derived(
5248 _Impl::_S_bit_or(__data(__x), __data(__y)));
5249 }
5250
5251 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5252 _V
5253 operator^(const _V& __x, const _V& __y)
5254 {
5255 return _SimdIntOperators::_S_make_derived(
5256 _Impl::_S_bit_xor(__data(__x), __data(__y)));
5257 }
5258
5259 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5260 _V
5261 operator<<(const _V& __x, const _V& __y)
5262 {
5263 return _SimdIntOperators::_S_make_derived(
5264 _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5265 }
5266
5267 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5268 _V
5269 operator>>(const _V& __x, const _V& __y)
5270 {
5271 return _SimdIntOperators::_S_make_derived(
5272 _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5273 }
5274
5275 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5276 _V
5277 operator<<(const _V& __x, int __y)
5278 {
5279 if (__y < 0)
5280 __invoke_ub("The behavior is undefined if the right operand of a "
5281 "shift operation is negative. [expr.shift]\nA shift by "
5282 "%d was requested",
5283 __y);
5284 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5285 __invoke_ub(
5286 "The behavior is undefined if the right operand of a "
5287 "shift operation is greater than or equal to the width of the "
5288 "promoted left operand. [expr.shift]\nA shift by %d was requested",
5289 __y);
5290 return _SimdIntOperators::_S_make_derived(
5291 _Impl::_S_bit_shift_left(__data(__x), __y));
5292 }
5293
5294 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5295 _V
5296 operator>>(const _V& __x, int __y)
5297 {
5298 if (__y < 0)
5299 __invoke_ub(
5300 "The behavior is undefined if the right operand of a shift "
5301 "operation is negative. [expr.shift]\nA shift by %d was requested",
5302 __y);
5303 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5304 __invoke_ub(
5305 "The behavior is undefined if the right operand of a shift "
5306 "operation is greater than or equal to the width of the promoted "
5307 "left operand. [expr.shift]\nA shift by %d was requested",
5308 __y);
5309 return _SimdIntOperators::_S_make_derived(
5310 _Impl::_S_bit_shift_right(__data(__x), __y));
5311 }
5312
5313 // unary operators (for integral _Tp)
5314 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5315 _V
5316 operator~() const
5317 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5318 };
5319
5320//}}}1
5321/// @endcond
5322
5323// simd {{{
5324template <typename _Tp, typename _Abi>
5325 class simd : public _SimdIntOperators<
5326 simd<_Tp, _Abi>, _Tp, _Abi,
5327 conjunction<is_integral<_Tp>,
5328 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5329 public _SimdTraits<_Tp, _Abi>::_SimdBase
5330 {
5331 using _Traits = _SimdTraits<_Tp, _Abi>;
5332 using _MemberType = typename _Traits::_SimdMember;
5333 using _CastType = typename _Traits::_SimdCastType;
5334 static constexpr _Tp* _S_type_tag = nullptr;
5335 friend typename _Traits::_SimdBase;
5336
5337 public:
5338 using _Impl = typename _Traits::_SimdImpl;
5339 friend _Impl;
5340 friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5341
5342 using value_type = _Tp;
5343 using reference = _SmartReference<_MemberType, _Impl, value_type>;
5344 using mask_type = simd_mask<_Tp, _Abi>;
5345 using abi_type = _Abi;
5346
5347 static constexpr size_t size()
5348 { return __size_or_zero_v<_Tp, _Abi>; }
5349
5350 _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5351 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5352 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5353 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5354 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5355
5356 // implicit broadcast constructor
5357 template <typename _Up,
5358 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5359 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5360 simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5361 : _M_data(
5362 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5363 {}
5364
5365 // implicit type conversion constructor (convert from fixed_size to
5366 // fixed_size)
5367 template <typename _Up>
5368 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5369 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5370 enable_if_t<
5371 conjunction<
5372 is_same<simd_abi::fixed_size<size()>, abi_type>,
5373 negation<__is_narrowing_conversion<_Up, value_type>>,
5374 __converts_to_higher_integer_rank<_Up, value_type>>::value,
5375 void*> = nullptr)
5376 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5377
5378 // explicit type conversion constructor
5379#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5380 template <typename _Up, typename _A2,
5381 typename = decltype(static_simd_cast<simd>(
5382 declval<const simd<_Up, _A2>&>()))>
5383 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5384 simd(const simd<_Up, _A2>& __x)
5385 : simd(static_simd_cast<simd>(__x)) {}
5386#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5387
5388 // generator constructor
5389 template <typename _Fp>
5390 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5391 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5392 declval<_SizeConstant<0>&>())),
5393 value_type>* = nullptr)
5394 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5395
5396 // load constructor
5397 template <typename _Up, typename _Flags>
5398 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5399 simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5400 : _M_data(
5401 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5402 {}
5403
5404 // loads [simd.load]
5405 template <typename _Up, typename _Flags>
5406 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5407 copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5408 {
5409 _M_data = static_cast<decltype(_M_data)>(
5410 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5411 }
5412
5413 // stores [simd.store]
5414 template <typename _Up, typename _Flags>
5415 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5416 copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5417 {
5418 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5419 _S_type_tag);
5420 }
5421
5422 // scalar access
5423 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5424 operator[](size_t __i)
5425 { return {_M_data, int(__i)}; }
5426
5427 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5428 operator[]([[maybe_unused]] size_t __i) const
5429 {
5430 if constexpr (__is_scalar_abi<_Abi>())
5431 {
5432 _GLIBCXX_DEBUG_ASSERT(__i == 0);
5433 return _M_data;
5434 }
5435 else
5436 return _M_data[__i];
5437 }
5438
5439 // increment and decrement:
5440 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5441 operator++()
5442 {
5443 _Impl::_S_increment(_M_data);
5444 return *this;
5445 }
5446
5447 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5448 operator++(int)
5449 {
5450 simd __r = *this;
5451 _Impl::_S_increment(_M_data);
5452 return __r;
5453 }
5454
5455 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5456 operator--()
5457 {
5458 _Impl::_S_decrement(_M_data);
5459 return *this;
5460 }
5461
5462 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5463 operator--(int)
5464 {
5465 simd __r = *this;
5466 _Impl::_S_decrement(_M_data);
5467 return __r;
5468 }
5469
5470 // unary operators (for any _Tp)
5471 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5472 operator!() const
5473 { return {__private_init, _Impl::_S_negate(_M_data)}; }
5474
5475 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5476 operator+() const
5477 { return *this; }
5478
5479 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5480 operator-() const
5481 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5482
5483 // access to internal representation (suggested extension)
5484 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5485 simd(_CastType __init) : _M_data(__init) {}
5486
5487 // compound assignment [simd.cassign]
5488 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5489 operator+=(simd& __lhs, const simd& __x)
5490 { return __lhs = __lhs + __x; }
5491
5492 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5493 operator-=(simd& __lhs, const simd& __x)
5494 { return __lhs = __lhs - __x; }
5495
5496 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5497 operator*=(simd& __lhs, const simd& __x)
5498 { return __lhs = __lhs * __x; }
5499
5500 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5501 operator/=(simd& __lhs, const simd& __x)
5502 { return __lhs = __lhs / __x; }
5503
5504 // binary operators [simd.binary]
5505 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5506 operator+(const simd& __x, const simd& __y)
5507 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5508
5509 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5510 operator-(const simd& __x, const simd& __y)
5511 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5512
5513 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5514 operator*(const simd& __x, const simd& __y)
5515 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5516
5517 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5518 operator/(const simd& __x, const simd& __y)
5519 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5520
5521 // compares [simd.comparison]
5522 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5523 operator==(const simd& __x, const simd& __y)
5524 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5525
5526 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5527 operator!=(const simd& __x, const simd& __y)
5528 {
5529 return simd::_S_make_mask(
5530 _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5531 }
5532
5533 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5534 operator<(const simd& __x, const simd& __y)
5535 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5536
5537 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5538 operator<=(const simd& __x, const simd& __y)
5539 {
5540 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5541 }
5542
5543 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5544 operator>(const simd& __x, const simd& __y)
5545 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5546
5547 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5548 operator>=(const simd& __x, const simd& __y)
5549 {
5550 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5551 }
5552
5553 // operator?: overloads (suggested extension) {{{
5554#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5555 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5556 operator?:(const mask_type& __k, const simd& __where_true,
5557 const simd& __where_false)
5558 {
5559 auto __ret = __where_false;
5560 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5561 return __ret;
5562 }
5563
5564#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5565 // }}}
5566
5567 // "private" because of the first arguments's namespace
5568 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5569 simd(_PrivateInit, const _MemberType& __init)
5570 : _M_data(__init) {}
5571
5572 // "private" because of the first arguments's namespace
5573 _GLIBCXX_SIMD_INTRINSIC
5574 simd(_BitsetInit, bitset<size()> __init) : _M_data()
5575 { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5576
5577 _GLIBCXX_SIMD_INTRINSIC constexpr bool
5578 _M_is_constprop() const
5579 {
5580 if constexpr (__is_scalar_abi<_Abi>())
5581 return __builtin_constant_p(_M_data);
5582 else
5583 return _M_data._M_is_constprop();
5584 }
5585
5586 private:
5587 _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5588 _S_make_mask(typename mask_type::_MemberType __k)
5589 { return {__private_init, __k}; }
5590
5591 friend const auto& __data<value_type, abi_type>(const simd&);
5592 friend auto& __data<value_type, abi_type>(simd&);
5593 alignas(_Traits::_S_simd_align) _MemberType _M_data;
5594 };
5595
5596// }}}
5597/// @cond undocumented
5598// __data {{{
5599template <typename _Tp, typename _Ap>
5600 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5601 __data(const simd<_Tp, _Ap>& __x)
5602 { return __x._M_data; }
5603
5604template <typename _Tp, typename _Ap>
5605 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5606 __data(simd<_Tp, _Ap>& __x)
5607 { return __x._M_data; }
5608
5609// }}}
5610namespace __float_bitwise_operators { //{{{
5611template <typename _Tp, typename _Ap>
5612 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5613 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5614 { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5615
5616template <typename _Tp, typename _Ap>
5617 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5618 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5619 { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5620
5621template <typename _Tp, typename _Ap>
5622 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5623 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5624 { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5625
5626template <typename _Tp, typename _Ap>
5627 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5628 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5629 operator~(const simd<_Tp, _Ap>& __a)
5630 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5631} // namespace __float_bitwise_operators }}}
5632/// @endcond
5633
5634/// @}
5635_GLIBCXX_SIMD_END_NAMESPACE
5636
5637#endif // __cplusplus >= 201703L
5638#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5639
5640// vim: foldmethod=marker foldmarker={{{,}}}
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition bit:426
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition chrono.h:779
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition complex:395
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:365
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:335
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition complex:425
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition type_traits:1640
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition type_traits:1983
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition type_traits:2632
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition type_traits:2614
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition type_traits:85
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition type_traits:2610
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition tuple:2155
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition type_traits:2386
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition tuple:2001
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition move.h:97
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition numeric:287
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition utility.h:188
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1573
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1593
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1683
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1563
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1553