libstdc++
simd.h
1// Definition of the public simd interfaces -*- C++ -*-
2
3// Copyright (C) 2020-2022 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28#if __cplusplus >= 201703L
29
30#include "simd_detail.h"
31#include "numeric_traits.h"
32#include <bit>
33#include <bitset>
34#ifdef _GLIBCXX_DEBUG_UB
35#include <cstdio> // for stderr
36#endif
37#include <cstring>
38#include <cmath>
39#include <functional>
40#include <iosfwd>
41#include <utility>
42
43#if _GLIBCXX_SIMD_X86INTRIN
44#include <x86intrin.h>
45#elif _GLIBCXX_SIMD_HAVE_NEON
46#pragma GCC diagnostic push
47// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48// 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49#pragma GCC diagnostic ignored "-Wnarrowing"
50#include <arm_neon.h>
51#pragma GCC diagnostic pop
52#endif
53
54/** @ingroup ts_simd
55 * @{
56 */
57/* There are several closely related types, with the following naming
58 * convention:
59 * _Tp: vectorizable (arithmetic) type (or any type)
60 * _TV: __vector_type_t<_Tp, _Np>
61 * _TW: _SimdWrapper<_Tp, _Np>
62 * _TI: __intrinsic_type_t<_Tp, _Np>
63 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64 * If one additional type is needed use _U instead of _T.
65 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66 *
67 * More naming conventions:
68 * _Ap or _Abi: An ABI tag from the simd_abi namespace
69 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70 * _IV, _IW as for _TV, _TW
71 * _Np: number of elements (not bytes)
72 * _Bytes: number of bytes
73 *
74 * Variable names:
75 * __k: mask object (vector- or bitmask)
76 */
77_GLIBCXX_SIMD_BEGIN_NAMESPACE
78
79#if !_GLIBCXX_SIMD_X86INTRIN
80using __m128 [[__gnu__::__vector_size__(16)]] = float;
81using __m128d [[__gnu__::__vector_size__(16)]] = double;
82using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83using __m256 [[__gnu__::__vector_size__(32)]] = float;
84using __m256d [[__gnu__::__vector_size__(32)]] = double;
85using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86using __m512 [[__gnu__::__vector_size__(64)]] = float;
87using __m512d [[__gnu__::__vector_size__(64)]] = double;
88using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89#endif
90
91namespace simd_abi {
92// simd_abi forward declarations {{{
93// implementation details:
94struct _Scalar;
95
96template <int _Np>
97 struct _Fixed;
98
99// There are two major ABIs that appear on different architectures.
100// Both have non-boolean values packed into an N Byte register
101// -> #elements = N / sizeof(T)
102// Masks differ:
103// 1. Use value vector registers for masks (all 0 or all 1)
104// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105// value vector
106//
107// Both can be partially used, masking off the rest when doing horizontal
108// operations or operations that can trap (e.g. FP_INVALID or integer division
109// by 0). This is encoded as the number of used bytes.
110template <int _UsedBytes>
111 struct _VecBuiltin;
112
113template <int _UsedBytes>
114 struct _VecBltnBtmsk;
115
116template <typename _Tp, int _Np>
117 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118
119template <int _UsedBytes = 16>
120 using _Sse = _VecBuiltin<_UsedBytes>;
121
122template <int _UsedBytes = 32>
123 using _Avx = _VecBuiltin<_UsedBytes>;
124
125template <int _UsedBytes = 64>
126 using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127
128template <int _UsedBytes = 16>
129 using _Neon = _VecBuiltin<_UsedBytes>;
130
131// implementation-defined:
132using __sse = _Sse<>;
133using __avx = _Avx<>;
134using __avx512 = _Avx512<>;
135using __neon = _Neon<>;
136using __neon128 = _Neon<16>;
137using __neon64 = _Neon<8>;
138
139// standard:
140template <typename _Tp, size_t _Np, typename...>
141 struct deduce;
142
143template <int _Np>
144 using fixed_size = _Fixed<_Np>;
145
146using scalar = _Scalar;
147
148// }}}
149} // namespace simd_abi
150// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151template <typename _Tp>
152 struct is_simd;
153
154template <typename _Tp>
155 struct is_simd_mask;
156
157template <typename _Tp, typename _Abi>
158 class simd;
159
160template <typename _Tp, typename _Abi>
161 class simd_mask;
162
163template <typename _Tp, typename _Abi>
164 struct simd_size;
165
166// }}}
167// load/store flags {{{
168struct element_aligned_tag
169{
170 template <typename _Tp, typename _Up = typename _Tp::value_type>
171 static constexpr size_t _S_alignment = alignof(_Up);
172
173 template <typename _Tp, typename _Up>
174 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175 _S_apply(_Up* __ptr)
176 { return __ptr; }
177};
178
179struct vector_aligned_tag
180{
181 template <typename _Tp, typename _Up = typename _Tp::value_type>
182 static constexpr size_t _S_alignment
183 = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184
185 template <typename _Tp, typename _Up>
186 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187 _S_apply(_Up* __ptr)
188 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189};
190
191template <size_t _Np> struct overaligned_tag
192{
193 template <typename _Tp, typename _Up = typename _Tp::value_type>
194 static constexpr size_t _S_alignment = _Np;
195
196 template <typename _Tp, typename _Up>
197 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198 _S_apply(_Up* __ptr)
199 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200};
201
202inline constexpr element_aligned_tag element_aligned = {};
203
204inline constexpr vector_aligned_tag vector_aligned = {};
205
206template <size_t _Np>
207 inline constexpr overaligned_tag<_Np> overaligned = {};
208
209// }}}
210template <size_t _Xp>
211 using _SizeConstant = integral_constant<size_t, _Xp>;
212// constexpr feature detection{{{
213constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245
246constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249constexpr inline bool __support_neon_float =
250#if defined __GCC_IEC_559
251 __GCC_IEC_559 == 0;
252#elif defined __FAST_MATH__
253 true;
254#else
255 false;
256#endif
257
258#ifdef _ARCH_PWR10
259constexpr inline bool __have_power10vec = true;
260#else
261constexpr inline bool __have_power10vec = false;
262#endif
263#ifdef __POWER9_VECTOR__
264constexpr inline bool __have_power9vec = true;
265#else
266constexpr inline bool __have_power9vec = false;
267#endif
268#if defined __POWER8_VECTOR__
269constexpr inline bool __have_power8vec = true;
270#else
271constexpr inline bool __have_power8vec = __have_power9vec;
272#endif
273#if defined __VSX__
274constexpr inline bool __have_power_vsx = true;
275#else
276constexpr inline bool __have_power_vsx = __have_power8vec;
277#endif
278#if defined __ALTIVEC__
279constexpr inline bool __have_power_vmx = true;
280#else
281constexpr inline bool __have_power_vmx = __have_power_vsx;
282#endif
283
284// }}}
285
286namespace __detail
287{
288#ifdef math_errhandling
289 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291 // must be guessed.
292 template <int = math_errhandling>
293 constexpr bool
294 __handle_fpexcept_impl(int)
295 { return math_errhandling & MATH_ERREXCEPT; }
296#endif
297
298 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299 // ignored, otherwise implement correct exception behavior.
300 constexpr bool
301 __handle_fpexcept_impl(float)
302 {
303#if defined __FAST_MATH__
304 return false;
305#else
306 return true;
307#endif
308 }
309
310 /// True if math functions must raise floating-point exceptions as specified by C17.
311 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312
313 constexpr std::uint_least64_t
314 __floating_point_flags()
315 {
316 std::uint_least64_t __flags = 0;
317 if constexpr (_S_handle_fpexcept)
318 __flags |= 1;
319#ifdef __FAST_MATH__
320 __flags |= 1 << 1;
321#elif __FINITE_MATH_ONLY__
322 __flags |= 2 << 1;
323#elif __GCC_IEC_559 < 2
324 __flags |= 3 << 1;
325#endif
326 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327 return __flags;
328 }
329
330 constexpr std::uint_least64_t
331 __machine_flags()
332 {
333 if constexpr (__have_mmx || __have_sse)
334 return __have_mmx
335 | (__have_sse << 1)
336 | (__have_sse2 << 2)
337 | (__have_sse3 << 3)
338 | (__have_ssse3 << 4)
339 | (__have_sse4_1 << 5)
340 | (__have_sse4_2 << 6)
341 | (__have_xop << 7)
342 | (__have_avx << 8)
343 | (__have_avx2 << 9)
344 | (__have_bmi << 10)
345 | (__have_bmi2 << 11)
346 | (__have_lzcnt << 12)
347 | (__have_sse4a << 13)
348 | (__have_fma << 14)
349 | (__have_fma4 << 15)
350 | (__have_f16c << 16)
351 | (__have_popcnt << 17)
352 | (__have_avx512f << 18)
353 | (__have_avx512dq << 19)
354 | (__have_avx512vl << 20)
355 | (__have_avx512bw << 21)
356 | (__have_avx512bitalg << 22)
357 | (__have_avx512vbmi2 << 23)
358 | (__have_avx512vbmi << 24)
359 | (__have_avx512ifma << 25)
360 | (__have_avx512cd << 26)
361 | (__have_avx512vnni << 27)
362 | (__have_avx512vpopcntdq << 28)
363 | (__have_avx512vp2intersect << 29);
364 else if constexpr (__have_neon)
365 return __have_neon
366 | (__have_neon_a32 << 1)
367 | (__have_neon_a64 << 2)
368 | (__have_neon_a64 << 2)
369 | (__support_neon_float << 3);
370 else if constexpr (__have_power_vmx)
371 return __have_power_vmx
372 | (__have_power_vsx << 1)
373 | (__have_power8vec << 2)
374 | (__have_power9vec << 3)
375 | (__have_power10vec << 4);
376 else
377 return 0;
378 }
379
380 namespace
381 {
382 struct _OdrEnforcer {};
383 }
384
385 template <std::uint_least64_t...>
386 struct _MachineFlagsTemplate {};
387
388 /**@internal
389 * Use this type as default template argument to all function templates that
390 * are not declared always_inline. It ensures, that a function
391 * specialization, which the compiler decides not to inline, has a unique symbol
392 * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394 * users link TUs compiled with different flags. This is especially important
395 * for using simd in libraries.
396 */
397 using __odr_helper
398 = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400
401 struct _Minimum
402 {
403 template <typename _Tp>
404 _GLIBCXX_SIMD_INTRINSIC constexpr
405 _Tp
406 operator()(_Tp __a, _Tp __b) const
407 {
408 using std::min;
409 return min(__a, __b);
410 }
411 };
412
413 struct _Maximum
414 {
415 template <typename _Tp>
416 _GLIBCXX_SIMD_INTRINSIC constexpr
417 _Tp
418 operator()(_Tp __a, _Tp __b) const
419 {
420 using std::max;
421 return max(__a, __b);
422 }
423 };
424} // namespace __detail
425
426// unrolled/pack execution helpers
427// __execute_n_times{{{
428template <typename _Fp, size_t... _I>
429 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430 void
431 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432 { ((void)__f(_SizeConstant<_I>()), ...); }
433
434template <typename _Fp>
435 _GLIBCXX_SIMD_INTRINSIC constexpr void
436 __execute_on_index_sequence(_Fp&&, index_sequence<>)
437 { }
438
439template <size_t _Np, typename _Fp>
440 _GLIBCXX_SIMD_INTRINSIC constexpr void
441 __execute_n_times(_Fp&& __f)
442 {
443 __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444 make_index_sequence<_Np>{});
445 }
446
447// }}}
448// __generate_from_n_evaluations{{{
449template <typename _R, typename _Fp, size_t... _I>
450 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451 _R
452 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453 { return _R{__f(_SizeConstant<_I>())...}; }
454
455template <size_t _Np, typename _R, typename _Fp>
456 _GLIBCXX_SIMD_INTRINSIC constexpr _R
457 __generate_from_n_evaluations(_Fp&& __f)
458 {
459 return __execute_on_index_sequence_with_return<_R>(
460 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461 }
462
463// }}}
464// __call_with_n_evaluations{{{
465template <size_t... _I, typename _F0, typename _FArgs>
466 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467 auto
468 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469 { return __f0(__fargs(_SizeConstant<_I>())...); }
470
471template <size_t _Np, typename _F0, typename _FArgs>
472 _GLIBCXX_SIMD_INTRINSIC constexpr auto
473 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474 {
475 return __call_with_n_evaluations(make_index_sequence<_Np>{},
476 static_cast<_F0&&>(__f0),
477 static_cast<_FArgs&&>(__fargs));
478 }
479
480// }}}
481// __call_with_subscripts{{{
482template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484 auto
485 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486 { return __fun(__x[_First + _It]...); }
487
488template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489 _GLIBCXX_SIMD_INTRINSIC constexpr auto
490 __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491 {
492 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493 make_index_sequence<_Np>(),
494 static_cast<_Fp&&>(__fun));
495 }
496
497// }}}
498
499// vvv ---- type traits ---- vvv
500// integer type aliases{{{
501using _UChar = unsigned char;
502using _SChar = signed char;
503using _UShort = unsigned short;
504using _UInt = unsigned int;
505using _ULong = unsigned long;
506using _ULLong = unsigned long long;
507using _LLong = long long;
508
509//}}}
510// __first_of_pack{{{
511template <typename _T0, typename...>
512 struct __first_of_pack
513 { using type = _T0; };
514
515template <typename... _Ts>
516 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517
518//}}}
519// __value_type_or_identity_t {{{
520template <typename _Tp>
521 typename _Tp::value_type
522 __value_type_or_identity_impl(int);
523
524template <typename _Tp>
525 _Tp
526 __value_type_or_identity_impl(float);
527
528template <typename _Tp>
529 using __value_type_or_identity_t
530 = decltype(__value_type_or_identity_impl<_Tp>(int()));
531
532// }}}
533// __is_vectorizable {{{
534template <typename _Tp>
535 struct __is_vectorizable : public is_arithmetic<_Tp> {};
536
537template <>
538 struct __is_vectorizable<bool> : public false_type {};
539
540template <typename _Tp>
541 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542
543// Deduces to a vectorizable type
544template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545 using _Vectorizable = _Tp;
546
547// }}}
548// _LoadStorePtr / __is_possible_loadstore_conversion {{{
549template <typename _Ptr, typename _ValueType>
550 struct __is_possible_loadstore_conversion
551 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552
553template <>
554 struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555
556// Deduces to a type allowed for load/store with the given value type.
557template <typename _Ptr, typename _ValueType,
558 typename = enable_if_t<
559 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560 using _LoadStorePtr = _Ptr;
561
562// }}}
563// __is_bitmask{{{
564template <typename _Tp, typename = void_t<>>
565 struct __is_bitmask : false_type {};
566
567template <typename _Tp>
568 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569
570// the __mmaskXX case:
571template <typename _Tp>
572 struct __is_bitmask<_Tp,
573 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574 : true_type {};
575
576// }}}
577// __int_for_sizeof{{{
578#pragma GCC diagnostic push
579#pragma GCC diagnostic ignored "-Wpedantic"
580template <size_t _Bytes>
581 constexpr auto
582 __int_for_sizeof()
583 {
584 static_assert(_Bytes > 0);
585 if constexpr (_Bytes == sizeof(int))
586 return int();
587 else if constexpr (_Bytes == sizeof(_SChar))
588 return _SChar();
589 else if constexpr (_Bytes == sizeof(short))
590 return short();
591 else if constexpr (_Bytes == sizeof(long))
592 return long();
593 else if constexpr (_Bytes == sizeof(_LLong))
594 return _LLong();
595 #ifdef __SIZEOF_INT128__
596 else if constexpr (_Bytes == sizeof(__int128))
597 return __int128();
598 #endif // __SIZEOF_INT128__
599 else if constexpr (_Bytes % sizeof(int) == 0)
600 {
601 constexpr size_t _Np = _Bytes / sizeof(int);
602 struct _Ip
603 {
604 int _M_data[_Np];
605
606 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
607 operator&(_Ip __rhs) const
608 {
609 return __generate_from_n_evaluations<_Np, _Ip>(
610 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
611 return __rhs._M_data[__i] & _M_data[__i];
612 });
613 }
614
615 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
616 operator|(_Ip __rhs) const
617 {
618 return __generate_from_n_evaluations<_Np, _Ip>(
619 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
620 return __rhs._M_data[__i] | _M_data[__i];
621 });
622 }
623
624 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
625 operator^(_Ip __rhs) const
626 {
627 return __generate_from_n_evaluations<_Np, _Ip>(
628 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
629 return __rhs._M_data[__i] ^ _M_data[__i];
630 });
631 }
632
633 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
634 operator~() const
635 {
636 return __generate_from_n_evaluations<_Np, _Ip>(
637 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
638 }
639 };
640 return _Ip{};
641 }
642 else
643 static_assert(_Bytes == 0, "this should be unreachable");
644 }
645#pragma GCC diagnostic pop
646
647template <typename _Tp>
648 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
649
650template <size_t _Np>
651 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
652
653// }}}
654// __is_fixed_size_abi{{{
655template <typename _Tp>
656 struct __is_fixed_size_abi : false_type {};
657
658template <int _Np>
659 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
660
661template <typename _Tp>
662 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
663
664// }}}
665// __is_scalar_abi {{{
666template <typename _Abi>
667 constexpr bool
668 __is_scalar_abi()
669 { return is_same_v<simd_abi::scalar, _Abi>; }
670
671// }}}
672// __abi_bytes_v {{{
673template <template <int> class _Abi, int _Bytes>
674 constexpr int
675 __abi_bytes_impl(_Abi<_Bytes>*)
676 { return _Bytes; }
677
678template <typename _Tp>
679 constexpr int
680 __abi_bytes_impl(_Tp*)
681 { return -1; }
682
683template <typename _Abi>
684 inline constexpr int __abi_bytes_v
685 = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
686
687// }}}
688// __is_builtin_bitmask_abi {{{
689template <typename _Abi>
690 constexpr bool
691 __is_builtin_bitmask_abi()
692 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
693
694// }}}
695// __is_sse_abi {{{
696template <typename _Abi>
697 constexpr bool
698 __is_sse_abi()
699 {
700 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
701 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
702 }
703
704// }}}
705// __is_avx_abi {{{
706template <typename _Abi>
707 constexpr bool
708 __is_avx_abi()
709 {
710 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
711 return _Bytes > 16 && _Bytes <= 32
712 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
713 }
714
715// }}}
716// __is_avx512_abi {{{
717template <typename _Abi>
718 constexpr bool
719 __is_avx512_abi()
720 {
721 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
722 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
723 }
724
725// }}}
726// __is_neon_abi {{{
727template <typename _Abi>
728 constexpr bool
729 __is_neon_abi()
730 {
731 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
732 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
733 }
734
735// }}}
736// __make_dependent_t {{{
737template <typename, typename _Up>
738 struct __make_dependent
739 { using type = _Up; };
740
741template <typename _Tp, typename _Up>
742 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
743
744// }}}
745// ^^^ ---- type traits ---- ^^^
746
747// __invoke_ub{{{
748template <typename... _Args>
749 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
750 __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
751 {
752#ifdef _GLIBCXX_DEBUG_UB
753 __builtin_fprintf(stderr, __msg, __args...);
754 __builtin_trap();
755#else
756 __builtin_unreachable();
757#endif
758 }
759
760// }}}
761// __assert_unreachable{{{
762template <typename _Tp>
763 struct __assert_unreachable
764 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
765
766// }}}
767// __size_or_zero_v {{{
768template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
769 constexpr size_t
770 __size_or_zero_dispatch(int)
771 { return _Np; }
772
773template <typename _Tp, typename _Ap>
774 constexpr size_t
775 __size_or_zero_dispatch(float)
776 { return 0; }
777
778template <typename _Tp, typename _Ap>
779 inline constexpr size_t __size_or_zero_v
780 = __size_or_zero_dispatch<_Tp, _Ap>(0);
781
782// }}}
783// __div_roundup {{{
784inline constexpr size_t
785__div_roundup(size_t __a, size_t __b)
786{ return (__a + __b - 1) / __b; }
787
788// }}}
789// _ExactBool{{{
790class _ExactBool
791{
792 const bool _M_data;
793
794public:
795 _GLIBCXX_SIMD_INTRINSIC constexpr
796 _ExactBool(bool __b) : _M_data(__b) {}
797
798 _ExactBool(int) = delete;
799
800 _GLIBCXX_SIMD_INTRINSIC constexpr
801 operator bool() const
802 { return _M_data; }
803};
804
805// }}}
806// __may_alias{{{
807/**@internal
808 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
809 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
810 * that support it).
811 */
812template <typename _Tp>
813 using __may_alias [[__gnu__::__may_alias__]] = _Tp;
814
815// }}}
816// _UnsupportedBase {{{
817// simd and simd_mask base for unsupported <_Tp, _Abi>
818struct _UnsupportedBase
819{
820 _UnsupportedBase() = delete;
821 _UnsupportedBase(const _UnsupportedBase&) = delete;
822 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
823 ~_UnsupportedBase() = delete;
824};
825
826// }}}
827// _InvalidTraits {{{
828/**
829 * @internal
830 * Defines the implementation of __a given <_Tp, _Abi>.
831 *
832 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
833 * possible. Static assertions in the type definition do not suffice. It is
834 * important that SFINAE works.
835 */
836struct _InvalidTraits
837{
838 using _IsValid = false_type;
839 using _SimdBase = _UnsupportedBase;
840 using _MaskBase = _UnsupportedBase;
841
842 static constexpr size_t _S_full_size = 0;
843 static constexpr bool _S_is_partial = false;
844
845 static constexpr size_t _S_simd_align = 1;
846 struct _SimdImpl;
847 struct _SimdMember {};
848 struct _SimdCastType;
849
850 static constexpr size_t _S_mask_align = 1;
851 struct _MaskImpl;
852 struct _MaskMember {};
853 struct _MaskCastType;
854};
855
856// }}}
857// _SimdTraits {{{
858template <typename _Tp, typename _Abi, typename = void_t<>>
859 struct _SimdTraits : _InvalidTraits {};
860
861// }}}
862// __private_init, __bitset_init{{{
863/**
864 * @internal
865 * Tag used for private init constructor of simd and simd_mask
866 */
867inline constexpr struct _PrivateInit {} __private_init = {};
868
869inline constexpr struct _BitsetInit {} __bitset_init = {};
870
871// }}}
872// __is_narrowing_conversion<_From, _To>{{{
873template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
874 bool = is_arithmetic_v<_To>>
875 struct __is_narrowing_conversion;
876
877// ignore "signed/unsigned mismatch" in the following trait.
878// The implicit conversions will do the right thing here.
879template <typename _From, typename _To>
880 struct __is_narrowing_conversion<_From, _To, true, true>
881 : public __bool_constant<(
882 __digits_v<_From> > __digits_v<_To>
883 || __finite_max_v<_From> > __finite_max_v<_To>
884 || __finite_min_v<_From> < __finite_min_v<_To>
885 || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
886
887template <typename _Tp>
888 struct __is_narrowing_conversion<_Tp, bool, true, true>
889 : public true_type {};
890
891template <>
892 struct __is_narrowing_conversion<bool, bool, true, true>
893 : public false_type {};
894
895template <typename _Tp>
896 struct __is_narrowing_conversion<_Tp, _Tp, true, true>
897 : public false_type {};
898
899template <typename _From, typename _To>
900 struct __is_narrowing_conversion<_From, _To, false, true>
901 : public negation<is_convertible<_From, _To>> {};
902
903// }}}
904// __converts_to_higher_integer_rank{{{
905template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
906 struct __converts_to_higher_integer_rank : public true_type {};
907
908// this may fail for char -> short if sizeof(char) == sizeof(short)
909template <typename _From, typename _To>
910 struct __converts_to_higher_integer_rank<_From, _To, false>
911 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
912
913// }}}
914// __data(simd/simd_mask) {{{
915template <typename _Tp, typename _Ap>
916 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
917 __data(const simd<_Tp, _Ap>& __x);
918
919template <typename _Tp, typename _Ap>
920 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
921 __data(simd<_Tp, _Ap>& __x);
922
923template <typename _Tp, typename _Ap>
924 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
925 __data(const simd_mask<_Tp, _Ap>& __x);
926
927template <typename _Tp, typename _Ap>
928 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
929 __data(simd_mask<_Tp, _Ap>& __x);
930
931// }}}
932// _SimdConverter {{{
933template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
934 typename = void>
935 struct _SimdConverter;
936
937template <typename _Tp, typename _Ap>
938 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
939 {
940 template <typename _Up>
941 _GLIBCXX_SIMD_INTRINSIC const _Up&
942 operator()(const _Up& __x)
943 { return __x; }
944 };
945
946// }}}
947// __to_value_type_or_member_type {{{
948template <typename _V>
949 _GLIBCXX_SIMD_INTRINSIC constexpr auto
950 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
951 { return __data(__x); }
952
953template <typename _V>
954 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
955 __to_value_type_or_member_type(const typename _V::value_type& __x)
956 { return __x; }
957
958// }}}
959// __bool_storage_member_type{{{
960template <size_t _Size>
961 struct __bool_storage_member_type;
962
963template <size_t _Size>
964 using __bool_storage_member_type_t =
965 typename __bool_storage_member_type<_Size>::type;
966
967// }}}
968// _SimdTuple {{{
969// why not tuple?
970// 1. tuple gives no guarantee about the storage order, but I require
971// storage
972// equivalent to array<_Tp, _Np>
973// 2. direct access to the element type (first template argument)
974// 3. enforces equal element type, only different _Abi types are allowed
975template <typename _Tp, typename... _Abis>
976 struct _SimdTuple;
977
978//}}}
979// __fixed_size_storage_t {{{
980template <typename _Tp, int _Np>
981 struct __fixed_size_storage;
982
983template <typename _Tp, int _Np>
984 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
985
986// }}}
987// _SimdWrapper fwd decl{{{
988template <typename _Tp, size_t _Size, typename = void_t<>>
989 struct _SimdWrapper;
990
991template <typename _Tp>
992 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
993template <typename _Tp>
994 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
995template <typename _Tp>
996 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
997template <typename _Tp>
998 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
999
1000// }}}
1001// __is_simd_wrapper {{{
1002template <typename _Tp>
1003 struct __is_simd_wrapper : false_type {};
1004
1005template <typename _Tp, size_t _Np>
1006 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1007
1008template <typename _Tp>
1009 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1010
1011// }}}
1012// _BitOps {{{
1013struct _BitOps
1014{
1015 // _S_bit_iteration {{{
1016 template <typename _Tp, typename _Fp>
1017 static void
1018 _S_bit_iteration(_Tp __mask, _Fp&& __f)
1019 {
1020 static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1021 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1022 if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1023 __k = __mask;
1024 else
1025 __k = __mask.to_ullong();
1026 while(__k)
1027 {
1028 __f(std::__countr_zero(__k));
1029 __k &= (__k - 1);
1030 }
1031 }
1032
1033 //}}}
1034};
1035
1036//}}}
1037// __increment, __decrement {{{
1038template <typename _Tp = void>
1039 struct __increment
1040 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1041
1042template <>
1043 struct __increment<void>
1044 {
1045 template <typename _Tp>
1046 constexpr _Tp
1047 operator()(_Tp __a) const
1048 { return ++__a; }
1049 };
1050
1051template <typename _Tp = void>
1052 struct __decrement
1053 { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1054
1055template <>
1056 struct __decrement<void>
1057 {
1058 template <typename _Tp>
1059 constexpr _Tp
1060 operator()(_Tp __a) const
1061 { return --__a; }
1062 };
1063
1064// }}}
1065// _ValuePreserving(OrInt) {{{
1066template <typename _From, typename _To,
1067 typename = enable_if_t<negation<
1068 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1069 using _ValuePreserving = _From;
1070
1071template <typename _From, typename _To,
1072 typename _DecayedFrom = __remove_cvref_t<_From>,
1073 typename = enable_if_t<conjunction<
1074 is_convertible<_From, _To>,
1075 disjunction<
1076 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1077 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1078 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1079 using _ValuePreservingOrInt = _From;
1080
1081// }}}
1082// __intrinsic_type {{{
1083template <typename _Tp, size_t _Bytes, typename = void_t<>>
1084 struct __intrinsic_type;
1085
1086template <typename _Tp, size_t _Size>
1087 using __intrinsic_type_t =
1088 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1089
1090template <typename _Tp>
1091 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1092template <typename _Tp>
1093 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1094template <typename _Tp>
1095 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1096template <typename _Tp>
1097 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1098template <typename _Tp>
1099 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1100template <typename _Tp>
1101 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1102
1103// }}}
1104// _BitMask {{{
1105template <size_t _Np, bool _Sanitized = false>
1106 struct _BitMask;
1107
1108template <size_t _Np, bool _Sanitized>
1109 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1110
1111template <size_t _Np>
1112 using _SanitizedBitMask = _BitMask<_Np, true>;
1113
1114template <size_t _Np, bool _Sanitized>
1115 struct _BitMask
1116 {
1117 static_assert(_Np > 0);
1118
1119 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1120
1121 using _Tp = conditional_t<_Np == 1, bool,
1122 make_unsigned_t<__int_with_sizeof_t<std::min(
1123 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1124
1125 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1126
1127 _Tp _M_bits[_S_array_size];
1128
1129 static constexpr int _S_unused_bits
1130 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1131
1132 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1133
1134 constexpr _BitMask() noexcept = default;
1135
1136 constexpr _BitMask(unsigned long long __x) noexcept
1137 : _M_bits{static_cast<_Tp>(__x)} {}
1138
1139 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1140
1141 constexpr _BitMask(const _BitMask&) noexcept = default;
1142
1143 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1144 && _Sanitized == true>>
1145 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1146 : _BitMask(__rhs._M_sanitized()) {}
1147
1148 constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1149 {
1150 static_assert(_S_array_size == 1);
1151 return _M_bits[0];
1152 }
1153
1154 // precondition: is sanitized
1155 constexpr _Tp
1156 _M_to_bits() const noexcept
1157 {
1158 static_assert(_S_array_size == 1);
1159 return _M_bits[0];
1160 }
1161
1162 // precondition: is sanitized
1163 constexpr unsigned long long
1164 to_ullong() const noexcept
1165 {
1166 static_assert(_S_array_size == 1);
1167 return _M_bits[0];
1168 }
1169
1170 // precondition: is sanitized
1171 constexpr unsigned long
1172 to_ulong() const noexcept
1173 {
1174 static_assert(_S_array_size == 1);
1175 return _M_bits[0];
1176 }
1177
1178 constexpr bitset<_Np>
1179 _M_to_bitset() const noexcept
1180 {
1181 static_assert(_S_array_size == 1);
1182 return _M_bits[0];
1183 }
1184
1185 constexpr decltype(auto)
1186 _M_sanitized() const noexcept
1187 {
1188 if constexpr (_Sanitized)
1189 return *this;
1190 else if constexpr (_Np == 1)
1191 return _SanitizedBitMask<_Np>(_M_bits[0]);
1192 else
1193 {
1194 _SanitizedBitMask<_Np> __r = {};
1195 for (int __i = 0; __i < _S_array_size; ++__i)
1196 __r._M_bits[__i] = _M_bits[__i];
1197 if constexpr (_S_unused_bits > 0)
1198 __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1199 return __r;
1200 }
1201 }
1202
1203 template <size_t _Mp, bool _LSanitized>
1204 constexpr _BitMask<_Np + _Mp, _Sanitized>
1205 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1206 {
1207 constexpr size_t _RN = _Np + _Mp;
1208 using _Rp = _BitMask<_RN, _Sanitized>;
1209 if constexpr (_Rp::_S_array_size == 1)
1210 {
1211 _Rp __r{{_M_bits[0]}};
1212 __r._M_bits[0] <<= _Mp;
1213 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1214 return __r;
1215 }
1216 else
1217 __assert_unreachable<_Rp>();
1218 }
1219
1220 // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1221 // significant bits. If the operation implicitly produces a sanitized bitmask,
1222 // the result type will have _Sanitized set.
1223 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1224 constexpr auto
1225 _M_extract() const noexcept
1226 {
1227 static_assert(_Np > _DropLsb);
1228 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1229 "not implemented for bitmasks larger than one ullong");
1230 if constexpr (_NewSize == 1)
1231 // must sanitize because the return _Tp is bool
1232 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1233 else
1234 return _BitMask<_NewSize,
1235 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1236 && _NewSize + _DropLsb <= _Np)
1237 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1238 && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1239 >> _DropLsb);
1240 }
1241
1242 // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1243 constexpr bool
1244 all() const noexcept
1245 {
1246 if constexpr (_Np == 1)
1247 return _M_bits[0];
1248 else if constexpr (!_Sanitized)
1249 return _M_sanitized().all();
1250 else
1251 {
1252 constexpr _Tp __allbits = ~_Tp();
1253 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1254 if (_M_bits[__i] != __allbits)
1255 return false;
1256 return _M_bits[_S_array_size - 1] == _S_bitmask;
1257 }
1258 }
1259
1260 // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1261 // false.
1262 constexpr bool
1263 any() const noexcept
1264 {
1265 if constexpr (_Np == 1)
1266 return _M_bits[0];
1267 else if constexpr (!_Sanitized)
1268 return _M_sanitized().any();
1269 else
1270 {
1271 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1272 if (_M_bits[__i] != 0)
1273 return true;
1274 return _M_bits[_S_array_size - 1] != 0;
1275 }
1276 }
1277
1278 // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1279 constexpr bool
1280 none() const noexcept
1281 {
1282 if constexpr (_Np == 1)
1283 return !_M_bits[0];
1284 else if constexpr (!_Sanitized)
1285 return _M_sanitized().none();
1286 else
1287 {
1288 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1289 if (_M_bits[__i] != 0)
1290 return false;
1291 return _M_bits[_S_array_size - 1] == 0;
1292 }
1293 }
1294
1295 // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1296 // false.
1297 constexpr int
1298 count() const noexcept
1299 {
1300 if constexpr (_Np == 1)
1301 return _M_bits[0];
1302 else if constexpr (!_Sanitized)
1303 return _M_sanitized().none();
1304 else
1305 {
1306 int __result = __builtin_popcountll(_M_bits[0]);
1307 for (int __i = 1; __i < _S_array_size; ++__i)
1308 __result += __builtin_popcountll(_M_bits[__i]);
1309 return __result;
1310 }
1311 }
1312
1313 // Returns the bit at offset __i as bool.
1314 constexpr bool
1315 operator[](size_t __i) const noexcept
1316 {
1317 if constexpr (_Np == 1)
1318 return _M_bits[0];
1319 else if constexpr (_S_array_size == 1)
1320 return (_M_bits[0] >> __i) & 1;
1321 else
1322 {
1323 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1324 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1325 return (_M_bits[__j] >> __shift) & 1;
1326 }
1327 }
1328
1329 template <size_t __i>
1330 constexpr bool
1331 operator[](_SizeConstant<__i>) const noexcept
1332 {
1333 static_assert(__i < _Np);
1334 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1335 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1336 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1337 }
1338
1339 // Set the bit at offset __i to __x.
1340 constexpr void
1341 set(size_t __i, bool __x) noexcept
1342 {
1343 if constexpr (_Np == 1)
1344 _M_bits[0] = __x;
1345 else if constexpr (_S_array_size == 1)
1346 {
1347 _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1348 _M_bits[0] |= _Tp(_Tp(__x) << __i);
1349 }
1350 else
1351 {
1352 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1353 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1354 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1355 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1356 }
1357 }
1358
1359 template <size_t __i>
1360 constexpr void
1361 set(_SizeConstant<__i>, bool __x) noexcept
1362 {
1363 static_assert(__i < _Np);
1364 if constexpr (_Np == 1)
1365 _M_bits[0] = __x;
1366 else
1367 {
1368 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1369 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1370 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1371 _M_bits[__j] &= __mask;
1372 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1373 }
1374 }
1375
1376 // Inverts all bits. Sanitized input leads to sanitized output.
1377 constexpr _BitMask
1378 operator~() const noexcept
1379 {
1380 if constexpr (_Np == 1)
1381 return !_M_bits[0];
1382 else
1383 {
1384 _BitMask __result{};
1385 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1386 __result._M_bits[__i] = ~_M_bits[__i];
1387 if constexpr (_Sanitized)
1388 __result._M_bits[_S_array_size - 1]
1389 = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1390 else
1391 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1392 return __result;
1393 }
1394 }
1395
1396 constexpr _BitMask&
1397 operator^=(const _BitMask& __b) & noexcept
1398 {
1399 __execute_n_times<_S_array_size>(
1400 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1401 return *this;
1402 }
1403
1404 constexpr _BitMask&
1405 operator|=(const _BitMask& __b) & noexcept
1406 {
1407 __execute_n_times<_S_array_size>(
1408 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1409 return *this;
1410 }
1411
1412 constexpr _BitMask&
1413 operator&=(const _BitMask& __b) & noexcept
1414 {
1415 __execute_n_times<_S_array_size>(
1416 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1417 return *this;
1418 }
1419
1420 friend constexpr _BitMask
1421 operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1422 {
1423 _BitMask __r = __a;
1424 __r ^= __b;
1425 return __r;
1426 }
1427
1428 friend constexpr _BitMask
1429 operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1430 {
1431 _BitMask __r = __a;
1432 __r |= __b;
1433 return __r;
1434 }
1435
1436 friend constexpr _BitMask
1437 operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1438 {
1439 _BitMask __r = __a;
1440 __r &= __b;
1441 return __r;
1442 }
1443
1444 _GLIBCXX_SIMD_INTRINSIC
1445 constexpr bool
1446 _M_is_constprop() const
1447 {
1448 if constexpr (_S_array_size == 0)
1449 return __builtin_constant_p(_M_bits[0]);
1450 else
1451 {
1452 for (int __i = 0; __i < _S_array_size; ++__i)
1453 if (!__builtin_constant_p(_M_bits[__i]))
1454 return false;
1455 return true;
1456 }
1457 }
1458 };
1459
1460// }}}
1461
1462// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1463// __min_vector_size {{{
1464template <typename _Tp = void>
1465 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1466
1467#if _GLIBCXX_SIMD_HAVE_NEON
1468template <>
1469 inline constexpr int __min_vector_size<void> = 8;
1470#else
1471template <>
1472 inline constexpr int __min_vector_size<void> = 16;
1473#endif
1474
1475// }}}
1476// __vector_type {{{
1477template <typename _Tp, size_t _Np, typename = void>
1478 struct __vector_type_n {};
1479
1480// substition failure for 0-element case
1481template <typename _Tp>
1482 struct __vector_type_n<_Tp, 0, void> {};
1483
1484// special case 1-element to be _Tp itself
1485template <typename _Tp>
1486 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1487 { using type = _Tp; };
1488
1489// else, use GNU-style builtin vector types
1490template <typename _Tp, size_t _Np>
1491 struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1492 {
1493 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1494
1495 static constexpr size_t _S_Bytes =
1496#ifdef __i386__
1497 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1498 // those objects are passed via MMX registers and nothing ever calls EMMS.
1499 _S_Np2 == 8 ? 16 :
1500#endif
1501 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1502 : _S_Np2;
1503
1504 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1505 };
1506
1507template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1508 struct __vector_type;
1509
1510template <typename _Tp, size_t _Bytes>
1511 struct __vector_type<_Tp, _Bytes, 0>
1512 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1513
1514template <typename _Tp, size_t _Size>
1515 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1516
1517template <typename _Tp>
1518 using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1519template <typename _Tp>
1520 using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1521template <typename _Tp>
1522 using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1523template <typename _Tp>
1524 using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1525template <typename _Tp>
1526 using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1527template <typename _Tp>
1528 using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1529
1530// }}}
1531// __is_vector_type {{{
1532template <typename _Tp, typename = void_t<>>
1533 struct __is_vector_type : false_type {};
1534
1535template <typename _Tp>
1536 struct __is_vector_type<
1537 _Tp, void_t<typename __vector_type<
1538 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1539 : is_same<_Tp, typename __vector_type<
1540 remove_reference_t<decltype(declval<_Tp>()[0])>,
1541 sizeof(_Tp)>::type> {};
1542
1543template <typename _Tp>
1544 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1545
1546// }}}
1547// __is_intrinsic_type {{{
1548#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1549template <typename _Tp>
1550 using __is_intrinsic_type = __is_vector_type<_Tp>;
1551#else // not SSE (x86)
1552template <typename _Tp, typename = void_t<>>
1553 struct __is_intrinsic_type : false_type {};
1554
1555template <typename _Tp>
1556 struct __is_intrinsic_type<
1557 _Tp, void_t<typename __intrinsic_type<
1558 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1559 : is_same<_Tp, typename __intrinsic_type<
1560 remove_reference_t<decltype(declval<_Tp>()[0])>,
1561 sizeof(_Tp)>::type> {};
1562#endif
1563
1564template <typename _Tp>
1565 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1566
1567// }}}
1568// _VectorTraits{{{
1569template <typename _Tp, typename = void_t<>>
1570 struct _VectorTraitsImpl;
1571
1572template <typename _Tp>
1573 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1574 || __is_intrinsic_type_v<_Tp>>>
1575 {
1576 using type = _Tp;
1577 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1578 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1579 using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1580 template <typename _Up, int _W = _S_full_size>
1581 static constexpr bool _S_is
1582 = is_same_v<value_type, _Up> && _W == _S_full_size;
1583 };
1584
1585template <typename _Tp, size_t _Np>
1586 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1587 void_t<__vector_type_t<_Tp, _Np>>>
1588 {
1589 using type = __vector_type_t<_Tp, _Np>;
1590 using value_type = _Tp;
1591 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1592 using _Wrapper = _SimdWrapper<_Tp, _Np>;
1593 static constexpr bool _S_is_partial = (_Np == _S_full_size);
1594 static constexpr int _S_partial_width = _Np;
1595 template <typename _Up, int _W = _S_full_size>
1596 static constexpr bool _S_is
1597 = is_same_v<value_type, _Up>&& _W == _S_full_size;
1598 };
1599
1600template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1601 using _VectorTraits = _VectorTraitsImpl<_Tp>;
1602
1603// }}}
1604// __as_vector{{{
1605template <typename _V>
1606 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1607 __as_vector(_V __x)
1608 {
1609 if constexpr (__is_vector_type_v<_V>)
1610 return __x;
1611 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1612 {
1613 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
1614 {
1615 static_assert(is_simd<_V>::value);
1616 static_assert(_V::abi_type::template __traits<
1617 typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
1618 return __as_vector(__data(__x).first);
1619 }
1620 else if constexpr (_V::size() > 1)
1621 return __data(__x)._M_data;
1622 else
1623 {
1624 static_assert(is_simd<_V>::value);
1625 using _Tp = typename _V::value_type;
1626#ifdef __i386__
1627 constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
1628 using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
1629#else
1630 using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
1631#endif
1632 return _RV{__data(__x)};
1633 }
1634 }
1635 else if constexpr (__is_vectorizable_v<_V>)
1636 return __vector_type_t<_V, 2>{__x};
1637 else
1638 return __x._M_data;
1639 }
1640
1641// }}}
1642// __as_wrapper{{{
1643template <size_t _Np = 0, typename _V>
1644 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1645 __as_wrapper(_V __x)
1646 {
1647 if constexpr (__is_vector_type_v<_V>)
1648 return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1649 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1650 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1651 {
1652 static_assert(_V::size() == _Np);
1653 return __data(__x);
1654 }
1655 else
1656 {
1657 static_assert(_V::_S_size == _Np);
1658 return __x;
1659 }
1660 }
1661
1662// }}}
1663// __intrin_bitcast{{{
1664template <typename _To, typename _From>
1665 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1666 __intrin_bitcast(_From __v)
1667 {
1668 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1669 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1670 if constexpr (sizeof(_To) == sizeof(_From))
1671 return reinterpret_cast<_To>(__v);
1672 else if constexpr (sizeof(_From) > sizeof(_To))
1673 if constexpr (sizeof(_To) >= 16)
1674 return reinterpret_cast<const __may_alias<_To>&>(__v);
1675 else
1676 {
1677 _To __r;
1678 __builtin_memcpy(&__r, &__v, sizeof(_To));
1679 return __r;
1680 }
1681#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1682 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1683 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1684 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1685 else if constexpr (__have_avx512f && sizeof(_From) == 16
1686 && sizeof(_To) == 64)
1687 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1688 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1689 else if constexpr (__have_avx512f && sizeof(_From) == 32
1690 && sizeof(_To) == 64)
1691 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1692 reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1693#endif // _GLIBCXX_SIMD_X86INTRIN
1694 else if constexpr (sizeof(__v) <= 8)
1695 return reinterpret_cast<_To>(
1696 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1697 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1698 else
1699 {
1700 static_assert(sizeof(_To) > sizeof(_From));
1701 _To __r = {};
1702 __builtin_memcpy(&__r, &__v, sizeof(_From));
1703 return __r;
1704 }
1705 }
1706
1707// }}}
1708// __vector_bitcast{{{
1709template <typename _To, size_t _NN = 0, typename _From,
1710 typename _FromVT = _VectorTraits<_From>,
1711 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1712 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1713 __vector_bitcast(_From __x)
1714 {
1715 using _R = __vector_type_t<_To, _Np>;
1716 return __intrin_bitcast<_R>(__x);
1717 }
1718
1719template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1720 size_t _Np
1721 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1722 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1723 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1724 {
1725 static_assert(_Np > 1);
1726 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1727 }
1728
1729// }}}
1730// __convert_x86 declarations {{{
1731#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1732template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1733 _To __convert_x86(_Tp);
1734
1735template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1736 _To __convert_x86(_Tp, _Tp);
1737
1738template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1739 _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1740
1741template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1742 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1743
1744template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1745 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1746 _Tp, _Tp, _Tp, _Tp);
1747#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1748
1749//}}}
1750// __bit_cast {{{
1751template <typename _To, typename _From>
1752 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1753 __bit_cast(const _From __x)
1754 {
1755#if __has_builtin(__builtin_bit_cast)
1756 return __builtin_bit_cast(_To, __x);
1757#else
1758 static_assert(sizeof(_To) == sizeof(_From));
1759 constexpr bool __to_is_vectorizable
1760 = is_arithmetic_v<_To> || is_enum_v<_To>;
1761 constexpr bool __from_is_vectorizable
1762 = is_arithmetic_v<_From> || is_enum_v<_From>;
1763 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1764 return reinterpret_cast<_To>(__x);
1765 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1766 {
1767 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1768 return reinterpret_cast<_To>(_FV{__x});
1769 }
1770 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1771 {
1772 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1773 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1774 return reinterpret_cast<_TV>(_FV{__x})[0];
1775 }
1776 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1777 {
1778 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1779 return reinterpret_cast<_TV>(__x)[0];
1780 }
1781 else
1782 {
1783 _To __r;
1784 __builtin_memcpy(reinterpret_cast<char*>(&__r),
1785 reinterpret_cast<const char*>(&__x), sizeof(_To));
1786 return __r;
1787 }
1788#endif
1789 }
1790
1791// }}}
1792// __to_intrin {{{
1793template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1794 typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1795 _GLIBCXX_SIMD_INTRINSIC constexpr _R
1796 __to_intrin(_Tp __x)
1797 {
1798 static_assert(sizeof(__x) <= sizeof(_R),
1799 "__to_intrin may never drop values off the end");
1800 if constexpr (sizeof(__x) == sizeof(_R))
1801 return reinterpret_cast<_R>(__as_vector(__x));
1802 else
1803 {
1804 using _Up = __int_for_sizeof_t<_Tp>;
1805 return reinterpret_cast<_R>(
1806 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1807 }
1808 }
1809
1810// }}}
1811// __make_vector{{{
1812template <typename _Tp, typename... _Args>
1813 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1814 __make_vector(const _Args&... __args)
1815 { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1816
1817// }}}
1818// __vector_broadcast{{{
1819template <size_t _Np, typename _Tp, size_t... _I>
1820 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1821 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1822 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1823
1824template <size_t _Np, typename _Tp>
1825 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1826 __vector_broadcast(_Tp __x)
1827 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1828
1829// }}}
1830// __generate_vector{{{
1831 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1832 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1833 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1834 { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1835
1836template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1837 _GLIBCXX_SIMD_INTRINSIC constexpr _V
1838 __generate_vector(_Gp&& __gen)
1839 {
1840 if constexpr (__is_vector_type_v<_V>)
1841 return __generate_vector_impl<typename _VVT::value_type,
1842 _VVT::_S_full_size>(
1843 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1844 else
1845 return __generate_vector_impl<typename _VVT::value_type,
1846 _VVT::_S_partial_width>(
1847 static_cast<_Gp&&>(__gen),
1848 make_index_sequence<_VVT::_S_partial_width>());
1849 }
1850
1851template <typename _Tp, size_t _Np, typename _Gp>
1852 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1853 __generate_vector(_Gp&& __gen)
1854 {
1855 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1856 make_index_sequence<_Np>());
1857 }
1858
1859// }}}
1860// __xor{{{
1861template <typename _TW>
1862 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1863 __xor(_TW __a, _TW __b) noexcept
1864 {
1865 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1866 {
1867 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1868 _VectorTraitsImpl<_TW>>::value_type;
1869 if constexpr (is_floating_point_v<_Tp>)
1870 {
1871 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1873 ^ __vector_bitcast<_Ip>(__b));
1874 }
1875 else if constexpr (__is_vector_type_v<_TW>)
1876 return __a ^ __b;
1877 else
1878 return __a._M_data ^ __b._M_data;
1879 }
1880 else
1881 return __a ^ __b;
1882 }
1883
1884// }}}
1885// __or{{{
1886template <typename _TW>
1887 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1888 __or(_TW __a, _TW __b) noexcept
1889 {
1890 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1891 {
1892 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1893 _VectorTraitsImpl<_TW>>::value_type;
1894 if constexpr (is_floating_point_v<_Tp>)
1895 {
1896 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1897 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1898 | __vector_bitcast<_Ip>(__b));
1899 }
1900 else if constexpr (__is_vector_type_v<_TW>)
1901 return __a | __b;
1902 else
1903 return __a._M_data | __b._M_data;
1904 }
1905 else
1906 return __a | __b;
1907 }
1908
1909// }}}
1910// __and{{{
1911template <typename _TW>
1912 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1913 __and(_TW __a, _TW __b) noexcept
1914 {
1915 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1916 {
1917 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1918 _VectorTraitsImpl<_TW>>::value_type;
1919 if constexpr (is_floating_point_v<_Tp>)
1920 {
1921 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1922 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1923 & __vector_bitcast<_Ip>(__b));
1924 }
1925 else if constexpr (__is_vector_type_v<_TW>)
1926 return __a & __b;
1927 else
1928 return __a._M_data & __b._M_data;
1929 }
1930 else
1931 return __a & __b;
1932 }
1933
1934// }}}
1935// __andnot{{{
1936#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1937static constexpr struct
1938{
1939 _GLIBCXX_SIMD_INTRINSIC __v4sf
1940 operator()(__v4sf __a, __v4sf __b) const noexcept
1941 { return __builtin_ia32_andnps(__a, __b); }
1942
1943 _GLIBCXX_SIMD_INTRINSIC __v2df
1944 operator()(__v2df __a, __v2df __b) const noexcept
1945 { return __builtin_ia32_andnpd(__a, __b); }
1946
1947 _GLIBCXX_SIMD_INTRINSIC __v2di
1948 operator()(__v2di __a, __v2di __b) const noexcept
1949 { return __builtin_ia32_pandn128(__a, __b); }
1950
1951 _GLIBCXX_SIMD_INTRINSIC __v8sf
1952 operator()(__v8sf __a, __v8sf __b) const noexcept
1953 { return __builtin_ia32_andnps256(__a, __b); }
1954
1955 _GLIBCXX_SIMD_INTRINSIC __v4df
1956 operator()(__v4df __a, __v4df __b) const noexcept
1957 { return __builtin_ia32_andnpd256(__a, __b); }
1958
1959 _GLIBCXX_SIMD_INTRINSIC __v4di
1960 operator()(__v4di __a, __v4di __b) const noexcept
1961 {
1962 if constexpr (__have_avx2)
1963 return __builtin_ia32_andnotsi256(__a, __b);
1964 else
1965 return reinterpret_cast<__v4di>(
1966 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1967 reinterpret_cast<__v4df>(__b)));
1968 }
1969
1970 _GLIBCXX_SIMD_INTRINSIC __v16sf
1971 operator()(__v16sf __a, __v16sf __b) const noexcept
1972 {
1973 if constexpr (__have_avx512dq)
1974 return _mm512_andnot_ps(__a, __b);
1975 else
1976 return reinterpret_cast<__v16sf>(
1977 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1978 reinterpret_cast<__v8di>(__b)));
1979 }
1980
1981 _GLIBCXX_SIMD_INTRINSIC __v8df
1982 operator()(__v8df __a, __v8df __b) const noexcept
1983 {
1984 if constexpr (__have_avx512dq)
1985 return _mm512_andnot_pd(__a, __b);
1986 else
1987 return reinterpret_cast<__v8df>(
1988 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1989 reinterpret_cast<__v8di>(__b)));
1990 }
1991
1992 _GLIBCXX_SIMD_INTRINSIC __v8di
1993 operator()(__v8di __a, __v8di __b) const noexcept
1994 { return _mm512_andnot_si512(__a, __b); }
1995} _S_x86_andnot;
1996#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1997
1998template <typename _TW>
1999 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2000 __andnot(_TW __a, _TW __b) noexcept
2001 {
2002 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2003 {
2004 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2005 _VectorTraitsImpl<_TW>>;
2006 using _Tp = typename _TVT::value_type;
2007#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2008 if constexpr (sizeof(_TW) >= 16)
2009 {
2010 const auto __ai = __to_intrin(__a);
2011 const auto __bi = __to_intrin(__b);
2012 if (!__builtin_is_constant_evaluated()
2013 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2014 {
2015 const auto __r = _S_x86_andnot(__ai, __bi);
2016 if constexpr (is_convertible_v<decltype(__r), _TW>)
2017 return __r;
2018 else
2019 return reinterpret_cast<typename _TVT::type>(__r);
2020 }
2021 }
2022#endif // _GLIBCXX_SIMD_X86INTRIN
2023 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2024 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2025 & __vector_bitcast<_Ip>(__b));
2026 }
2027 else
2028 return ~__a & __b;
2029 }
2030
2031// }}}
2032// __not{{{
2033template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2034 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2035 __not(_Tp __a) noexcept
2036 {
2037 if constexpr (is_floating_point_v<typename _TVT::value_type>)
2038 return reinterpret_cast<typename _TVT::type>(
2039 ~__vector_bitcast<unsigned>(__a));
2040 else
2041 return ~__a;
2042 }
2043
2044// }}}
2045// __vec_shuffle{{{
2046template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
2047 _GLIBCXX_SIMD_INTRINSIC constexpr
2048 __vector_type_t<remove_reference_t<decltype(declval<_T0>()[0])>, sizeof...(_Is)>
2049 __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
2050 {
2051 constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
2052 constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
2053 using _Tp = remove_reference_t<decltype(declval<_T0>()[0])>;
2054 using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
2055#if __has_builtin(__builtin_shufflevector)
2056#ifdef __clang__
2057 // Clang requires _T0 == _T1
2058 if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
2059 return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
2060 else if constexpr (sizeof(__x) > sizeof(__y))
2061 return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
2062 else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
2063 return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
2064 __i = __idx_perm(__i);
2065 return __i < _N0 ? __i : __i - _N0 + _N1;
2066 });
2067 else if constexpr (sizeof(__x) < sizeof(__y))
2068 return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
2069 __i = __idx_perm(__i);
2070 return __i < _N0 ? __i : __i - _N0 + _N1;
2071 });
2072 else
2073#endif
2074 {
2075 const auto __r = __builtin_shufflevector(__x, __y, [=] {
2076 constexpr int __j = __idx_perm(_Is);
2077 static_assert(__j < _N0 + _N1);
2078 return __j;
2079 }()...);
2080#ifdef __i386__
2081 if constexpr (sizeof(__r) == sizeof(_RV))
2082 return __r;
2083 else
2084 return _RV {__r[_Is]...};
2085#else
2086 return __r;
2087#endif
2088 }
2089#else
2090 return _RV {
2091 [=]() -> _Tp {
2092 constexpr int __j = __idx_perm(_Is);
2093 static_assert(__j < _N0 + _N1);
2094 if constexpr (__j < 0)
2095 return 0;
2096 else if constexpr (__j < _N0)
2097 return __x[__j];
2098 else
2099 return __y[__j - _N0];
2100 }()...
2101 };
2102#endif
2103 }
2104
2105template <typename _T0, typename _Fun, typename _Seq>
2106 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2107 __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
2108 { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
2109
2110// }}}
2111// __concat{{{
2112template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2113 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2114 constexpr _R
2115 __concat(_Tp a_, _Tp b_)
2116 {
2117#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2118 using _W
2119 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2120 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2121 long long, typename _TVT::value_type>>;
2122 constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2123 const auto __a = __vector_bitcast<_W>(a_);
2124 const auto __b = __vector_bitcast<_W>(b_);
2125 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2126#else
2127 constexpr int input_width = _TVT::_S_full_size;
2128 const _Tp& __a = a_;
2129 const _Tp& __b = b_;
2130 using _Up = _R;
2131#endif
2132 if constexpr (input_width == 2)
2133 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2134 else if constexpr (input_width == 4)
2135 return reinterpret_cast<_R>(
2136 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2137 else if constexpr (input_width == 8)
2138 return reinterpret_cast<_R>(
2139 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2140 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2141 else if constexpr (input_width == 16)
2142 return reinterpret_cast<_R>(
2143 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2144 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2145 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2146 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2147 __b[12], __b[13], __b[14], __b[15]});
2148 else if constexpr (input_width == 32)
2149 return reinterpret_cast<_R>(
2150 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2151 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2152 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2153 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2154 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2155 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2156 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2157 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2158 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2159 __b[31]});
2160 }
2161
2162// }}}
2163// __zero_extend {{{
2164template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2165 struct _ZeroExtendProxy
2166 {
2167 using value_type = typename _TVT::value_type;
2168 static constexpr size_t _Np = _TVT::_S_full_size;
2169 const _Tp __x;
2170
2171 template <typename _To, typename _ToVT = _VectorTraits<_To>,
2172 typename
2173 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2174 _GLIBCXX_SIMD_INTRINSIC operator _To() const
2175 {
2176 constexpr size_t _ToN = _ToVT::_S_full_size;
2177 if constexpr (_ToN == _Np)
2178 return __x;
2179 else if constexpr (_ToN == 2 * _Np)
2180 {
2181#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2182 if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2183 return __vector_bitcast<value_type>(
2184 _mm256_insertf128_ps(__m256(), __x, 0));
2185 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2186 return __vector_bitcast<value_type>(
2187 _mm256_insertf128_pd(__m256d(), __x, 0));
2188 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2189 return __vector_bitcast<value_type>(
2190 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2191 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2192 {
2193 if constexpr (__have_avx512dq)
2194 return __vector_bitcast<value_type>(
2195 _mm512_insertf32x8(__m512(), __x, 0));
2196 else
2197 return reinterpret_cast<__m512>(
2198 _mm512_insertf64x4(__m512d(),
2199 reinterpret_cast<__m256d>(__x), 0));
2200 }
2201 else if constexpr (__have_avx512f
2202 && _TVT::template _S_is<double, 4>)
2203 return __vector_bitcast<value_type>(
2204 _mm512_insertf64x4(__m512d(), __x, 0));
2205 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2206 return __vector_bitcast<value_type>(
2207 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2208#endif
2209 return __concat(__x, _Tp());
2210 }
2211 else if constexpr (_ToN == 4 * _Np)
2212 {
2213#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2214 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2215 {
2216 return __vector_bitcast<value_type>(
2217 _mm512_insertf64x2(__m512d(), __x, 0));
2218 }
2219 else if constexpr (__have_avx512f
2220 && is_floating_point_v<value_type>)
2221 {
2222 return __vector_bitcast<value_type>(
2223 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2224 0));
2225 }
2226 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2227 {
2228 return __vector_bitcast<value_type>(
2229 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2230 }
2231#endif
2232 return __concat(__concat(__x, _Tp()),
2233 __vector_type_t<value_type, _Np * 2>());
2234 }
2235 else if constexpr (_ToN == 8 * _Np)
2236 return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2237 __vector_type_t<value_type, _Np * 4>());
2238 else if constexpr (_ToN == 16 * _Np)
2239 return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2240 __vector_type_t<value_type, _Np * 8>());
2241 else
2242 __assert_unreachable<_Tp>();
2243 }
2244 };
2245
2246template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2247 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2248 __zero_extend(_Tp __x)
2249 { return {__x}; }
2250
2251// }}}
2252// __extract<_Np, By>{{{
2253template <int _Offset,
2254 int _SplitBy,
2255 typename _Tp,
2256 typename _TVT = _VectorTraits<_Tp>,
2257 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2258 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2259 __extract(_Tp __in)
2260 {
2261 using value_type = typename _TVT::value_type;
2262#if _GLIBCXX_SIMD_X86INTRIN // {{{
2263 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2264 {
2265 if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2266 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2267 else if constexpr (is_floating_point_v<value_type>)
2268 return __vector_bitcast<value_type>(
2269 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2270 else
2271 return reinterpret_cast<_R>(
2272 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2273 _Offset));
2274 }
2275 else
2276#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2277 {
2278#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2279 using _W = conditional_t<
2280 is_floating_point_v<value_type>, double,
2281 conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2282 static_assert(sizeof(_R) % sizeof(_W) == 0);
2283 constexpr int __return_width = sizeof(_R) / sizeof(_W);
2284 using _Up = __vector_type_t<_W, __return_width>;
2285 const auto __x = __vector_bitcast<_W>(__in);
2286#else
2287 constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2288 using _Up = _R;
2289 const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2290 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2291#endif
2292 constexpr int _O = _Offset * __return_width;
2293 return __call_with_subscripts<__return_width, _O>(
2294 __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2295 return reinterpret_cast<_R>(_Up{__entries...});
2296 });
2297 }
2298 }
2299
2300// }}}
2301// __lo/__hi64[z]{{{
2302template <typename _Tp,
2303 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2304 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2305 __lo64(_Tp __x)
2306 {
2307 _R __r{};
2308 __builtin_memcpy(&__r, &__x, 8);
2309 return __r;
2310 }
2311
2312template <typename _Tp,
2313 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2314 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2315 __hi64(_Tp __x)
2316 {
2317 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2318 _R __r{};
2319 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2320 return __r;
2321 }
2322
2323template <typename _Tp,
2324 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2325 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2326 __hi64z([[maybe_unused]] _Tp __x)
2327 {
2328 _R __r{};
2329 if constexpr (sizeof(_Tp) == 16)
2330 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2331 return __r;
2332 }
2333
2334// }}}
2335// __lo/__hi128{{{
2336template <typename _Tp>
2337 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2338 __lo128(_Tp __x)
2339 { return __extract<0, sizeof(_Tp) / 16>(__x); }
2340
2341template <typename _Tp>
2342 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2343 __hi128(_Tp __x)
2344 {
2345 static_assert(sizeof(__x) == 32);
2346 return __extract<1, 2>(__x);
2347 }
2348
2349// }}}
2350// __lo/__hi256{{{
2351template <typename _Tp>
2352 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2353 __lo256(_Tp __x)
2354 {
2355 static_assert(sizeof(__x) == 64);
2356 return __extract<0, 2>(__x);
2357 }
2358
2359template <typename _Tp>
2360 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2361 __hi256(_Tp __x)
2362 {
2363 static_assert(sizeof(__x) == 64);
2364 return __extract<1, 2>(__x);
2365 }
2366
2367// }}}
2368// __auto_bitcast{{{
2369template <typename _Tp>
2370 struct _AutoCast
2371 {
2372 static_assert(__is_vector_type_v<_Tp>);
2373
2374 const _Tp __x;
2375
2376 template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2377 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2378 { return __intrin_bitcast<typename _UVT::type>(__x); }
2379 };
2380
2381template <typename _Tp>
2382 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2383 __auto_bitcast(const _Tp& __x)
2384 { return {__x}; }
2385
2386template <typename _Tp, size_t _Np>
2387 _GLIBCXX_SIMD_INTRINSIC constexpr
2388 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2389 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2390 { return {__x._M_data}; }
2391
2392// }}}
2393// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2394
2395#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2396// __bool_storage_member_type{{{
2397#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2398template <size_t _Size>
2399 struct __bool_storage_member_type
2400 {
2401 static_assert((_Size & (_Size - 1)) != 0,
2402 "This trait may only be used for non-power-of-2 sizes. "
2403 "Power-of-2 sizes must be specialized.");
2404 using type =
2405 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2406 };
2407
2408template <>
2409 struct __bool_storage_member_type<1> { using type = bool; };
2410
2411template <>
2412 struct __bool_storage_member_type<2> { using type = __mmask8; };
2413
2414template <>
2415 struct __bool_storage_member_type<4> { using type = __mmask8; };
2416
2417template <>
2418 struct __bool_storage_member_type<8> { using type = __mmask8; };
2419
2420template <>
2421 struct __bool_storage_member_type<16> { using type = __mmask16; };
2422
2423template <>
2424 struct __bool_storage_member_type<32> { using type = __mmask32; };
2425
2426template <>
2427 struct __bool_storage_member_type<64> { using type = __mmask64; };
2428#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2429
2430// }}}
2431// __intrinsic_type (x86){{{
2432// the following excludes bool via __is_vectorizable
2433#if _GLIBCXX_SIMD_HAVE_SSE
2434template <typename _Tp, size_t _Bytes>
2435 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2436 {
2437 static_assert(!is_same_v<_Tp, long double>,
2438 "no __intrinsic_type support for long double on x86");
2439
2440 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2441
2442 using type [[__gnu__::__vector_size__(_S_VBytes)]]
2443 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2444 };
2445#endif // _GLIBCXX_SIMD_HAVE_SSE
2446
2447// }}}
2448#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2449// __intrinsic_type (ARM){{{
2450#if _GLIBCXX_SIMD_HAVE_NEON
2451template <>
2452 struct __intrinsic_type<float, 8, void>
2453 { using type = float32x2_t; };
2454
2455template <>
2456 struct __intrinsic_type<float, 16, void>
2457 { using type = float32x4_t; };
2458
2459template <>
2460 struct __intrinsic_type<double, 8, void>
2461 {
2462#if _GLIBCXX_SIMD_HAVE_NEON_A64
2463 using type = float64x1_t;
2464#endif
2465 };
2466
2467template <>
2468 struct __intrinsic_type<double, 16, void>
2469 {
2470#if _GLIBCXX_SIMD_HAVE_NEON_A64
2471 using type = float64x2_t;
2472#endif
2473 };
2474
2475#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2476template <> \
2477 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2478 _Np * _Bits / 8, void> \
2479 { using type = int##_Bits##x##_Np##_t; }; \
2480template <> \
2481 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2482 _Np * _Bits / 8, void> \
2483 { using type = uint##_Bits##x##_Np##_t; }
2484_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2485_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2486_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2487_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2488_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2489_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2490_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2491_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2492#undef _GLIBCXX_SIMD_ARM_INTRIN
2493
2494template <typename _Tp, size_t _Bytes>
2495 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2496 {
2497 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2498
2499 using _Ip = __int_for_sizeof_t<_Tp>;
2500
2501 using _Up = conditional_t<
2502 is_floating_point_v<_Tp>, _Tp,
2503 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2504
2505 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2506 "should use explicit specialization above");
2507
2508 using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2509 };
2510#endif // _GLIBCXX_SIMD_HAVE_NEON
2511
2512// }}}
2513// __intrinsic_type (PPC){{{
2514#ifdef __ALTIVEC__
2515template <typename _Tp>
2516 struct __intrinsic_type_impl;
2517
2518#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2519 template <> \
2520 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2521_GLIBCXX_SIMD_PPC_INTRIN(float);
2522#ifdef __VSX__
2523_GLIBCXX_SIMD_PPC_INTRIN(double);
2524#endif
2525_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2526_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2527_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2528_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2529_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2530_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2531#if defined __VSX__ || __SIZEOF_LONG__ == 4
2532_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2533_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2534#endif
2535#ifdef __VSX__
2536_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2537_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2538#endif
2539#undef _GLIBCXX_SIMD_PPC_INTRIN
2540
2541template <typename _Tp, size_t _Bytes>
2542 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2543 {
2544 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2545
2546 // allow _Tp == long double with -mlong-double-64
2547 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2548 "no __intrinsic_type support for 128-bit floating point on PowerPC");
2549
2550#ifndef __VSX__
2551 static_assert(!(is_same_v<_Tp, double>
2552 || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2553 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2554#endif
2555
2556 static constexpr auto __element_type()
2557 {
2558 if constexpr (is_floating_point_v<_Tp>)
2559 {
2560 if constexpr (_S_is_ldouble)
2561 return double {};
2562 else
2563 return _Tp {};
2564 }
2565 else if constexpr (is_signed_v<_Tp>)
2566 {
2567 if constexpr (sizeof(_Tp) == sizeof(_SChar))
2568 return _SChar {};
2569 else if constexpr (sizeof(_Tp) == sizeof(short))
2570 return short {};
2571 else if constexpr (sizeof(_Tp) == sizeof(int))
2572 return int {};
2573 else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2574 return _LLong {};
2575 }
2576 else
2577 {
2578 if constexpr (sizeof(_Tp) == sizeof(_UChar))
2579 return _UChar {};
2580 else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2581 return _UShort {};
2582 else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2583 return _UInt {};
2584 else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2585 return _ULLong {};
2586 }
2587 }
2588
2589 using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2590 };
2591#endif // __ALTIVEC__
2592
2593// }}}
2594// _SimdWrapper<bool>{{{1
2595template <size_t _Width>
2596 struct _SimdWrapper<bool, _Width,
2597 void_t<typename __bool_storage_member_type<_Width>::type>>
2598 {
2599 using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2600 using value_type = bool;
2601
2602 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2603
2604 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2605 __as_full_vector() const
2606 { return _M_data; }
2607
2608 _GLIBCXX_SIMD_INTRINSIC constexpr
2609 _SimdWrapper() = default;
2610
2611 _GLIBCXX_SIMD_INTRINSIC constexpr
2612 _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2613
2614 _GLIBCXX_SIMD_INTRINSIC
2615 operator const _BuiltinType&() const
2616 { return _M_data; }
2617
2618 _GLIBCXX_SIMD_INTRINSIC
2619 operator _BuiltinType&()
2620 { return _M_data; }
2621
2622 _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2623 __intrin() const
2624 { return _M_data; }
2625
2626 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2627 operator[](size_t __i) const
2628 { return _M_data & (_BuiltinType(1) << __i); }
2629
2630 template <size_t __i>
2631 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2632 operator[](_SizeConstant<__i>) const
2633 { return _M_data & (_BuiltinType(1) << __i); }
2634
2635 _GLIBCXX_SIMD_INTRINSIC constexpr void
2636 _M_set(size_t __i, value_type __x)
2637 {
2638 if (__x)
2639 _M_data |= (_BuiltinType(1) << __i);
2640 else
2641 _M_data &= ~(_BuiltinType(1) << __i);
2642 }
2643
2644 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2645 _M_is_constprop() const
2646 { return __builtin_constant_p(_M_data); }
2647
2648 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2649 _M_is_constprop_none_of() const
2650 {
2651 if (__builtin_constant_p(_M_data))
2652 {
2653 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2654 constexpr _BuiltinType __active_mask
2655 = ~_BuiltinType() >> (__nbits - _Width);
2656 return (_M_data & __active_mask) == 0;
2657 }
2658 return false;
2659 }
2660
2661 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2662 _M_is_constprop_all_of() const
2663 {
2664 if (__builtin_constant_p(_M_data))
2665 {
2666 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2667 constexpr _BuiltinType __active_mask
2668 = ~_BuiltinType() >> (__nbits - _Width);
2669 return (_M_data & __active_mask) == __active_mask;
2670 }
2671 return false;
2672 }
2673
2674 _BuiltinType _M_data;
2675 };
2676
2677// _SimdWrapperBase{{{1
2678template <bool _MustZeroInitPadding, typename _BuiltinType>
2679 struct _SimdWrapperBase;
2680
2681template <typename _BuiltinType>
2682 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2683 {
2684 _GLIBCXX_SIMD_INTRINSIC constexpr
2685 _SimdWrapperBase() = default;
2686
2687 _GLIBCXX_SIMD_INTRINSIC constexpr
2688 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2689
2690 _BuiltinType _M_data;
2691 };
2692
2693template <typename _BuiltinType>
2694 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2695 // never become SNaN
2696 {
2697 _GLIBCXX_SIMD_INTRINSIC constexpr
2698 _SimdWrapperBase() : _M_data() {}
2699
2700 _GLIBCXX_SIMD_INTRINSIC constexpr
2701 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2702
2703 _BuiltinType _M_data;
2704 };
2705
2706// }}}
2707// _SimdWrapper{{{
2708struct _DisabledSimdWrapper;
2709
2710template <typename _Tp, size_t _Width>
2711 struct _SimdWrapper<
2712 _Tp, _Width,
2713 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2714 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2715 && sizeof(_Tp) * _Width
2716 == sizeof(__vector_type_t<_Tp, _Width>),
2717 __vector_type_t<_Tp, _Width>>
2718 {
2719 static constexpr bool _S_need_default_init
2720 = __has_iec559_behavior<__signaling_NaN, _Tp>::value
2721 and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
2722
2723 using _BuiltinType = __vector_type_t<_Tp, _Width>;
2724
2725 using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
2726
2727 static_assert(__is_vectorizable_v<_Tp>);
2728 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2729
2730 using value_type = _Tp;
2731
2732 static inline constexpr size_t _S_full_size
2733 = sizeof(_BuiltinType) / sizeof(value_type);
2734 static inline constexpr int _S_size = _Width;
2735 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2736
2737 using _Base::_M_data;
2738
2739 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2740 __as_full_vector() const
2741 { return _M_data; }
2742
2743 _GLIBCXX_SIMD_INTRINSIC constexpr
2744 _SimdWrapper(initializer_list<_Tp> __init)
2745 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2746 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2747 return __init.begin()[__i.value];
2748 })) {}
2749
2750 _GLIBCXX_SIMD_INTRINSIC constexpr
2751 _SimdWrapper() = default;
2752
2753 _GLIBCXX_SIMD_INTRINSIC constexpr
2754 _SimdWrapper(const _SimdWrapper&) = default;
2755
2756 _GLIBCXX_SIMD_INTRINSIC constexpr
2757 _SimdWrapper(_SimdWrapper&&) = default;
2758
2759 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2760 operator=(const _SimdWrapper&) = default;
2761
2762 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2763 operator=(_SimdWrapper&&) = default;
2764
2765 // Convert from exactly matching __vector_type_t
2766 using _SimdWrapperBase<_S_need_default_init, _BuiltinType>::_SimdWrapperBase;
2767
2768 // Convert from __intrinsic_type_t if __intrinsic_type_t and __vector_type_t differ, otherwise
2769 // this ctor should not exist. Making the argument type unusable is our next best solution.
2770 _GLIBCXX_SIMD_INTRINSIC constexpr
2771 _SimdWrapper(conditional_t<is_same_v<_BuiltinType, __intrinsic_type_t<_Tp, _Width>>,
2772 _DisabledSimdWrapper, __intrinsic_type_t<_Tp, _Width>> __x)
2773 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2774
2775 // Convert from different __vector_type_t, but only if bit reinterpretation is a correct
2776 // conversion of the value_type
2777 template <typename _V, typename _TVT = _VectorTraits<_V>,
2778 typename = enable_if_t<sizeof(typename _TVT::value_type) == sizeof(_Tp)
2779 and sizeof(_V) == sizeof(_BuiltinType)
2780 and is_integral_v<_Tp>
2781 and is_integral_v<typename _TVT::value_type>>>
2782 _GLIBCXX_SIMD_INTRINSIC constexpr
2783 _SimdWrapper(_V __x)
2784 : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
2785
2786 template <typename... _As,
2787 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2788 && sizeof...(_As) <= _Width)>>
2789 _GLIBCXX_SIMD_INTRINSIC constexpr
2790 operator _SimdTuple<_Tp, _As...>() const
2791 {
2792 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2793 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2794 { return _M_data[int(__i)]; });
2795 }
2796
2797 _GLIBCXX_SIMD_INTRINSIC constexpr
2798 operator const _BuiltinType&() const
2799 { return _M_data; }
2800
2801 _GLIBCXX_SIMD_INTRINSIC constexpr
2802 operator _BuiltinType&()
2803 { return _M_data; }
2804
2805 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2806 operator[](size_t __i) const
2807 { return _M_data[__i]; }
2808
2809 template <size_t __i>
2810 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2811 operator[](_SizeConstant<__i>) const
2812 { return _M_data[__i]; }
2813
2814 _GLIBCXX_SIMD_INTRINSIC constexpr void
2815 _M_set(size_t __i, _Tp __x)
2816 {
2817 if (__builtin_is_constant_evaluated())
2818 _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2819 return __j == __i ? __x : _M_data[__j()];
2820 });
2821 else
2822 _M_data[__i] = __x;
2823 }
2824
2825 _GLIBCXX_SIMD_INTRINSIC
2826 constexpr bool
2827 _M_is_constprop() const
2828 { return __builtin_constant_p(_M_data); }
2829
2830 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2831 _M_is_constprop_none_of() const
2832 {
2833 if (__builtin_constant_p(_M_data))
2834 {
2835 bool __r = true;
2836 if constexpr (is_floating_point_v<_Tp>)
2837 {
2838 using _Ip = __int_for_sizeof_t<_Tp>;
2839 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2840 __execute_n_times<_Width>(
2841 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2842 }
2843 else
2844 __execute_n_times<_Width>(
2845 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2846 if (__builtin_constant_p(__r))
2847 return __r;
2848 }
2849 return false;
2850 }
2851
2852 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2853 _M_is_constprop_all_of() const
2854 {
2855 if (__builtin_constant_p(_M_data))
2856 {
2857 bool __r = true;
2858 if constexpr (is_floating_point_v<_Tp>)
2859 {
2860 using _Ip = __int_for_sizeof_t<_Tp>;
2861 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2862 __execute_n_times<_Width>(
2863 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2864 }
2865 else
2866 __execute_n_times<_Width>(
2867 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2868 if (__builtin_constant_p(__r))
2869 return __r;
2870 }
2871 return false;
2872 }
2873 };
2874
2875// }}}
2876
2877// __vectorized_sizeof {{{
2878template <typename _Tp>
2879 constexpr size_t
2880 __vectorized_sizeof()
2881 {
2882 if constexpr (!__is_vectorizable_v<_Tp>)
2883 return 0;
2884
2885 if constexpr (sizeof(_Tp) <= 8)
2886 {
2887 // X86:
2888 if constexpr (__have_avx512bw)
2889 return 64;
2890 if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2891 return 64;
2892 if constexpr (__have_avx2)
2893 return 32;
2894 if constexpr (__have_avx && is_floating_point_v<_Tp>)
2895 return 32;
2896 if constexpr (__have_sse2)
2897 return 16;
2898 if constexpr (__have_sse && is_same_v<_Tp, float>)
2899 return 16;
2900 /* The following is too much trouble because of mixed MMX and x87 code.
2901 * While nothing here explicitly calls MMX instructions of registers,
2902 * they are still emitted but no EMMS cleanup is done.
2903 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2904 return 8;
2905 */
2906
2907 // PowerPC:
2908 if constexpr (__have_power8vec
2909 || (__have_power_vmx && (sizeof(_Tp) < 8))
2910 || (__have_power_vsx && is_floating_point_v<_Tp>) )
2911 return 16;
2912
2913 // ARM:
2914 if constexpr (__have_neon_a64
2915 || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2916 return 16;
2917 if constexpr (__have_neon
2918 && sizeof(_Tp) < 8
2919 // Only allow fp if the user allows non-ICE559 fp (e.g.
2920 // via -ffast-math). ARMv7 NEON fp is not conforming to
2921 // IEC559.
2922 && (__support_neon_float || !is_floating_point_v<_Tp>))
2923 return 16;
2924 }
2925
2926 return sizeof(_Tp);
2927 }
2928
2929// }}}
2930namespace simd_abi {
2931// most of simd_abi is defined in simd_detail.h
2932template <typename _Tp>
2933 inline constexpr int max_fixed_size
2934 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2935
2936// compatible {{{
2937#if defined __x86_64__ || defined __aarch64__
2938template <typename _Tp>
2939 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2940#elif defined __ARM_NEON
2941// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2942// ABI?)
2943template <typename _Tp>
2944 using compatible
2945 = conditional_t<(sizeof(_Tp) < 8
2946 && (__support_neon_float || !is_floating_point_v<_Tp>)),
2947 _VecBuiltin<16>, scalar>;
2948#else
2949template <typename>
2950 using compatible = scalar;
2951#endif
2952
2953// }}}
2954// native {{{
2955template <typename _Tp>
2956 constexpr auto
2957 __determine_native_abi()
2958 {
2959 constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2960 if constexpr (__bytes == sizeof(_Tp))
2961 return static_cast<scalar*>(nullptr);
2962 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2963 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2964 else
2965 return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2966 }
2967
2968template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2969 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2970
2971// }}}
2972// __default_abi {{{
2973#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2974template <typename _Tp>
2975 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2976#else
2977template <typename _Tp>
2978 using __default_abi = compatible<_Tp>;
2979#endif
2980
2981// }}}
2982} // namespace simd_abi
2983
2984// traits {{{1
2985template <typename _Tp>
2986 struct is_simd_flag_type
2987 : false_type
2988 {};
2989
2990template <>
2991 struct is_simd_flag_type<element_aligned_tag>
2992 : true_type
2993 {};
2994
2995template <>
2996 struct is_simd_flag_type<vector_aligned_tag>
2997 : true_type
2998 {};
2999
3000template <size_t _Np>
3001 struct is_simd_flag_type<overaligned_tag<_Np>>
3002 : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
3003 {};
3004
3005template <typename _Tp>
3006 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
3007
3008template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
3009 using _IsSimdFlagType = _Tp;
3010
3011// is_abi_tag {{{2
3012template <typename _Tp, typename = void_t<>>
3013 struct is_abi_tag : false_type {};
3014
3015template <typename _Tp>
3016 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
3017 : public _Tp::_IsValidAbiTag {};
3018
3019template <typename _Tp>
3020 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
3021
3022// is_simd(_mask) {{{2
3023template <typename _Tp>
3024 struct is_simd : public false_type {};
3025
3026template <typename _Tp>
3027 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
3028
3029template <typename _Tp>
3030 struct is_simd_mask : public false_type {};
3031
3032template <typename _Tp>
3033inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
3034
3035// simd_size {{{2
3036template <typename _Tp, typename _Abi, typename = void>
3037 struct __simd_size_impl {};
3038
3039template <typename _Tp, typename _Abi>
3040 struct __simd_size_impl<
3041 _Tp, _Abi,
3042 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
3043 : _SizeConstant<_Abi::template _S_size<_Tp>> {};
3044
3045template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3046 struct simd_size : __simd_size_impl<_Tp, _Abi> {};
3047
3048template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3049 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
3050
3051// simd_abi::deduce {{{2
3052template <typename _Tp, size_t _Np, typename = void>
3053 struct __deduce_impl;
3054
3055namespace simd_abi {
3056/**
3057 * @tparam _Tp The requested `value_type` for the elements.
3058 * @tparam _Np The requested number of elements.
3059 * @tparam _Abis This parameter is ignored, since this implementation cannot
3060 * make any use of it. Either __a good native ABI is matched and used as `type`
3061 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3062 * the best matching native ABIs.
3063 */
3064template <typename _Tp, size_t _Np, typename...>
3065 struct deduce : __deduce_impl<_Tp, _Np> {};
3066
3067template <typename _Tp, size_t _Np, typename... _Abis>
3068 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3069} // namespace simd_abi
3070
3071// }}}2
3072// rebind_simd {{{2
3073template <typename _Tp, typename _V, typename = void>
3074 struct rebind_simd;
3075
3076template <typename _Tp, typename _Up, typename _Abi>
3077 struct rebind_simd<_Tp, simd<_Up, _Abi>,
3078 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3079 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3080
3081template <typename _Tp, typename _Up, typename _Abi>
3082 struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3083 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3084 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3085
3086template <typename _Tp, typename _V>
3087 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3088
3089// resize_simd {{{2
3090template <int _Np, typename _V, typename = void>
3091 struct resize_simd;
3092
3093template <int _Np, typename _Tp, typename _Abi>
3094 struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3095 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3096
3097template <int _Np, typename _Tp, typename _Abi>
3098 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3099 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3100
3101template <int _Np, typename _V>
3102 using resize_simd_t = typename resize_simd<_Np, _V>::type;
3103
3104// }}}2
3105// memory_alignment {{{2
3106template <typename _Tp, typename _Up = typename _Tp::value_type>
3107 struct memory_alignment
3108 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3109
3110template <typename _Tp, typename _Up = typename _Tp::value_type>
3111 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3112
3113// class template simd [simd] {{{1
3114template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3115 class simd;
3116
3117template <typename _Tp, typename _Abi>
3118 struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3119
3120template <typename _Tp>
3121 using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3122
3123template <typename _Tp, int _Np>
3124 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3125
3126template <typename _Tp, size_t _Np>
3127 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3128
3129// class template simd_mask [simd_mask] {{{1
3130template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3131 class simd_mask;
3132
3133template <typename _Tp, typename _Abi>
3134 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3135
3136template <typename _Tp>
3137 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3138
3139template <typename _Tp, int _Np>
3140 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3141
3142template <typename _Tp, size_t _Np>
3143 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3144
3145// casts [simd.casts] {{{1
3146// static_simd_cast {{{2
3147template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3148 struct __static_simd_cast_return_type;
3149
3150template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3151 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3152 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3153
3154template <typename _Tp, typename _Up, typename _Ap>
3155 struct __static_simd_cast_return_type<
3156 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3157 { using type = _Tp; };
3158
3159template <typename _Tp, typename _Ap>
3160 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3161#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3162 enable_if_t<__is_vectorizable_v<_Tp>>
3163#else
3164 void
3165#endif
3166 >
3167 { using type = simd<_Tp, _Ap>; };
3168
3169template <typename _Tp, typename = void>
3170 struct __safe_make_signed { using type = _Tp;};
3171
3172template <typename _Tp>
3173 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3174 {
3175 // the extra make_unsigned_t is because of PR85951
3176 using type = make_signed_t<make_unsigned_t<_Tp>>;
3177 };
3178
3179template <typename _Tp>
3180 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3181
3182template <typename _Tp, typename _Up, typename _Ap>
3183 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3184#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3185 enable_if_t<__is_vectorizable_v<_Tp>>
3186#else
3187 void
3188#endif
3189 >
3190 {
3191 using type = conditional_t<
3192 (is_integral_v<_Up> && is_integral_v<_Tp> &&
3193#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3194 is_signed_v<_Up> != is_signed_v<_Tp> &&
3195#endif
3196 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3197 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3198 };
3199
3200template <typename _Tp, typename _Up, typename _Ap,
3201 typename _R
3202 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3203 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3204 static_simd_cast(const simd<_Up, _Ap>& __x)
3205 {
3206 if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3207 return __x;
3208 else
3209 {
3210 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3211 __c;
3212 return _R(__private_init, __c(__data(__x)));
3213 }
3214 }
3215
3216namespace __proposed {
3217template <typename _Tp, typename _Up, typename _Ap,
3218 typename _R
3219 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3220 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3221 static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3222 {
3223 using _RM = typename _R::mask_type;
3224 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3225 typename _RM::simd_type::value_type>(__x)};
3226 }
3227
3228template <typename _To, typename _Up, typename _Abi>
3229 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3230 _To
3231 simd_bit_cast(const simd<_Up, _Abi>& __x)
3232 {
3233 using _Tp = typename _To::value_type;
3234 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3235 using _From = simd<_Up, _Abi>;
3236 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3237 // with concepts, the following should be constraints
3238 static_assert(sizeof(_To) == sizeof(_From));
3239 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3240 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3241#if __has_builtin(__builtin_bit_cast)
3242 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3243#else
3244 return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3245#endif
3246 }
3247
3248template <typename _To, typename _Up, typename _Abi>
3249 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3250 _To
3251 simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3252 {
3253 using _From = simd_mask<_Up, _Abi>;
3254 static_assert(sizeof(_To) == sizeof(_From));
3255 static_assert(is_trivially_copyable_v<_From>);
3256 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3257 // copyable.
3258 if constexpr (is_simd_v<_To>)
3259 {
3260 using _Tp = typename _To::value_type;
3261 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3262 static_assert(is_trivially_copyable_v<_ToMember>);
3263#if __has_builtin(__builtin_bit_cast)
3264 return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3265#else
3266 return {__private_init, __bit_cast<_ToMember>(__x)};
3267#endif
3268 }
3269 else
3270 {
3271 static_assert(is_trivially_copyable_v<_To>);
3272#if __has_builtin(__builtin_bit_cast)
3273 return __builtin_bit_cast(_To, __x);
3274#else
3275 return __bit_cast<_To>(__x);
3276#endif
3277 }
3278 }
3279} // namespace __proposed
3280
3281// simd_cast {{{2
3282template <typename _Tp, typename _Up, typename _Ap,
3283 typename _To = __value_type_or_identity_t<_Tp>>
3284 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3285 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3286 -> decltype(static_simd_cast<_Tp>(__x))
3287 { return static_simd_cast<_Tp>(__x); }
3288
3289namespace __proposed {
3290template <typename _Tp, typename _Up, typename _Ap,
3291 typename _To = __value_type_or_identity_t<_Tp>>
3292 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3293 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3294 -> decltype(static_simd_cast<_Tp>(__x))
3295 { return static_simd_cast<_Tp>(__x); }
3296} // namespace __proposed
3297
3298// }}}2
3299// resizing_simd_cast {{{
3300namespace __proposed {
3301/* Proposed spec:
3302
3303template <class T, class U, class Abi>
3304T resizing_simd_cast(const simd<U, Abi>& x)
3305
3306p1 Constraints:
3307 - is_simd_v<T> is true and
3308 - T::value_type is the same type as U
3309
3310p2 Returns:
3311 A simd object with the i^th element initialized to x[i] for all i in the
3312 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3313 than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3314
3315template <class T, class U, class Abi>
3316T resizing_simd_cast(const simd_mask<U, Abi>& x)
3317
3318p1 Constraints: is_simd_mask_v<T> is true
3319
3320p2 Returns:
3321 A simd_mask object with the i^th element initialized to x[i] for all i in
3322the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3323 than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3324
3325 */
3326
3327template <typename _Tp, typename _Up, typename _Ap>
3328 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3329 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3330 resizing_simd_cast(const simd<_Up, _Ap>& __x)
3331 {
3332 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3333 return __x;
3334 else if (__builtin_is_constant_evaluated())
3335 return _Tp([&](auto __i) constexpr {
3336 return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3337 });
3338 else if constexpr (simd_size_v<_Up, _Ap> == 1)
3339 {
3340 _Tp __r{};
3341 __r[0] = __x[0];
3342 return __r;
3343 }
3344 else if constexpr (_Tp::size() == 1)
3345 return __x[0];
3346 else if constexpr (sizeof(_Tp) == sizeof(__x)
3347 && !__is_fixed_size_abi_v<_Ap>)
3348 return {__private_init,
3349 __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3350 _Ap::_S_masked(__data(__x))._M_data)};
3351 else
3352 {
3353 _Tp __r{};
3354 __builtin_memcpy(&__data(__r), &__data(__x),
3355 sizeof(_Up)
3356 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3357 return __r;
3358 }
3359 }
3360
3361template <typename _Tp, typename _Up, typename _Ap>
3362 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3363 enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3364 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3365 {
3366 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3367 typename _Tp::simd_type::value_type>(__x)};
3368 }
3369} // namespace __proposed
3370
3371// }}}
3372// to_fixed_size {{{2
3373template <typename _Tp, int _Np>
3374 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3375 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3376 { return __x; }
3377
3378template <typename _Tp, int _Np>
3379 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3380 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3381 { return __x; }
3382
3383template <typename _Tp, typename _Ap>
3384 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3385 to_fixed_size(const simd<_Tp, _Ap>& __x)
3386 {
3387 using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3388 return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3389 }
3390
3391template <typename _Tp, typename _Ap>
3392 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3393 to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3394 {
3395 return {__private_init,
3396 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3397 }
3398
3399// to_native {{{2
3400template <typename _Tp, int _Np>
3401 _GLIBCXX_SIMD_INTRINSIC
3402 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3403 to_native(const fixed_size_simd<_Tp, _Np>& __x)
3404 {
3405 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3406 __x.copy_to(__mem, vector_aligned);
3407 return {__mem, vector_aligned};
3408 }
3409
3410template <typename _Tp, int _Np>
3411 _GLIBCXX_SIMD_INTRINSIC
3412 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3413 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3414 {
3415 return native_simd_mask<_Tp>(
3416 __private_init,
3417 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3418 }
3419
3420// to_compatible {{{2
3421template <typename _Tp, int _Np>
3422 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3423 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3424 {
3425 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3426 __x.copy_to(__mem, vector_aligned);
3427 return {__mem, vector_aligned};
3428 }
3429
3430template <typename _Tp, int _Np>
3431 _GLIBCXX_SIMD_INTRINSIC
3432 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3433 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3434 {
3435 return simd_mask<_Tp>(
3436 __private_init,
3437 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3438 }
3439
3440// masked assignment [simd_mask.where] {{{1
3441
3442// where_expression {{{1
3443// const_where_expression<M, T> {{{2
3444template <typename _M, typename _Tp>
3445 class const_where_expression
3446 {
3447 using _V = _Tp;
3448 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3449
3450 struct _Wrapper { using value_type = _V; };
3451
3452 protected:
3453 using _Impl = typename _V::_Impl;
3454
3455 using value_type =
3456 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3457
3458 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3459 __get_mask(const const_where_expression& __x)
3460 { return __x._M_k; }
3461
3462 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3463 __get_lvalue(const const_where_expression& __x)
3464 { return __x._M_value; }
3465
3466 const _M& _M_k;
3467 _Tp& _M_value;
3468
3469 public:
3470 const_where_expression(const const_where_expression&) = delete;
3471
3472 const_where_expression& operator=(const const_where_expression&) = delete;
3473
3474 _GLIBCXX_SIMD_INTRINSIC constexpr
3475 const_where_expression(const _M& __kk, const _Tp& dd)
3476 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3477
3478 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3479 operator-() const&&
3480 {
3481 return {__private_init,
3482 _Impl::template _S_masked_unary<negate>(__data(_M_k),
3483 __data(_M_value))};
3484 }
3485
3486 template <typename _Up, typename _Flags>
3487 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3488 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3489 {
3490 return {__private_init,
3491 _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3492 _Flags::template _S_apply<_V>(__mem))};
3493 }
3494
3495 template <typename _Up, typename _Flags>
3496 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3497 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3498 {
3499 _Impl::_S_masked_store(__data(_M_value),
3500 _Flags::template _S_apply<_V>(__mem),
3501 __data(_M_k));
3502 }
3503 };
3504
3505// const_where_expression<bool, T> {{{2
3506template <typename _Tp>
3507 class const_where_expression<bool, _Tp>
3508 {
3509 using _M = bool;
3510 using _V = _Tp;
3511
3512 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3513
3514 struct _Wrapper { using value_type = _V; };
3515
3516 protected:
3517 using value_type
3518 = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3519
3520 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3521 __get_mask(const const_where_expression& __x)
3522 { return __x._M_k; }
3523
3524 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3525 __get_lvalue(const const_where_expression& __x)
3526 { return __x._M_value; }
3527
3528 const bool _M_k;
3529 _Tp& _M_value;
3530
3531 public:
3532 const_where_expression(const const_where_expression&) = delete;
3533 const_where_expression& operator=(const const_where_expression&) = delete;
3534
3535 _GLIBCXX_SIMD_INTRINSIC constexpr
3536 const_where_expression(const bool __kk, const _Tp& dd)
3537 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3538
3539 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3540 operator-() const&&
3541 { return _M_k ? -_M_value : _M_value; }
3542
3543 template <typename _Up, typename _Flags>
3544 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3545 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3546 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3547
3548 template <typename _Up, typename _Flags>
3549 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3550 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3551 {
3552 if (_M_k)
3553 __mem[0] = _M_value;
3554 }
3555 };
3556
3557// where_expression<M, T> {{{2
3558template <typename _M, typename _Tp>
3559 class where_expression : public const_where_expression<_M, _Tp>
3560 {
3561 using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3562
3563 static_assert(!is_const<_Tp>::value,
3564 "where_expression may only be instantiated with __a non-const "
3565 "_Tp parameter");
3566
3567 using typename const_where_expression<_M, _Tp>::value_type;
3568 using const_where_expression<_M, _Tp>::_M_k;
3569 using const_where_expression<_M, _Tp>::_M_value;
3570
3571 static_assert(
3572 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3573 static_assert(_M::size() == _Tp::size(), "");
3574
3575 _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3576 __get_lvalue(where_expression& __x)
3577 { return __x._M_value; }
3578
3579 public:
3580 where_expression(const where_expression&) = delete;
3581 where_expression& operator=(const where_expression&) = delete;
3582
3583 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3584 where_expression(const _M& __kk, _Tp& dd)
3585 : const_where_expression<_M, _Tp>(__kk, dd) {}
3586
3587 template <typename _Up>
3588 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3589 operator=(_Up&& __x) &&
3590 {
3591 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3592 __to_value_type_or_member_type<_Tp>(
3593 static_cast<_Up&&>(__x)));
3594 }
3595
3596#define _GLIBCXX_SIMD_OP_(__op, __name) \
3597 template <typename _Up> \
3598 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3599 operator __op##=(_Up&& __x)&& \
3600 { \
3601 _Impl::template _S_masked_cassign( \
3602 __data(_M_k), __data(_M_value), \
3603 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3604 [](auto __impl, auto __lhs, auto __rhs) \
3605 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3606 { return __impl.__name(__lhs, __rhs); }); \
3607 } \
3608 static_assert(true)
3609 _GLIBCXX_SIMD_OP_(+, _S_plus);
3610 _GLIBCXX_SIMD_OP_(-, _S_minus);
3611 _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3612 _GLIBCXX_SIMD_OP_(/, _S_divides);
3613 _GLIBCXX_SIMD_OP_(%, _S_modulus);
3614 _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3615 _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3616 _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3617 _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3618 _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3619#undef _GLIBCXX_SIMD_OP_
3620
3621 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3622 operator++() &&
3623 {
3624 __data(_M_value)
3625 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3626 }
3627
3628 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3629 operator++(int) &&
3630 {
3631 __data(_M_value)
3632 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3633 }
3634
3635 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3636 operator--() &&
3637 {
3638 __data(_M_value)
3639 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3640 }
3641
3642 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3643 operator--(int) &&
3644 {
3645 __data(_M_value)
3646 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3647 }
3648
3649 // intentionally hides const_where_expression::copy_from
3650 template <typename _Up, typename _Flags>
3651 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3652 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3653 {
3654 __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3655 _Flags::template _S_apply<_Tp>(__mem));
3656 }
3657 };
3658
3659// where_expression<bool, T> {{{2
3660template <typename _Tp>
3661 class where_expression<bool, _Tp>
3662 : public const_where_expression<bool, _Tp>
3663 {
3664 using _M = bool;
3665 using typename const_where_expression<_M, _Tp>::value_type;
3666 using const_where_expression<_M, _Tp>::_M_k;
3667 using const_where_expression<_M, _Tp>::_M_value;
3668
3669 public:
3670 where_expression(const where_expression&) = delete;
3671 where_expression& operator=(const where_expression&) = delete;
3672
3673 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3674 where_expression(const _M& __kk, _Tp& dd)
3675 : const_where_expression<_M, _Tp>(__kk, dd) {}
3676
3677#define _GLIBCXX_SIMD_OP_(__op) \
3678 template <typename _Up> \
3679 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3680 operator __op(_Up&& __x)&& \
3681 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3682
3683 _GLIBCXX_SIMD_OP_(=)
3684 _GLIBCXX_SIMD_OP_(+=)
3685 _GLIBCXX_SIMD_OP_(-=)
3686 _GLIBCXX_SIMD_OP_(*=)
3687 _GLIBCXX_SIMD_OP_(/=)
3688 _GLIBCXX_SIMD_OP_(%=)
3689 _GLIBCXX_SIMD_OP_(&=)
3690 _GLIBCXX_SIMD_OP_(|=)
3691 _GLIBCXX_SIMD_OP_(^=)
3692 _GLIBCXX_SIMD_OP_(<<=)
3693 _GLIBCXX_SIMD_OP_(>>=)
3694 #undef _GLIBCXX_SIMD_OP_
3695
3696 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3697 operator++() &&
3698 { if (_M_k) ++_M_value; }
3699
3700 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3701 operator++(int) &&
3702 { if (_M_k) ++_M_value; }
3703
3704 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3705 operator--() &&
3706 { if (_M_k) --_M_value; }
3707
3708 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3709 operator--(int) &&
3710 { if (_M_k) --_M_value; }
3711
3712 // intentionally hides const_where_expression::copy_from
3713 template <typename _Up, typename _Flags>
3714 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3715 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3716 { if (_M_k) _M_value = __mem[0]; }
3717 };
3718
3719// where {{{1
3720template <typename _Tp, typename _Ap>
3721 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3722 where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3723 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3724 { return {__k, __value}; }
3725
3726template <typename _Tp, typename _Ap>
3727 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3728 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3729 where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3730 { return {__k, __value}; }
3731
3732template <typename _Tp, typename _Ap>
3733 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3734 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3735 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3736 { return {__k, __value}; }
3737
3738template <typename _Tp, typename _Ap>
3739 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3740 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3741 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3742 { return {__k, __value}; }
3743
3744template <typename _Tp>
3745 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3746 where(_ExactBool __k, _Tp& __value)
3747 { return {__k, __value}; }
3748
3749template <typename _Tp>
3750 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3751 where(_ExactBool __k, const _Tp& __value)
3752 { return {__k, __value}; }
3753
3754template <typename _Tp, typename _Ap>
3755 _GLIBCXX_SIMD_CONSTEXPR void
3756 where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3757
3758template <typename _Tp, typename _Ap>
3759 _GLIBCXX_SIMD_CONSTEXPR void
3760 where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3761
3762// proposed mask iterations {{{1
3763namespace __proposed {
3764template <size_t _Np>
3765 class where_range
3766 {
3767 const bitset<_Np> __bits;
3768
3769 public:
3770 where_range(bitset<_Np> __b) : __bits(__b) {}
3771
3772 class iterator
3773 {
3774 size_t __mask;
3775 size_t __bit;
3776
3777 _GLIBCXX_SIMD_INTRINSIC void
3778 __next_bit()
3779 { __bit = __builtin_ctzl(__mask); }
3780
3781 _GLIBCXX_SIMD_INTRINSIC void
3782 __reset_lsb()
3783 {
3784 // 01100100 - 1 = 01100011
3785 __mask &= (__mask - 1);
3786 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3787 }
3788
3789 public:
3790 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3791 iterator(const iterator&) = default;
3792 iterator(iterator&&) = default;
3793
3794 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3795 operator->() const
3796 { return __bit; }
3797
3798 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3799 operator*() const
3800 { return __bit; }
3801
3802 _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3803 operator++()
3804 {
3805 __reset_lsb();
3806 __next_bit();
3807 return *this;
3808 }
3809
3810 _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3811 operator++(int)
3812 {
3813 iterator __tmp = *this;
3814 __reset_lsb();
3815 __next_bit();
3816 return __tmp;
3817 }
3818
3819 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3820 operator==(const iterator& __rhs) const
3821 { return __mask == __rhs.__mask; }
3822
3823 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3824 operator!=(const iterator& __rhs) const
3825 { return __mask != __rhs.__mask; }
3826 };
3827
3828 iterator
3829 begin() const
3830 { return __bits.to_ullong(); }
3831
3832 iterator
3833 end() const
3834 { return 0; }
3835 };
3836
3837template <typename _Tp, typename _Ap>
3838 where_range<simd_size_v<_Tp, _Ap>>
3839 where(const simd_mask<_Tp, _Ap>& __k)
3840 { return __k.__to_bitset(); }
3841
3842} // namespace __proposed
3843
3844// }}}1
3845// reductions [simd.reductions] {{{1
3846template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3847 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3848 reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3849 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3850
3851template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3852 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3853 reduce(const const_where_expression<_M, _V>& __x,
3854 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3855 {
3856 if (__builtin_expect(none_of(__get_mask(__x)), false))
3857 return __identity_element;
3858
3859 _V __tmp = __identity_element;
3860 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3861 __data(__get_lvalue(__x)));
3862 return reduce(__tmp, __binary_op);
3863 }
3864
3865template <typename _M, typename _V>
3866 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3867 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3868 { return reduce(__x, 0, __binary_op); }
3869
3870template <typename _M, typename _V>
3871 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3872 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3873 { return reduce(__x, 1, __binary_op); }
3874
3875template <typename _M, typename _V>
3876 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3877 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3878 { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3879
3880template <typename _M, typename _V>
3881 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3882 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3883 { return reduce(__x, 0, __binary_op); }
3884
3885template <typename _M, typename _V>
3886 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3887 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3888 { return reduce(__x, 0, __binary_op); }
3889
3890template <typename _Tp, typename _Abi>
3891 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3892 hmin(const simd<_Tp, _Abi>& __v) noexcept
3893 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3894
3895template <typename _Tp, typename _Abi>
3896 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3897 hmax(const simd<_Tp, _Abi>& __v) noexcept
3898 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3899
3900template <typename _M, typename _V>
3901 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3902 typename _V::value_type
3903 hmin(const const_where_expression<_M, _V>& __x) noexcept
3904 {
3905 using _Tp = typename _V::value_type;
3906 constexpr _Tp __id_elem =
3907#ifdef __FINITE_MATH_ONLY__
3908 __finite_max_v<_Tp>;
3909#else
3910 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3911#endif
3912 _V __tmp = __id_elem;
3913 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3914 __data(__get_lvalue(__x)));
3915 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3916 }
3917
3918template <typename _M, typename _V>
3919 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3920 typename _V::value_type
3921 hmax(const const_where_expression<_M, _V>& __x) noexcept
3922 {
3923 using _Tp = typename _V::value_type;
3924 constexpr _Tp __id_elem =
3925#ifdef __FINITE_MATH_ONLY__
3926 __finite_min_v<_Tp>;
3927#else
3928 [] {
3929 if constexpr (__value_exists_v<__infinity, _Tp>)
3930 return -__infinity_v<_Tp>;
3931 else
3932 return __finite_min_v<_Tp>;
3933 }();
3934#endif
3935 _V __tmp = __id_elem;
3936 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3937 __data(__get_lvalue(__x)));
3938 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3939 }
3940
3941// }}}1
3942// algorithms [simd.alg] {{{
3943template <typename _Tp, typename _Ap>
3944 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3945 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3946 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3947
3948template <typename _Tp, typename _Ap>
3949 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3950 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3951 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3952
3953template <typename _Tp, typename _Ap>
3954 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3955 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3956 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3957 {
3958 const auto pair_of_members
3959 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3960 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3961 simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3962 }
3963
3964template <typename _Tp, typename _Ap>
3965 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3966 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3967 {
3968 using _Impl = typename _Ap::_SimdImpl;
3969 return {__private_init,
3970 _Impl::_S_min(__data(__hi),
3971 _Impl::_S_max(__data(__lo), __data(__v)))};
3972 }
3973
3974// }}}
3975
3976template <size_t... _Sizes, typename _Tp, typename _Ap,
3977 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3978 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3979 split(const simd<_Tp, _Ap>&);
3980
3981// __extract_part {{{
3982template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3983 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3984 conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
3985 __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3986
3987template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3988 _GLIBCXX_SIMD_INTRINSIC constexpr auto
3989 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3990
3991// }}}
3992// _SizeList {{{
3993template <size_t _V0, size_t... _Values>
3994 struct _SizeList
3995 {
3996 template <size_t _I>
3997 static constexpr size_t
3998 _S_at(_SizeConstant<_I> = {})
3999 {
4000 if constexpr (_I == 0)
4001 return _V0;
4002 else
4003 return _SizeList<_Values...>::template _S_at<_I - 1>();
4004 }
4005
4006 template <size_t _I>
4007 static constexpr auto
4008 _S_before(_SizeConstant<_I> = {})
4009 {
4010 if constexpr (_I == 0)
4011 return _SizeConstant<0>();
4012 else
4013 return _SizeConstant<
4014 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
4015 }
4016
4017 template <size_t _Np>
4018 static constexpr auto
4019 _S_pop_front(_SizeConstant<_Np> = {})
4020 {
4021 if constexpr (_Np == 0)
4022 return _SizeList();
4023 else
4024 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
4025 }
4026 };
4027
4028// }}}
4029// __extract_center {{{
4030template <typename _Tp, size_t _Np>
4031 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
4032 __extract_center(_SimdWrapper<_Tp, _Np> __x)
4033 {
4034 static_assert(_Np >= 4);
4035 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4036#if _GLIBCXX_SIMD_X86INTRIN // {{{
4037 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4038 {
4039 const auto __intrin = __to_intrin(__x);
4040 if constexpr (is_integral_v<_Tp>)
4041 return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4042 _mm512_shuffle_i32x4(__intrin, __intrin,
4043 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4044 else if constexpr (sizeof(_Tp) == 4)
4045 return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4046 _mm512_shuffle_f32x4(__intrin, __intrin,
4047 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4048 else if constexpr (sizeof(_Tp) == 8)
4049 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4050 _mm512_shuffle_f64x2(__intrin, __intrin,
4051 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4052 else
4053 __assert_unreachable<_Tp>();
4054 }
4055 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4056 return __vector_bitcast<_Tp>(
4057 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4058 __hi128(__vector_bitcast<double>(__x)), 1));
4059 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4060 return __vector_bitcast<_Tp>(
4061 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4062 __lo128(__vector_bitcast<_LLong>(__x)),
4063 sizeof(_Tp) * _Np / 4));
4064 else
4065#endif // _GLIBCXX_SIMD_X86INTRIN }}}
4066 {
4067 __vector_type_t<_Tp, _Np / 2> __r;
4068 __builtin_memcpy(&__r,
4069 reinterpret_cast<const char*>(&__x)
4070 + sizeof(_Tp) * _Np / 4,
4071 sizeof(_Tp) * _Np / 2);
4072 return __r;
4073 }
4074 }
4075
4076template <typename _Tp, typename _A0, typename... _As>
4077 _GLIBCXX_SIMD_INTRINSIC
4078 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4079 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4080 {
4081 if constexpr (sizeof...(_As) == 0)
4082 return __extract_center(__x.first);
4083 else
4084 return __extract_part<1, 4, 2>(__x);
4085 }
4086
4087// }}}
4088// __split_wrapper {{{
4089template <size_t... _Sizes, typename _Tp, typename... _As>
4090 auto
4091 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4092 {
4093 return split<_Sizes...>(
4094 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4095 __x));
4096 }
4097
4098// }}}
4099
4100// split<simd>(simd) {{{
4101template <typename _V, typename _Ap,
4102 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4103 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4104 && is_simd_v<_V>, array<_V, _Parts>>
4105 split(const simd<typename _V::value_type, _Ap>& __x)
4106 {
4107 using _Tp = typename _V::value_type;
4108 if constexpr (_Parts == 1)
4109 {
4110 return {simd_cast<_V>(__x)};
4111 }
4112 else if (__x._M_is_constprop())
4113 {
4114 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4115 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4116 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4117 { return __x[__i * _V::size() + __j]; });
4118 });
4119 }
4120 else if constexpr (
4121 __is_fixed_size_abi_v<_Ap>
4122 && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4123 || (__is_fixed_size_abi_v<typename _V::abi_type>
4124 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4125 )))
4126 {
4127 // fixed_size -> fixed_size (w/o padding) or scalar
4128#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4129 const __may_alias<_Tp>* const __element_ptr
4130 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4131 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4132 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4133 { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4134#else
4135 const auto& __xx = __data(__x);
4136 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4137 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4138 [[maybe_unused]] constexpr size_t __offset
4139 = decltype(__i)::value * _V::size();
4140 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4141 constexpr _SizeConstant<__j + __offset> __k;
4142 return __xx[__k];
4143 });
4144 });
4145#endif
4146 }
4147 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4148 {
4149 // normally memcpy should work here as well
4150 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4151 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4152 }
4153 else
4154 {
4155 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4156 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4157 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4158 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4159 return __x[__i * _V::size() + __j];
4160 });
4161 else
4162 return _V(__private_init,
4163 __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4164 });
4165 }
4166 }
4167
4168// }}}
4169// split<simd_mask>(simd_mask) {{{
4170template <typename _V, typename _Ap,
4171 size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4172 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4173 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4174 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4175 {
4176 if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4177 return {__x};
4178 else if constexpr (_Parts == 1)
4179 return {__proposed::static_simd_cast<_V>(__x)};
4180 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4181 && __is_avx_abi<_Ap>())
4182 return {_V(__private_init, __lo128(__data(__x))),
4183 _V(__private_init, __hi128(__data(__x)))};
4184 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4185 {
4186 const bitset __bits = __x.__to_bitset();
4187 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4188 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4189 constexpr size_t __offset = __i * _V::size();
4190 return _V(__bitset_init, (__bits >> __offset).to_ullong());
4191 });
4192 }
4193 else
4194 {
4195 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4196 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4197 constexpr size_t __offset = __i * _V::size();
4198 return _V(__private_init,
4199 [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4200 return __x[__j + __offset];
4201 });
4202 });
4203 }
4204 }
4205
4206// }}}
4207// split<_Sizes...>(simd) {{{
4208template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4209 _GLIBCXX_SIMD_ALWAYS_INLINE
4210 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4211 split(const simd<_Tp, _Ap>& __x)
4212 {
4213 using _SL = _SizeList<_Sizes...>;
4214 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4215 constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4216 constexpr size_t _N0 = _SL::template _S_at<0>();
4217 using _V = __deduced_simd<_Tp, _N0>;
4218
4219 if (__x._M_is_constprop())
4220 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4221 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4222 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4223 constexpr size_t __offset = _SL::_S_before(__i);
4224 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4225 return __x[__offset + __j];
4226 });
4227 });
4228 else if constexpr (_Np == _N0)
4229 {
4230 static_assert(sizeof...(_Sizes) == 1);
4231 return {simd_cast<_V>(__x)};
4232 }
4233 else if constexpr // split from fixed_size, such that __x::first.size == _N0
4234 (__is_fixed_size_abi_v<
4235 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4236 {
4237 static_assert(
4238 !__is_fixed_size_abi_v<typename _V::abi_type>,
4239 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4240 "fixed_size_simd "
4241 "when deduced?");
4242 // extract first and recurse (__split_wrapper is needed to deduce a new
4243 // _Sizes pack)
4244 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4245 __split_wrapper(_SL::template _S_pop_front<1>(),
4246 __data(__x).second));
4247 }
4248 else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4249 {
4250 constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
4251 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4252 [&](auto __i) constexpr {
4253 constexpr size_t __offset = [&]() {
4254 size_t __r = 0;
4255 for (unsigned __j = 0; __j < __i; ++__j)
4256 __r += __size[__j];
4257 return __r;
4258 }();
4259 return __deduced_simd<_Tp, __size[__i]>(
4260 __private_init,
4261 __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
4262 });
4263 }
4264#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4265 const __may_alias<_Tp>* const __element_ptr
4266 = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4267 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4268 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4269 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4270 constexpr size_t __offset = _SL::_S_before(__i);
4271 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4272 constexpr size_t __a
4273 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4274 constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4275 constexpr size_t __alignment = __b == 0 ? __a : __b;
4276 return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4277 });
4278#else
4279 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4280 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4281 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4282 const auto& __xx = __data(__x);
4283 using _Offset = decltype(_SL::_S_before(__i));
4284 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4285 constexpr _SizeConstant<_Offset::value + __j> __k;
4286 return __xx[__k];
4287 });
4288 });
4289#endif
4290 }
4291
4292// }}}
4293
4294// __subscript_in_pack {{{
4295template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4296 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4297 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4298 {
4299 if constexpr (_I < simd_size_v<_Tp, _Ap>)
4300 return __x[_I];
4301 else
4302 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4303 }
4304
4305// }}}
4306// __store_pack_of_simd {{{
4307template <typename _Tp, typename _A0, typename... _As>
4308 _GLIBCXX_SIMD_INTRINSIC void
4309 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4310 {
4311 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4312 __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4313 if constexpr (sizeof...(__xs) > 0)
4314 __store_pack_of_simd(__mem + __n_bytes, __xs...);
4315 }
4316
4317// }}}
4318// concat(simd...) {{{
4319template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4320 inline _GLIBCXX_SIMD_CONSTEXPR
4321 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4322 concat(const simd<_Tp, _As>&... __xs)
4323 {
4324 constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
4325 using _Abi = simd_abi::deduce_t<_Tp, _Np>;
4326 using _Rp = simd<_Tp, _Abi>;
4327 using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
4328 if constexpr (sizeof...(__xs) == 1)
4329 return simd_cast<_Rp>(__xs...);
4330 else if ((... && __xs._M_is_constprop()))
4331 return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4332 { return __subscript_in_pack<__i>(__xs...); });
4333 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
4334 {
4335 return {__private_init,
4336 __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
4337 [](int __i) {
4338 constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
4339 constexpr int __vsizes[2]
4340 = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
4341 constexpr int __padding0 = __vsizes[0] - __sizes[0];
4342 return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
4343 })};
4344 }
4345 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
4346 return [](const auto& __x0, const auto& __x1, const auto& __x2)
4347 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4348 return concat(concat(__x0, __x1), __x2);
4349 }(__xs...);
4350 else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
4351 return [](const auto& __x0, const auto& __x1, const auto&... __rest)
4352 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4353 return concat(concat(__x0, __x1), concat(__rest...));
4354 }(__xs...);
4355 else
4356 {
4357 _Rp __r{};
4358 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4359 return __r;
4360 }
4361 }
4362
4363// }}}
4364// concat(array<simd>) {{{
4365template <typename _Tp, typename _Abi, size_t _Np>
4366 _GLIBCXX_SIMD_ALWAYS_INLINE
4367 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4368 concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4369 {
4370 return __call_with_subscripts<_Np>(
4371 __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4372 return concat(__xs...);
4373 });
4374 }
4375
4376// }}}
4377
4378/// @cond undocumented
4379// _SmartReference {{{
4380template <typename _Up, typename _Accessor = _Up,
4381 typename _ValueType = typename _Up::value_type>
4382 class _SmartReference
4383 {
4384 friend _Accessor;
4385 int _M_index;
4386 _Up& _M_obj;
4387
4388 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4389 _M_read() const noexcept
4390 {
4391 if constexpr (is_arithmetic_v<_Up>)
4392 return _M_obj;
4393 else
4394 return _M_obj[_M_index];
4395 }
4396
4397 template <typename _Tp>
4398 _GLIBCXX_SIMD_INTRINSIC constexpr void
4399 _M_write(_Tp&& __x) const
4400 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4401
4402 public:
4403 _GLIBCXX_SIMD_INTRINSIC constexpr
4404 _SmartReference(_Up& __o, int __i) noexcept
4405 : _M_index(__i), _M_obj(__o) {}
4406
4407 using value_type = _ValueType;
4408
4409 _GLIBCXX_SIMD_INTRINSIC
4410 _SmartReference(const _SmartReference&) = delete;
4411
4412 _GLIBCXX_SIMD_INTRINSIC constexpr
4413 operator value_type() const noexcept
4414 { return _M_read(); }
4415
4416 template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4417 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4418 operator=(_Tp&& __x) &&
4419 {
4420 _M_write(static_cast<_Tp&&>(__x));
4421 return {_M_obj, _M_index};
4422 }
4423
4424#define _GLIBCXX_SIMD_OP_(__op) \
4425 template <typename _Tp, \
4426 typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4427 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4428 typename = _ValuePreservingOrInt<_TT, value_type>> \
4429 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4430 operator __op##=(_Tp&& __x) && \
4431 { \
4432 const value_type& __lhs = _M_read(); \
4433 _M_write(__lhs __op __x); \
4434 return {_M_obj, _M_index}; \
4435 }
4436 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4437 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4438 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4439#undef _GLIBCXX_SIMD_OP_
4440
4441 template <typename _Tp = void,
4442 typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4443 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4444 operator++() &&
4445 {
4446 value_type __x = _M_read();
4447 _M_write(++__x);
4448 return {_M_obj, _M_index};
4449 }
4450
4451 template <typename _Tp = void,
4452 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4453 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4454 operator++(int) &&
4455 {
4456 const value_type __r = _M_read();
4457 value_type __x = __r;
4458 _M_write(++__x);
4459 return __r;
4460 }
4461
4462 template <typename _Tp = void,
4463 typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4464 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4465 operator--() &&
4466 {
4467 value_type __x = _M_read();
4468 _M_write(--__x);
4469 return {_M_obj, _M_index};
4470 }
4471
4472 template <typename _Tp = void,
4473 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4474 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4475 operator--(int) &&
4476 {
4477 const value_type __r = _M_read();
4478 value_type __x = __r;
4479 _M_write(--__x);
4480 return __r;
4481 }
4482
4483 _GLIBCXX_SIMD_INTRINSIC friend void
4484 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4485 conjunction<
4486 is_nothrow_constructible<value_type, _SmartReference&&>,
4487 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4488 {
4489 value_type __tmp = static_cast<_SmartReference&&>(__a);
4490 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4491 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4492 }
4493
4494 _GLIBCXX_SIMD_INTRINSIC friend void
4495 swap(value_type& __a, _SmartReference&& __b) noexcept(
4496 conjunction<
4497 is_nothrow_constructible<value_type, value_type&&>,
4498 is_nothrow_assignable<value_type&, value_type&&>,
4499 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4500 {
4501 value_type __tmp(std::move(__a));
4502 __a = static_cast<value_type>(__b);
4503 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4504 }
4505
4506 _GLIBCXX_SIMD_INTRINSIC friend void
4507 swap(_SmartReference&& __a, value_type& __b) noexcept(
4508 conjunction<
4509 is_nothrow_constructible<value_type, _SmartReference&&>,
4510 is_nothrow_assignable<value_type&, value_type&&>,
4511 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4512 {
4513 value_type __tmp(__a);
4514 static_cast<_SmartReference&&>(__a) = std::move(__b);
4515 __b = std::move(__tmp);
4516 }
4517 };
4518
4519// }}}
4520// __scalar_abi_wrapper {{{
4521template <int _Bytes>
4522 struct __scalar_abi_wrapper
4523 {
4524 template <typename _Tp> static constexpr size_t _S_full_size = 1;
4525 template <typename _Tp> static constexpr size_t _S_size = 1;
4526 template <typename _Tp> static constexpr size_t _S_is_partial = false;
4527
4528 template <typename _Tp, typename _Abi = simd_abi::scalar>
4529 static constexpr bool _S_is_valid_v
4530 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4531 };
4532
4533// }}}
4534// __decay_abi metafunction {{{
4535template <typename _Tp>
4536 struct __decay_abi { using type = _Tp; };
4537
4538template <int _Bytes>
4539 struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4540 { using type = simd_abi::scalar; };
4541
4542// }}}
4543// __find_next_valid_abi metafunction {{{1
4544// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4545// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4546// recursion at 2 elements in the resulting ABI tag. In this case
4547// type::_S_is_valid_v<_Tp> may be false.
4548template <template <int> class _Abi, int _Bytes, typename _Tp>
4549 struct __find_next_valid_abi
4550 {
4551 static constexpr auto
4552 _S_choose()
4553 {
4554 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4555 using _NextAbi = _Abi<_NextBytes>;
4556 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4557 return _Abi<_Bytes>();
4558 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4559 && _NextAbi::template _S_is_valid_v<_Tp>)
4560 return _NextAbi();
4561 else
4562 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4563 }
4564
4565 using type = decltype(_S_choose());
4566 };
4567
4568template <int _Bytes, typename _Tp>
4569 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4570 { using type = simd_abi::scalar; };
4571
4572// _AbiList {{{1
4573template <template <int> class...>
4574 struct _AbiList
4575 {
4576 template <typename, int> static constexpr bool _S_has_valid_abi = false;
4577 template <typename, int> using _FirstValidAbi = void;
4578 template <typename, int> using _BestAbi = void;
4579 };
4580
4581template <template <int> class _A0, template <int> class... _Rest>
4582 struct _AbiList<_A0, _Rest...>
4583 {
4584 template <typename _Tp, int _Np>
4585 static constexpr bool _S_has_valid_abi
4586 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4587 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4588
4589 template <typename _Tp, int _Np>
4590 using _FirstValidAbi = conditional_t<
4591 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4592 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4593 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4594
4595 template <typename _Tp, int _Np>
4596 static constexpr auto
4597 _S_determine_best_abi()
4598 {
4599 static_assert(_Np >= 1);
4600 constexpr int _Bytes = sizeof(_Tp) * _Np;
4601 if constexpr (_Np == 1)
4602 return __make_dependent_t<_Tp, simd_abi::scalar>{};
4603 else
4604 {
4605 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4606 // _A0<_Bytes> is good if:
4607 // 1. The ABI tag is valid for _Tp
4608 // 2. The storage overhead is no more than padding to fill the next
4609 // power-of-2 number of bytes
4610 if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4611 _Tp> && __fullsize / 2 < _Np)
4612 return typename __decay_abi<_A0<_Bytes>>::type{};
4613 else
4614 {
4615 using _Bp =
4616 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4617 if constexpr (_Bp::template _S_is_valid_v<
4618 _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4619 return _Bp{};
4620 else
4621 return
4622 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4623 }
4624 }
4625 }
4626
4627 template <typename _Tp, int _Np>
4628 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4629 };
4630
4631// }}}1
4632
4633// the following lists all native ABIs, which makes them accessible to
4634// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4635// matters: Whatever comes first has higher priority.
4636using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4637 __scalar_abi_wrapper>;
4638
4639// valid _SimdTraits specialization {{{1
4640template <typename _Tp, typename _Abi>
4641 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4642 : _Abi::template __traits<_Tp> {};
4643
4644// __deduce_impl specializations {{{1
4645// try all native ABIs (including scalar) first
4646template <typename _Tp, size_t _Np>
4647 struct __deduce_impl<
4648 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4649 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4650
4651// fall back to fixed_size only if scalar and native ABIs don't match
4652template <typename _Tp, size_t _Np, typename = void>
4653 struct __deduce_fixed_size_fallback {};
4654
4655template <typename _Tp, size_t _Np>
4656 struct __deduce_fixed_size_fallback<_Tp, _Np,
4657 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4658 { using type = simd_abi::fixed_size<_Np>; };
4659
4660template <typename _Tp, size_t _Np, typename>
4661 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4662
4663//}}}1
4664/// @endcond
4665
4666// simd_mask {{{
4667template <typename _Tp, typename _Abi>
4668 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4669 {
4670 // types, tags, and friends {{{
4671 using _Traits = _SimdTraits<_Tp, _Abi>;
4672 using _MemberType = typename _Traits::_MaskMember;
4673
4674 // We map all masks with equal element sizeof to a single integer type, the
4675 // one given by __int_for_sizeof_t<_Tp>. This is the approach
4676 // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4677 // template specializations in the implementation classes.
4678 using _Ip = __int_for_sizeof_t<_Tp>;
4679 static constexpr _Ip* _S_type_tag = nullptr;
4680
4681 friend typename _Traits::_MaskBase;
4682 friend class simd<_Tp, _Abi>; // to construct masks on return
4683 friend typename _Traits::_SimdImpl; // to construct masks on return and
4684 // inspect data on masked operations
4685 public:
4686 using _Impl = typename _Traits::_MaskImpl;
4687 friend _Impl;
4688
4689 // }}}
4690 // member types {{{
4691 using value_type = bool;
4692 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4693 using simd_type = simd<_Tp, _Abi>;
4694 using abi_type = _Abi;
4695
4696 // }}}
4697 static constexpr size_t size() // {{{
4698 { return __size_or_zero_v<_Tp, _Abi>; }
4699
4700 // }}}
4701 // constructors & assignment {{{
4702 simd_mask() = default;
4703 simd_mask(const simd_mask&) = default;
4704 simd_mask(simd_mask&&) = default;
4705 simd_mask& operator=(const simd_mask&) = default;
4706 simd_mask& operator=(simd_mask&&) = default;
4707
4708 // }}}
4709 // access to internal representation (optional feature) {{{
4710 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4711 simd_mask(typename _Traits::_MaskCastType __init)
4712 : _M_data{__init} {}
4713 // conversions to internal type is done in _MaskBase
4714
4715 // }}}
4716 // bitset interface (extension to be proposed) {{{
4717 // TS_FEEDBACK:
4718 // Conversion of simd_mask to and from bitset makes it much easier to
4719 // interface with other facilities. I suggest adding `static
4720 // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4721 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4722 __from_bitset(bitset<size()> bs)
4723 { return {__bitset_init, bs}; }
4724
4725 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4726 __to_bitset() const
4727 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4728
4729 // }}}
4730 // explicit broadcast constructor {{{
4731 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4732 simd_mask(value_type __x)
4733 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4734
4735 // }}}
4736 // implicit type conversion constructor {{{
4737 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4738 // proposed improvement
4739 template <typename _Up, typename _A2,
4740 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4741 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4742 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4743 simd_mask(const simd_mask<_Up, _A2>& __x)
4744 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4745 #else
4746 // conforming to ISO/IEC 19570:2018
4747 template <typename _Up, typename = enable_if_t<conjunction<
4748 is_same<abi_type, simd_abi::fixed_size<size()>>,
4749 is_same<_Up, _Up>>::value>>
4750 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4751 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4752 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4753 #endif
4754
4755 // }}}
4756 // load constructor {{{
4757 template <typename _Flags>
4758 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4759 simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4760 : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4761
4762 template <typename _Flags>
4763 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4764 simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4765 : _M_data{}
4766 {
4767 _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4768 _Flags::template _S_apply<simd_mask>(__mem));
4769 }
4770
4771 // }}}
4772 // loads [simd_mask.load] {{{
4773 template <typename _Flags>
4774 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4775 copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4776 { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4777
4778 // }}}
4779 // stores [simd_mask.store] {{{
4780 template <typename _Flags>
4781 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4782 copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4783 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4784
4785 // }}}
4786 // scalar access {{{
4787 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4788 operator[](size_t __i)
4789 {
4790 if (__i >= size())
4791 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4792 return {_M_data, int(__i)};
4793 }
4794
4795 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4796 operator[](size_t __i) const
4797 {
4798 if (__i >= size())
4799 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4800 if constexpr (__is_scalar_abi<_Abi>())
4801 return _M_data;
4802 else
4803 return static_cast<bool>(_M_data[__i]);
4804 }
4805
4806 // }}}
4807 // negation {{{
4808 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4809 operator!() const
4810 { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4811
4812 // }}}
4813 // simd_mask binary operators [simd_mask.binary] {{{
4814 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4815 // simd_mask<int> && simd_mask<uint> needs disambiguation
4816 template <typename _Up, typename _A2,
4817 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4818 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4819 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4820 {
4821 return {__private_init,
4822 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4823 }
4824
4825 template <typename _Up, typename _A2,
4826 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4827 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4828 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4829 {
4830 return {__private_init,
4831 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4832 }
4833 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4834
4835 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4836 operator&&(const simd_mask& __x, const simd_mask& __y)
4837 { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4838
4839 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4840 operator||(const simd_mask& __x, const simd_mask& __y)
4841 { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4842
4843 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4844 operator&(const simd_mask& __x, const simd_mask& __y)
4845 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4846
4847 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4848 operator|(const simd_mask& __x, const simd_mask& __y)
4849 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4850
4851 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4852 operator^(const simd_mask& __x, const simd_mask& __y)
4853 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4854
4855 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4856 operator&=(simd_mask& __x, const simd_mask& __y)
4857 {
4858 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4859 return __x;
4860 }
4861
4862 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4863 operator|=(simd_mask& __x, const simd_mask& __y)
4864 {
4865 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4866 return __x;
4867 }
4868
4869 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4870 operator^=(simd_mask& __x, const simd_mask& __y)
4871 {
4872 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4873 return __x;
4874 }
4875
4876 // }}}
4877 // simd_mask compares [simd_mask.comparison] {{{
4878 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4879 operator==(const simd_mask& __x, const simd_mask& __y)
4880 { return !operator!=(__x, __y); }
4881
4882 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4883 operator!=(const simd_mask& __x, const simd_mask& __y)
4884 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4885
4886 // }}}
4887 // private_init ctor {{{
4888 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4889 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4890 : _M_data(__init) {}
4891
4892 // }}}
4893 // private_init generator ctor {{{
4894 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4895 _GLIBCXX_SIMD_INTRINSIC constexpr
4896 simd_mask(_PrivateInit, _Fp&& __gen)
4897 : _M_data()
4898 {
4899 __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4900 _Impl::_S_set(_M_data, __i, __gen(__i));
4901 });
4902 }
4903
4904 // }}}
4905 // bitset_init ctor {{{
4906 _GLIBCXX_SIMD_INTRINSIC constexpr
4907 simd_mask(_BitsetInit, bitset<size()> __init)
4908 : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4909 {}
4910
4911 // }}}
4912 // __cvt {{{
4913 // TS_FEEDBACK:
4914 // The conversion operator this implements should be a ctor on simd_mask.
4915 // Once you call .__cvt() on a simd_mask it converts conveniently.
4916 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4917 struct _CvtProxy
4918 {
4919 template <typename _Up, typename _A2,
4920 typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4921 _GLIBCXX_SIMD_ALWAYS_INLINE
4922 operator simd_mask<_Up, _A2>() &&
4923 {
4924 using namespace std::experimental::__proposed;
4925 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4926 }
4927
4928 const simd_mask<_Tp, _Abi>& _M_data;
4929 };
4930
4931 _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4932 __cvt() const
4933 { return {*this}; }
4934
4935 // }}}
4936 // operator?: overloads (suggested extension) {{{
4937 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4938 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4939 operator?:(const simd_mask& __k, const simd_mask& __where_true,
4940 const simd_mask& __where_false)
4941 {
4942 auto __ret = __where_false;
4943 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4944 return __ret;
4945 }
4946
4947 template <typename _U1, typename _U2,
4948 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4949 typename = enable_if_t<conjunction_v<
4950 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4951 is_convertible<simd_mask, typename _Rp::mask_type>>>>
4952 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4953 operator?:(const simd_mask& __k, const _U1& __where_true,
4954 const _U2& __where_false)
4955 {
4956 _Rp __ret = __where_false;
4957 _Rp::_Impl::_S_masked_assign(
4958 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4959 __data(static_cast<_Rp>(__where_true)));
4960 return __ret;
4961 }
4962
4963 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4964 template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4965 typename = enable_if_t<
4966 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4967 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4968 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4969 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4970 const simd_mask<_Up, _Au>& __where_false)
4971 {
4972 simd_mask __ret = __where_false;
4973 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4974 __where_true._M_data);
4975 return __ret;
4976 }
4977 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4978 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4979
4980 // }}}
4981 // _M_is_constprop {{{
4982 _GLIBCXX_SIMD_INTRINSIC constexpr bool
4983 _M_is_constprop() const
4984 {
4985 if constexpr (__is_scalar_abi<_Abi>())
4986 return __builtin_constant_p(_M_data);
4987 else
4988 return _M_data._M_is_constprop();
4989 }
4990
4991 // }}}
4992
4993 private:
4994 friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4995 friend auto& __data<_Tp, abi_type>(simd_mask&);
4996 alignas(_Traits::_S_mask_align) _MemberType _M_data;
4997 };
4998
4999// }}}
5000
5001/// @cond undocumented
5002// __data(simd_mask) {{{
5003template <typename _Tp, typename _Ap>
5004 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5005 __data(const simd_mask<_Tp, _Ap>& __x)
5006 { return __x._M_data; }
5007
5008template <typename _Tp, typename _Ap>
5009 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5010 __data(simd_mask<_Tp, _Ap>& __x)
5011 { return __x._M_data; }
5012
5013// }}}
5014/// @endcond
5015
5016// simd_mask reductions [simd_mask.reductions] {{{
5017template <typename _Tp, typename _Abi>
5018 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5019 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5020 {
5021 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5022 {
5023 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5024 if (!__k[__i])
5025 return false;
5026 return true;
5027 }
5028 else
5029 return _Abi::_MaskImpl::_S_all_of(__k);
5030 }
5031
5032template <typename _Tp, typename _Abi>
5033 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5034 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5035 {
5036 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5037 {
5038 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5039 if (__k[__i])
5040 return true;
5041 return false;
5042 }
5043 else
5044 return _Abi::_MaskImpl::_S_any_of(__k);
5045 }
5046
5047template <typename _Tp, typename _Abi>
5048 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5049 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5050 {
5051 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5052 {
5053 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5054 if (__k[__i])
5055 return false;
5056 return true;
5057 }
5058 else
5059 return _Abi::_MaskImpl::_S_none_of(__k);
5060 }
5061
5062template <typename _Tp, typename _Abi>
5063 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5064 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5065 {
5066 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5067 {
5068 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5069 if (__k[__i] != __k[__i - 1])
5070 return true;
5071 return false;
5072 }
5073 else
5074 return _Abi::_MaskImpl::_S_some_of(__k);
5075 }
5076
5077template <typename _Tp, typename _Abi>
5078 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5079 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5080 {
5081 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5082 {
5083 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5084 __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5085 return ((__elements != 0) + ...);
5086 });
5087 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5088 return __r;
5089 }
5090 return _Abi::_MaskImpl::_S_popcount(__k);
5091 }
5092
5093template <typename _Tp, typename _Abi>
5094 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5095 find_first_set(const simd_mask<_Tp, _Abi>& __k)
5096 {
5097 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5098 {
5099 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5100 const size_t _Idx = __call_with_n_evaluations<_Np>(
5101 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5102 return std::min({__indexes...});
5103 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5104 return __k[__i] ? +__i : _Np;
5105 });
5106 if (_Idx >= _Np)
5107 __invoke_ub("find_first_set(empty mask) is UB");
5108 if (__builtin_constant_p(_Idx))
5109 return _Idx;
5110 }
5111 return _Abi::_MaskImpl::_S_find_first_set(__k);
5112 }
5113
5114template <typename _Tp, typename _Abi>
5115 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5116 find_last_set(const simd_mask<_Tp, _Abi>& __k)
5117 {
5118 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5119 {
5120 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5121 const int _Idx = __call_with_n_evaluations<_Np>(
5122 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5123 return std::max({__indexes...});
5124 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5125 return __k[__i] ? int(__i) : -1;
5126 });
5127 if (_Idx < 0)
5128 __invoke_ub("find_first_set(empty mask) is UB");
5129 if (__builtin_constant_p(_Idx))
5130 return _Idx;
5131 }
5132 return _Abi::_MaskImpl::_S_find_last_set(__k);
5133 }
5134
5135_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5136all_of(_ExactBool __x) noexcept
5137{ return __x; }
5138
5139_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5140any_of(_ExactBool __x) noexcept
5141{ return __x; }
5142
5143_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5144none_of(_ExactBool __x) noexcept
5145{ return !__x; }
5146
5147_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5148some_of(_ExactBool) noexcept
5149{ return false; }
5150
5151_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5152popcount(_ExactBool __x) noexcept
5153{ return __x; }
5154
5155_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5156find_first_set(_ExactBool)
5157{ return 0; }
5158
5159_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5160find_last_set(_ExactBool)
5161{ return 0; }
5162
5163// }}}
5164
5165/// @cond undocumented
5166// _SimdIntOperators{{{1
5167template <typename _V, typename _Tp, typename _Abi, bool>
5168 class _SimdIntOperators {};
5169
5170template <typename _V, typename _Tp, typename _Abi>
5171 class _SimdIntOperators<_V, _Tp, _Abi, true>
5172 {
5173 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5174
5175 _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5176 __derived() const
5177 { return *static_cast<const _V*>(this); }
5178
5179 template <typename _Up>
5180 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5181 _S_make_derived(_Up&& __d)
5182 { return {__private_init, static_cast<_Up&&>(__d)}; }
5183
5184 public:
5185 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5186 _V&
5187 operator%=(_V& __lhs, const _V& __x)
5188 { return __lhs = __lhs % __x; }
5189
5190 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5191 _V&
5192 operator&=(_V& __lhs, const _V& __x)
5193 { return __lhs = __lhs & __x; }
5194
5195 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5196 _V&
5197 operator|=(_V& __lhs, const _V& __x)
5198 { return __lhs = __lhs | __x; }
5199
5200 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5201 _V&
5202 operator^=(_V& __lhs, const _V& __x)
5203 { return __lhs = __lhs ^ __x; }
5204
5205 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5206 _V&
5207 operator<<=(_V& __lhs, const _V& __x)
5208 { return __lhs = __lhs << __x; }
5209
5210 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5211 _V&
5212 operator>>=(_V& __lhs, const _V& __x)
5213 { return __lhs = __lhs >> __x; }
5214
5215 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5216 _V&
5217 operator<<=(_V& __lhs, int __x)
5218 { return __lhs = __lhs << __x; }
5219
5220 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5221 _V&
5222 operator>>=(_V& __lhs, int __x)
5223 { return __lhs = __lhs >> __x; }
5224
5225 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5226 _V
5227 operator%(const _V& __x, const _V& __y)
5228 {
5229 return _SimdIntOperators::_S_make_derived(
5230 _Impl::_S_modulus(__data(__x), __data(__y)));
5231 }
5232
5233 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5234 _V
5235 operator&(const _V& __x, const _V& __y)
5236 {
5237 return _SimdIntOperators::_S_make_derived(
5238 _Impl::_S_bit_and(__data(__x), __data(__y)));
5239 }
5240
5241 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5242 _V
5243 operator|(const _V& __x, const _V& __y)
5244 {
5245 return _SimdIntOperators::_S_make_derived(
5246 _Impl::_S_bit_or(__data(__x), __data(__y)));
5247 }
5248
5249 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5250 _V
5251 operator^(const _V& __x, const _V& __y)
5252 {
5253 return _SimdIntOperators::_S_make_derived(
5254 _Impl::_S_bit_xor(__data(__x), __data(__y)));
5255 }
5256
5257 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5258 _V
5259 operator<<(const _V& __x, const _V& __y)
5260 {
5261 return _SimdIntOperators::_S_make_derived(
5262 _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5263 }
5264
5265 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5266 _V
5267 operator>>(const _V& __x, const _V& __y)
5268 {
5269 return _SimdIntOperators::_S_make_derived(
5270 _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5271 }
5272
5273 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5274 _V
5275 operator<<(const _V& __x, int __y)
5276 {
5277 if (__y < 0)
5278 __invoke_ub("The behavior is undefined if the right operand of a "
5279 "shift operation is negative. [expr.shift]\nA shift by "
5280 "%d was requested",
5281 __y);
5282 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5283 __invoke_ub(
5284 "The behavior is undefined if the right operand of a "
5285 "shift operation is greater than or equal to the width of the "
5286 "promoted left operand. [expr.shift]\nA shift by %d was requested",
5287 __y);
5288 return _SimdIntOperators::_S_make_derived(
5289 _Impl::_S_bit_shift_left(__data(__x), __y));
5290 }
5291
5292 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5293 _V
5294 operator>>(const _V& __x, int __y)
5295 {
5296 if (__y < 0)
5297 __invoke_ub(
5298 "The behavior is undefined if the right operand of a shift "
5299 "operation is negative. [expr.shift]\nA shift by %d was requested",
5300 __y);
5301 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5302 __invoke_ub(
5303 "The behavior is undefined if the right operand of a shift "
5304 "operation is greater than or equal to the width of the promoted "
5305 "left operand. [expr.shift]\nA shift by %d was requested",
5306 __y);
5307 return _SimdIntOperators::_S_make_derived(
5308 _Impl::_S_bit_shift_right(__data(__x), __y));
5309 }
5310
5311 // unary operators (for integral _Tp)
5312 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5313 _V
5314 operator~() const
5315 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5316 };
5317
5318//}}}1
5319/// @endcond
5320
5321// simd {{{
5322template <typename _Tp, typename _Abi>
5323 class simd : public _SimdIntOperators<
5324 simd<_Tp, _Abi>, _Tp, _Abi,
5325 conjunction<is_integral<_Tp>,
5326 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5327 public _SimdTraits<_Tp, _Abi>::_SimdBase
5328 {
5329 using _Traits = _SimdTraits<_Tp, _Abi>;
5330 using _MemberType = typename _Traits::_SimdMember;
5331 using _CastType = typename _Traits::_SimdCastType;
5332 static constexpr _Tp* _S_type_tag = nullptr;
5333 friend typename _Traits::_SimdBase;
5334
5335 public:
5336 using _Impl = typename _Traits::_SimdImpl;
5337 friend _Impl;
5338 friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5339
5340 using value_type = _Tp;
5341 using reference = _SmartReference<_MemberType, _Impl, value_type>;
5342 using mask_type = simd_mask<_Tp, _Abi>;
5343 using abi_type = _Abi;
5344
5345 static constexpr size_t size()
5346 { return __size_or_zero_v<_Tp, _Abi>; }
5347
5348 _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5349 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5350 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5351 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5352 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5353
5354 // implicit broadcast constructor
5355 template <typename _Up,
5356 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5357 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5358 simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5359 : _M_data(
5360 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5361 {}
5362
5363 // implicit type conversion constructor (convert from fixed_size to
5364 // fixed_size)
5365 template <typename _Up>
5366 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5367 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5368 enable_if_t<
5369 conjunction<
5370 is_same<simd_abi::fixed_size<size()>, abi_type>,
5371 negation<__is_narrowing_conversion<_Up, value_type>>,
5372 __converts_to_higher_integer_rank<_Up, value_type>>::value,
5373 void*> = nullptr)
5374 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5375
5376 // explicit type conversion constructor
5377#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5378 template <typename _Up, typename _A2,
5379 typename = decltype(static_simd_cast<simd>(
5380 declval<const simd<_Up, _A2>&>()))>
5381 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5382 simd(const simd<_Up, _A2>& __x)
5383 : simd(static_simd_cast<simd>(__x)) {}
5384#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5385
5386 // generator constructor
5387 template <typename _Fp>
5388 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5389 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5390 declval<_SizeConstant<0>&>())),
5391 value_type>* = nullptr)
5392 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5393
5394 // load constructor
5395 template <typename _Up, typename _Flags>
5396 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5397 simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5398 : _M_data(
5399 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5400 {}
5401
5402 // loads [simd.load]
5403 template <typename _Up, typename _Flags>
5404 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5405 copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5406 {
5407 _M_data = static_cast<decltype(_M_data)>(
5408 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5409 }
5410
5411 // stores [simd.store]
5412 template <typename _Up, typename _Flags>
5413 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5414 copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5415 {
5416 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5417 _S_type_tag);
5418 }
5419
5420 // scalar access
5421 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5422 operator[](size_t __i)
5423 { return {_M_data, int(__i)}; }
5424
5425 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5426 operator[]([[maybe_unused]] size_t __i) const
5427 {
5428 if constexpr (__is_scalar_abi<_Abi>())
5429 {
5430 _GLIBCXX_DEBUG_ASSERT(__i == 0);
5431 return _M_data;
5432 }
5433 else
5434 return _M_data[__i];
5435 }
5436
5437 // increment and decrement:
5438 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5439 operator++()
5440 {
5441 _Impl::_S_increment(_M_data);
5442 return *this;
5443 }
5444
5445 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5446 operator++(int)
5447 {
5448 simd __r = *this;
5449 _Impl::_S_increment(_M_data);
5450 return __r;
5451 }
5452
5453 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5454 operator--()
5455 {
5456 _Impl::_S_decrement(_M_data);
5457 return *this;
5458 }
5459
5460 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5461 operator--(int)
5462 {
5463 simd __r = *this;
5464 _Impl::_S_decrement(_M_data);
5465 return __r;
5466 }
5467
5468 // unary operators (for any _Tp)
5469 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5470 operator!() const
5471 { return {__private_init, _Impl::_S_negate(_M_data)}; }
5472
5473 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5474 operator+() const
5475 { return *this; }
5476
5477 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5478 operator-() const
5479 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5480
5481 // access to internal representation (suggested extension)
5482 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5483 simd(_CastType __init) : _M_data(__init) {}
5484
5485 // compound assignment [simd.cassign]
5486 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5487 operator+=(simd& __lhs, const simd& __x)
5488 { return __lhs = __lhs + __x; }
5489
5490 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5491 operator-=(simd& __lhs, const simd& __x)
5492 { return __lhs = __lhs - __x; }
5493
5494 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5495 operator*=(simd& __lhs, const simd& __x)
5496 { return __lhs = __lhs * __x; }
5497
5498 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5499 operator/=(simd& __lhs, const simd& __x)
5500 { return __lhs = __lhs / __x; }
5501
5502 // binary operators [simd.binary]
5503 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5504 operator+(const simd& __x, const simd& __y)
5505 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5506
5507 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5508 operator-(const simd& __x, const simd& __y)
5509 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5510
5511 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5512 operator*(const simd& __x, const simd& __y)
5513 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5514
5515 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5516 operator/(const simd& __x, const simd& __y)
5517 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5518
5519 // compares [simd.comparison]
5520 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5521 operator==(const simd& __x, const simd& __y)
5522 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5523
5524 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5525 operator!=(const simd& __x, const simd& __y)
5526 {
5527 return simd::_S_make_mask(
5528 _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5529 }
5530
5531 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5532 operator<(const simd& __x, const simd& __y)
5533 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5534
5535 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5536 operator<=(const simd& __x, const simd& __y)
5537 {
5538 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5539 }
5540
5541 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5542 operator>(const simd& __x, const simd& __y)
5543 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5544
5545 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5546 operator>=(const simd& __x, const simd& __y)
5547 {
5548 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5549 }
5550
5551 // operator?: overloads (suggested extension) {{{
5552#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5553 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5554 operator?:(const mask_type& __k, const simd& __where_true,
5555 const simd& __where_false)
5556 {
5557 auto __ret = __where_false;
5558 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5559 return __ret;
5560 }
5561
5562#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5563 // }}}
5564
5565 // "private" because of the first arguments's namespace
5566 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5567 simd(_PrivateInit, const _MemberType& __init)
5568 : _M_data(__init) {}
5569
5570 // "private" because of the first arguments's namespace
5571 _GLIBCXX_SIMD_INTRINSIC
5572 simd(_BitsetInit, bitset<size()> __init) : _M_data()
5573 { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5574
5575 _GLIBCXX_SIMD_INTRINSIC constexpr bool
5576 _M_is_constprop() const
5577 {
5578 if constexpr (__is_scalar_abi<_Abi>())
5579 return __builtin_constant_p(_M_data);
5580 else
5581 return _M_data._M_is_constprop();
5582 }
5583
5584 private:
5585 _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5586 _S_make_mask(typename mask_type::_MemberType __k)
5587 { return {__private_init, __k}; }
5588
5589 friend const auto& __data<value_type, abi_type>(const simd&);
5590 friend auto& __data<value_type, abi_type>(simd&);
5591 alignas(_Traits::_S_simd_align) _MemberType _M_data;
5592 };
5593
5594// }}}
5595/// @cond undocumented
5596// __data {{{
5597template <typename _Tp, typename _Ap>
5598 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5599 __data(const simd<_Tp, _Ap>& __x)
5600 { return __x._M_data; }
5601
5602template <typename _Tp, typename _Ap>
5603 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5604 __data(simd<_Tp, _Ap>& __x)
5605 { return __x._M_data; }
5606
5607// }}}
5608namespace __float_bitwise_operators { //{{{
5609template <typename _Tp, typename _Ap>
5610 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5611 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5612 { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5613
5614template <typename _Tp, typename _Ap>
5615 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5616 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5617 { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5618
5619template <typename _Tp, typename _Ap>
5620 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5621 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5622 { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5623
5624template <typename _Tp, typename _Ap>
5625 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5626 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5627 operator~(const simd<_Tp, _Ap>& __a)
5628 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5629} // namespace __float_bitwise_operators }}}
5630/// @endcond
5631
5632/// @}
5633_GLIBCXX_SIMD_END_NAMESPACE
5634
5635#endif // __cplusplus >= 201703L
5636#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5637
5638// vim: foldmethod=marker foldmarker={{{,}}}
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition bit:426
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition chrono.h:757
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition complex:392
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:362
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:332
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition complex:422
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition type_traits:1664
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition type_traits:2003
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition type_traits:2630
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition type_traits:2612
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition type_traits:85
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition type_traits:2608
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition tuple:1746
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition type_traits:2387
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition tuple:1592
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition move.h:104
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition numeric:287
bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1438
bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1447
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1475
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1543
bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1456