libstdc++
simd.h
1// Definition of the public simd interfaces -*- C++ -*-
2
3// Copyright (C) 2020-2024 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28#if __cplusplus >= 201703L
29
30#include "simd_detail.h"
31#include "numeric_traits.h"
32#include <bit>
33#include <bitset>
34#ifdef _GLIBCXX_DEBUG_UB
35#include <cstdio> // for stderr
36#endif
37#include <cstring>
38#include <cmath>
39#include <functional>
40#include <iosfwd>
41#include <utility>
42#include <algorithm>
43
44#if _GLIBCXX_SIMD_X86INTRIN
45#include <x86intrin.h>
46#elif _GLIBCXX_SIMD_HAVE_NEON
47#pragma GCC diagnostic push
48// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
49// 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
50#pragma GCC diagnostic ignored "-Wnarrowing"
51#include <arm_neon.h>
52#pragma GCC diagnostic pop
53#endif
54#if _GLIBCXX_SIMD_HAVE_SVE
55#include <arm_sve.h>
56#endif
57
58/** @ingroup ts_simd
59 * @{
60 */
61/* There are several closely related types, with the following naming
62 * convention:
63 * _Tp: vectorizable (arithmetic) type (or any type)
64 * _TV: __vector_type_t<_Tp, _Np>
65 * _TW: _SimdWrapper<_Tp, _Np>
66 * _TI: __intrinsic_type_t<_Tp, _Np>
67 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
68 * If one additional type is needed use _U instead of _T.
69 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
70 *
71 * More naming conventions:
72 * _Ap or _Abi: An ABI tag from the simd_abi namespace
73 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
74 * _IV, _IW as for _TV, _TW
75 * _Np: number of elements (not bytes)
76 * _Bytes: number of bytes
77 *
78 * Variable names:
79 * __k: mask object (vector- or bitmask)
80 */
81_GLIBCXX_SIMD_BEGIN_NAMESPACE
82
83#if !_GLIBCXX_SIMD_X86INTRIN
84using __m128 [[__gnu__::__vector_size__(16)]] = float;
85using __m128d [[__gnu__::__vector_size__(16)]] = double;
86using __m128i [[__gnu__::__vector_size__(16)]] = long long;
87using __m256 [[__gnu__::__vector_size__(32)]] = float;
88using __m256d [[__gnu__::__vector_size__(32)]] = double;
89using __m256i [[__gnu__::__vector_size__(32)]] = long long;
90using __m512 [[__gnu__::__vector_size__(64)]] = float;
91using __m512d [[__gnu__::__vector_size__(64)]] = double;
92using __m512i [[__gnu__::__vector_size__(64)]] = long long;
93#endif
94
95#if _GLIBCXX_SIMD_HAVE_SVE
96constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
97#else
98constexpr inline int __sve_vectorized_size_bytes = 0;
99#endif
100
101namespace simd_abi {
102// simd_abi forward declarations {{{
103// implementation details:
104struct _Scalar;
105
106template <int _Np>
107 struct _Fixed;
108
109// There are two major ABIs that appear on different architectures.
110// Both have non-boolean values packed into an N Byte register
111// -> #elements = N / sizeof(T)
112// Masks differ:
113// 1. Use value vector registers for masks (all 0 or all 1)
114// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
115// value vector
116//
117// Both can be partially used, masking off the rest when doing horizontal
118// operations or operations that can trap (e.g. FP_INVALID or integer division
119// by 0). This is encoded as the number of used bytes.
120template <int _UsedBytes>
121 struct _VecBuiltin;
122
123template <int _UsedBytes>
124 struct _VecBltnBtmsk;
125
126template <int _UsedBytes, int _TotalBytes = __sve_vectorized_size_bytes>
127 struct _SveAbi;
128
129template <typename _Tp, int _Np>
130 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
131
132template <int _UsedBytes = 16>
133 using _Sse = _VecBuiltin<_UsedBytes>;
134
135template <int _UsedBytes = 32>
136 using _Avx = _VecBuiltin<_UsedBytes>;
137
138template <int _UsedBytes = 64>
139 using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
140
141template <int _UsedBytes = 16>
142 using _Neon = _VecBuiltin<_UsedBytes>;
143
144template <int _UsedBytes = __sve_vectorized_size_bytes>
145 using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
146
147// implementation-defined:
148using __sse = _Sse<>;
149using __avx = _Avx<>;
150using __avx512 = _Avx512<>;
151using __neon = _Neon<>;
152using __neon128 = _Neon<16>;
153using __neon64 = _Neon<8>;
154using __sve = _Sve<>;
155
156// standard:
157template <typename _Tp, size_t _Np, typename...>
158 struct deduce;
159
160template <int _Np>
161 using fixed_size = _Fixed<_Np>;
162
163using scalar = _Scalar;
164
165// }}}
166} // namespace simd_abi
167// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
168template <typename _Tp>
169 struct is_simd;
170
171template <typename _Tp>
172 struct is_simd_mask;
173
174template <typename _Tp, typename _Abi>
175 class simd;
176
177template <typename _Tp, typename _Abi>
178 class simd_mask;
179
180template <typename _Tp, typename _Abi>
181 struct simd_size;
182
183// }}}
184// load/store flags {{{
185struct element_aligned_tag
186{
187 template <typename _Tp, typename _Up = typename _Tp::value_type>
188 static constexpr size_t _S_alignment = alignof(_Up);
189
190 template <typename _Tp, typename _Up>
191 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192 _S_apply(_Up* __ptr)
193 { return __ptr; }
194};
195
196struct vector_aligned_tag
197{
198 template <typename _Tp, typename _Up = typename _Tp::value_type>
199 static constexpr size_t _S_alignment
200 = std::__bit_ceil(sizeof(_Up) * _Tp::size());
201
202 template <typename _Tp, typename _Up>
203 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
204 _S_apply(_Up* __ptr)
205 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
206};
207
208template <size_t _Np> struct overaligned_tag
209{
210 template <typename _Tp, typename _Up = typename _Tp::value_type>
211 static constexpr size_t _S_alignment = _Np;
212
213 template <typename _Tp, typename _Up>
214 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
215 _S_apply(_Up* __ptr)
216 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
217};
218
219inline constexpr element_aligned_tag element_aligned = {};
220
221inline constexpr vector_aligned_tag vector_aligned = {};
222
223template <size_t _Np>
224 inline constexpr overaligned_tag<_Np> overaligned = {};
225
226// }}}
227template <size_t _Xp>
228 using _SizeConstant = integral_constant<size_t, _Xp>;
229// constexpr feature detection{{{
230constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
231constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
232constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
233constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
234constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
235constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
236constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
237constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
238constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
239constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
240constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
241constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
242constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
243constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
244constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
245constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
246constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
247constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
248constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
249constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
250constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
251constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
252constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
253constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
254constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
255constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
256constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
257constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
258constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
259constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
260constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
261constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
262
263constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
264constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
265constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
266constexpr inline bool __support_neon_float =
267#if defined __GCC_IEC_559
268 __GCC_IEC_559 == 0;
269#elif defined __FAST_MATH__
270 true;
271#else
272 false;
273#endif
274
275constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE;
276constexpr inline bool __have_sve2 = _GLIBCXX_SIMD_HAVE_SVE2;
277
278#ifdef _ARCH_PWR10
279constexpr inline bool __have_power10vec = true;
280#else
281constexpr inline bool __have_power10vec = false;
282#endif
283#ifdef __POWER9_VECTOR__
284constexpr inline bool __have_power9vec = true;
285#else
286constexpr inline bool __have_power9vec = false;
287#endif
288#if defined __POWER8_VECTOR__
289constexpr inline bool __have_power8vec = true;
290#else
291constexpr inline bool __have_power8vec = __have_power9vec;
292#endif
293#if defined __VSX__
294constexpr inline bool __have_power_vsx = true;
295#else
296constexpr inline bool __have_power_vsx = __have_power8vec;
297#endif
298#if defined __ALTIVEC__
299constexpr inline bool __have_power_vmx = true;
300#else
301constexpr inline bool __have_power_vmx = __have_power_vsx;
302#endif
303
304// }}}
305
306namespace __detail
307{
308#ifdef math_errhandling
309 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
310 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
311 // must be guessed.
312 template <int = math_errhandling>
313 constexpr bool
314 __handle_fpexcept_impl(int)
315 { return math_errhandling & MATH_ERREXCEPT; }
316#endif
317
318 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
319 // ignored, otherwise implement correct exception behavior.
320 constexpr bool
321 __handle_fpexcept_impl(float)
322 {
323#if defined __FAST_MATH__
324 return false;
325#else
326 return true;
327#endif
328 }
329
330 /// True if math functions must raise floating-point exceptions as specified by C17.
331 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
332
333 constexpr std::uint_least64_t
334 __floating_point_flags()
335 {
336 std::uint_least64_t __flags = 0;
337 if constexpr (_S_handle_fpexcept)
338 __flags |= 1;
339#ifdef __FAST_MATH__
340 __flags |= 1 << 1;
341#elif __FINITE_MATH_ONLY__
342 __flags |= 2 << 1;
343#elif __GCC_IEC_559 < 2
344 __flags |= 3 << 1;
345#endif
346 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
347 return __flags;
348 }
349
350 constexpr std::uint_least64_t
351 __machine_flags()
352 {
353 if constexpr (__have_mmx || __have_sse)
354 return __have_mmx
355 | (__have_sse << 1)
356 | (__have_sse2 << 2)
357 | (__have_sse3 << 3)
358 | (__have_ssse3 << 4)
359 | (__have_sse4_1 << 5)
360 | (__have_sse4_2 << 6)
361 | (__have_xop << 7)
362 | (__have_avx << 8)
363 | (__have_avx2 << 9)
364 | (__have_bmi << 10)
365 | (__have_bmi2 << 11)
366 | (__have_lzcnt << 12)
367 | (__have_sse4a << 13)
368 | (__have_fma << 14)
369 | (__have_fma4 << 15)
370 | (__have_f16c << 16)
371 | (__have_popcnt << 17)
372 | (__have_avx512f << 18)
373 | (__have_avx512dq << 19)
374 | (__have_avx512vl << 20)
375 | (__have_avx512bw << 21)
376 | (__have_avx512bitalg << 22)
377 | (__have_avx512vbmi2 << 23)
378 | (__have_avx512vbmi << 24)
379 | (__have_avx512ifma << 25)
380 | (__have_avx512cd << 26)
381 | (__have_avx512vnni << 27)
382 | (__have_avx512vpopcntdq << 28)
383 | (__have_avx512vp2intersect << 29);
384 else if constexpr (__have_neon || __have_sve)
385 return __have_neon
386 | (__have_neon_a32 << 1)
387 | (__have_neon_a64 << 2)
388 | (__have_neon_a64 << 2)
389 | (__support_neon_float << 3)
390 | (__have_sve << 4)
391 | (__have_sve2 << 5);
392 else if constexpr (__have_power_vmx)
393 return __have_power_vmx
394 | (__have_power_vsx << 1)
395 | (__have_power8vec << 2)
396 | (__have_power9vec << 3)
397 | (__have_power10vec << 4);
398 else
399 return 0;
400 }
401
402 namespace
403 {
404 struct _OdrEnforcer {};
405 }
406
407 template <std::uint_least64_t...>
408 struct _MachineFlagsTemplate {};
409
410 /**@internal
411 * Use this type as default template argument to all function templates that
412 * are not declared always_inline. It ensures, that a function
413 * specialization, which the compiler decides not to inline, has a unique symbol
414 * (_OdrEnforcer) or a symbol matching the machine/architecture flags
415 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
416 * users link TUs compiled with different flags. This is especially important
417 * for using simd in libraries.
418 */
419 using __odr_helper
420 = conditional_t<__machine_flags() == 0, _OdrEnforcer,
421 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
422
423 struct _Minimum
424 {
425 template <typename _Tp>
426 _GLIBCXX_SIMD_INTRINSIC constexpr
427 _Tp
428 operator()(_Tp __a, _Tp __b) const
429 {
430 using std::min;
431 return min(__a, __b);
432 }
433 };
434
435 struct _Maximum
436 {
437 template <typename _Tp>
438 _GLIBCXX_SIMD_INTRINSIC constexpr
439 _Tp
440 operator()(_Tp __a, _Tp __b) const
441 {
442 using std::max;
443 return max(__a, __b);
444 }
445 };
446} // namespace __detail
447
448// unrolled/pack execution helpers
449// __execute_n_times{{{
450template <typename _Fp, size_t... _I>
451 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
452 void
453 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
454 { ((void)__f(_SizeConstant<_I>()), ...); }
455
456template <typename _Fp>
457 _GLIBCXX_SIMD_INTRINSIC constexpr void
458 __execute_on_index_sequence(_Fp&&, index_sequence<>)
459 { }
460
461template <size_t _Np, typename _Fp>
462 _GLIBCXX_SIMD_INTRINSIC constexpr void
463 __execute_n_times(_Fp&& __f)
464 {
465 __execute_on_index_sequence(static_cast<_Fp&&>(__f),
466 make_index_sequence<_Np>{});
467 }
468
469// }}}
470// __generate_from_n_evaluations{{{
471template <typename _R, typename _Fp, size_t... _I>
472 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
473 _R
474 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
475 { return _R{__f(_SizeConstant<_I>())...}; }
476
477template <size_t _Np, typename _R, typename _Fp>
478 _GLIBCXX_SIMD_INTRINSIC constexpr _R
479 __generate_from_n_evaluations(_Fp&& __f)
480 {
481 return __execute_on_index_sequence_with_return<_R>(
482 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
483 }
484
485// }}}
486// __call_with_n_evaluations{{{
487template <size_t... _I, typename _F0, typename _FArgs>
488 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
489 auto
490 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
491 { return __f0(__fargs(_SizeConstant<_I>())...); }
492
493template <size_t _Np, typename _F0, typename _FArgs>
494 _GLIBCXX_SIMD_INTRINSIC constexpr auto
495 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
496 {
497 return __call_with_n_evaluations(make_index_sequence<_Np>{},
498 static_cast<_F0&&>(__f0),
499 static_cast<_FArgs&&>(__fargs));
500 }
501
502// }}}
503// __call_with_subscripts{{{
504template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
505 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
506 auto
507 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
508 { return __fun(__x[_First + _It]...); }
509
510template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
511 _GLIBCXX_SIMD_INTRINSIC constexpr auto
512 __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
513 {
514 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
515 make_index_sequence<_Np>(),
516 static_cast<_Fp&&>(__fun));
517 }
518
519// }}}
520
521// vvv ---- type traits ---- vvv
522// integer type aliases{{{
523using _UChar = unsigned char;
524using _SChar = signed char;
525using _UShort = unsigned short;
526using _UInt = unsigned int;
527using _ULong = unsigned long;
528using _ULLong = unsigned long long;
529using _LLong = long long;
530
531//}}}
532// __first_of_pack{{{
533template <typename _T0, typename...>
534 struct __first_of_pack
535 { using type = _T0; };
536
537template <typename... _Ts>
538 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
539
540//}}}
541// __value_type_or_identity_t {{{
542template <typename _Tp>
543 typename _Tp::value_type
544 __value_type_or_identity_impl(int);
545
546template <typename _Tp>
547 _Tp
548 __value_type_or_identity_impl(float);
549
550template <typename _Tp>
551 using __value_type_or_identity_t
552 = decltype(__value_type_or_identity_impl<_Tp>(int()));
553
554// }}}
555// __is_vectorizable {{{
556template <typename _Tp>
557 struct __is_vectorizable : public is_arithmetic<_Tp> {};
558
559template <>
560 struct __is_vectorizable<bool> : public false_type {};
561
562template <typename _Tp>
563 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
564
565// Deduces to a vectorizable type
566template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
567 using _Vectorizable = _Tp;
568
569// }}}
570// _LoadStorePtr / __is_possible_loadstore_conversion {{{
571template <typename _Ptr, typename _ValueType>
572 struct __is_possible_loadstore_conversion
573 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
574
575template <>
576 struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
577
578// Deduces to a type allowed for load/store with the given value type.
579template <typename _Ptr, typename _ValueType,
580 typename = enable_if_t<
581 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
582 using _LoadStorePtr = _Ptr;
583
584// }}}
585// __is_bitmask{{{
586template <typename _Tp, typename = void_t<>>
587 struct __is_bitmask : false_type {};
588
589template <typename _Tp>
590 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
591
592// the __mmaskXX case:
593template <typename _Tp>
594 struct __is_bitmask<_Tp,
595 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
596 : true_type {};
597
598// }}}
599// __int_for_sizeof{{{
600#pragma GCC diagnostic push
601#pragma GCC diagnostic ignored "-Wpedantic"
602template <size_t _Bytes>
603 constexpr auto
604 __int_for_sizeof()
605 {
606 static_assert(_Bytes > 0);
607 if constexpr (_Bytes == sizeof(int))
608 return int();
609 #ifdef __clang__
610 else if constexpr (_Bytes == sizeof(char))
611 return char();
612 #else
613 else if constexpr (_Bytes == sizeof(_SChar))
614 return _SChar();
615 #endif
616 else if constexpr (_Bytes == sizeof(short))
617 return short();
618 #ifndef __clang__
619 else if constexpr (_Bytes == sizeof(long))
620 return long();
621 #endif
622 else if constexpr (_Bytes == sizeof(_LLong))
623 return _LLong();
624 #ifdef __SIZEOF_INT128__
625 else if constexpr (_Bytes == sizeof(__int128))
626 return __int128();
627 #endif // __SIZEOF_INT128__
628 else if constexpr (_Bytes % sizeof(int) == 0)
629 {
630 constexpr size_t _Np = _Bytes / sizeof(int);
631 struct _Ip
632 {
633 int _M_data[_Np];
634
635 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
636 operator&(_Ip __rhs) const
637 {
638 return __generate_from_n_evaluations<_Np, _Ip>(
639 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
640 return __rhs._M_data[__i] & _M_data[__i];
641 });
642 }
643
644 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
645 operator|(_Ip __rhs) const
646 {
647 return __generate_from_n_evaluations<_Np, _Ip>(
648 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
649 return __rhs._M_data[__i] | _M_data[__i];
650 });
651 }
652
653 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
654 operator^(_Ip __rhs) const
655 {
656 return __generate_from_n_evaluations<_Np, _Ip>(
657 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
658 return __rhs._M_data[__i] ^ _M_data[__i];
659 });
660 }
661
662 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
663 operator~() const
664 {
665 return __generate_from_n_evaluations<_Np, _Ip>(
666 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
667 }
668 };
669 return _Ip{};
670 }
671 else
672 static_assert(_Bytes == 0, "this should be unreachable");
673 }
674#pragma GCC diagnostic pop
675
676template <typename _Tp>
677 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
678
679template <size_t _Np>
680 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
681
682// }}}
683// __is_fixed_size_abi{{{
684template <typename _Tp>
685 struct __is_fixed_size_abi : false_type {};
686
687template <int _Np>
688 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
689
690template <typename _Tp>
691 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
692
693// }}}
694// __is_scalar_abi {{{
695template <typename _Abi>
696 constexpr bool
697 __is_scalar_abi()
698 { return is_same_v<simd_abi::scalar, _Abi>; }
699
700// }}}
701// __abi_bytes_v {{{
702template <template <int> class _Abi, int _Bytes>
703 constexpr int
704 __abi_bytes_impl(_Abi<_Bytes>*)
705 { return _Bytes; }
706
707template <typename _Tp>
708 constexpr int
709 __abi_bytes_impl(_Tp*)
710 { return -1; }
711
712template <typename _Abi>
713 inline constexpr int __abi_bytes_v
714 = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
715
716// }}}
717// __is_builtin_bitmask_abi {{{
718template <typename _Abi>
719 constexpr bool
720 __is_builtin_bitmask_abi()
721 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
722
723// }}}
724// __is_sse_abi {{{
725template <typename _Abi>
726 constexpr bool
727 __is_sse_abi()
728 {
729 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
730 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
731 }
732
733// }}}
734// __is_avx_abi {{{
735template <typename _Abi>
736 constexpr bool
737 __is_avx_abi()
738 {
739 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
740 return _Bytes > 16 && _Bytes <= 32
741 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
742 }
743
744// }}}
745// __is_avx512_abi {{{
746template <typename _Abi>
747 constexpr bool
748 __is_avx512_abi()
749 {
750 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
751 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
752 }
753
754// }}}
755// __is_neon_abi {{{
756template <typename _Abi>
757 constexpr bool
758 __is_neon_abi()
759 {
760 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
761 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
762 }
763
764// }}}
765// __is_sve_abi {{{
766template <typename _Abi>
767 constexpr bool
768 __is_sve_abi()
769 {
770 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
771 return _Bytes <= __sve_vectorized_size_bytes && is_same_v<simd_abi::_Sve<_Bytes>, _Abi>;
772 }
773
774// }}}
775// __make_dependent_t {{{
776template <typename, typename _Up>
777 struct __make_dependent
778 { using type = _Up; };
779
780template <typename _Tp, typename _Up>
781 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
782
783// }}}
784// ^^^ ---- type traits ---- ^^^
785
786// __invoke_ub{{{
787template <typename... _Args>
788 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
789 __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
790 {
791#ifdef _GLIBCXX_DEBUG_UB
792 __builtin_fprintf(stderr, __msg, __args...);
793 __builtin_trap();
794#else
795 __builtin_unreachable();
796#endif
797 }
798
799// }}}
800// __assert_unreachable{{{
801template <typename _Tp>
802 struct __assert_unreachable
803 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
804
805// }}}
806// __size_or_zero_v {{{
807template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
808 constexpr size_t
809 __size_or_zero_dispatch(int)
810 { return _Np; }
811
812template <typename _Tp, typename _Ap>
813 constexpr size_t
814 __size_or_zero_dispatch(float)
815 { return 0; }
816
817template <typename _Tp, typename _Ap>
818 inline constexpr size_t __size_or_zero_v
819 = __size_or_zero_dispatch<_Tp, _Ap>(0);
820
821// }}}
822// __div_roundup {{{
823inline constexpr size_t
824__div_roundup(size_t __a, size_t __b)
825{ return (__a + __b - 1) / __b; }
826
827// }}}
828// _ExactBool{{{
829class _ExactBool
830{
831 const bool _M_data;
832
833public:
834 _GLIBCXX_SIMD_INTRINSIC constexpr
835 _ExactBool(bool __b) : _M_data(__b) {}
836
837 _ExactBool(int) = delete;
838
839 _GLIBCXX_SIMD_INTRINSIC constexpr
840 operator bool() const
841 { return _M_data; }
842};
843
844// }}}
845// __may_alias{{{
846/**@internal
847 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
848 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
849 * that support it).
850 */
851template <typename _Tp>
852 using __may_alias [[__gnu__::__may_alias__]] = _Tp;
853
854// }}}
855// _UnsupportedBase {{{
856// simd and simd_mask base for unsupported <_Tp, _Abi>
857struct _UnsupportedBase
858{
859 _UnsupportedBase() = delete;
860 _UnsupportedBase(const _UnsupportedBase&) = delete;
861 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
862 ~_UnsupportedBase() = delete;
863};
864
865// }}}
866// _InvalidTraits {{{
867/**
868 * @internal
869 * Defines the implementation of __a given <_Tp, _Abi>.
870 *
871 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
872 * possible. Static assertions in the type definition do not suffice. It is
873 * important that SFINAE works.
874 */
875struct _InvalidTraits
876{
877 using _IsValid = false_type;
878 using _SimdBase = _UnsupportedBase;
879 using _MaskBase = _UnsupportedBase;
880
881 static constexpr size_t _S_full_size = 0;
882 static constexpr bool _S_is_partial = false;
883
884 static constexpr size_t _S_simd_align = 1;
885 struct _SimdImpl;
886 struct _SimdMember {};
887 struct _SimdCastType;
888
889 static constexpr size_t _S_mask_align = 1;
890 struct _MaskImpl;
891 struct _MaskMember {};
892 struct _MaskCastType;
893};
894
895// }}}
896// _SimdTraits {{{
897template <typename _Tp, typename _Abi, typename = void_t<>>
898 struct _SimdTraits : _InvalidTraits {};
899
900// }}}
901// __private_init, __bitset_init{{{
902/**
903 * @internal
904 * Tag used for private init constructor of simd and simd_mask
905 */
906inline constexpr struct _PrivateInit {} __private_init = {};
907
908inline constexpr struct _BitsetInit {} __bitset_init = {};
909
910// }}}
911// __is_narrowing_conversion<_From, _To>{{{
912template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
913 bool = is_arithmetic_v<_To>>
914 struct __is_narrowing_conversion;
915
916// ignore "signed/unsigned mismatch" in the following trait.
917// The implicit conversions will do the right thing here.
918template <typename _From, typename _To>
919 struct __is_narrowing_conversion<_From, _To, true, true>
920 : public __bool_constant<(
921 __digits_v<_From> > __digits_v<_To>
922 || __finite_max_v<_From> > __finite_max_v<_To>
923 || __finite_min_v<_From> < __finite_min_v<_To>
924 || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
925
926template <typename _Tp>
927 struct __is_narrowing_conversion<_Tp, bool, true, true>
928 : public true_type {};
929
930template <>
931 struct __is_narrowing_conversion<bool, bool, true, true>
932 : public false_type {};
933
934template <typename _Tp>
935 struct __is_narrowing_conversion<_Tp, _Tp, true, true>
936 : public false_type {};
937
938template <typename _From, typename _To>
939 struct __is_narrowing_conversion<_From, _To, false, true>
940 : public negation<is_convertible<_From, _To>> {};
941
942// }}}
943// __converts_to_higher_integer_rank{{{
944template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
945 struct __converts_to_higher_integer_rank : public true_type {};
946
947// this may fail for char -> short if sizeof(char) == sizeof(short)
948template <typename _From, typename _To>
949 struct __converts_to_higher_integer_rank<_From, _To, false>
950 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
951
952// }}}
953// __data(simd/simd_mask) {{{
954template <typename _Tp, typename _Ap>
955 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
956 __data(const simd<_Tp, _Ap>& __x);
957
958template <typename _Tp, typename _Ap>
959 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
960 __data(simd<_Tp, _Ap>& __x);
961
962template <typename _Tp, typename _Ap>
963 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
964 __data(const simd_mask<_Tp, _Ap>& __x);
965
966template <typename _Tp, typename _Ap>
967 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
968 __data(simd_mask<_Tp, _Ap>& __x);
969
970// }}}
971// _SimdConverter {{{
972template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
973 typename = void>
974 struct _SimdConverter;
975
976template <typename _Tp, typename _Ap>
977 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
978 {
979 template <typename _Up>
980 _GLIBCXX_SIMD_INTRINSIC const _Up&
981 operator()(const _Up& __x)
982 { return __x; }
983 };
984
985// }}}
986// __to_value_type_or_member_type {{{
987template <typename _V>
988 _GLIBCXX_SIMD_INTRINSIC constexpr auto
989 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
990 { return __data(__x); }
991
992template <typename _V>
993 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
994 __to_value_type_or_member_type(const typename _V::value_type& __x)
995 { return __x; }
996
997// }}}
998// __bool_storage_member_type{{{
999template <size_t _Size>
1000 struct __bool_storage_member_type;
1001
1002template <size_t _Size>
1003 using __bool_storage_member_type_t =
1004 typename __bool_storage_member_type<_Size>::type;
1005
1006// }}}
1007// _SimdTuple {{{
1008// why not tuple?
1009// 1. tuple gives no guarantee about the storage order, but I require
1010// storage
1011// equivalent to array<_Tp, _Np>
1012// 2. direct access to the element type (first template argument)
1013// 3. enforces equal element type, only different _Abi types are allowed
1014template <typename _Tp, typename... _Abis>
1015 struct _SimdTuple;
1016
1017//}}}
1018// __fixed_size_storage_t {{{
1019template <typename _Tp, int _Np>
1020 struct __fixed_size_storage;
1021
1022template <typename _Tp, int _Np>
1023 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
1024
1025// }}}
1026// _SimdWrapper fwd decl{{{
1027template <typename _Tp, size_t _Size, typename = void_t<>>
1028 struct _SimdWrapper;
1029
1030template <typename _Tp>
1031 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
1032template <typename _Tp>
1033 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
1034template <typename _Tp>
1035 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
1036template <typename _Tp>
1037 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1038
1039template <typename _Tp, size_t _Width>
1040 struct _SveSimdWrapper;
1041
1042// }}}
1043// __is_simd_wrapper {{{
1044template <typename _Tp>
1045 struct __is_simd_wrapper : false_type {};
1046
1047template <typename _Tp, size_t _Np>
1048 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1049
1050template <typename _Tp>
1051 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1052
1053// }}}
1054// _BitOps {{{
1055struct _BitOps
1056{
1057 // _S_bit_iteration {{{
1058 template <typename _Tp, typename _Fp>
1059 static void
1060 _S_bit_iteration(_Tp __mask, _Fp&& __f)
1061 {
1062 static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1063 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1064 if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1065 __k = __mask;
1066 else
1067 __k = __mask.to_ullong();
1068 while(__k)
1069 {
1070 __f(std::__countr_zero(__k));
1071 __k &= (__k - 1);
1072 }
1073 }
1074
1075 //}}}
1076};
1077
1078//}}}
1079// __increment, __decrement {{{
1080template <typename _Tp = void>
1081 struct __increment
1082 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1083
1084template <>
1085 struct __increment<void>
1086 {
1087 template <typename _Tp>
1088 constexpr _Tp
1089 operator()(_Tp __a) const
1090 { return ++__a; }
1091 };
1092
1093template <typename _Tp = void>
1094 struct __decrement
1095 { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1096
1097template <>
1098 struct __decrement<void>
1099 {
1100 template <typename _Tp>
1101 constexpr _Tp
1102 operator()(_Tp __a) const
1103 { return --__a; }
1104 };
1105
1106// }}}
1107// _ValuePreserving(OrInt) {{{
1108template <typename _From, typename _To,
1109 typename = enable_if_t<negation<
1110 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1111 using _ValuePreserving = _From;
1112
1113template <typename _From, typename _To,
1114 typename _DecayedFrom = __remove_cvref_t<_From>,
1115 typename = enable_if_t<conjunction<
1116 is_convertible<_From, _To>,
1117 disjunction<
1118 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1119 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1120 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1121 using _ValuePreservingOrInt = _From;
1122
1123// }}}
1124// __intrinsic_type {{{
1125template <typename _Tp, size_t _Bytes, typename = void_t<>>
1126 struct __intrinsic_type;
1127
1128template <typename _Tp, size_t _Size>
1129 using __intrinsic_type_t =
1130 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1131
1132template <typename _Tp>
1133 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1134template <typename _Tp>
1135 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1136template <typename _Tp>
1137 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1138template <typename _Tp>
1139 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1140template <typename _Tp>
1141 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1142template <typename _Tp>
1143 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1144
1145// }}}
1146// _BitMask {{{
1147template <size_t _Np, bool _Sanitized = false>
1148 struct _BitMask;
1149
1150template <size_t _Np, bool _Sanitized>
1151 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1152
1153template <size_t _Np>
1154 using _SanitizedBitMask = _BitMask<_Np, true>;
1155
1156template <size_t _Np, bool _Sanitized>
1157 struct _BitMask
1158 {
1159 static_assert(_Np > 0);
1160
1161 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1162
1163 using _Tp = conditional_t<_Np == 1, bool,
1164 make_unsigned_t<__int_with_sizeof_t<std::min(
1165 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1166
1167 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1168
1169 _Tp _M_bits[_S_array_size];
1170
1171 static constexpr int _S_unused_bits
1172 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1173
1174 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1175
1176 constexpr _BitMask() noexcept = default;
1177
1178 constexpr _BitMask(unsigned long long __x) noexcept
1179 : _M_bits{static_cast<_Tp>(__x)} {}
1180
1181 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1182
1183 constexpr _BitMask(const _BitMask&) noexcept = default;
1184
1185 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1186 && _Sanitized == true>>
1187 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1188 : _BitMask(__rhs._M_sanitized()) {}
1189
1190 constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1191 {
1192 static_assert(_S_array_size == 1);
1193 return _M_bits[0];
1194 }
1195
1196 // precondition: is sanitized
1197 constexpr _Tp
1198 _M_to_bits() const noexcept
1199 {
1200 static_assert(_S_array_size == 1);
1201 return _M_bits[0];
1202 }
1203
1204 // precondition: is sanitized
1205 constexpr unsigned long long
1206 to_ullong() const noexcept
1207 {
1208 static_assert(_S_array_size == 1);
1209 return _M_bits[0];
1210 }
1211
1212 // precondition: is sanitized
1213 constexpr unsigned long
1214 to_ulong() const noexcept
1215 {
1216 static_assert(_S_array_size == 1);
1217 return _M_bits[0];
1218 }
1219
1220 constexpr bitset<_Np>
1221 _M_to_bitset() const noexcept
1222 {
1223 static_assert(_S_array_size == 1);
1224 return _M_bits[0];
1225 }
1226
1227 constexpr decltype(auto)
1228 _M_sanitized() const noexcept
1229 {
1230 if constexpr (_Sanitized)
1231 return *this;
1232 else if constexpr (_Np == 1)
1233 return _SanitizedBitMask<_Np>(_M_bits[0]);
1234 else
1235 {
1236 _SanitizedBitMask<_Np> __r = {};
1237 for (int __i = 0; __i < _S_array_size; ++__i)
1238 __r._M_bits[__i] = _M_bits[__i];
1239 if constexpr (_S_unused_bits > 0)
1240 __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1241 return __r;
1242 }
1243 }
1244
1245 template <size_t _Mp, bool _LSanitized>
1246 constexpr _BitMask<_Np + _Mp, _Sanitized>
1247 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1248 {
1249 constexpr size_t _RN = _Np + _Mp;
1250 using _Rp = _BitMask<_RN, _Sanitized>;
1251 if constexpr (_Rp::_S_array_size == 1)
1252 {
1253 _Rp __r{{_M_bits[0]}};
1254 __r._M_bits[0] <<= _Mp;
1255 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1256 return __r;
1257 }
1258 else
1259 __assert_unreachable<_Rp>();
1260 }
1261
1262 // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1263 // significant bits. If the operation implicitly produces a sanitized bitmask,
1264 // the result type will have _Sanitized set.
1265 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1266 constexpr auto
1267 _M_extract() const noexcept
1268 {
1269 static_assert(_Np > _DropLsb);
1270 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1271 "not implemented for bitmasks larger than one ullong");
1272 if constexpr (_NewSize == 1)
1273 // must sanitize because the return _Tp is bool
1274 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1275 else
1276 return _BitMask<_NewSize,
1277 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1278 && _NewSize + _DropLsb <= _Np)
1279 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1280 && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1281 >> _DropLsb);
1282 }
1283
1284 // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1285 constexpr bool
1286 all() const noexcept
1287 {
1288 if constexpr (_Np == 1)
1289 return _M_bits[0];
1290 else if constexpr (!_Sanitized)
1291 return _M_sanitized().all();
1292 else
1293 {
1294 constexpr _Tp __allbits = ~_Tp();
1295 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1296 if (_M_bits[__i] != __allbits)
1297 return false;
1298 return _M_bits[_S_array_size - 1] == _S_bitmask;
1299 }
1300 }
1301
1302 // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1303 // false.
1304 constexpr bool
1305 any() const noexcept
1306 {
1307 if constexpr (_Np == 1)
1308 return _M_bits[0];
1309 else if constexpr (!_Sanitized)
1310 return _M_sanitized().any();
1311 else
1312 {
1313 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1314 if (_M_bits[__i] != 0)
1315 return true;
1316 return _M_bits[_S_array_size - 1] != 0;
1317 }
1318 }
1319
1320 // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1321 constexpr bool
1322 none() const noexcept
1323 {
1324 if constexpr (_Np == 1)
1325 return !_M_bits[0];
1326 else if constexpr (!_Sanitized)
1327 return _M_sanitized().none();
1328 else
1329 {
1330 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1331 if (_M_bits[__i] != 0)
1332 return false;
1333 return _M_bits[_S_array_size - 1] == 0;
1334 }
1335 }
1336
1337 // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1338 // false.
1339 constexpr int
1340 count() const noexcept
1341 {
1342 if constexpr (_Np == 1)
1343 return _M_bits[0];
1344 else if constexpr (!_Sanitized)
1345 return _M_sanitized().none();
1346 else
1347 {
1348 int __result = __builtin_popcountll(_M_bits[0]);
1349 for (int __i = 1; __i < _S_array_size; ++__i)
1350 __result += __builtin_popcountll(_M_bits[__i]);
1351 return __result;
1352 }
1353 }
1354
1355 // Returns the bit at offset __i as bool.
1356 constexpr bool
1357 operator[](size_t __i) const noexcept
1358 {
1359 if constexpr (_Np == 1)
1360 return _M_bits[0];
1361 else if constexpr (_S_array_size == 1)
1362 return (_M_bits[0] >> __i) & 1;
1363 else
1364 {
1365 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1366 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1367 return (_M_bits[__j] >> __shift) & 1;
1368 }
1369 }
1370
1371 template <size_t __i>
1372 constexpr bool
1373 operator[](_SizeConstant<__i>) const noexcept
1374 {
1375 static_assert(__i < _Np);
1376 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1377 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1378 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1379 }
1380
1381 // Set the bit at offset __i to __x.
1382 constexpr void
1383 set(size_t __i, bool __x) noexcept
1384 {
1385 if constexpr (_Np == 1)
1386 _M_bits[0] = __x;
1387 else if constexpr (_S_array_size == 1)
1388 {
1389 _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1390 _M_bits[0] |= _Tp(_Tp(__x) << __i);
1391 }
1392 else
1393 {
1394 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1395 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1396 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1397 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1398 }
1399 }
1400
1401 template <size_t __i>
1402 constexpr void
1403 set(_SizeConstant<__i>, bool __x) noexcept
1404 {
1405 static_assert(__i < _Np);
1406 if constexpr (_Np == 1)
1407 _M_bits[0] = __x;
1408 else
1409 {
1410 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1411 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1412 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1413 _M_bits[__j] &= __mask;
1414 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1415 }
1416 }
1417
1418 // Inverts all bits. Sanitized input leads to sanitized output.
1419 constexpr _BitMask
1420 operator~() const noexcept
1421 {
1422 if constexpr (_Np == 1)
1423 return !_M_bits[0];
1424 else
1425 {
1426 _BitMask __result{};
1427 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1428 __result._M_bits[__i] = ~_M_bits[__i];
1429 if constexpr (_Sanitized)
1430 __result._M_bits[_S_array_size - 1]
1431 = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1432 else
1433 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1434 return __result;
1435 }
1436 }
1437
1438 constexpr _BitMask&
1439 operator^=(const _BitMask& __b) & noexcept
1440 {
1441 __execute_n_times<_S_array_size>(
1442 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1443 return *this;
1444 }
1445
1446 constexpr _BitMask&
1447 operator|=(const _BitMask& __b) & noexcept
1448 {
1449 __execute_n_times<_S_array_size>(
1450 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1451 return *this;
1452 }
1453
1454 constexpr _BitMask&
1455 operator&=(const _BitMask& __b) & noexcept
1456 {
1457 __execute_n_times<_S_array_size>(
1458 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1459 return *this;
1460 }
1461
1462 friend constexpr _BitMask
1463 operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1464 {
1465 _BitMask __r = __a;
1466 __r ^= __b;
1467 return __r;
1468 }
1469
1470 friend constexpr _BitMask
1471 operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1472 {
1473 _BitMask __r = __a;
1474 __r |= __b;
1475 return __r;
1476 }
1477
1478 friend constexpr _BitMask
1479 operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1480 {
1481 _BitMask __r = __a;
1482 __r &= __b;
1483 return __r;
1484 }
1485
1486 _GLIBCXX_SIMD_INTRINSIC
1487 constexpr bool
1488 _M_is_constprop() const
1489 {
1490 if constexpr (_S_array_size == 0)
1491 return __builtin_constant_p(_M_bits[0]);
1492 else
1493 {
1494 for (int __i = 0; __i < _S_array_size; ++__i)
1495 if (!__builtin_constant_p(_M_bits[__i]))
1496 return false;
1497 return true;
1498 }
1499 }
1500 };
1501
1502// }}}
1503
1504// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1505// __min_vector_size {{{
1506template <typename _Tp = void>
1507 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1508
1509#if _GLIBCXX_SIMD_HAVE_NEON
1510template <>
1511 inline constexpr int __min_vector_size<void> = 8;
1512#else
1513template <>
1514 inline constexpr int __min_vector_size<void> = 16;
1515#endif
1516
1517// }}}
1518// __vector_type {{{
1519template <typename _Tp, size_t _Np, typename = void>
1520 struct __vector_type_n {};
1521
1522// substition failure for 0-element case
1523template <typename _Tp>
1524 struct __vector_type_n<_Tp, 0, void> {};
1525
1526// special case 1-element to be _Tp itself
1527template <typename _Tp>
1528 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1529 { using type = _Tp; };
1530
1531// else, use GNU-style builtin vector types
1532template <typename _Tp, size_t _Np>
1533 struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1534 {
1535 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1536
1537 static constexpr size_t _S_Bytes =
1538#ifdef __i386__
1539 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1540 // those objects are passed via MMX registers and nothing ever calls EMMS.
1541 _S_Np2 == 8 ? 16 :
1542#endif
1543 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1544 : _S_Np2;
1545
1546 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1547 };
1548
1549template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1550 struct __vector_type;
1551
1552template <typename _Tp, size_t _Bytes>
1553 struct __vector_type<_Tp, _Bytes, 0>
1554 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1555
1556template <typename _Tp, size_t _Size>
1557 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1558
1559template <typename _Tp>
1560 using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1561template <typename _Tp>
1562 using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1563template <typename _Tp>
1564 using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1565template <typename _Tp>
1566 using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1567template <typename _Tp>
1568 using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1569template <typename _Tp>
1570 using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1571
1572// }}}
1573// __is_vector_type {{{
1574template <typename _Tp, typename = void_t<>>
1575 struct __is_vector_type : false_type {};
1576
1577template <typename _Tp>
1578 struct __is_vector_type<
1579 _Tp, void_t<typename __vector_type<
1580 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1581 : is_same<_Tp, typename __vector_type<
1582 remove_reference_t<decltype(declval<_Tp>()[0])>,
1583 sizeof(_Tp)>::type> {};
1584
1585template <typename _Tp>
1586 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1587
1588// }}}
1589// __is_intrinsic_type {{{
1590#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1591template <typename _Tp>
1592 using __is_intrinsic_type = __is_vector_type<_Tp>;
1593#else // not SSE (x86)
1594template <typename _Tp, typename = void_t<>>
1595 struct __is_intrinsic_type : false_type {};
1596
1597template <typename _Tp>
1598 struct __is_intrinsic_type<
1599 _Tp, void_t<typename __intrinsic_type<
1600 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1601 : is_same<_Tp, typename __intrinsic_type<
1602 remove_reference_t<decltype(declval<_Tp>()[0])>,
1603 sizeof(_Tp)>::type> {};
1604#endif
1605
1606template <typename _Tp>
1607 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1608
1609// }}}
1610// _VectorTraits{{{
1611template <typename _Tp, typename = void_t<>>
1612 struct _VectorTraitsImpl;
1613
1614template <typename _Tp>
1615 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1616 || __is_intrinsic_type_v<_Tp>>>
1617 {
1618 using type = _Tp;
1619 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1620 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1621 using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1622 template <typename _Up, int _W = _S_full_size>
1623 static constexpr bool _S_is
1624 = is_same_v<value_type, _Up> && _W == _S_full_size;
1625 };
1626
1627template <typename _Tp, size_t _Np>
1628 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1629 void_t<__vector_type_t<_Tp, _Np>>>
1630 {
1631 using type = __vector_type_t<_Tp, _Np>;
1632 using value_type = _Tp;
1633 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1634 using _Wrapper = _SimdWrapper<_Tp, _Np>;
1635 static constexpr bool _S_is_partial = (_Np == _S_full_size);
1636 static constexpr int _S_partial_width = _Np;
1637 template <typename _Up, int _W = _S_full_size>
1638 static constexpr bool _S_is
1639 = is_same_v<value_type, _Up>&& _W == _S_full_size;
1640 };
1641
1642template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1643 using _VectorTraits = _VectorTraitsImpl<_Tp>;
1644
1645// }}}
1646// __as_vector{{{
1647template <typename _V>
1648 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1649 __as_vector(_V __x)
1650 {
1651 if constexpr (__is_vector_type_v<_V>)
1652 return __x;
1653 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1654 return __data(__x)._M_data;
1655 else if constexpr (__is_vectorizable_v<_V>)
1656 return __vector_type_t<_V, 2>{__x};
1657 else
1658 return __x._M_data;
1659 }
1660
1661// }}}
1662// __as_wrapper{{{
1663template <size_t _Np = 0, typename _V>
1664 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1665 __as_wrapper(_V __x)
1666 {
1667 if constexpr (__is_vector_type_v<_V>)
1668 return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1669 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1670 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1671 {
1672 static_assert(_V::size() == _Np);
1673 return __data(__x);
1674 }
1675 else
1676 {
1677 static_assert(_V::_S_size == _Np);
1678 return __x;
1679 }
1680 }
1681
1682// }}}
1683// __intrin_bitcast{{{
1684template <typename _To, typename _From>
1685 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1686 __intrin_bitcast(_From __v)
1687 {
1688 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1689 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1690 if constexpr (sizeof(_To) == sizeof(_From))
1691 return reinterpret_cast<_To>(__v);
1692 else if constexpr (sizeof(_From) > sizeof(_To))
1693 if constexpr (sizeof(_To) >= 16)
1694 return reinterpret_cast<const __may_alias<_To>&>(__v);
1695 else
1696 {
1697 _To __r;
1698 __builtin_memcpy(&__r, &__v, sizeof(_To));
1699 return __r;
1700 }
1701#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1702 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1703 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1704 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1705 else if constexpr (__have_avx512f && sizeof(_From) == 16
1706 && sizeof(_To) == 64)
1707 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1708 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1709 else if constexpr (__have_avx512f && sizeof(_From) == 32
1710 && sizeof(_To) == 64)
1711 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1712 reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1713#endif // _GLIBCXX_SIMD_X86INTRIN
1714 else if constexpr (sizeof(__v) <= 8)
1715 return reinterpret_cast<_To>(
1716 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1717 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1718 else
1719 {
1720 static_assert(sizeof(_To) > sizeof(_From));
1721 _To __r = {};
1722 __builtin_memcpy(&__r, &__v, sizeof(_From));
1723 return __r;
1724 }
1725 }
1726
1727// }}}
1728// __vector_bitcast{{{
1729template <typename _To, size_t _NN = 0, typename _From,
1730 typename _FromVT = _VectorTraits<_From>,
1731 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1732 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1733 __vector_bitcast(_From __x)
1734 {
1735 using _R = __vector_type_t<_To, _Np>;
1736 return __intrin_bitcast<_R>(__x);
1737 }
1738
1739template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1740 size_t _Np
1741 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1742 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1743 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1744 {
1745 static_assert(_Np > 1);
1746 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1747 }
1748
1749// }}}
1750// __convert_x86 declarations {{{
1751#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1752template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1753 _To __convert_x86(_Tp);
1754
1755template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1756 _To __convert_x86(_Tp, _Tp);
1757
1758template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1759 _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1760
1761template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1762 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1763
1764template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1765 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1766 _Tp, _Tp, _Tp, _Tp);
1767#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1768
1769//}}}
1770// __bit_cast {{{
1771template <typename _To, typename _From>
1772 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1773 __bit_cast(const _From __x)
1774 {
1775#if __has_builtin(__builtin_bit_cast)
1776 return __builtin_bit_cast(_To, __x);
1777#else
1778 static_assert(sizeof(_To) == sizeof(_From));
1779 constexpr bool __to_is_vectorizable
1780 = is_arithmetic_v<_To> || is_enum_v<_To>;
1781 constexpr bool __from_is_vectorizable
1782 = is_arithmetic_v<_From> || is_enum_v<_From>;
1783 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1784 return reinterpret_cast<_To>(__x);
1785 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1786 {
1787 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1788 return reinterpret_cast<_To>(_FV{__x});
1789 }
1790 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1791 {
1792 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1793 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1794 return reinterpret_cast<_TV>(_FV{__x})[0];
1795 }
1796 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1797 {
1798 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1799 return reinterpret_cast<_TV>(__x)[0];
1800 }
1801 else
1802 {
1803 _To __r;
1804 __builtin_memcpy(reinterpret_cast<char*>(&__r),
1805 reinterpret_cast<const char*>(&__x), sizeof(_To));
1806 return __r;
1807 }
1808#endif
1809 }
1810
1811// }}}
1812// __to_intrin {{{
1813template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1814 typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1815 _GLIBCXX_SIMD_INTRINSIC constexpr _R
1816 __to_intrin(_Tp __x)
1817 {
1818 static_assert(sizeof(__x) <= sizeof(_R),
1819 "__to_intrin may never drop values off the end");
1820 if constexpr (sizeof(__x) == sizeof(_R))
1821 return reinterpret_cast<_R>(__as_vector(__x));
1822 else
1823 {
1824 using _Up = __int_for_sizeof_t<_Tp>;
1825 return reinterpret_cast<_R>(
1826 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1827 }
1828 }
1829
1830// }}}
1831// __make_vector{{{
1832template <typename _Tp, typename... _Args>
1833 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1834 __make_vector(const _Args&... __args)
1835 { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1836
1837// }}}
1838// __vector_broadcast{{{
1839template <size_t _Np, typename _Tp, size_t... _I>
1840 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1841 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1842 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1843
1844template <size_t _Np, typename _Tp>
1845 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1846 __vector_broadcast(_Tp __x)
1847 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1848
1849// }}}
1850// __generate_vector{{{
1851 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1852 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1853 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1854 { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1855
1856template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1857 _GLIBCXX_SIMD_INTRINSIC constexpr _V
1858 __generate_vector(_Gp&& __gen)
1859 {
1860 if constexpr (__is_vector_type_v<_V>)
1861 return __generate_vector_impl<typename _VVT::value_type,
1862 _VVT::_S_full_size>(
1863 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1864 else
1865 return __generate_vector_impl<typename _VVT::value_type,
1866 _VVT::_S_partial_width>(
1867 static_cast<_Gp&&>(__gen),
1868 make_index_sequence<_VVT::_S_partial_width>());
1869 }
1870
1871template <typename _Tp, size_t _Np, typename _Gp>
1872 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1873 __generate_vector(_Gp&& __gen)
1874 {
1875 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1876 make_index_sequence<_Np>());
1877 }
1878
1879// }}}
1880// __xor{{{
1881template <typename _TW>
1882 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1883 __xor(_TW __a, _TW __b) noexcept
1884 {
1885 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1886 {
1887 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1888 _VectorTraitsImpl<_TW>>::value_type;
1889 if constexpr (is_floating_point_v<_Tp>)
1890 {
1891 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1892 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1893 ^ __vector_bitcast<_Ip>(__b));
1894 }
1895 else if constexpr (__is_vector_type_v<_TW>)
1896 return __a ^ __b;
1897 else
1898 return __a._M_data ^ __b._M_data;
1899 }
1900 else
1901 return __a ^ __b;
1902 }
1903
1904// }}}
1905// __or{{{
1906template <typename _TW>
1907 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1908 __or(_TW __a, _TW __b) noexcept
1909 {
1910 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1911 {
1912 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1913 _VectorTraitsImpl<_TW>>::value_type;
1914 if constexpr (is_floating_point_v<_Tp>)
1915 {
1916 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1917 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1918 | __vector_bitcast<_Ip>(__b));
1919 }
1920 else if constexpr (__is_vector_type_v<_TW>)
1921 return __a | __b;
1922 else
1923 return __a._M_data | __b._M_data;
1924 }
1925 else
1926 return __a | __b;
1927 }
1928
1929// }}}
1930// __and{{{
1931template <typename _TW>
1932 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1933 __and(_TW __a, _TW __b) noexcept
1934 {
1935 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1936 {
1937 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1938 _VectorTraitsImpl<_TW>>::value_type;
1939 if constexpr (is_floating_point_v<_Tp>)
1940 {
1941 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1942 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1943 & __vector_bitcast<_Ip>(__b));
1944 }
1945 else if constexpr (__is_vector_type_v<_TW>)
1946 return __a & __b;
1947 else
1948 return __a._M_data & __b._M_data;
1949 }
1950 else
1951 return __a & __b;
1952 }
1953
1954// }}}
1955// __andnot{{{
1956#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1957static constexpr struct
1958{
1959 _GLIBCXX_SIMD_INTRINSIC __v4sf
1960 operator()(__v4sf __a, __v4sf __b) const noexcept
1961 { return __builtin_ia32_andnps(__a, __b); }
1962
1963 _GLIBCXX_SIMD_INTRINSIC __v2df
1964 operator()(__v2df __a, __v2df __b) const noexcept
1965 { return __builtin_ia32_andnpd(__a, __b); }
1966
1967 _GLIBCXX_SIMD_INTRINSIC __v2di
1968 operator()(__v2di __a, __v2di __b) const noexcept
1969 { return __builtin_ia32_pandn128(__a, __b); }
1970
1971 _GLIBCXX_SIMD_INTRINSIC __v8sf
1972 operator()(__v8sf __a, __v8sf __b) const noexcept
1973 { return __builtin_ia32_andnps256(__a, __b); }
1974
1975 _GLIBCXX_SIMD_INTRINSIC __v4df
1976 operator()(__v4df __a, __v4df __b) const noexcept
1977 { return __builtin_ia32_andnpd256(__a, __b); }
1978
1979 _GLIBCXX_SIMD_INTRINSIC __v4di
1980 operator()(__v4di __a, __v4di __b) const noexcept
1981 {
1982 if constexpr (__have_avx2)
1983 return __builtin_ia32_andnotsi256(__a, __b);
1984 else
1985 return reinterpret_cast<__v4di>(
1986 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1987 reinterpret_cast<__v4df>(__b)));
1988 }
1989
1990 _GLIBCXX_SIMD_INTRINSIC __v16sf
1991 operator()(__v16sf __a, __v16sf __b) const noexcept
1992 {
1993 if constexpr (__have_avx512dq)
1994 return _mm512_andnot_ps(__a, __b);
1995 else
1996 return reinterpret_cast<__v16sf>(
1997 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1998 reinterpret_cast<__v8di>(__b)));
1999 }
2000
2001 _GLIBCXX_SIMD_INTRINSIC __v8df
2002 operator()(__v8df __a, __v8df __b) const noexcept
2003 {
2004 if constexpr (__have_avx512dq)
2005 return _mm512_andnot_pd(__a, __b);
2006 else
2007 return reinterpret_cast<__v8df>(
2008 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
2009 reinterpret_cast<__v8di>(__b)));
2010 }
2011
2012 _GLIBCXX_SIMD_INTRINSIC __v8di
2013 operator()(__v8di __a, __v8di __b) const noexcept
2014 { return _mm512_andnot_si512(__a, __b); }
2015} _S_x86_andnot;
2016#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
2017
2018template <typename _TW>
2019 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2020 __andnot(_TW __a, _TW __b) noexcept
2021 {
2022 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2023 {
2024 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2025 _VectorTraitsImpl<_TW>>;
2026 using _Tp = typename _TVT::value_type;
2027#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2028 if constexpr (sizeof(_TW) >= 16)
2029 {
2030 const auto __ai = __to_intrin(__a);
2031 const auto __bi = __to_intrin(__b);
2032 if (!__builtin_is_constant_evaluated()
2033 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2034 {
2035 const auto __r = _S_x86_andnot(__ai, __bi);
2036 if constexpr (is_convertible_v<decltype(__r), _TW>)
2037 return __r;
2038 else
2039 return reinterpret_cast<typename _TVT::type>(__r);
2040 }
2041 }
2042#endif // _GLIBCXX_SIMD_X86INTRIN
2043 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2044 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2045 & __vector_bitcast<_Ip>(__b));
2046 }
2047 else
2048 return ~__a & __b;
2049 }
2050
2051// }}}
2052// __not{{{
2053template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2054 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2055 __not(_Tp __a) noexcept
2056 {
2057 if constexpr (is_floating_point_v<typename _TVT::value_type>)
2058 return reinterpret_cast<typename _TVT::type>(
2059 ~__vector_bitcast<unsigned>(__a));
2060 else
2061 return ~__a;
2062 }
2063
2064// }}}
2065// __concat{{{
2066template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2067 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2068 constexpr _R
2069 __concat(_Tp a_, _Tp b_)
2070 {
2071#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2072 using _W
2073 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2074 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2075 long long, typename _TVT::value_type>>;
2076 constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2077 const auto __a = __vector_bitcast<_W>(a_);
2078 const auto __b = __vector_bitcast<_W>(b_);
2079 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2080#else
2081 constexpr int input_width = _TVT::_S_full_size;
2082 const _Tp& __a = a_;
2083 const _Tp& __b = b_;
2084 using _Up = _R;
2085#endif
2086 if constexpr (input_width == 2)
2087 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2088 else if constexpr (input_width == 4)
2089 return reinterpret_cast<_R>(
2090 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2091 else if constexpr (input_width == 8)
2092 return reinterpret_cast<_R>(
2093 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2094 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2095 else if constexpr (input_width == 16)
2096 return reinterpret_cast<_R>(
2097 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2098 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2099 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2100 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2101 __b[12], __b[13], __b[14], __b[15]});
2102 else if constexpr (input_width == 32)
2103 return reinterpret_cast<_R>(
2104 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2105 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2106 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2107 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2108 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2109 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2110 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2111 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2112 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2113 __b[31]});
2114 }
2115
2116// }}}
2117// __zero_extend {{{
2118template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2119 struct _ZeroExtendProxy
2120 {
2121 using value_type = typename _TVT::value_type;
2122 static constexpr size_t _Np = _TVT::_S_full_size;
2123 const _Tp __x;
2124
2125 template <typename _To, typename _ToVT = _VectorTraits<_To>,
2126 typename
2127 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2128 _GLIBCXX_SIMD_INTRINSIC operator _To() const
2129 {
2130 constexpr size_t _ToN = _ToVT::_S_full_size;
2131 if constexpr (_ToN == _Np)
2132 return __x;
2133 else if constexpr (_ToN == 2 * _Np)
2134 {
2135#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2136 if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2137 return __vector_bitcast<value_type>(
2138 _mm256_insertf128_ps(__m256(), __x, 0));
2139 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2140 return __vector_bitcast<value_type>(
2141 _mm256_insertf128_pd(__m256d(), __x, 0));
2142 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2143 return __vector_bitcast<value_type>(
2144 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2145 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2146 {
2147 if constexpr (__have_avx512dq)
2148 return __vector_bitcast<value_type>(
2149 _mm512_insertf32x8(__m512(), __x, 0));
2150 else
2151 return reinterpret_cast<__m512>(
2152 _mm512_insertf64x4(__m512d(),
2153 reinterpret_cast<__m256d>(__x), 0));
2154 }
2155 else if constexpr (__have_avx512f
2156 && _TVT::template _S_is<double, 4>)
2157 return __vector_bitcast<value_type>(
2158 _mm512_insertf64x4(__m512d(), __x, 0));
2159 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2160 return __vector_bitcast<value_type>(
2161 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2162#endif
2163 return __concat(__x, _Tp());
2164 }
2165 else if constexpr (_ToN == 4 * _Np)
2166 {
2167#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2168 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2169 {
2170 return __vector_bitcast<value_type>(
2171 _mm512_insertf64x2(__m512d(), __x, 0));
2172 }
2173 else if constexpr (__have_avx512f
2174 && is_floating_point_v<value_type>)
2175 {
2176 return __vector_bitcast<value_type>(
2177 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2178 0));
2179 }
2180 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2181 {
2182 return __vector_bitcast<value_type>(
2183 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2184 }
2185#endif
2186 return __concat(__concat(__x, _Tp()),
2187 __vector_type_t<value_type, _Np * 2>());
2188 }
2189 else if constexpr (_ToN == 8 * _Np)
2190 return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2191 __vector_type_t<value_type, _Np * 4>());
2192 else if constexpr (_ToN == 16 * _Np)
2193 return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2194 __vector_type_t<value_type, _Np * 8>());
2195 else
2196 __assert_unreachable<_Tp>();
2197 }
2198 };
2199
2200template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2201 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2202 __zero_extend(_Tp __x)
2203 { return {__x}; }
2204
2205// }}}
2206// __extract<_Np, By>{{{
2207template <int _Offset,
2208 int _SplitBy,
2209 typename _Tp,
2210 typename _TVT = _VectorTraits<_Tp>,
2211 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2212 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2213 __extract(_Tp __in)
2214 {
2215 using value_type = typename _TVT::value_type;
2216#if _GLIBCXX_SIMD_X86INTRIN // {{{
2217 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2218 {
2219 if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2220 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2221 else if constexpr (is_floating_point_v<value_type>)
2222 return __vector_bitcast<value_type>(
2223 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2224 else
2225 return reinterpret_cast<_R>(
2226 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2227 _Offset));
2228 }
2229 else
2230#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2231 {
2232#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2233 using _W = conditional_t<
2234 is_floating_point_v<value_type>, double,
2235 conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2236 static_assert(sizeof(_R) % sizeof(_W) == 0);
2237 constexpr int __return_width = sizeof(_R) / sizeof(_W);
2238 using _Up = __vector_type_t<_W, __return_width>;
2239 const auto __x = __vector_bitcast<_W>(__in);
2240#else
2241 constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2242 using _Up = _R;
2243 const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2244 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2245#endif
2246 constexpr int _O = _Offset * __return_width;
2247 return __call_with_subscripts<__return_width, _O>(
2248 __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2249 return reinterpret_cast<_R>(_Up{__entries...});
2250 });
2251 }
2252 }
2253
2254// }}}
2255// __lo/__hi64[z]{{{
2256template <typename _Tp,
2257 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2258 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2259 __lo64(_Tp __x)
2260 {
2261 _R __r{};
2262 __builtin_memcpy(&__r, &__x, 8);
2263 return __r;
2264 }
2265
2266template <typename _Tp,
2267 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2268 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2269 __hi64(_Tp __x)
2270 {
2271 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2272 _R __r{};
2273 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2274 return __r;
2275 }
2276
2277template <typename _Tp,
2278 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2279 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2280 __hi64z([[maybe_unused]] _Tp __x)
2281 {
2282 _R __r{};
2283 if constexpr (sizeof(_Tp) == 16)
2284 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2285 return __r;
2286 }
2287
2288// }}}
2289// __lo/__hi128{{{
2290template <typename _Tp>
2291 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2292 __lo128(_Tp __x)
2293 { return __extract<0, sizeof(_Tp) / 16>(__x); }
2294
2295template <typename _Tp>
2296 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2297 __hi128(_Tp __x)
2298 {
2299 static_assert(sizeof(__x) == 32);
2300 return __extract<1, 2>(__x);
2301 }
2302
2303// }}}
2304// __lo/__hi256{{{
2305template <typename _Tp>
2306 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2307 __lo256(_Tp __x)
2308 {
2309 static_assert(sizeof(__x) == 64);
2310 return __extract<0, 2>(__x);
2311 }
2312
2313template <typename _Tp>
2314 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2315 __hi256(_Tp __x)
2316 {
2317 static_assert(sizeof(__x) == 64);
2318 return __extract<1, 2>(__x);
2319 }
2320
2321// }}}
2322// __auto_bitcast{{{
2323template <typename _Tp>
2324 struct _AutoCast
2325 {
2326 static_assert(__is_vector_type_v<_Tp>);
2327
2328 const _Tp __x;
2329
2330 template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2331 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2332 { return __intrin_bitcast<typename _UVT::type>(__x); }
2333 };
2334
2335template <typename _Tp>
2336 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2337 __auto_bitcast(const _Tp& __x)
2338 { return {__x}; }
2339
2340template <typename _Tp, size_t _Np>
2341 _GLIBCXX_SIMD_INTRINSIC constexpr
2342 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2343 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2344 { return {__x._M_data}; }
2345
2346// }}}
2347// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2348
2349#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2350// __bool_storage_member_type{{{
2351#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2352template <size_t _Size>
2353 struct __bool_storage_member_type
2354 {
2355 static_assert((_Size & (_Size - 1)) != 0,
2356 "This trait may only be used for non-power-of-2 sizes. "
2357 "Power-of-2 sizes must be specialized.");
2358 using type =
2359 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2360 };
2361
2362template <>
2363 struct __bool_storage_member_type<1> { using type = bool; };
2364
2365template <>
2366 struct __bool_storage_member_type<2> { using type = __mmask8; };
2367
2368template <>
2369 struct __bool_storage_member_type<4> { using type = __mmask8; };
2370
2371template <>
2372 struct __bool_storage_member_type<8> { using type = __mmask8; };
2373
2374template <>
2375 struct __bool_storage_member_type<16> { using type = __mmask16; };
2376
2377template <>
2378 struct __bool_storage_member_type<32> { using type = __mmask32; };
2379
2380template <>
2381 struct __bool_storage_member_type<64> { using type = __mmask64; };
2382#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2383
2384// }}}
2385// __intrinsic_type (x86){{{
2386// the following excludes bool via __is_vectorizable
2387#if _GLIBCXX_SIMD_HAVE_SSE
2388template <typename _Tp, size_t _Bytes>
2389 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2390 {
2391 static_assert(!is_same_v<_Tp, long double>,
2392 "no __intrinsic_type support for long double on x86");
2393
2394 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2395
2396 using type [[__gnu__::__vector_size__(_S_VBytes)]]
2397 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2398 };
2399#endif // _GLIBCXX_SIMD_HAVE_SSE
2400
2401// }}}
2402#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2403// __intrinsic_type (ARM){{{
2404#if _GLIBCXX_SIMD_HAVE_NEON
2405template <>
2406 struct __intrinsic_type<float, 8, void>
2407 { using type = float32x2_t; };
2408
2409template <>
2410 struct __intrinsic_type<float, 16, void>
2411 { using type = float32x4_t; };
2412
2413template <>
2414 struct __intrinsic_type<double, 8, void>
2415 {
2416#if _GLIBCXX_SIMD_HAVE_NEON_A64
2417 using type = float64x1_t;
2418#endif
2419 };
2420
2421template <>
2422 struct __intrinsic_type<double, 16, void>
2423 {
2424#if _GLIBCXX_SIMD_HAVE_NEON_A64
2425 using type = float64x2_t;
2426#endif
2427 };
2428
2429#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2430template <> \
2431 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2432 _Np * _Bits / 8, void> \
2433 { using type = int##_Bits##x##_Np##_t; }; \
2434template <> \
2435 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2436 _Np * _Bits / 8, void> \
2437 { using type = uint##_Bits##x##_Np##_t; }
2438_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2439_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2440_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2441_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2442_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2443_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2444_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2445_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2446#undef _GLIBCXX_SIMD_ARM_INTRIN
2447
2448template <typename _Tp, size_t _Bytes>
2449 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2450 {
2451 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2452
2453 using _Ip = __int_for_sizeof_t<_Tp>;
2454
2455 using _Up = conditional_t<
2456 is_floating_point_v<_Tp>, _Tp,
2457 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2458
2459 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2460 "should use explicit specialization above");
2461
2462 using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2463 };
2464#endif // _GLIBCXX_SIMD_HAVE_NEON
2465
2466// }}}
2467// __intrinsic_type (PPC){{{
2468#ifdef __ALTIVEC__
2469template <typename _Tp>
2470 struct __intrinsic_type_impl;
2471
2472#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2473 template <> \
2474 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2475_GLIBCXX_SIMD_PPC_INTRIN(float);
2476#ifdef __VSX__
2477_GLIBCXX_SIMD_PPC_INTRIN(double);
2478#endif
2479_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2480_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2481_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2482_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2483_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2484_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2485#if defined __VSX__ || __SIZEOF_LONG__ == 4
2486_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2487_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2488#endif
2489#ifdef __VSX__
2490_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2491_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2492#endif
2493#undef _GLIBCXX_SIMD_PPC_INTRIN
2494
2495template <typename _Tp, size_t _Bytes>
2496 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2497 {
2498 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2499
2500 // allow _Tp == long double with -mlong-double-64
2501 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2502 "no __intrinsic_type support for 128-bit floating point on PowerPC");
2503
2504#ifndef __VSX__
2505 static_assert(!(is_same_v<_Tp, double>
2506 || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2507 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2508#endif
2509
2510 static constexpr auto __element_type()
2511 {
2512 if constexpr (is_floating_point_v<_Tp>)
2513 {
2514 if constexpr (_S_is_ldouble)
2515 return double {};
2516 else
2517 return _Tp {};
2518 }
2519 else if constexpr (is_signed_v<_Tp>)
2520 {
2521 if constexpr (sizeof(_Tp) == sizeof(_SChar))
2522 return _SChar {};
2523 else if constexpr (sizeof(_Tp) == sizeof(short))
2524 return short {};
2525 else if constexpr (sizeof(_Tp) == sizeof(int))
2526 return int {};
2527 else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2528 return _LLong {};
2529 }
2530 else
2531 {
2532 if constexpr (sizeof(_Tp) == sizeof(_UChar))
2533 return _UChar {};
2534 else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2535 return _UShort {};
2536 else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2537 return _UInt {};
2538 else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2539 return _ULLong {};
2540 }
2541 }
2542
2543 using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2544 };
2545#endif // __ALTIVEC__
2546
2547// }}}
2548// _SimdWrapper<bool>{{{1
2549template <size_t _Width>
2550 struct _SimdWrapper<bool, _Width,
2551 void_t<typename __bool_storage_member_type<_Width>::type>>
2552 {
2553 using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2554 using value_type = bool;
2555
2556 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2557
2558 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2559 __as_full_vector() const
2560 { return _M_data; }
2561
2562 _GLIBCXX_SIMD_INTRINSIC constexpr
2563 _SimdWrapper() = default;
2564
2565 _GLIBCXX_SIMD_INTRINSIC constexpr
2566 _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2567
2568 _GLIBCXX_SIMD_INTRINSIC
2569 operator const _BuiltinType&() const
2570 { return _M_data; }
2571
2572 _GLIBCXX_SIMD_INTRINSIC
2573 operator _BuiltinType&()
2574 { return _M_data; }
2575
2576 _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2577 __intrin() const
2578 { return _M_data; }
2579
2580 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2581 operator[](size_t __i) const
2582 { return _M_data & (_BuiltinType(1) << __i); }
2583
2584 template <size_t __i>
2585 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2586 operator[](_SizeConstant<__i>) const
2587 { return _M_data & (_BuiltinType(1) << __i); }
2588
2589 _GLIBCXX_SIMD_INTRINSIC constexpr void
2590 _M_set(size_t __i, value_type __x)
2591 {
2592 if (__x)
2593 _M_data |= (_BuiltinType(1) << __i);
2594 else
2595 _M_data &= ~(_BuiltinType(1) << __i);
2596 }
2597
2598 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2599 _M_is_constprop() const
2600 { return __builtin_constant_p(_M_data); }
2601
2602 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2603 _M_is_constprop_none_of() const
2604 {
2605 if (__builtin_constant_p(_M_data))
2606 {
2607 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2608 constexpr _BuiltinType __active_mask
2609 = ~_BuiltinType() >> (__nbits - _Width);
2610 return (_M_data & __active_mask) == 0;
2611 }
2612 return false;
2613 }
2614
2615 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2616 _M_is_constprop_all_of() const
2617 {
2618 if (__builtin_constant_p(_M_data))
2619 {
2620 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2621 constexpr _BuiltinType __active_mask
2622 = ~_BuiltinType() >> (__nbits - _Width);
2623 return (_M_data & __active_mask) == __active_mask;
2624 }
2625 return false;
2626 }
2627
2628 _BuiltinType _M_data;
2629 };
2630
2631// _SimdWrapperBase{{{1
2632template <bool _MustZeroInitPadding, typename _BuiltinType>
2633 struct _SimdWrapperBase;
2634
2635template <typename _BuiltinType>
2636 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2637 {
2638 _GLIBCXX_SIMD_INTRINSIC constexpr
2639 _SimdWrapperBase() = default;
2640
2641 _GLIBCXX_SIMD_INTRINSIC constexpr
2642 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2643
2644 _BuiltinType _M_data;
2645 };
2646
2647template <typename _BuiltinType>
2648 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2649 // never become SNaN
2650 {
2651 _GLIBCXX_SIMD_INTRINSIC constexpr
2652 _SimdWrapperBase() : _M_data() {}
2653
2654 _GLIBCXX_SIMD_INTRINSIC constexpr
2655 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2656
2657 _BuiltinType _M_data;
2658 };
2659
2660// }}}
2661// _SimdWrapper{{{
2662template <typename _Tp, size_t _Width>
2663 struct _SimdWrapper<
2664 _Tp, _Width,
2665 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2666 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2667 && sizeof(_Tp) * _Width
2668 == sizeof(__vector_type_t<_Tp, _Width>),
2669 __vector_type_t<_Tp, _Width>>
2670 {
2671 using _Base
2672 = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2673 && sizeof(_Tp) * _Width
2674 == sizeof(__vector_type_t<_Tp, _Width>),
2675 __vector_type_t<_Tp, _Width>>;
2676
2677 static_assert(__is_vectorizable_v<_Tp>);
2678 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2679
2680 using _BuiltinType = __vector_type_t<_Tp, _Width>;
2681 using value_type = _Tp;
2682
2683 static inline constexpr size_t _S_full_size
2684 = sizeof(_BuiltinType) / sizeof(value_type);
2685 static inline constexpr int _S_size = _Width;
2686 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2687
2688 using _Base::_M_data;
2689
2690 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2691 __as_full_vector() const
2692 { return _M_data; }
2693
2694 _GLIBCXX_SIMD_INTRINSIC constexpr
2695 _SimdWrapper(initializer_list<_Tp> __init)
2696 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2697 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2698 return __init.begin()[__i.value];
2699 })) {}
2700
2701 _GLIBCXX_SIMD_INTRINSIC constexpr
2702 _SimdWrapper() = default;
2703
2704 _GLIBCXX_SIMD_INTRINSIC constexpr
2705 _SimdWrapper(const _SimdWrapper&) = default;
2706
2707 _GLIBCXX_SIMD_INTRINSIC constexpr
2708 _SimdWrapper(_SimdWrapper&&) = default;
2709
2710 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2711 operator=(const _SimdWrapper&) = default;
2712
2713 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2714 operator=(_SimdWrapper&&) = default;
2715
2716 template <typename _V, typename = enable_if_t<disjunction_v<
2717 is_same<_V, __vector_type_t<_Tp, _Width>>,
2718 is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2719 _GLIBCXX_SIMD_INTRINSIC constexpr
2720 _SimdWrapper(_V __x)
2721 // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2722 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2723
2724 template <typename... _As,
2725 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2726 && sizeof...(_As) <= _Width)>>
2727 _GLIBCXX_SIMD_INTRINSIC constexpr
2728 operator _SimdTuple<_Tp, _As...>() const
2729 {
2730 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2731 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2732 { return _M_data[int(__i)]; });
2733 }
2734
2735 _GLIBCXX_SIMD_INTRINSIC constexpr
2736 operator const _BuiltinType&() const
2737 { return _M_data; }
2738
2739 _GLIBCXX_SIMD_INTRINSIC constexpr
2740 operator _BuiltinType&()
2741 { return _M_data; }
2742
2743 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2744 operator[](size_t __i) const
2745 { return _M_data[__i]; }
2746
2747 template <size_t __i>
2748 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2749 operator[](_SizeConstant<__i>) const
2750 { return _M_data[__i]; }
2751
2752 _GLIBCXX_SIMD_INTRINSIC constexpr void
2753 _M_set(size_t __i, _Tp __x)
2754 {
2755 if (__builtin_is_constant_evaluated())
2756 _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2757 return __j == __i ? __x : _M_data[__j()];
2758 });
2759 else
2760 _M_data[__i] = __x;
2761 }
2762
2763 _GLIBCXX_SIMD_INTRINSIC
2764 constexpr bool
2765 _M_is_constprop() const
2766 { return __builtin_constant_p(_M_data); }
2767
2768 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2769 _M_is_constprop_none_of() const
2770 {
2771 if (__builtin_constant_p(_M_data))
2772 {
2773 bool __r = true;
2774 if constexpr (is_floating_point_v<_Tp>)
2775 {
2776 using _Ip = __int_for_sizeof_t<_Tp>;
2777 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2778 __execute_n_times<_Width>(
2779 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2780 }
2781 else
2782 __execute_n_times<_Width>(
2783 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2784 if (__builtin_constant_p(__r))
2785 return __r;
2786 }
2787 return false;
2788 }
2789
2790 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2791 _M_is_constprop_all_of() const
2792 {
2793 if (__builtin_constant_p(_M_data))
2794 {
2795 bool __r = true;
2796 if constexpr (is_floating_point_v<_Tp>)
2797 {
2798 using _Ip = __int_for_sizeof_t<_Tp>;
2799 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2800 __execute_n_times<_Width>(
2801 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2802 }
2803 else
2804 __execute_n_times<_Width>(
2805 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2806 if (__builtin_constant_p(__r))
2807 return __r;
2808 }
2809 return false;
2810 }
2811 };
2812
2813// }}}
2814
2815// __vectorized_sizeof {{{
2816template <typename _Tp>
2817 constexpr size_t
2818 __vectorized_sizeof()
2819 {
2820 if constexpr (!__is_vectorizable_v<_Tp>)
2821 return 0;
2822
2823 if constexpr (sizeof(_Tp) <= 8)
2824 {
2825 // X86:
2826 if constexpr (__have_avx512bw)
2827 return 64;
2828 if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2829 return 64;
2830 if constexpr (__have_avx2)
2831 return 32;
2832 if constexpr (__have_avx && is_floating_point_v<_Tp>)
2833 return 32;
2834 if constexpr (__have_sse2)
2835 return 16;
2836 if constexpr (__have_sse && is_same_v<_Tp, float>)
2837 return 16;
2838 /* The following is too much trouble because of mixed MMX and x87 code.
2839 * While nothing here explicitly calls MMX instructions of registers,
2840 * they are still emitted but no EMMS cleanup is done.
2841 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2842 return 8;
2843 */
2844
2845 // PowerPC:
2846 if constexpr (__have_power8vec
2847 || (__have_power_vmx && (sizeof(_Tp) < 8))
2848 || (__have_power_vsx && is_floating_point_v<_Tp>) )
2849 return 16;
2850
2851 // ARM:
2852 if constexpr (__have_neon_a64)
2853 return 16;
2854 if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2855 or is_same_v<_Tp, float>))
2856 return 16;
2857 if constexpr (__have_neon
2858 && sizeof(_Tp) < 8
2859 // Only allow fp if the user allows non-ICE559 fp (e.g.
2860 // via -ffast-math). ARMv7 NEON fp is not conforming to
2861 // IEC559.
2862 && (__support_neon_float || !is_floating_point_v<_Tp>))
2863 return 16;
2864 }
2865
2866 return sizeof(_Tp);
2867 }
2868
2869// }}}
2870namespace simd_abi {
2871// most of simd_abi is defined in simd_detail.h
2872template <typename _Tp>
2873 inline constexpr int max_fixed_size
2874 = ((__have_avx512bw && sizeof(_Tp) == 1)
2875 || (__have_sve && __sve_vectorized_size_bytes/sizeof(_Tp) >= 64)) ? 64 : 32;
2876
2877// compatible {{{
2878#if defined __x86_64__ || defined __aarch64__
2879template <typename _Tp>
2880 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2881#elif defined __ARM_NEON
2882// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2883// ABI?)
2884template <typename _Tp>
2885 using compatible
2886 = conditional_t<(sizeof(_Tp) < 8
2887 && (__support_neon_float || !is_floating_point_v<_Tp>)),
2888 _VecBuiltin<16>, scalar>;
2889#else
2890template <typename>
2891 using compatible = scalar;
2892#endif
2893
2894// }}}
2895// native {{{
2896template <typename _Tp>
2897 constexpr auto
2898 __determine_native_abi()
2899 {
2900 constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2901 if constexpr (__bytes == sizeof(_Tp))
2902 return static_cast<scalar*>(nullptr);
2903 else if constexpr (__have_sve)
2904 return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr);
2905 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2906 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2907 else
2908 return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2909 }
2910
2911template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2912 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2913
2914// }}}
2915// __default_abi {{{
2916#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2917template <typename _Tp>
2918 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2919#else
2920template <typename _Tp>
2921 using __default_abi = compatible<_Tp>;
2922#endif
2923
2924// }}}
2925} // namespace simd_abi
2926
2927// traits {{{1
2928template <typename _Tp>
2929 struct is_simd_flag_type
2930 : false_type
2931 {};
2932
2933template <>
2934 struct is_simd_flag_type<element_aligned_tag>
2935 : true_type
2936 {};
2937
2938template <>
2939 struct is_simd_flag_type<vector_aligned_tag>
2940 : true_type
2941 {};
2942
2943template <size_t _Np>
2944 struct is_simd_flag_type<overaligned_tag<_Np>>
2945 : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
2946 {};
2947
2948template <typename _Tp>
2949 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
2950
2951template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
2952 using _IsSimdFlagType = _Tp;
2953
2954// is_abi_tag {{{2
2955template <typename _Tp, typename = void_t<>>
2956 struct is_abi_tag : false_type {};
2957
2958template <typename _Tp>
2959 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2960 : public _Tp::_IsValidAbiTag {};
2961
2962template <typename _Tp>
2963 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2964
2965// is_simd(_mask) {{{2
2966template <typename _Tp>
2967 struct is_simd : public false_type {};
2968
2969template <typename _Tp>
2970 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2971
2972template <typename _Tp>
2973 struct is_simd_mask : public false_type {};
2974
2975template <typename _Tp>
2976inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2977
2978// simd_size {{{2
2979template <typename _Tp, typename _Abi, typename = void>
2980 struct __simd_size_impl {};
2981
2982template <typename _Tp, typename _Abi>
2983 struct __simd_size_impl<
2984 _Tp, _Abi,
2985 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2986 : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2987
2988template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2989 struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2990
2991template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2992 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2993
2994// simd_abi::deduce {{{2
2995template <typename _Tp, size_t _Np, typename = void>
2996 struct __deduce_impl;
2997
2998template <typename _Tp, size_t _Np, typename = void>
2999 struct __no_sve_deduce_impl;
3000
3001namespace simd_abi {
3002/**
3003 * @tparam _Tp The requested `value_type` for the elements.
3004 * @tparam _Np The requested number of elements.
3005 * @tparam _Abis This parameter is ignored, since this implementation cannot
3006 * make any use of it. Either __a good native ABI is matched and used as `type`
3007 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3008 * the best matching native ABIs.
3009 */
3010template <typename _Tp, size_t _Np, typename...>
3011 struct deduce : __deduce_impl<_Tp, _Np> {};
3012
3013template <typename _Tp, size_t _Np, typename... _Abis>
3014 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3015
3016template <typename _Tp, size_t _Np, typename...>
3017 struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {};
3018
3019template <typename _Tp, size_t _Np, typename... _Abis>
3020 using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type;
3021} // namespace simd_abi
3022
3023// }}}2
3024// rebind_simd {{{2
3025template <typename _Tp, typename _V, typename = void>
3026 struct rebind_simd;
3027
3028template <typename _Tp, typename _Up, typename _Abi>
3029 struct rebind_simd<_Tp, simd<_Up, _Abi>,
3030 void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
3031 simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3032 simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
3033 {
3034 using type = simd<_Tp, std::conditional_t<
3035 !__is_sve_abi<_Abi>(),
3036 simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3037 simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
3038 };
3039
3040template <typename _Tp, typename _Up, typename _Abi>
3041 struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3042 void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
3043 simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3044 simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
3045 {
3046 using type = simd_mask<_Tp, std::conditional_t<
3047 !__is_sve_abi<_Abi>(),
3048 simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3049 simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
3050 };
3051
3052template <typename _Tp, typename _V>
3053 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3054
3055// resize_simd {{{2
3056template <int _Np, typename _V, typename = void>
3057 struct resize_simd;
3058
3059template <int _Np, typename _Tp, typename _Abi>
3060 struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3061 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3062
3063template <int _Np, typename _Tp, typename _Abi>
3064 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3065 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3066
3067template <int _Np, typename _V>
3068 using resize_simd_t = typename resize_simd<_Np, _V>::type;
3069
3070// }}}2
3071// memory_alignment {{{2
3072template <typename _Tp, typename _Up = typename _Tp::value_type>
3073 struct memory_alignment
3074 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3075
3076template <typename _Tp, typename _Up = typename _Tp::value_type>
3077 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3078
3079// class template simd [simd] {{{1
3080template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3081 class simd;
3082
3083template <typename _Tp, typename _Abi>
3084 struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3085
3086template <typename _Tp>
3087 using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3088
3089template <typename _Tp, int _Np>
3090 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3091
3092template <typename _Tp, size_t _Np>
3093 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3094
3095// class template simd_mask [simd_mask] {{{1
3096template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3097 class simd_mask;
3098
3099template <typename _Tp, typename _Abi>
3100 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3101
3102template <typename _Tp>
3103 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3104
3105template <typename _Tp, int _Np>
3106 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3107
3108template <typename _Tp, size_t _Np>
3109 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3110
3111// casts [simd.casts] {{{1
3112// static_simd_cast {{{2
3113template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3114 struct __static_simd_cast_return_type;
3115
3116template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3117 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3118 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3119
3120template <typename _Tp, typename _Up, typename _Ap>
3121 struct __static_simd_cast_return_type<
3122 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3123 { using type = _Tp; };
3124
3125template <typename _Tp, typename _Ap>
3126 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3127#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3128 enable_if_t<__is_vectorizable_v<_Tp>>
3129#else
3130 void
3131#endif
3132 >
3133 { using type = simd<_Tp, _Ap>; };
3134
3135template <typename _Tp, typename = void>
3136 struct __safe_make_signed { using type = _Tp;};
3137
3138template <typename _Tp>
3139 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3140 {
3141 // the extra make_unsigned_t is because of PR85951
3142 using type = make_signed_t<make_unsigned_t<_Tp>>;
3143 };
3144
3145template <typename _Tp>
3146 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3147
3148template <typename _Tp, typename _Up, typename _Ap>
3149 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3150#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3151 enable_if_t<__is_vectorizable_v<_Tp>>
3152#else
3153 void
3154#endif
3155 >
3156 {
3157 using type = conditional_t<
3158 (is_integral_v<_Up> && is_integral_v<_Tp> &&
3159#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3160 is_signed_v<_Up> != is_signed_v<_Tp> &&
3161#endif
3162 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3163 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3164 };
3165
3166template <typename _Tp, typename _Up, typename _Ap,
3167 typename _R
3168 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3169 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3170 static_simd_cast(const simd<_Up, _Ap>& __x)
3171 {
3172 if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3173 return __x;
3174 else
3175 {
3176 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3177 __c;
3178 return _R(__private_init, __c(__data(__x)));
3179 }
3180 }
3181
3182namespace __proposed {
3183template <typename _Tp, typename _Up, typename _Ap,
3184 typename _R
3185 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3186 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3187 static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3188 {
3189 using _RM = typename _R::mask_type;
3190 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3191 typename _RM::simd_type::value_type>(__x)};
3192 }
3193
3194template <typename _To, typename _Up, typename _Abi>
3195 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3196 _To
3197 simd_bit_cast(const simd<_Up, _Abi>& __x)
3198 {
3199 using _Tp = typename _To::value_type;
3200 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3201 using _From = simd<_Up, _Abi>;
3202 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3203 // with concepts, the following should be constraints
3204 static_assert(sizeof(_To) == sizeof(_From));
3205 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3206 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3207#if __has_builtin(__builtin_bit_cast)
3208 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3209#else
3210 return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3211#endif
3212 }
3213
3214template <typename _To, typename _Up, typename _Abi>
3215 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3216 _To
3217 simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3218 {
3219 using _From = simd_mask<_Up, _Abi>;
3220 static_assert(sizeof(_To) == sizeof(_From));
3221 static_assert(is_trivially_copyable_v<_From>);
3222 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3223 // copyable.
3224 if constexpr (is_simd_v<_To>)
3225 {
3226 using _Tp = typename _To::value_type;
3227 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3228 static_assert(is_trivially_copyable_v<_ToMember>);
3229#if __has_builtin(__builtin_bit_cast)
3230 return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3231#else
3232 return {__private_init, __bit_cast<_ToMember>(__x)};
3233#endif
3234 }
3235 else
3236 {
3237 static_assert(is_trivially_copyable_v<_To>);
3238#if __has_builtin(__builtin_bit_cast)
3239 return __builtin_bit_cast(_To, __x);
3240#else
3241 return __bit_cast<_To>(__x);
3242#endif
3243 }
3244 }
3245} // namespace __proposed
3246
3247// simd_cast {{{2
3248template <typename _Tp, typename _Up, typename _Ap,
3249 typename _To = __value_type_or_identity_t<_Tp>>
3250 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3251 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3252 -> decltype(static_simd_cast<_Tp>(__x))
3253 { return static_simd_cast<_Tp>(__x); }
3254
3255namespace __proposed {
3256template <typename _Tp, typename _Up, typename _Ap,
3257 typename _To = __value_type_or_identity_t<_Tp>>
3258 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3259 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3260 -> decltype(static_simd_cast<_Tp>(__x))
3261 { return static_simd_cast<_Tp>(__x); }
3262} // namespace __proposed
3263
3264// }}}2
3265// resizing_simd_cast {{{
3266namespace __proposed {
3267/* Proposed spec:
3268
3269template <class T, class U, class Abi>
3270T resizing_simd_cast(const simd<U, Abi>& x)
3271
3272p1 Constraints:
3273 - is_simd_v<T> is true and
3274 - T::value_type is the same type as U
3275
3276p2 Returns:
3277 A simd object with the i^th element initialized to x[i] for all i in the
3278 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3279 than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3280
3281template <class T, class U, class Abi>
3282T resizing_simd_cast(const simd_mask<U, Abi>& x)
3283
3284p1 Constraints: is_simd_mask_v<T> is true
3285
3286p2 Returns:
3287 A simd_mask object with the i^th element initialized to x[i] for all i in
3288the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3289 than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3290
3291 */
3292
3293template <typename _Tp, typename _Up, typename _Ap>
3294 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3295 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3296 resizing_simd_cast(const simd<_Up, _Ap>& __x)
3297 {
3298 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3299 return __x;
3300 else if (__builtin_is_constant_evaluated())
3301 return _Tp([&](auto __i) constexpr {
3302 return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3303 });
3304 else if constexpr (simd_size_v<_Up, _Ap> == 1)
3305 {
3306 _Tp __r{};
3307 __r[0] = __x[0];
3308 return __r;
3309 }
3310 else if constexpr (_Tp::size() == 1)
3311 return __x[0];
3312 else if constexpr (sizeof(_Tp) == sizeof(__x)
3313 && !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>())
3314 return {__private_init,
3315 __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3316 _Ap::_S_masked(__data(__x))._M_data)};
3317 else
3318 {
3319 _Tp __r{};
3320 __builtin_memcpy(&__data(__r), &__data(__x),
3321 sizeof(_Up)
3322 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3323 return __r;
3324 }
3325 }
3326
3327template <typename _Tp, typename _Up, typename _Ap>
3328 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3329 enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3330 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3331 {
3332 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3333 typename _Tp::simd_type::value_type>(__x)};
3334 }
3335} // namespace __proposed
3336
3337// }}}
3338// to_fixed_size {{{2
3339template <typename _Tp, int _Np>
3340 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3341 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3342 { return __x; }
3343
3344template <typename _Tp, int _Np>
3345 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3346 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3347 { return __x; }
3348
3349template <typename _Tp, typename _Ap>
3350 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3351 to_fixed_size(const simd<_Tp, _Ap>& __x)
3352 {
3353 using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3354 return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3355 }
3356
3357template <typename _Tp, typename _Ap>
3358 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3359 to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3360 {
3361 return {__private_init,
3362 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3363 }
3364
3365// to_native {{{2
3366template <typename _Tp, int _Np>
3367 _GLIBCXX_SIMD_INTRINSIC
3368 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3369 to_native(const fixed_size_simd<_Tp, _Np>& __x)
3370 {
3371 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3372 __x.copy_to(__mem, vector_aligned);
3373 return {__mem, vector_aligned};
3374 }
3375
3376template <typename _Tp, int _Np>
3377 _GLIBCXX_SIMD_INTRINSIC
3378 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3379 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3380 {
3381 return native_simd_mask<_Tp>(
3382 __private_init,
3383 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3384 }
3385
3386// to_compatible {{{2
3387template <typename _Tp, int _Np>
3388 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3389 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3390 {
3391 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3392 __x.copy_to(__mem, vector_aligned);
3393 return {__mem, vector_aligned};
3394 }
3395
3396template <typename _Tp, int _Np>
3397 _GLIBCXX_SIMD_INTRINSIC
3398 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3399 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3400 {
3401 return simd_mask<_Tp>(
3402 __private_init,
3403 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3404 }
3405
3406// masked assignment [simd_mask.where] {{{1
3407
3408// where_expression {{{1
3409// const_where_expression<M, T> {{{2
3410template <typename _M, typename _Tp>
3411 class const_where_expression
3412 {
3413 using _V = _Tp;
3414 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3415
3416 struct _Wrapper { using value_type = _V; };
3417
3418 protected:
3419 using _Impl = typename _V::_Impl;
3420
3421 using value_type =
3422 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3423
3424 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3425 __get_mask(const const_where_expression& __x)
3426 { return __x._M_k; }
3427
3428 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3429 __get_lvalue(const const_where_expression& __x)
3430 { return __x._M_value; }
3431
3432 const _M& _M_k;
3433 _Tp& _M_value;
3434
3435 public:
3436 const_where_expression(const const_where_expression&) = delete;
3437
3438 const_where_expression& operator=(const const_where_expression&) = delete;
3439
3440 _GLIBCXX_SIMD_INTRINSIC constexpr
3441 const_where_expression(const _M& __kk, const _Tp& dd)
3442 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3443
3444 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3445 operator-() const&&
3446 {
3447 return {__private_init,
3448 _Impl::template _S_masked_unary<negate>(__data(_M_k),
3449 __data(_M_value))};
3450 }
3451
3452 template <typename _Up, typename _Flags>
3453 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3454 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3455 {
3456 return {__private_init,
3457 _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3458 _Flags::template _S_apply<_V>(__mem))};
3459 }
3460
3461 template <typename _Up, typename _Flags>
3462 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3463 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3464 {
3465 _Impl::_S_masked_store(__data(_M_value),
3466 _Flags::template _S_apply<_V>(__mem),
3467 __data(_M_k));
3468 }
3469 };
3470
3471// const_where_expression<bool, T> {{{2
3472template <typename _Tp>
3473 class const_where_expression<bool, _Tp>
3474 {
3475 using _M = bool;
3476 using _V = _Tp;
3477
3478 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3479
3480 struct _Wrapper { using value_type = _V; };
3481
3482 protected:
3483 using value_type
3484 = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3485
3486 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3487 __get_mask(const const_where_expression& __x)
3488 { return __x._M_k; }
3489
3490 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3491 __get_lvalue(const const_where_expression& __x)
3492 { return __x._M_value; }
3493
3494 const bool _M_k;
3495 _Tp& _M_value;
3496
3497 public:
3498 const_where_expression(const const_where_expression&) = delete;
3499 const_where_expression& operator=(const const_where_expression&) = delete;
3500
3501 _GLIBCXX_SIMD_INTRINSIC constexpr
3502 const_where_expression(const bool __kk, const _Tp& dd)
3503 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3504
3505 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3506 operator-() const&&
3507 { return _M_k ? -_M_value : _M_value; }
3508
3509 template <typename _Up, typename _Flags>
3510 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3511 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3512 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3513
3514 template <typename _Up, typename _Flags>
3515 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3516 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3517 {
3518 if (_M_k)
3519 __mem[0] = _M_value;
3520 }
3521 };
3522
3523// where_expression<M, T> {{{2
3524template <typename _M, typename _Tp>
3525 class where_expression : public const_where_expression<_M, _Tp>
3526 {
3527 using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3528
3529 static_assert(!is_const<_Tp>::value,
3530 "where_expression may only be instantiated with __a non-const "
3531 "_Tp parameter");
3532
3533 using typename const_where_expression<_M, _Tp>::value_type;
3534 using const_where_expression<_M, _Tp>::_M_k;
3535 using const_where_expression<_M, _Tp>::_M_value;
3536
3537 static_assert(
3538 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3539 static_assert(_M::size() == _Tp::size(), "");
3540
3541 _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3542 __get_lvalue(where_expression& __x)
3543 { return __x._M_value; }
3544
3545 public:
3546 where_expression(const where_expression&) = delete;
3547 where_expression& operator=(const where_expression&) = delete;
3548
3549 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3550 where_expression(const _M& __kk, _Tp& dd)
3551 : const_where_expression<_M, _Tp>(__kk, dd) {}
3552
3553 template <typename _Up>
3554 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3555 operator=(_Up&& __x) &&
3556 {
3557 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3558 __to_value_type_or_member_type<_Tp>(
3559 static_cast<_Up&&>(__x)));
3560 }
3561
3562#define _GLIBCXX_SIMD_OP_(__op, __name) \
3563 template <typename _Up> \
3564 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3565 operator __op##=(_Up&& __x)&& \
3566 { \
3567 _Impl::template _S_masked_cassign( \
3568 __data(_M_k), __data(_M_value), \
3569 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3570 [](auto __impl, auto __lhs, auto __rhs) \
3571 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3572 { return __impl.__name(__lhs, __rhs); }); \
3573 } \
3574 static_assert(true)
3575 _GLIBCXX_SIMD_OP_(+, _S_plus);
3576 _GLIBCXX_SIMD_OP_(-, _S_minus);
3577 _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3578 _GLIBCXX_SIMD_OP_(/, _S_divides);
3579 _GLIBCXX_SIMD_OP_(%, _S_modulus);
3580 _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3581 _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3582 _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3583 _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3584 _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3585#undef _GLIBCXX_SIMD_OP_
3586
3587 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3588 operator++() &&
3589 {
3590 __data(_M_value)
3591 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3592 }
3593
3594 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3595 operator++(int) &&
3596 {
3597 __data(_M_value)
3598 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3599 }
3600
3601 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3602 operator--() &&
3603 {
3604 __data(_M_value)
3605 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3606 }
3607
3608 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3609 operator--(int) &&
3610 {
3611 __data(_M_value)
3612 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3613 }
3614
3615 // intentionally hides const_where_expression::copy_from
3616 template <typename _Up, typename _Flags>
3617 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3618 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3619 {
3620 __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3621 _Flags::template _S_apply<_Tp>(__mem));
3622 }
3623 };
3624
3625// where_expression<bool, T> {{{2
3626template <typename _Tp>
3627 class where_expression<bool, _Tp>
3628 : public const_where_expression<bool, _Tp>
3629 {
3630 using _M = bool;
3631 using typename const_where_expression<_M, _Tp>::value_type;
3632 using const_where_expression<_M, _Tp>::_M_k;
3633 using const_where_expression<_M, _Tp>::_M_value;
3634
3635 public:
3636 where_expression(const where_expression&) = delete;
3637 where_expression& operator=(const where_expression&) = delete;
3638
3639 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3640 where_expression(const _M& __kk, _Tp& dd)
3641 : const_where_expression<_M, _Tp>(__kk, dd) {}
3642
3643#define _GLIBCXX_SIMD_OP_(__op) \
3644 template <typename _Up> \
3645 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3646 operator __op(_Up&& __x)&& \
3647 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3648
3649 _GLIBCXX_SIMD_OP_(=)
3650 _GLIBCXX_SIMD_OP_(+=)
3651 _GLIBCXX_SIMD_OP_(-=)
3652 _GLIBCXX_SIMD_OP_(*=)
3653 _GLIBCXX_SIMD_OP_(/=)
3654 _GLIBCXX_SIMD_OP_(%=)
3655 _GLIBCXX_SIMD_OP_(&=)
3656 _GLIBCXX_SIMD_OP_(|=)
3657 _GLIBCXX_SIMD_OP_(^=)
3658 _GLIBCXX_SIMD_OP_(<<=)
3659 _GLIBCXX_SIMD_OP_(>>=)
3660 #undef _GLIBCXX_SIMD_OP_
3661
3662 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3663 operator++() &&
3664 { if (_M_k) ++_M_value; }
3665
3666 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3667 operator++(int) &&
3668 { if (_M_k) ++_M_value; }
3669
3670 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3671 operator--() &&
3672 { if (_M_k) --_M_value; }
3673
3674 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3675 operator--(int) &&
3676 { if (_M_k) --_M_value; }
3677
3678 // intentionally hides const_where_expression::copy_from
3679 template <typename _Up, typename _Flags>
3680 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3681 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3682 { if (_M_k) _M_value = __mem[0]; }
3683 };
3684
3685// where {{{1
3686template <typename _Tp, typename _Ap>
3687 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3688 where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3689 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3690 { return {__k, __value}; }
3691
3692template <typename _Tp, typename _Ap>
3693 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3694 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3695 where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3696 { return {__k, __value}; }
3697
3698template <typename _Tp, typename _Ap>
3699 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3700 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3701 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3702 { return {__k, __value}; }
3703
3704template <typename _Tp, typename _Ap>
3705 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3706 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3707 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3708 { return {__k, __value}; }
3709
3710template <typename _Tp>
3711 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3712 where(_ExactBool __k, _Tp& __value)
3713 { return {__k, __value}; }
3714
3715template <typename _Tp>
3716 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3717 where(_ExactBool __k, const _Tp& __value)
3718 { return {__k, __value}; }
3719
3720template <typename _Tp, typename _Ap>
3721 _GLIBCXX_SIMD_CONSTEXPR void
3722 where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3723
3724template <typename _Tp, typename _Ap>
3725 _GLIBCXX_SIMD_CONSTEXPR void
3726 where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3727
3728// proposed mask iterations {{{1
3729namespace __proposed {
3730template <size_t _Np>
3731 class where_range
3732 {
3733 const bitset<_Np> __bits;
3734
3735 public:
3736 where_range(bitset<_Np> __b) : __bits(__b) {}
3737
3738 class iterator
3739 {
3740 size_t __mask;
3741 size_t __bit;
3742
3743 _GLIBCXX_SIMD_INTRINSIC void
3744 __next_bit()
3745 { __bit = __builtin_ctzl(__mask); }
3746
3747 _GLIBCXX_SIMD_INTRINSIC void
3748 __reset_lsb()
3749 {
3750 // 01100100 - 1 = 01100011
3751 __mask &= (__mask - 1);
3752 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3753 }
3754
3755 public:
3756 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3757 iterator(const iterator&) = default;
3758 iterator(iterator&&) = default;
3759
3760 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3761 operator->() const
3762 { return __bit; }
3763
3764 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3765 operator*() const
3766 { return __bit; }
3767
3768 _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3769 operator++()
3770 {
3771 __reset_lsb();
3772 __next_bit();
3773 return *this;
3774 }
3775
3776 _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3777 operator++(int)
3778 {
3779 iterator __tmp = *this;
3780 __reset_lsb();
3781 __next_bit();
3782 return __tmp;
3783 }
3784
3785 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3786 operator==(const iterator& __rhs) const
3787 { return __mask == __rhs.__mask; }
3788
3789 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3790 operator!=(const iterator& __rhs) const
3791 { return __mask != __rhs.__mask; }
3792 };
3793
3794 iterator
3795 begin() const
3796 { return __bits.to_ullong(); }
3797
3798 iterator
3799 end() const
3800 { return 0; }
3801 };
3802
3803template <typename _Tp, typename _Ap>
3804 where_range<simd_size_v<_Tp, _Ap>>
3805 where(const simd_mask<_Tp, _Ap>& __k)
3806 { return __k.__to_bitset(); }
3807
3808} // namespace __proposed
3809
3810// }}}1
3811// reductions [simd.reductions] {{{1
3812template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3813 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3814 reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3815 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3816
3817template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3818 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3819 reduce(const const_where_expression<_M, _V>& __x,
3820 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3821 {
3822 if (__builtin_expect(none_of(__get_mask(__x)), false))
3823 return __identity_element;
3824
3825 _V __tmp = __identity_element;
3826 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3827 __data(__get_lvalue(__x)));
3828 return reduce(__tmp, __binary_op);
3829 }
3830
3831template <typename _M, typename _V>
3832 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3833 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3834 { return reduce(__x, 0, __binary_op); }
3835
3836template <typename _M, typename _V>
3837 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3838 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3839 { return reduce(__x, 1, __binary_op); }
3840
3841template <typename _M, typename _V>
3842 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3843 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3844 { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3845
3846template <typename _M, typename _V>
3847 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3848 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3849 { return reduce(__x, 0, __binary_op); }
3850
3851template <typename _M, typename _V>
3852 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3853 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3854 { return reduce(__x, 0, __binary_op); }
3855
3856template <typename _Tp, typename _Abi>
3857 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3858 hmin(const simd<_Tp, _Abi>& __v) noexcept
3859 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3860
3861template <typename _Tp, typename _Abi>
3862 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3863 hmax(const simd<_Tp, _Abi>& __v) noexcept
3864 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3865
3866template <typename _M, typename _V>
3867 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3868 typename _V::value_type
3869 hmin(const const_where_expression<_M, _V>& __x) noexcept
3870 {
3871 using _Tp = typename _V::value_type;
3872 constexpr _Tp __id_elem =
3873#ifdef __FINITE_MATH_ONLY__
3874 __finite_max_v<_Tp>;
3875#else
3876 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3877#endif
3878 _V __tmp = __id_elem;
3879 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3880 __data(__get_lvalue(__x)));
3881 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3882 }
3883
3884template <typename _M, typename _V>
3885 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3886 typename _V::value_type
3887 hmax(const const_where_expression<_M, _V>& __x) noexcept
3888 {
3889 using _Tp = typename _V::value_type;
3890 constexpr _Tp __id_elem =
3891#ifdef __FINITE_MATH_ONLY__
3892 __finite_min_v<_Tp>;
3893#else
3894 [] {
3895 if constexpr (__value_exists_v<__infinity, _Tp>)
3896 return -__infinity_v<_Tp>;
3897 else
3898 return __finite_min_v<_Tp>;
3899 }();
3900#endif
3901 _V __tmp = __id_elem;
3902 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3903 __data(__get_lvalue(__x)));
3904 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3905 }
3906
3907// }}}1
3908// algorithms [simd.alg] {{{
3909template <typename _Tp, typename _Ap>
3910 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3911 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3912 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3913
3914template <typename _Tp, typename _Ap>
3915 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3916 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3917 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3918
3919template <typename _Tp, typename _Ap>
3920 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3921 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3922 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3923 {
3924 const auto pair_of_members
3925 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3926 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3927 simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3928 }
3929
3930template <typename _Tp, typename _Ap>
3931 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3932 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3933 {
3934 using _Impl = typename _Ap::_SimdImpl;
3935 return {__private_init,
3936 _Impl::_S_min(__data(__hi),
3937 _Impl::_S_max(__data(__lo), __data(__v)))};
3938 }
3939
3940// }}}
3941
3942template <size_t... _Sizes, typename _Tp, typename _Ap,
3943 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3944 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3945 split(const simd<_Tp, _Ap>&);
3946
3947// __extract_part {{{
3948template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3949 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3950 _SimdWrapper<_Tp, _Np / _Total * _Combine>
3951 __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3952
3953template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3954 _GLIBCXX_SIMD_INTRINSIC constexpr auto
3955 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3956
3957// }}}
3958// _SizeList {{{
3959template <size_t _V0, size_t... _Values>
3960 struct _SizeList
3961 {
3962 template <size_t _I>
3963 static constexpr size_t
3964 _S_at(_SizeConstant<_I> = {})
3965 {
3966 if constexpr (_I == 0)
3967 return _V0;
3968 else
3969 return _SizeList<_Values...>::template _S_at<_I - 1>();
3970 }
3971
3972 template <size_t _I>
3973 static constexpr auto
3974 _S_before(_SizeConstant<_I> = {})
3975 {
3976 if constexpr (_I == 0)
3977 return _SizeConstant<0>();
3978 else
3979 return _SizeConstant<
3980 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3981 }
3982
3983 template <size_t _Np>
3984 static constexpr auto
3985 _S_pop_front(_SizeConstant<_Np> = {})
3986 {
3987 if constexpr (_Np == 0)
3988 return _SizeList();
3989 else
3990 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3991 }
3992 };
3993
3994// }}}
3995// __extract_center {{{
3996template <typename _Tp, size_t _Np>
3997 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3998 __extract_center(_SimdWrapper<_Tp, _Np> __x)
3999 {
4000 static_assert(_Np >= 4);
4001 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4002#if _GLIBCXX_SIMD_X86INTRIN // {{{
4003 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4004 {
4005 const auto __intrin = __to_intrin(__x);
4006 if constexpr (is_integral_v<_Tp>)
4007 return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4008 _mm512_shuffle_i32x4(__intrin, __intrin,
4009 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4010 else if constexpr (sizeof(_Tp) == 4)
4011 return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4012 _mm512_shuffle_f32x4(__intrin, __intrin,
4013 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4014 else if constexpr (sizeof(_Tp) == 8)
4015 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4016 _mm512_shuffle_f64x2(__intrin, __intrin,
4017 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4018 else
4019 __assert_unreachable<_Tp>();
4020 }
4021 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4022 return __vector_bitcast<_Tp>(
4023 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4024 __hi128(__vector_bitcast<double>(__x)), 1));
4025 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4026 return __vector_bitcast<_Tp>(
4027 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4028 __lo128(__vector_bitcast<_LLong>(__x)),
4029 sizeof(_Tp) * _Np / 4));
4030 else
4031#endif // _GLIBCXX_SIMD_X86INTRIN }}}
4032 {
4033 __vector_type_t<_Tp, _Np / 2> __r;
4034 __builtin_memcpy(&__r,
4035 reinterpret_cast<const char*>(&__x)
4036 + sizeof(_Tp) * _Np / 4,
4037 sizeof(_Tp) * _Np / 2);
4038 return __r;
4039 }
4040 }
4041
4042template <typename _Tp, typename _A0, typename... _As>
4043 _GLIBCXX_SIMD_INTRINSIC
4044 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4045 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4046 {
4047 if constexpr (sizeof...(_As) == 0)
4048 return __extract_center(__x.first);
4049 else
4050 return __extract_part<1, 4, 2>(__x);
4051 }
4052
4053// }}}
4054// __split_wrapper {{{
4055template <size_t... _Sizes, typename _Tp, typename... _As>
4056 auto
4057 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4058 {
4059 return split<_Sizes...>(
4060 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4061 __x));
4062 }
4063
4064// }}}
4065
4066// split<simd>(simd) {{{
4067template <typename _V, typename _Ap,
4068 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4069 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4070 && is_simd_v<_V>, array<_V, _Parts>>
4071 split(const simd<typename _V::value_type, _Ap>& __x)
4072 {
4073 using _Tp = typename _V::value_type;
4074
4075 auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4076 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4077 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4078 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4079 { return __x[__i * _V::size() + __j]; });
4080 });
4081 };
4082
4083 if constexpr (_Parts == 1)
4084 {
4085 return {simd_cast<_V>(__x)};
4086 }
4087 else if (__x._M_is_constprop())
4088 {
4089 return __gen_fallback();
4090 }
4091#if _GLIBCXX_SIMD_HAVE_SVE
4092 else if constexpr(__is_sve_abi<_Ap>)
4093 {
4094 return __gen_fallback();
4095 }
4096#endif
4097 else if constexpr (
4098 __is_fixed_size_abi_v<_Ap>
4099 && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4100 || (__is_fixed_size_abi_v<typename _V::abi_type>
4101 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4102 )))
4103 {
4104 // fixed_size -> fixed_size (w/o padding) or scalar
4105#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4106 const __may_alias<_Tp>* const __element_ptr
4107 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4108 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4109 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4110 { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4111#else
4112 const auto& __xx = __data(__x);
4113 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4114 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4115 [[maybe_unused]] constexpr size_t __offset
4116 = decltype(__i)::value * _V::size();
4117 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4118 constexpr _SizeConstant<__j + __offset> __k;
4119 return __xx[__k];
4120 });
4121 });
4122#endif
4123 }
4124 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4125 {
4126 // normally memcpy should work here as well
4127 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4128 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4129 }
4130 else
4131 {
4132 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4133 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4134 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4135 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4136 return __x[__i * _V::size() + __j];
4137 });
4138 else
4139 return _V(__private_init,
4140 __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4141 });
4142 }
4143 }
4144
4145// }}}
4146// split<simd_mask>(simd_mask) {{{
4147template <typename _V, typename _Ap,
4148 size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4149 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4150 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4151 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4152 {
4153 if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4154 return {__x};
4155 else if constexpr (_Parts == 1)
4156 return {__proposed::static_simd_cast<_V>(__x)};
4157 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4158 && __is_avx_abi<_Ap>())
4159 return {_V(__private_init, __lo128(__data(__x))),
4160 _V(__private_init, __hi128(__data(__x)))};
4161 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4162 {
4163 const bitset __bits = __x.__to_bitset();
4164 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4165 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4166 constexpr size_t __offset = __i * _V::size();
4167 return _V(__bitset_init, (__bits >> __offset).to_ullong());
4168 });
4169 }
4170 else
4171 {
4172 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4173 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4174 constexpr size_t __offset = __i * _V::size();
4175 return _V(__private_init,
4176 [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4177 return __x[__j + __offset];
4178 });
4179 });
4180 }
4181 }
4182
4183// }}}
4184// split<_Sizes...>(simd) {{{
4185template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4186 _GLIBCXX_SIMD_ALWAYS_INLINE
4187 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4188 split(const simd<_Tp, _Ap>& __x)
4189 {
4190 using _SL = _SizeList<_Sizes...>;
4191 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4192 constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4193 constexpr size_t _N0 = _SL::template _S_at<0>();
4194 using _V = __deduced_simd<_Tp, _N0>;
4195
4196 auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4197 {
4198 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4199 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4200 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4201 constexpr size_t __offset = _SL::_S_before(__i);
4202 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4203 return __x[__offset + __j];
4204 });
4205 });
4206 };
4207
4208 if (__x._M_is_constprop())
4209 __gen_fallback();
4210#if _GLIBCXX_SIMD_HAVE_SVE
4211 else if constexpr (__have_sve)
4212 __gen_fallback();
4213#endif
4214 else if constexpr (_Np == _N0)
4215 {
4216 static_assert(sizeof...(_Sizes) == 1);
4217 return {simd_cast<_V>(__x)};
4218 }
4219 else if constexpr // split from fixed_size, such that __x::first.size == _N0
4220 (__is_fixed_size_abi_v<
4221 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4222 {
4223 static_assert(
4224 !__is_fixed_size_abi_v<typename _V::abi_type>,
4225 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4226 "fixed_size_simd "
4227 "when deduced?");
4228 // extract first and recurse (__split_wrapper is needed to deduce a new
4229 // _Sizes pack)
4230 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4231 __split_wrapper(_SL::template _S_pop_front<1>(),
4232 __data(__x).second));
4233 }
4234 else if constexpr ((!is_same_v<simd_abi::scalar,
4235 simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4236 && (!__is_fixed_size_abi_v<
4237 simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4238 {
4239 if constexpr (((_Sizes * 2 == _Np) && ...))
4240 return {{__private_init, __extract_part<0, 2>(__data(__x))},
4241 {__private_init, __extract_part<1, 2>(__data(__x))}};
4242 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4243 _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4244 return {{__private_init, __extract_part<0, 3>(__data(__x))},
4245 {__private_init, __extract_part<1, 3>(__data(__x))},
4246 {__private_init, __extract_part<2, 3>(__data(__x))}};
4247 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4248 _SizeList<2 * _Np / 3, _Np / 3>>)
4249 return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4250 {__private_init, __extract_part<2, 3>(__data(__x))}};
4251 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4252 _SizeList<_Np / 3, 2 * _Np / 3>>)
4253 return {{__private_init, __extract_part<0, 3>(__data(__x))},
4254 {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4255 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4256 _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4257 return {{__private_init, __extract_part<0, 2>(__data(__x))},
4258 {__private_init, __extract_part<2, 4>(__data(__x))},
4259 {__private_init, __extract_part<3, 4>(__data(__x))}};
4260 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4261 _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4262 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4263 {__private_init, __extract_part<1, 4>(__data(__x))},
4264 {__private_init, __extract_part<1, 2>(__data(__x))}};
4265 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4266 _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4267 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4268 {__private_init, __extract_center(__data(__x))},
4269 {__private_init, __extract_part<3, 4>(__data(__x))}};
4270 else if constexpr (((_Sizes * 4 == _Np) && ...))
4271 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4272 {__private_init, __extract_part<1, 4>(__data(__x))},
4273 {__private_init, __extract_part<2, 4>(__data(__x))},
4274 {__private_init, __extract_part<3, 4>(__data(__x))}};
4275 // else fall through
4276 }
4277#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4278 const __may_alias<_Tp>* const __element_ptr
4279 = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4280 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4281 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4282 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4283 constexpr size_t __offset = _SL::_S_before(__i);
4284 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4285 constexpr size_t __a
4286 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4287 constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4288 constexpr size_t __alignment = __b == 0 ? __a : __b;
4289 return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4290 });
4291#else
4292 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4293 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4294 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4295 const auto& __xx = __data(__x);
4296 using _Offset = decltype(_SL::_S_before(__i));
4297 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4298 constexpr _SizeConstant<_Offset::value + __j> __k;
4299 return __xx[__k];
4300 });
4301 });
4302#endif
4303 }
4304
4305// }}}
4306
4307// __subscript_in_pack {{{
4308template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4309 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4310 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4311 {
4312 if constexpr (_I < simd_size_v<_Tp, _Ap>)
4313 return __x[_I];
4314 else
4315 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4316 }
4317
4318// }}}
4319// __store_pack_of_simd {{{
4320template <typename _Tp, typename _A0, typename... _As>
4321 _GLIBCXX_SIMD_INTRINSIC void
4322 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4323 {
4324 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4325 __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4326 if constexpr (sizeof...(__xs) > 0)
4327 __store_pack_of_simd(__mem + __n_bytes, __xs...);
4328 }
4329
4330// }}}
4331// concat(simd...) {{{
4332template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4333 inline _GLIBCXX_SIMD_CONSTEXPR
4334 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4335 concat(const simd<_Tp, _As>&... __xs)
4336 {
4337 using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4338 if constexpr (sizeof...(__xs) == 1)
4339 return simd_cast<_Rp>(__xs...);
4340 else if ((... && __xs._M_is_constprop()))
4341 return simd<_Tp,
4342 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
4343 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4344 { return __subscript_in_pack<__i>(__xs...); });
4345 else
4346 {
4347 _Rp __r{};
4348 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4349 return __r;
4350 }
4351 }
4352
4353// }}}
4354// concat(array<simd>) {{{
4355template <typename _Tp, typename _Abi, size_t _Np>
4356 _GLIBCXX_SIMD_ALWAYS_INLINE
4357 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4358 concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4359 {
4360 return __call_with_subscripts<_Np>(
4361 __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4362 return concat(__xs...);
4363 });
4364 }
4365
4366// }}}
4367
4368/// @cond undocumented
4369// _SmartReference {{{
4370template <typename _Up, typename _Accessor = _Up,
4371 typename _ValueType = typename _Up::value_type>
4372 class _SmartReference
4373 {
4374 friend _Accessor;
4375 int _M_index;
4376 _Up& _M_obj;
4377
4378 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4379 _M_read() const noexcept
4380 {
4381 if constexpr (is_arithmetic_v<_Up>)
4382 return _M_obj;
4383 else
4384 return _M_obj[_M_index];
4385 }
4386
4387 template <typename _Tp>
4388 _GLIBCXX_SIMD_INTRINSIC constexpr void
4389 _M_write(_Tp&& __x) const
4390 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4391
4392 public:
4393 _GLIBCXX_SIMD_INTRINSIC constexpr
4394 _SmartReference(_Up& __o, int __i) noexcept
4395 : _M_index(__i), _M_obj(__o) {}
4396
4397 using value_type = _ValueType;
4398
4399 _GLIBCXX_SIMD_INTRINSIC
4400 _SmartReference(const _SmartReference&) = delete;
4401
4402 _GLIBCXX_SIMD_INTRINSIC constexpr
4403 operator value_type() const noexcept
4404 { return _M_read(); }
4405
4406 template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4407 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4408 operator=(_Tp&& __x) &&
4409 {
4410 _M_write(static_cast<_Tp&&>(__x));
4411 return {_M_obj, _M_index};
4412 }
4413
4414#define _GLIBCXX_SIMD_OP_(__op) \
4415 template <typename _Tp, \
4416 typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4417 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4418 typename = _ValuePreservingOrInt<_TT, value_type>> \
4419 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4420 operator __op##=(_Tp&& __x) && \
4421 { \
4422 const value_type& __lhs = _M_read(); \
4423 _M_write(__lhs __op __x); \
4424 return {_M_obj, _M_index}; \
4425 }
4426 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4427 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4428 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4429#undef _GLIBCXX_SIMD_OP_
4430
4431 template <typename _Tp = void,
4432 typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4433 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4434 operator++() &&
4435 {
4436 value_type __x = _M_read();
4437 _M_write(++__x);
4438 return {_M_obj, _M_index};
4439 }
4440
4441 template <typename _Tp = void,
4442 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4443 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4444 operator++(int) &&
4445 {
4446 const value_type __r = _M_read();
4447 value_type __x = __r;
4448 _M_write(++__x);
4449 return __r;
4450 }
4451
4452 template <typename _Tp = void,
4453 typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4454 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4455 operator--() &&
4456 {
4457 value_type __x = _M_read();
4458 _M_write(--__x);
4459 return {_M_obj, _M_index};
4460 }
4461
4462 template <typename _Tp = void,
4463 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4464 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4465 operator--(int) &&
4466 {
4467 const value_type __r = _M_read();
4468 value_type __x = __r;
4469 _M_write(--__x);
4470 return __r;
4471 }
4472
4473 _GLIBCXX_SIMD_INTRINSIC friend void
4474 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4475 conjunction<
4476 is_nothrow_constructible<value_type, _SmartReference&&>,
4477 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4478 {
4479 value_type __tmp = static_cast<_SmartReference&&>(__a);
4480 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4481 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4482 }
4483
4484 _GLIBCXX_SIMD_INTRINSIC friend void
4485 swap(value_type& __a, _SmartReference&& __b) noexcept(
4486 conjunction<
4487 is_nothrow_constructible<value_type, value_type&&>,
4488 is_nothrow_assignable<value_type&, value_type&&>,
4489 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4490 {
4491 value_type __tmp(std::move(__a));
4492 __a = static_cast<value_type>(__b);
4493 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4494 }
4495
4496 _GLIBCXX_SIMD_INTRINSIC friend void
4497 swap(_SmartReference&& __a, value_type& __b) noexcept(
4498 conjunction<
4499 is_nothrow_constructible<value_type, _SmartReference&&>,
4500 is_nothrow_assignable<value_type&, value_type&&>,
4501 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4502 {
4503 value_type __tmp(__a);
4504 static_cast<_SmartReference&&>(__a) = std::move(__b);
4505 __b = std::move(__tmp);
4506 }
4507 };
4508
4509// }}}
4510// __scalar_abi_wrapper {{{
4511template <int _Bytes>
4512 struct __scalar_abi_wrapper
4513 {
4514 template <typename _Tp> static constexpr size_t _S_full_size = 1;
4515 template <typename _Tp> static constexpr size_t _S_size = 1;
4516 template <typename _Tp> static constexpr size_t _S_is_partial = false;
4517
4518 template <typename _Tp, typename _Abi = simd_abi::scalar>
4519 static constexpr bool _S_is_valid_v
4520 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4521 };
4522
4523// }}}
4524// __decay_abi metafunction {{{
4525template <typename _Tp>
4526 struct __decay_abi { using type = _Tp; };
4527
4528template <int _Bytes>
4529 struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4530 { using type = simd_abi::scalar; };
4531
4532// }}}
4533// __find_next_valid_abi metafunction {{{1
4534// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4535// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4536// recursion at 2 elements in the resulting ABI tag. In this case
4537// type::_S_is_valid_v<_Tp> may be false.
4538template <template <int> class _Abi, int _Bytes, typename _Tp>
4539 struct __find_next_valid_abi
4540 {
4541 static constexpr auto
4542 _S_choose()
4543 {
4544 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4545 using _NextAbi = _Abi<_NextBytes>;
4546 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4547 return _Abi<_Bytes>();
4548 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4549 && _NextAbi::template _S_is_valid_v<_Tp>)
4550 return _NextAbi();
4551 else
4552 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4553 }
4554
4555 using type = decltype(_S_choose());
4556 };
4557
4558template <int _Bytes, typename _Tp>
4559 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4560 { using type = simd_abi::scalar; };
4561
4562// _AbiList {{{1
4563template <template <int> class...>
4564 struct _AbiList
4565 {
4566 template <typename, int> static constexpr bool _S_has_valid_abi = false;
4567 template <typename, int> using _FirstValidAbi = void;
4568 template <typename, int> using _BestAbi = void;
4569 };
4570
4571template <template <int> class _A0, template <int> class... _Rest>
4572 struct _AbiList<_A0, _Rest...>
4573 {
4574 template <typename _Tp, int _Np>
4575 static constexpr bool _S_has_valid_abi
4576 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4577 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4578
4579 template <typename _Tp, int _Np>
4580 using _FirstValidAbi = conditional_t<
4581 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4582 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4583 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4584
4585 template <typename _Tp, int _Np>
4586 static constexpr auto
4587 _S_determine_best_abi()
4588 {
4589 static_assert(_Np >= 1);
4590 constexpr int _Bytes = sizeof(_Tp) * _Np;
4591 if constexpr (_Np == 1)
4592 return __make_dependent_t<_Tp, simd_abi::scalar>{};
4593 else
4594 {
4595 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4596 // _A0<_Bytes> is good if:
4597 // 1. The ABI tag is valid for _Tp
4598 // 2. The storage overhead is no more than padding to fill the next
4599 // power-of-2 number of bytes
4600 if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp>
4601 && ((__is_sve_abi<_A0<_Bytes>>() && __have_sve
4602 && (_Np <= __sve_vectorized_size_bytes/sizeof(_Tp)))
4603 || (__fullsize / 2 < _Np))
4604 )
4605 return typename __decay_abi<_A0<_Bytes>>::type{};
4606 else
4607 {
4608 using _Bp =
4609 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4610 if constexpr (_Bp::template _S_is_valid_v<
4611 _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4612 return _Bp{};
4613 else
4614 return
4615 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4616 }
4617 }
4618 }
4619
4620 template <typename _Tp, int _Np>
4621 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4622 };
4623
4624// }}}1
4625
4626// the following lists all native ABIs, which makes them accessible to
4627// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4628// matters: Whatever comes first has higher priority.
4629using _AllNativeAbis = _AbiList<
4630#if _GLIBCXX_SIMD_HAVE_SVE
4631 simd_abi::_SveAbi,
4632#endif
4633 simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, __scalar_abi_wrapper>;
4634
4635using _NoSveAllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4636 __scalar_abi_wrapper>;
4637
4638// valid _SimdTraits specialization {{{1
4639template <typename _Tp, typename _Abi>
4640 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4641 : _Abi::template __traits<_Tp> {};
4642
4643// __deduce_impl specializations {{{1
4644// try all native ABIs (including scalar) first
4645template <typename _Tp, size_t _Np>
4646 struct __deduce_impl<
4647 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4648 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4649
4650template <typename _Tp, size_t _Np>
4651 struct __no_sve_deduce_impl<
4652 _Tp, _Np, enable_if_t<_NoSveAllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4653 { using type = _NoSveAllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4654
4655// fall back to fixed_size only if scalar and native ABIs don't match
4656template <typename _Tp, size_t _Np, typename = void>
4657 struct __deduce_fixed_size_fallback {};
4658
4659template <typename _Tp, size_t _Np>
4660 struct __deduce_fixed_size_fallback<_Tp, _Np,
4661 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4662 { using type = simd_abi::fixed_size<_Np>; };
4663
4664template <typename _Tp, size_t _Np, typename>
4665 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4666
4667template <typename _Tp, size_t _Np, typename>
4668 struct __no_sve_deduce_impl
4669 : public __deduce_fixed_size_fallback<_Tp, _Np>
4670 {};
4671
4672
4673//}}}1
4674/// @endcond
4675
4676// simd_mask {{{
4677template <typename _Tp, typename _Abi>
4678 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4679 {
4680 // types, tags, and friends {{{
4681 using _Traits = _SimdTraits<_Tp, _Abi>;
4682 using _MemberType = typename _Traits::_MaskMember;
4683
4684 // We map all masks with equal element sizeof to a single integer type, the
4685 // one given by __int_for_sizeof_t<_Tp>. This is the approach
4686 // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4687 // template specializations in the implementation classes.
4688 using _Ip = __int_for_sizeof_t<_Tp>;
4689 static constexpr _Ip* _S_type_tag = nullptr;
4690
4691 friend typename _Traits::_MaskBase;
4692 friend class simd<_Tp, _Abi>; // to construct masks on return
4693 friend typename _Traits::_SimdImpl; // to construct masks on return and
4694 // inspect data on masked operations
4695 public:
4696 using _Impl = typename _Traits::_MaskImpl;
4697 friend _Impl;
4698
4699 // }}}
4700 // member types {{{
4701 using value_type = bool;
4702 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4703 using simd_type = simd<_Tp, _Abi>;
4704 using abi_type = _Abi;
4705
4706 // }}}
4707 static constexpr size_t size() // {{{
4708 { return __size_or_zero_v<_Tp, _Abi>; }
4709
4710 // }}}
4711 // constructors & assignment {{{
4712 simd_mask() = default;
4713 simd_mask(const simd_mask&) = default;
4714 simd_mask(simd_mask&&) = default;
4715 simd_mask& operator=(const simd_mask&) = default;
4716 simd_mask& operator=(simd_mask&&) = default;
4717
4718 // }}}
4719 // access to internal representation (optional feature) {{{
4720 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4721 simd_mask(typename _Traits::_MaskCastType __init)
4722 : _M_data{__init} {}
4723 // conversions to internal type is done in _MaskBase
4724
4725 // }}}
4726 // bitset interface (extension to be proposed) {{{
4727 // TS_FEEDBACK:
4728 // Conversion of simd_mask to and from bitset makes it much easier to
4729 // interface with other facilities. I suggest adding `static
4730 // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4731 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4732 __from_bitset(bitset<size()> bs)
4733 { return {__bitset_init, bs}; }
4734
4735 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4736 __to_bitset() const
4737 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4738
4739 // }}}
4740 // explicit broadcast constructor {{{
4741 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4742 simd_mask(value_type __x)
4743 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4744
4745 // }}}
4746 // implicit type conversion constructor {{{
4747 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4748 // proposed improvement
4749 template <typename _Up, typename _A2,
4750 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4751 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4752 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4753 simd_mask(const simd_mask<_Up, _A2>& __x)
4754 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4755 #else
4756 // conforming to ISO/IEC 19570:2018
4757 template <typename _Up, typename = enable_if_t<conjunction<
4758 is_same<abi_type, simd_abi::fixed_size<size()>>,
4759 is_same<_Up, _Up>>::value>>
4760 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4761 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4762 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4763 #endif
4764
4765 // }}}
4766 // load constructor {{{
4767 template <typename _Flags>
4768 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4769 simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4770 : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4771
4772 template <typename _Flags>
4773 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4774 simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4775 : _M_data{}
4776 {
4777 _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4778 _Flags::template _S_apply<simd_mask>(__mem));
4779 }
4780
4781 // }}}
4782 // loads [simd_mask.load] {{{
4783 template <typename _Flags>
4784 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4785 copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4786 { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4787
4788 // }}}
4789 // stores [simd_mask.store] {{{
4790 template <typename _Flags>
4791 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4792 copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4793 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4794
4795 // }}}
4796 // scalar access {{{
4797 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4798 operator[](size_t __i)
4799 {
4800 if (__i >= size())
4801 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4802 return {_M_data, int(__i)};
4803 }
4804
4805 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4806 operator[](size_t __i) const
4807 {
4808 if (__i >= size())
4809 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4810 if constexpr (__is_scalar_abi<_Abi>())
4811 return _M_data;
4812 else
4813 return static_cast<bool>(_M_data[__i]);
4814 }
4815
4816 // }}}
4817 // negation {{{
4818 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4819 operator!() const
4820 { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4821
4822 // }}}
4823 // simd_mask binary operators [simd_mask.binary] {{{
4824 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4825 // simd_mask<int> && simd_mask<uint> needs disambiguation
4826 template <typename _Up, typename _A2,
4827 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4828 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4829 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4830 {
4831 return {__private_init,
4832 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4833 }
4834
4835 template <typename _Up, typename _A2,
4836 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4837 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4838 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4839 {
4840 return {__private_init,
4841 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4842 }
4843 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4844
4845 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4846 operator&&(const simd_mask& __x, const simd_mask& __y)
4847 { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4848
4849 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4850 operator||(const simd_mask& __x, const simd_mask& __y)
4851 { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4852
4853 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4854 operator&(const simd_mask& __x, const simd_mask& __y)
4855 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4856
4857 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4858 operator|(const simd_mask& __x, const simd_mask& __y)
4859 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4860
4861 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4862 operator^(const simd_mask& __x, const simd_mask& __y)
4863 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4864
4865 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4866 operator&=(simd_mask& __x, const simd_mask& __y)
4867 {
4868 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4869 return __x;
4870 }
4871
4872 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4873 operator|=(simd_mask& __x, const simd_mask& __y)
4874 {
4875 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4876 return __x;
4877 }
4878
4879 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4880 operator^=(simd_mask& __x, const simd_mask& __y)
4881 {
4882 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4883 return __x;
4884 }
4885
4886 // }}}
4887 // simd_mask compares [simd_mask.comparison] {{{
4888 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4889 operator==(const simd_mask& __x, const simd_mask& __y)
4890 { return !operator!=(__x, __y); }
4891
4892 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4893 operator!=(const simd_mask& __x, const simd_mask& __y)
4894 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4895
4896 // }}}
4897 // private_init ctor {{{
4898 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4899 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4900 : _M_data(__init) {}
4901
4902 // }}}
4903 // private_init generator ctor {{{
4904 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4905 _GLIBCXX_SIMD_INTRINSIC constexpr
4906 simd_mask(_PrivateInit, _Fp&& __gen)
4907 : _M_data()
4908 {
4909 __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4910 _Impl::_S_set(_M_data, __i, __gen(__i));
4911 });
4912 }
4913
4914 // }}}
4915 // bitset_init ctor {{{
4916 _GLIBCXX_SIMD_INTRINSIC constexpr
4917 simd_mask(_BitsetInit, bitset<size()> __init)
4918 : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4919 {}
4920
4921 // }}}
4922 // __cvt {{{
4923 // TS_FEEDBACK:
4924 // The conversion operator this implements should be a ctor on simd_mask.
4925 // Once you call .__cvt() on a simd_mask it converts conveniently.
4926 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4927 struct _CvtProxy
4928 {
4929 template <typename _Up, typename _A2,
4930 typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4931 _GLIBCXX_SIMD_ALWAYS_INLINE
4932 operator simd_mask<_Up, _A2>() &&
4933 {
4934 using namespace std::experimental::__proposed;
4935 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4936 }
4937
4938 const simd_mask<_Tp, _Abi>& _M_data;
4939 };
4940
4941 _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4942 __cvt() const
4943 { return {*this}; }
4944
4945 // }}}
4946 // operator?: overloads (suggested extension) {{{
4947 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4948 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4949 operator?:(const simd_mask& __k, const simd_mask& __where_true,
4950 const simd_mask& __where_false)
4951 {
4952 auto __ret = __where_false;
4953 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4954 return __ret;
4955 }
4956
4957 template <typename _U1, typename _U2,
4958 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4959 typename = enable_if_t<conjunction_v<
4960 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4961 is_convertible<simd_mask, typename _Rp::mask_type>>>>
4962 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4963 operator?:(const simd_mask& __k, const _U1& __where_true,
4964 const _U2& __where_false)
4965 {
4966 _Rp __ret = __where_false;
4967 _Rp::_Impl::_S_masked_assign(
4968 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4969 __data(static_cast<_Rp>(__where_true)));
4970 return __ret;
4971 }
4972
4973 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4974 template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4975 typename = enable_if_t<
4976 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4977 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4978 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4979 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4980 const simd_mask<_Up, _Au>& __where_false)
4981 {
4982 simd_mask __ret = __where_false;
4983 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4984 __where_true._M_data);
4985 return __ret;
4986 }
4987 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4988 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4989
4990 // }}}
4991 // _M_is_constprop {{{
4992 _GLIBCXX_SIMD_INTRINSIC constexpr bool
4993 _M_is_constprop() const
4994 {
4995 if constexpr (__is_scalar_abi<_Abi>())
4996 return __builtin_constant_p(_M_data);
4997 else
4998 return _M_data._M_is_constprop();
4999 }
5000
5001 // }}}
5002
5003 private:
5004 friend const auto& __data<_Tp, abi_type>(const simd_mask&);
5005 friend auto& __data<_Tp, abi_type>(simd_mask&);
5006 alignas(_Traits::_S_mask_align) _MemberType _M_data;
5007 };
5008
5009// }}}
5010
5011/// @cond undocumented
5012// __data(simd_mask) {{{
5013template <typename _Tp, typename _Ap>
5014 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5015 __data(const simd_mask<_Tp, _Ap>& __x)
5016 { return __x._M_data; }
5017
5018template <typename _Tp, typename _Ap>
5019 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5020 __data(simd_mask<_Tp, _Ap>& __x)
5021 { return __x._M_data; }
5022
5023// }}}
5024/// @endcond
5025
5026// simd_mask reductions [simd_mask.reductions] {{{
5027template <typename _Tp, typename _Abi>
5028 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5029 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5030 {
5031 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5032 {
5033 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5034 if (!__k[__i])
5035 return false;
5036 return true;
5037 }
5038 else
5039 return _Abi::_MaskImpl::_S_all_of(__k);
5040 }
5041
5042template <typename _Tp, typename _Abi>
5043 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5044 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5045 {
5046 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5047 {
5048 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5049 if (__k[__i])
5050 return true;
5051 return false;
5052 }
5053 else
5054 return _Abi::_MaskImpl::_S_any_of(__k);
5055 }
5056
5057template <typename _Tp, typename _Abi>
5058 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5059 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5060 {
5061 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5062 {
5063 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5064 if (__k[__i])
5065 return false;
5066 return true;
5067 }
5068 else
5069 return _Abi::_MaskImpl::_S_none_of(__k);
5070 }
5071
5072template <typename _Tp, typename _Abi>
5073 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5074 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5075 {
5076 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5077 {
5078 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5079 if (__k[__i] != __k[__i - 1])
5080 return true;
5081 return false;
5082 }
5083 else
5084 return _Abi::_MaskImpl::_S_some_of(__k);
5085 }
5086
5087template <typename _Tp, typename _Abi>
5088 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5089 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5090 {
5091 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5092 {
5093 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5094 __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5095 return ((__elements != 0) + ...);
5096 });
5097 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5098 return __r;
5099 }
5100 return _Abi::_MaskImpl::_S_popcount(__k);
5101 }
5102
5103template <typename _Tp, typename _Abi>
5104 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5105 find_first_set(const simd_mask<_Tp, _Abi>& __k)
5106 {
5107 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5108 {
5109 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5110 const size_t _Idx = __call_with_n_evaluations<_Np>(
5111 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5112 return std::min({__indexes...});
5113 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5114 return __k[__i] ? +__i : _Np;
5115 });
5116 if (_Idx >= _Np)
5117 __invoke_ub("find_first_set(empty mask) is UB");
5118 if (__builtin_constant_p(_Idx))
5119 return _Idx;
5120 }
5121 return _Abi::_MaskImpl::_S_find_first_set(__k);
5122 }
5123
5124template <typename _Tp, typename _Abi>
5125 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5126 find_last_set(const simd_mask<_Tp, _Abi>& __k)
5127 {
5128 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5129 {
5130 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5131 const int _Idx = __call_with_n_evaluations<_Np>(
5132 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5133 return std::max({__indexes...});
5134 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5135 return __k[__i] ? int(__i) : -1;
5136 });
5137 if (_Idx < 0)
5138 __invoke_ub("find_first_set(empty mask) is UB");
5139 if (__builtin_constant_p(_Idx))
5140 return _Idx;
5141 }
5142 return _Abi::_MaskImpl::_S_find_last_set(__k);
5143 }
5144
5145_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5146all_of(_ExactBool __x) noexcept
5147{ return __x; }
5148
5149_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5150any_of(_ExactBool __x) noexcept
5151{ return __x; }
5152
5153_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5154none_of(_ExactBool __x) noexcept
5155{ return !__x; }
5156
5157_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5158some_of(_ExactBool) noexcept
5159{ return false; }
5160
5161_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5162popcount(_ExactBool __x) noexcept
5163{ return __x; }
5164
5165_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5166find_first_set(_ExactBool)
5167{ return 0; }
5168
5169_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5170find_last_set(_ExactBool)
5171{ return 0; }
5172
5173// }}}
5174
5175/// @cond undocumented
5176// _SimdIntOperators{{{1
5177template <typename _V, typename _Tp, typename _Abi, bool>
5178 class _SimdIntOperators {};
5179
5180template <typename _V, typename _Tp, typename _Abi>
5181 class _SimdIntOperators<_V, _Tp, _Abi, true>
5182 {
5183 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5184
5185 _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5186 __derived() const
5187 { return *static_cast<const _V*>(this); }
5188
5189 template <typename _Up>
5190 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5191 _S_make_derived(_Up&& __d)
5192 { return {__private_init, static_cast<_Up&&>(__d)}; }
5193
5194 public:
5195 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5196 _V&
5197 operator%=(_V& __lhs, const _V& __x)
5198 { return __lhs = __lhs % __x; }
5199
5200 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5201 _V&
5202 operator&=(_V& __lhs, const _V& __x)
5203 { return __lhs = __lhs & __x; }
5204
5205 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5206 _V&
5207 operator|=(_V& __lhs, const _V& __x)
5208 { return __lhs = __lhs | __x; }
5209
5210 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5211 _V&
5212 operator^=(_V& __lhs, const _V& __x)
5213 { return __lhs = __lhs ^ __x; }
5214
5215 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5216 _V&
5217 operator<<=(_V& __lhs, const _V& __x)
5218 { return __lhs = __lhs << __x; }
5219
5220 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5221 _V&
5222 operator>>=(_V& __lhs, const _V& __x)
5223 { return __lhs = __lhs >> __x; }
5224
5225 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5226 _V&
5227 operator<<=(_V& __lhs, int __x)
5228 { return __lhs = __lhs << __x; }
5229
5230 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5231 _V&
5232 operator>>=(_V& __lhs, int __x)
5233 { return __lhs = __lhs >> __x; }
5234
5235 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5236 _V
5237 operator%(const _V& __x, const _V& __y)
5238 {
5239 return _SimdIntOperators::_S_make_derived(
5240 _Impl::_S_modulus(__data(__x), __data(__y)));
5241 }
5242
5243 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5244 _V
5245 operator&(const _V& __x, const _V& __y)
5246 {
5247 return _SimdIntOperators::_S_make_derived(
5248 _Impl::_S_bit_and(__data(__x), __data(__y)));
5249 }
5250
5251 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5252 _V
5253 operator|(const _V& __x, const _V& __y)
5254 {
5255 return _SimdIntOperators::_S_make_derived(
5256 _Impl::_S_bit_or(__data(__x), __data(__y)));
5257 }
5258
5259 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5260 _V
5261 operator^(const _V& __x, const _V& __y)
5262 {
5263 return _SimdIntOperators::_S_make_derived(
5264 _Impl::_S_bit_xor(__data(__x), __data(__y)));
5265 }
5266
5267 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5268 _V
5269 operator<<(const _V& __x, const _V& __y)
5270 {
5271 return _SimdIntOperators::_S_make_derived(
5272 _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5273 }
5274
5275 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5276 _V
5277 operator>>(const _V& __x, const _V& __y)
5278 {
5279 return _SimdIntOperators::_S_make_derived(
5280 _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5281 }
5282
5283 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5284 _V
5285 operator<<(const _V& __x, int __y)
5286 {
5287 if (__y < 0)
5288 __invoke_ub("The behavior is undefined if the right operand of a "
5289 "shift operation is negative. [expr.shift]\nA shift by "
5290 "%d was requested",
5291 __y);
5292 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5293 __invoke_ub(
5294 "The behavior is undefined if the right operand of a "
5295 "shift operation is greater than or equal to the width of the "
5296 "promoted left operand. [expr.shift]\nA shift by %d was requested",
5297 __y);
5298 return _SimdIntOperators::_S_make_derived(
5299 _Impl::_S_bit_shift_left(__data(__x), __y));
5300 }
5301
5302 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5303 _V
5304 operator>>(const _V& __x, int __y)
5305 {
5306 if (__y < 0)
5307 __invoke_ub(
5308 "The behavior is undefined if the right operand of a shift "
5309 "operation is negative. [expr.shift]\nA shift by %d was requested",
5310 __y);
5311 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5312 __invoke_ub(
5313 "The behavior is undefined if the right operand of a shift "
5314 "operation is greater than or equal to the width of the promoted "
5315 "left operand. [expr.shift]\nA shift by %d was requested",
5316 __y);
5317 return _SimdIntOperators::_S_make_derived(
5318 _Impl::_S_bit_shift_right(__data(__x), __y));
5319 }
5320
5321 // unary operators (for integral _Tp)
5322 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5323 _V
5324 operator~() const
5325 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5326 };
5327
5328//}}}1
5329/// @endcond
5330
5331// simd {{{
5332template <typename _Tp, typename _Abi>
5333 class simd : public _SimdIntOperators<
5334 simd<_Tp, _Abi>, _Tp, _Abi,
5335 conjunction<is_integral<_Tp>,
5336 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5337 public _SimdTraits<_Tp, _Abi>::_SimdBase
5338 {
5339 using _Traits = _SimdTraits<_Tp, _Abi>;
5340 using _MemberType = typename _Traits::_SimdMember;
5341 using _CastType = typename _Traits::_SimdCastType;
5342 static constexpr _Tp* _S_type_tag = nullptr;
5343 friend typename _Traits::_SimdBase;
5344
5345 public:
5346 using _Impl = typename _Traits::_SimdImpl;
5347 friend _Impl;
5348 friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5349
5350 using value_type = _Tp;
5351 using reference = _SmartReference<_MemberType, _Impl, value_type>;
5352 using mask_type = simd_mask<_Tp, _Abi>;
5353 using abi_type = _Abi;
5354
5355 static constexpr size_t size()
5356 { return __size_or_zero_v<_Tp, _Abi>; }
5357
5358 _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5359 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5360 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5361 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5362 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5363
5364 // implicit broadcast constructor
5365 template <typename _Up,
5366 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5367 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5368 simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5369 : _M_data(
5370 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5371 {}
5372
5373 // implicit type conversion constructor (convert from fixed_size to
5374 // fixed_size)
5375 template <typename _Up>
5376 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5377 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5378 enable_if_t<
5379 conjunction<
5380 is_same<simd_abi::fixed_size<size()>, abi_type>,
5381 negation<__is_narrowing_conversion<_Up, value_type>>,
5382 __converts_to_higher_integer_rank<_Up, value_type>>::value,
5383 void*> = nullptr)
5384 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5385
5386 // explicit type conversion constructor
5387#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5388 template <typename _Up, typename _A2,
5389 typename = decltype(static_simd_cast<simd>(
5390 declval<const simd<_Up, _A2>&>()))>
5391 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5392 simd(const simd<_Up, _A2>& __x)
5393 : simd(static_simd_cast<simd>(__x)) {}
5394#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5395
5396 // generator constructor
5397 template <typename _Fp>
5398 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5399 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5400 declval<_SizeConstant<0>&>())),
5401 value_type>* = nullptr)
5402 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5403
5404 // load constructor
5405 template <typename _Up, typename _Flags>
5406 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5407 simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5408 : _M_data(
5409 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5410 {}
5411
5412 // loads [simd.load]
5413 template <typename _Up, typename _Flags>
5414 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5415 copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5416 {
5417 _M_data = static_cast<decltype(_M_data)>(
5418 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5419 }
5420
5421 // stores [simd.store]
5422 template <typename _Up, typename _Flags>
5423 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5424 copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5425 {
5426 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5427 _S_type_tag);
5428 }
5429
5430 // scalar access
5431 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5432 operator[](size_t __i)
5433 { return {_M_data, int(__i)}; }
5434
5435 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5436 operator[]([[maybe_unused]] size_t __i) const
5437 {
5438 if constexpr (__is_scalar_abi<_Abi>())
5439 {
5440 _GLIBCXX_DEBUG_ASSERT(__i == 0);
5441 return _M_data;
5442 }
5443 else
5444 return _M_data[__i];
5445 }
5446
5447 // increment and decrement:
5448 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5449 operator++()
5450 {
5451 _Impl::_S_increment(_M_data);
5452 return *this;
5453 }
5454
5455 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5456 operator++(int)
5457 {
5458 simd __r = *this;
5459 _Impl::_S_increment(_M_data);
5460 return __r;
5461 }
5462
5463 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5464 operator--()
5465 {
5466 _Impl::_S_decrement(_M_data);
5467 return *this;
5468 }
5469
5470 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5471 operator--(int)
5472 {
5473 simd __r = *this;
5474 _Impl::_S_decrement(_M_data);
5475 return __r;
5476 }
5477
5478 // unary operators (for any _Tp)
5479 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5480 operator!() const
5481 { return {__private_init, _Impl::_S_negate(_M_data)}; }
5482
5483 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5484 operator+() const
5485 { return *this; }
5486
5487 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5488 operator-() const
5489 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5490
5491 // access to internal representation (suggested extension)
5492 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5493 simd(_CastType __init) : _M_data(__init) {}
5494
5495 // compound assignment [simd.cassign]
5496 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5497 operator+=(simd& __lhs, const simd& __x)
5498 { return __lhs = __lhs + __x; }
5499
5500 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5501 operator-=(simd& __lhs, const simd& __x)
5502 { return __lhs = __lhs - __x; }
5503
5504 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5505 operator*=(simd& __lhs, const simd& __x)
5506 { return __lhs = __lhs * __x; }
5507
5508 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5509 operator/=(simd& __lhs, const simd& __x)
5510 { return __lhs = __lhs / __x; }
5511
5512 // binary operators [simd.binary]
5513 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5514 operator+(const simd& __x, const simd& __y)
5515 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5516
5517 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5518 operator-(const simd& __x, const simd& __y)
5519 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5520
5521 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5522 operator*(const simd& __x, const simd& __y)
5523 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5524
5525 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5526 operator/(const simd& __x, const simd& __y)
5527 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5528
5529 // compares [simd.comparison]
5530 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5531 operator==(const simd& __x, const simd& __y)
5532 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5533
5534 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5535 operator!=(const simd& __x, const simd& __y)
5536 {
5537 return simd::_S_make_mask(
5538 _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5539 }
5540
5541 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5542 operator<(const simd& __x, const simd& __y)
5543 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5544
5545 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5546 operator<=(const simd& __x, const simd& __y)
5547 {
5548 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5549 }
5550
5551 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5552 operator>(const simd& __x, const simd& __y)
5553 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5554
5555 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5556 operator>=(const simd& __x, const simd& __y)
5557 {
5558 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5559 }
5560
5561 // operator?: overloads (suggested extension) {{{
5562#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5563 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5564 operator?:(const mask_type& __k, const simd& __where_true,
5565 const simd& __where_false)
5566 {
5567 auto __ret = __where_false;
5568 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5569 return __ret;
5570 }
5571
5572#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5573 // }}}
5574
5575 // "private" because of the first arguments's namespace
5576 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5577 simd(_PrivateInit, const _MemberType& __init)
5578 : _M_data(__init) {}
5579
5580 // "private" because of the first arguments's namespace
5581 _GLIBCXX_SIMD_INTRINSIC
5582 simd(_BitsetInit, bitset<size()> __init) : _M_data()
5583 { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5584
5585 _GLIBCXX_SIMD_INTRINSIC constexpr bool
5586 _M_is_constprop() const
5587 {
5588 if constexpr (__is_scalar_abi<_Abi>())
5589 return __builtin_constant_p(_M_data);
5590 else
5591 return _M_data._M_is_constprop();
5592 }
5593
5594 private:
5595 _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5596 _S_make_mask(typename mask_type::_MemberType __k)
5597 { return {__private_init, __k}; }
5598
5599 friend const auto& __data<value_type, abi_type>(const simd&);
5600 friend auto& __data<value_type, abi_type>(simd&);
5601 alignas(_Traits::_S_simd_align) _MemberType _M_data;
5602 };
5603
5604// }}}
5605/// @cond undocumented
5606// __data {{{
5607template <typename _Tp, typename _Ap>
5608 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5609 __data(const simd<_Tp, _Ap>& __x)
5610 { return __x._M_data; }
5611
5612template <typename _Tp, typename _Ap>
5613 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5614 __data(simd<_Tp, _Ap>& __x)
5615 { return __x._M_data; }
5616
5617// }}}
5618namespace __float_bitwise_operators { //{{{
5619template <typename _Tp, typename _Ap>
5620 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5621 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5622 { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5623
5624template <typename _Tp, typename _Ap>
5625 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5626 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5627 { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5628
5629template <typename _Tp, typename _Ap>
5630 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5631 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5632 { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5633
5634template <typename _Tp, typename _Ap>
5635 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5636 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5637 operator~(const simd<_Tp, _Ap>& __a)
5638 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5639} // namespace __float_bitwise_operators }}}
5640/// @endcond
5641
5642/// @}
5643_GLIBCXX_SIMD_END_NAMESPACE
5644
5645#endif // __cplusplus >= 201703L
5646#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5647
5648// vim: foldmethod=marker foldmarker={{{,}}}
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:855
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:869
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition chrono.h:779
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:822
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:862
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition complex:400
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:370
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:340
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition complex:430
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition type_traits:1718
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition type_traits:2061
__bool_constant< true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:111
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition type_traits:2700
__bool_constant< false > false_type
The type used as a compile-time boolean with false value.
Definition type_traits:114
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition type_traits:2696
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition tuple:2775
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition type_traits:2470
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition tuple:2638
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition move.h:126
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
Definition valarray:1249
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
Definition valarray:1227
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition numeric:287
void void_t
A metafunction that always yields void, used for detecting valid types.
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1577
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1597
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1687
constexpr auto data(_Container &__cont) noexcept(noexcept(__cont.data())) -> decltype(__cont.data())
Return the data pointer of a container.
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1567
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1557