/* Copyright (C) 2014-2022 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef _AVX512VLINTRIN_H_INCLUDED
#define _AVX512VLINTRIN_H_INCLUDED

#ifndef __AVX512VL__
#pragma GCC push_options
#pragma GCC target("avx512vl")
#define __DISABLE_AVX512VL__
#endif /* __AVX512VL__ */

/* Internal data types for implementing the intrinsics.  */
typedef unsigned int __mmask32;
typedef int __v4si_u __attribute__ ((__vector_size__ (16),	\
				     __may_alias__, __aligned__ (1)));
typedef int __v8si_u __attribute__ ((__vector_size__ (32),	\
				     __may_alias__, __aligned__ (1)));
typedef long long __v2di_u __attribute__ ((__vector_size__ (16),	\
					   __may_alias__, __aligned__ (1)));
typedef long long __v4di_u __attribute__ ((__vector_size__ (32),	\
					   __may_alias__, __aligned__ (1)));

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
						  (__v4df) __W,
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
						  (__v4df)
						  _mm256_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
						  (__v2df) __W,
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
						  (__v2df)
						  _mm_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
						   (__v4df) __W,
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
						   (__v4df)
						   _mm256_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
						   (__v2df) __W,
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_pd (__mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
						   (__v2df)
						   _mm_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
{
  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
				   (__v4df) __A,
				   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
{
  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
				   (__v2df) __A,
				   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
						 (__v8sf) __W,
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
						 (__v8sf)
						 _mm256_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
						 (__v4sf) __W,
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
						 (__v4sf)
						 _mm_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
						  (__v8sf) __W,
						  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
						  (__v8sf)
						  _mm256_setzero_ps (),
						  (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
						  (__v4sf) __W,
						  (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_ps (__mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
						  (__v4sf)
						  _mm_setzero_ps (),
						  (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
{
  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
				   (__v8sf) __A,
				   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
{
  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
				   (__v4sf) __A,
				   (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
						     (__v4di) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
						     (__v2di) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
						     (__v2di)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_epi64 (void const *__P)
{
  return (__m256i) (*(__v4di *) __P);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
							(__v4di) __W,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
							(__v4di)
							_mm256_setzero_si256 (),
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_epi64 (void const *__P)
{
  return (__m128i) (*(__v2di *) __P);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
							(__v2di) __W,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
							(__v2di)
							_mm_setzero_si128 (),
							(__mmask8)
							__U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
					(__v4di) __A,
					(__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
					(__v2di) __A,
					(__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_epi32 (void const *__P)
{
  return (__m256i) (*(__v8si *) __P);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
							(__v8si) __W,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
							(__v8si)
							_mm256_setzero_si256 (),
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_epi32 (void const *__P)
{
  return (__m128i) (*(__v4si *) __P);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
							(__v4si) __W,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
							(__v4si)
							_mm_setzero_si128 (),
							(__mmask8)
							__U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_epi32 (void *__P, __m256i __A)
{
  *(__v8si *) __P = (__v8si) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
					(__v8si) __A,
					(__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_epi32 (void *__P, __m128i __A)
{
  *(__v4si *) __P = (__v4si) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
					(__v4si) __A,
					(__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
						 (__v2df) __B,
						 (__v2df) __W,
						 (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
						 (__v2df) __B,
						 (__v2df)
						 _mm_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
						(__v4sf) __B,
						(__v4sf) __W,
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
						(__v4sf) __B,
						(__v4sf)
						_mm_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
						 (__v2df) __B,
						 (__v2df) __W,
						 (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
						 (__v2df) __B,
						 (__v2df)
						 _mm_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
						(__v4sf) __B,
						(__v4sf) __W,
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
						(__v4sf) __B,
						(__v4sf)
						_mm_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_epi64 (void *__P, __m256i __A)
{
  *(__m256i *) __P = __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_epi64 (void *__P, __m128i __A)
{
  *(__m128i *) __P = __A;
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
						   (__v4df) __W,
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
						   (__v4df)
						   _mm256_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
						   (__v2df) __W,
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
						   (__v2df)
						   _mm_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
{
  __builtin_ia32_storeupd256_mask ((double *) __P,
				   (__v4df) __A,
				   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
{
  __builtin_ia32_storeupd128_mask ((double *) __P,
				   (__v2df) __A,
				   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
						  (__v8sf) __W,
						  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
						  (__v8sf)
						  _mm256_setzero_ps (),
						  (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
						  (__v4sf) __W,
						  (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
						  (__v4sf)
						  _mm_setzero_ps (),
						  (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
{
  __builtin_ia32_storeups256_mask ((float *) __P,
				   (__v8sf) __A,
				   (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
{
  __builtin_ia32_storeups128_mask ((float *) __P,
				   (__v4sf) __A,
				   (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_epi64 (void const *__P)
{
  return (__m256i) (*(__v4di_u *) __P);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
						     (__v4di) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_epi64 (void const *__P)
{
  return (__m128i) (*(__v2di_u *) __P);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
						     (__v2di) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
						     (__v2di)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_epi64 (void *__P, __m256i __A)
{
  *(__m256i_u *) __P = (__m256i_u) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_storedqudi256_mask ((long long *) __P,
				     (__v4di) __A,
				     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_epi64 (void *__P, __m128i __A)
{
  *(__m128i_u *) __P = (__m128i_u) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_storedqudi128_mask ((long long *) __P,
				     (__v2di) __A,
				     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_epi32 (void const *__P)
{
  return (__m256i) (*(__v8si_u *) __P);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_epi32 (void const *__P)
{
  return (__m128i) (*(__v4si_u *) __P);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_epi32 (void *__P, __m256i __A)
{
  *(__m256i_u *) __P = (__m256i_u) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_storedqusi256_mask ((int *) __P,
				     (__v8si) __A,
				     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_epi32 (void *__P, __m128i __A)
{
  *(__m128i_u *) __P = (__m128i_u) __A;
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_storedqusi128_mask ((int *) __P,
				     (__v4si) __A,
				     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi64 (__m256i __A)
{
  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi64 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtpd_epu32 (__m256d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_epu32 (__m128d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttps_epu32 (__m256 __A)
{
  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
						      (__v8si)
						      _mm256_setzero_si256 (),
						      (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
						      (__v8si) __W,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
						      (__v8si)
						      _mm256_setzero_si256 (),
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_epu32 (__m128 __A)
{
  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
						      (__v4si) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttpd_epu32 (__m256d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
						      (__v4si) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_epu32 (__m128d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
						      (__v4si) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
{
  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
{
  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
{
  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
{
  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
{
  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu32_pd (__m128i __A)
{
  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
{
  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
{
  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_pd (__m128i __A)
{
  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
{
  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
						     (__v2df) __W,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
{
  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
{
  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
{
  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu32_ps (__m256i __A)
{
  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
{
  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
{
  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_ps (__m128i __A)
{
  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
						    (__v4sf) __W,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
{
  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
{
  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
{
  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
{
  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
						  (__v16qi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
						  (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
						  (__v16qi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
						  (__v16qi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
						  (__v16qi) __O, __M);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
						  (__v16qi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsepi32_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
						   (__v16qi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovsdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
						   (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
						   (__v16qi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsepi32_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
						   (__v16qi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovsdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
						   (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
						   (__v16qi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtusepi32_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
						    (__v16qi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovusdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
						    (__v16qi) __O,
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
						    (__v16qi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtusepi32_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
						    (__v16qi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovusdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
						    (__v16qi) __O,
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
						    (__v16qi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
						  (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
						  (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsepi32_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
						   (__v8hi)
						   _mm_setzero_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovsdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
						   (__v8hi)__O,
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
						   (__v8hi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsepi32_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
						   (__v8hi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
						   (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
						   (__v8hi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtusepi32_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
						    (__v8hi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovusdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
						    (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
						    (__v8hi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtusepi32_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
						    (__v8hi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
						    (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
						    (__v8hi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi64_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
						  (__v16qi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
						  (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
						  (__v16qi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi64_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
						  (__v16qi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
						  (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
						  (__v16qi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsepi64_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
						   (__v16qi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovsqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
						   (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
						   (__v16qi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsepi64_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
						   (__v16qi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovsqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
						   (__v16qi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
						   (__v16qi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtusepi64_epi8 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
						    (__v16qi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovusqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
						    (__v16qi) __O,
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
						    (__v16qi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtusepi64_epi8 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
						    (__v16qi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovusqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
						    (__v16qi) __O,
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
						    (__v16qi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi64_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
						  (__v8hi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
						  (__v8hi)__O,
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi64_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
						  (__v8hi)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
						  (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsepi64_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
						   (__v8hi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovsqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
						   (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
						   (__v8hi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsepi64_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
						   (__v8hi)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovsqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
						   (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
						   (__v8hi)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtusepi64_epi16 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
						    (__v8hi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovusqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
						    (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
						    (__v8hi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtusepi64_epi16 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
						    (__v8hi)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovusqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
						    (__v8hi) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
						    (__v8hi)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi64_epi32 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
						  (__v4si)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovqd128mem_mask ((unsigned long long *) __P,
				    (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
						  (__v4si) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi64_epi32 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
						  (__v4si)
						  _mm_undefined_si128 (),
						  (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
						  (__v4si) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsepi64_epi32 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
						   (__v4si)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovsqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
						   (__v4si) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
						   (__v4si)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsepi64_epi32 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
						   (__v4si)
						   _mm_undefined_si128 (),
						   (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
						   (__v4si)__O,
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
						   (__v4si)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtusepi64_epi32 (__m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
						    (__v4si)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
{
  __builtin_ia32_pmovusqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
						    (__v4si) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtusepi64_epi32 (__m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
						    (__v4si)
						    _mm_undefined_si128 (),
						    (__mmask8) -1);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
{
  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
						    (__v4si) __O, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
{
  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    __M);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
{
  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
						      (__v8sf) __O,
						      __M);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
{
  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
						      (__v8sf)
						      _mm256_setzero_ps (),
						      __M);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
{
  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
						      (__v4sf) __O,
						      __M);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
{
  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
						      (__v4sf)
						      _mm_setzero_ps (),
						      __M);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
{
  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
						       (__v4df) __O,
						       __M);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
{
  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
						       (__v4df)
						       _mm256_setzero_pd (),
						       __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
						       (__v8si) __O,
						       __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
						       (__v8si)
						       _mm256_setzero_si256 (),
						       __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
{
  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
							   __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
{
  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
							   (__v8si)
							   _mm256_setzero_si256 (),
							   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
						       (__v4si) __O,
						       __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
						       (__v4si)
						       _mm_setzero_si128 (),
						       __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
{
  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
							   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
{
  return (__m128i)
	 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
						 (__v4si) _mm_setzero_si128 (),
						 __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
						       (__v4di) __O,
						       __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
						       (__v4di)
						       _mm256_setzero_si256 (),
						       __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
{
  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
							   __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
							   (__v4di)
							   _mm256_setzero_si256 (),
							   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
						       (__v2di) __O,
						       __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
{
  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
						       (__v2di)
						       _mm_setzero_si128 (),
						       __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
{
  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
							   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
  return (__m128i)
	 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
						 (__v2di) _mm_setzero_si128 (),
						 __M);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_f32x4 (__m128 __A)
{
  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
						          (__v8sf)_mm256_undefined_pd (),
							  (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
{
  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
							  (__v8sf) __O,
							  __M);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
{
  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
							  (__v8sf)
							  _mm256_setzero_ps (),
							  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_i32x4 (__m128i __A)
{
  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
							   __A,
						           (__v8si)_mm256_undefined_si256 (),
							   (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
							   __A,
							   (__v8si)
							   __O, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
{
  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
							   __A,
							   (__v8si)
							   _mm256_setzero_si256 (),
							   __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
						    (__v8si)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
						    (__v8si)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
{
  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
{
  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
						    (__v8si)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
						    (__v8si)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
						    (__v4di)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
{
  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
{
  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
						    (__v2di)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp14_pd (__m256d __A)
{
  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
					      (__v4df)
					      _mm256_setzero_pd (),
					      (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
					      (__v4df) __W,
					      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
					      (__v4df)
					      _mm256_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_pd (__m128d __A)
{
  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
					      (__v2df) __W,
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp14_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
					     (__v8sf)
					     _mm256_setzero_ps (),
					     (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
					     (__v8sf) __W,
					     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
					     (__v8sf)
					     _mm256_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_ps (__m128 __A)
{
  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
					     (__v4sf) __W,
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt14_pd (__m256d __A)
{
  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_pd (__m128d __A)
{
  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
						     (__v2df) __W,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt14_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ps (__m128 __A)
{
  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
						    (__v4sf) __W,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
						  (__v4df) __W,
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
						  (__v4df)
						  _mm256_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
						  (__v2df) __W,
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
						  (__v2df)
						  _mm_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
						 (__v8sf) __W,
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
						 (__v8sf)
						 _mm256_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
						 (__v4sf) __W,
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
						 (__v4sf)
						 _mm_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getexp_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getexp_pd (__m256d __A)
{
  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_ps (__m128 __A)
{
  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_pd (__m128d __A)
{
  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_scalef_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
		       __m256d __B)
{
  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_scalef_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
		       __m256 __B)
{
  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_pd (__m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
		    __m128d __B)
{
  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_ps (__m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
		      __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df) __C,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
		       __mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
		       __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df) __C,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
		    __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
		    __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf) __C,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
		       __mmask8 __U)
{
  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
		       __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf) __C,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
		      __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df) __C,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
		       __mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
		       __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df) __C,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
		    __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
		    __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf) __C,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
		       __mmask8 __U)
{
  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
		       __m256 __C)
{
  return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf) __C,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
			 __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
						       (__v4df) __B,
						       (__v4df) __C,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
			  __mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
							(__v4df) __B,
							(__v4df) __C,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
			  __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
							(__v4df) __B,
							(__v4df) __C,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
		      __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
						       (__v2df) __B,
						       (__v2df) __C,
						       (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
		       __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
							(__v2df) __B,
							(__v2df) __C,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
		       __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
							(__v2df) __B,
							(__v2df) __C,
							(__mmask8)
							__U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
			 __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
						      (__v8sf) __B,
						      (__v8sf) __C,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
			  __mmask8 __U)
{
  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
						       (__v8sf) __B,
						       (__v8sf) __C,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
			  __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
						       (__v8sf) __B,
						       (__v8sf) __C,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
						      (__v4sf) __B,
						      (__v4sf) __C,
						      (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
		       __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
						       (__v4sf) __B,
						       (__v4sf) __C,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
		       __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
						       (__v4sf) __B,
						       (__v4sf) __C,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
			 __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
						       (__v4df) __B,
						       -(__v4df) __C,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
			  __mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
							(__v4df) __B,
							(__v4df) __C,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
			  __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
							(__v4df) __B,
							-(__v4df) __C,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
		      __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
						       (__v2df) __B,
						       -(__v2df) __C,
						       (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
		       __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
							(__v2df) __B,
							(__v2df) __C,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
		       __m128d __C)
{
  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
							(__v2df) __B,
							-(__v2df) __C,
							(__mmask8)
							__U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
			 __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
						      (__v8sf) __B,
						      -(__v8sf) __C,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
			  __mmask8 __U)
{
  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
						       (__v8sf) __B,
						       (__v8sf) __C,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
			  __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
						       (__v8sf) __B,
						       -(__v8sf) __C,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
						      (__v4sf) __B,
						      -(__v4sf) __C,
						      (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
		       __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
						       (__v4sf) __B,
						       (__v4sf) __C,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
		       __m128 __C)
{
  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
						       (__v4sf) __B,
						       -(__v4sf) __C,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
		       __m256d __C)
{
  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
			__mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
						      (__v4df) __B,
						      (__v4df) __C,
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
			__m256d __C)
{
  return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
						      (__v4df) __B,
						      (__v4df) __C,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
		    __m128d __C)
{
  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
		     __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
						      (__v2df) __B,
						      (__v2df) __C,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
		     __m128d __C)
{
  return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
						      (__v2df) __B,
						      (__v2df) __C,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
		       __m256 __C)
{
  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
			__mmask8 __U)
{
  return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8sf) __C,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
			__m256 __C)
{
  return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8sf) __C,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4sf) __C,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4sf) __C,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
		       __m256d __C)
{
  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
						     (__v4df) __B,
						     (__v4df) __C,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
			__mmask8 __U)
{
  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
						      (__v4df) __B,
						      (__v4df) __C,
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
			__m256d __C)
{
  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
						      (__v4df) __B,
						      (__v4df) __C,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
		    __m128d __C)
{
  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
						     (__v2df) __B,
						     (__v2df) __C,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
		     __mmask8 __U)
{
  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
						      (__v2df) __B,
						      (__v2df) __C,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
		     __m128d __C)
{
  return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
						      (__v2df) __B,
						      (__v2df) __C,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
		       __m256 __C)
{
  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
						    (__v8sf) __B,
						    (__v8sf) __C,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
			__mmask8 __U)
{
  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8sf) __C,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
			__m256 __C)
{
  return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8sf) __C,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
						    (__v4sf) __B,
						    (__v4sf) __C,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4sf) __C,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
  return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4sf) __C,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			  __m256i __B)
{
  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		       __m128i __B)
{
  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		      __m256i __B)
{
  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
						(__v8si) __B,
						(__v8si) __W,
						(__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
						(__v8si) __B,
						(__v8si)
						_mm256_setzero_si256 (),
						(__mmask8) __U);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i) ((__v8su)__A | (__v8su)__B);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
						(__v4si) __B,
						(__v4si) __W,
						(__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
						(__v4si) __B,
						(__v4si)
						_mm_setzero_si128 (),
						(__mmask8) __U);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_epi32 (__m128i __A, __m128i __B)
{
  return (__m128i) ((__v4su)__A | (__v4su)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
						 (__v8si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i) ((__v8su)__A ^ (__v8su)__B);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_epi32 (__m128i __A, __m128i __B)
{
  return (__m128i) ((__v4su)__A ^ (__v4su)__B);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
{
  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
						(__v4sf) __W,
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
{
  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
						(__v4sf)
						_mm_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
{
  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
{
  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
						    (__v8si)
						    _mm256_setzero_si256 (),
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
						    (__v4si)
						    _mm_setzero_si128 (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_epu32 (__m256 __A)
{
  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
{
  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_epu32 (__m128 __A)
{
  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
{
  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
						   (__v4df) __W,
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
						   (__v4df)
						   _mm256_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
						   (__v2df) __W,
						   (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
						   (__v2df)
						   _mm_setzero_pd (),
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
			 __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
						     (__v4si) __B,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
						     (__v4si) __B,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			    __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
						     (__v8si) __B,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
						     (__v8si) __B,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
			 __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
						      (__v2di) __B,
						      (__v2di) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
						      (__v2di) __B,
						      (__v2di)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			    __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
						      (__v4di) __B,
						      (__v4di) __W,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
						      (__v4di) __B,
						      (__v4di)
						      _mm256_setzero_si256 (),
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
			 __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
						     (__v4si) __B,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
						     (__v4si) __B,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			    __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
						     (__v8si) __B,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
						     (__v8si) __B,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
			 __m128i __B)
{
  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
						      (__v2di) __B,
						      (__v2di) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
						      (__v2di) __B,
						      (__v2di)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			    __m256i __B)
{
  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
						      (__v4di) __B,
						      (__v4di) __W,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
						      (__v4di) __B,
						      (__v4di)
						      _mm256_setzero_si256 (),
						      (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
						   (__v4si) __B, 0,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
						    (__v4si) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
						   (__v4si) __B, 0, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
						    (__v4si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
						   (__v8si) __B, 0,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
						    (__v8si) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
						   (__v8si) __B, 0, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
						    (__v8si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
						   (__v2di) __B, 0,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
						    (__v2di) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
						   (__v2di) __B, 0, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
						    (__v2di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
						   (__v4di) __B, 0,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
						    (__v4di) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
						   (__v4di) __B, 0, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
						    (__v4di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
						   (__v4si) __B, 6,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
						    (__v4si) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
						   (__v4si) __B, 6, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
						    (__v4si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
						   (__v8si) __B, 6,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
						    (__v8si) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
						   (__v8si) __B, 6, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
						    (__v8si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
						   (__v2di) __B, 6,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
						    (__v2di) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
						   (__v2di) __B, 6, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
						    (__v2di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
						   (__v4di) __B, 6,
						   (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
						    (__v4di) __B,
						    (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
						   (__v4di) __B, 6, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
						    (__v4di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_test_epi32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
					       (__v4si) __B,
					       (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
					       (__v4si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_test_epi32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
					       (__v8si) __B,
					       (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
					       (__v8si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_test_epi64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
					       (__v2di) __B,
					       (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
					       (__v2di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_test_epi64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
					       (__v4di) __B,
					       (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
					       (__v4di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_testn_epi32_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
						(__v4si) __B,
						(__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
						(__v4si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
						(__v8si) __B,
						(__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
						(__v8si) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_testn_epi64_mask (__m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
						(__v2di) __B,
						(__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
						(__v2di) __B, __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
						(__v4di) __B,
						(__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
						(__v4di) __B, __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
						      (__v4df) __W,
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
						      (__v4df)
						      _mm256_setzero_pd (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
{
  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
					  (__v4df) __A,
					  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
						      (__v2df) __W,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
						      (__v2df)
						      _mm_setzero_pd (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
{
  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
					  (__v2df) __A,
					  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
						     (__v8sf) __W,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
						     (__v8sf)
						     _mm256_setzero_ps (),
						     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
{
  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
					  (__v8sf) __A,
					  (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
						     (__v4sf) __W,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
						     (__v4sf)
						     _mm_setzero_ps (),
						     (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
{
  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
					  (__v4sf) __A,
					  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
						      (__v4di) __W,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
						      (__v4di)
						      _mm256_setzero_si256 (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
					  (__v4di) __A,
					  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
						      (__v2di) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
						      (__v2di)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
					  (__v2di) __A,
					  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
						      (__v8si) __W,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
						      (__v8si)
						      _mm256_setzero_si256 (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
					  (__v8si) __A,
					  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
						      (__v4si) __W,
						      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
						      (__v4si)
						      _mm_setzero_si128 (),
						      (__mmask8) __U);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
					  (__v4si) __A,
					  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
{
  return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
							(__v4df) __W,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
{
  return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
							 (__v4df)
							 _mm256_setzero_pd (),
							 (__mmask8)
							 __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
{
  return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
							(__v2df) __W,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
{
  return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
							 (__v2df)
							 _mm_setzero_pd (),
							 (__mmask8)
							 __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
{
  return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
						       (__v8sf) __W,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
{
  return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
							(__v8sf)
							_mm256_setzero_ps (),
							(__mmask8)
							__U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
{
  return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
						       (__v4sf) __W,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
{
  return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
							(__v4sf)
							_mm_setzero_ps (),
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
			       void const *__P)
{
  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
							(__v4di) __W,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
							 (__v4di)
							 _mm256_setzero_si256 (),
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
						     (__v2di)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
							(__v2di) __W,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
							 (__v2di)
							 _mm_setzero_si128 (),
							 (__mmask8)
							 __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
			       void const *__P)
{
  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
							(__v8si) __W,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
{
  return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
							 (__v8si)
							 _mm256_setzero_si256 (),
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
							(__v4si) __W,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
{
  return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
							 (__v4si)
							 _mm_setzero_si128 (),
							 (__mmask8)
							 __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
{
  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
							/* idx */ ,
							(__v4df) __A,
							(__v4df) __B,
							(__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
			     __m256d __B)
{
  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
							/* idx */ ,
							(__v4df) __A,
							(__v4df) __B,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
			      __m256d __B)
{
  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
							(__v4di) __I
							/* idx */ ,
							(__v4df) __B,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
			      __m256d __B)
{
  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
							 /* idx */ ,
							 (__v4df) __A,
							 (__v4df) __B,
							 (__mmask8)
							 __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
{
  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
						       /* idx */ ,
						       (__v8sf) __A,
						       (__v8sf) __B,
						       (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
			     __m256 __B)
{
  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
						       /* idx */ ,
						       (__v8sf) __A,
						       (__v8sf) __B,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
			      __m256 __B)
{
  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
						       (__v8si) __I
						       /* idx */ ,
						       (__v8sf) __B,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
			      __m256 __B)
{
  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
							/* idx */ ,
							(__v8sf) __A,
							(__v8sf) __B,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
						       /* idx */ ,
						       (__v2di) __A,
						       (__v2di) __B,
						       (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
			     __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
						       /* idx */ ,
						       (__v2di) __A,
						       (__v2di) __B,
						       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
			      __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
						       (__v2di) __I
						       /* idx */ ,
						       (__v2di) __B,
						       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
			      __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
							/* idx */ ,
							(__v2di) __A,
							(__v2di) __B,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
						       /* idx */ ,
						       (__v4si) __A,
						       (__v4si) __B,
						       (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
			     __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
						       /* idx */ ,
						       (__v4si) __A,
						       (__v4si) __B,
						       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
			      __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
						       (__v4si) __I
						       /* idx */ ,
						       (__v4si) __B,
						       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
			      __m128i __B)
{
  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
							/* idx */ ,
							(__v4si) __A,
							(__v4si) __B,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
						       /* idx */ ,
						       (__v4di) __A,
						       (__v4di) __B,
						       (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
				__m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
						       /* idx */ ,
						       (__v4di) __A,
						       (__v4di) __B,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
				 __mmask8 __U, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
						       (__v4di) __I
						       /* idx */ ,
						       (__v4di) __B,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
				 __m256i __I, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
							/* idx */ ,
							(__v4di) __A,
							(__v4di) __B,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
						       /* idx */ ,
						       (__v8si) __A,
						       (__v8si) __B,
						       (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
				__m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
						       /* idx */ ,
						       (__v8si) __A,
						       (__v8si) __B,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
				 __mmask8 __U, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
						       (__v8si) __I
						       /* idx */ ,
						       (__v8si) __B,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
				 __m256i __I, __m256i __B)
{
  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
							/* idx */ ,
							(__v8si) __A,
							(__v8si) __B,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
{
  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
							/* idx */ ,
							(__v2df) __A,
							(__v2df) __B,
							(__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
			  __m128d __B)
{
  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
							/* idx */ ,
							(__v2df) __A,
							(__v2df) __B,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
			   __m128d __B)
{
  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
							(__v2di) __I
							/* idx */ ,
							(__v2df) __B,
							(__mmask8)
							__U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
			   __m128d __B)
{
  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
							 /* idx */ ,
							 (__v2df) __A,
							 (__v2df) __B,
							 (__mmask8)
							 __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
{
  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
						       /* idx */ ,
						       (__v4sf) __A,
						       (__v4sf) __B,
						       (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
			  __m128 __B)
{
  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
						       /* idx */ ,
						       (__v4sf) __A,
						       (__v4sf) __B,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
			   __m128 __B)
{
  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
						       (__v4si) __I
						       /* idx */ ,
						       (__v4sf) __B,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
			   __m128 __B)
{
  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
							/* idx */ ,
							(__v4sf) __A,
							(__v4sf) __B,
							(__mmask8)
							__U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srav_epi64 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
						  (__v2di) __Y,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
						  (__v2di) __Y,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
						  (__v2di) __Y,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
						 (__v4di) __Y,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
						 (__v4di) __Y,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
						 (__v2di) __Y,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
						 (__v2di) __Y,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
						 (__v8si) __Y,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
						 (__v4si) __Y,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
						 (__v4di) __Y,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
						 (__v4di) __Y,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
		     __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
						 (__v2di) __Y,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
						 (__v2di) __Y,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rolv_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			__m256i __B)
{
  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rolv_epi32 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		     __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rorv_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			__m256i __B)
{
  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rorv_epi32 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		     __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rolv_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			__m256i __B)
{
  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rolv_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		     __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rorv_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			__m256i __B)
{
  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rorv_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		     __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srav_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
						  (__v4di) __Y,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
			__m256i __Y)
{
  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
						  (__v4di) __Y,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
						  (__v4di) __Y,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di) __W, __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di)
						 _mm256_setzero_pd (),
						 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W, __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_pd (),
						 __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			  __m256i __B)
{
  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W, __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_pd (),
						  __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		       __m128i __B)
{
  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W, __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_pd (),
						  __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		      __m256i __B)
{
  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
						(__v4di) __B,
						(__v4di) __W,
						(__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
						(__v4di) __B,
						(__v4di)
						_mm256_setzero_si256 (),
						(__mmask8) __U);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) ((__v4du)__A | (__v4du)__B);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
						(__v2di) __B,
						(__v2di) __W,
						(__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
						(__v2di) __B,
						(__v2di)
						_mm_setzero_si128 (),
						(__mmask8) __U);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) ((__v2du)__A | (__v2du)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
						 (__v4di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) ((__v4du)__A ^ (__v4du)__B);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf) __W,
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df) __W,
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf) __W,
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf) __W,
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf) __W,
					     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
					     (__v4sf) __B,
					     (__v4sf)
					     _mm_setzero_ps (),
					     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df) __W,
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df) __W,
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df) __W,
					      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
					      (__v2df) __B,
					      (__v2df)
					      _mm_setzero_pd (),
					      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf) __W,
						(__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
						(__v8sf) __B,
						(__v8sf)
						_mm256_setzero_ps (),
						(__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
		    __m256d __B)
{
  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df) __W,
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
						 (__v4df) __B,
						 (__v4df)
						 _mm256_setzero_pd (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu64 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
						  (__v4di) __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
		       __m256i __B)
{
  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
						  (__v2di) __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W, __M);
}

#ifndef __AVX512CD__
#pragma GCC push_options
#pragma GCC target("avx512vl,avx512cd")
#define __DISABLE_AVX512VLCD__
#endif

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastmb_epi64 (__mmask8 __A)
{
  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastmb_epi64 (__mmask8 __A)
{
  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastmw_epi32 (__mmask16 __A)
{
  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastmw_epi32 (__mmask16 __A)
{
  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_lzcnt_epi32 (__m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
						     (__v8si) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_lzcnt_epi64 (__m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
						     (__v4di) __W,
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_conflict_epi64 (__m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
							 (__v4di)
							 _mm256_setzero_si256 (),
							 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
							 (__v4di) __W,
							 (__mmask8)
							 __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
							 (__v4di)
							 _mm256_setzero_si256 (),
							 (__mmask8)
							 __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_conflict_epi32 (__m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
							 (__v8si)
							 _mm256_setzero_si256 (),
							 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
							 (__v8si) __W,
							 (__mmask8)
							 __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
{
  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
							 (__v8si)
							 _mm256_setzero_si256 (),
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_lzcnt_epi32 (__m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
						     (__v4si) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
						     (__v4si)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_lzcnt_epi64 (__m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
						     (__v2di)
						     _mm_setzero_si128 (),
						     (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
						     (__v2di) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
						     (__v2di)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_conflict_epi64 (__m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
							 (__v2di)
							 _mm_setzero_si128 (),
							 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
							 (__v2di) __W,
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
							 (__v2di)
							 _mm_setzero_si128 (),
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_conflict_epi32 (__m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
							 (__v4si)
							 _mm_setzero_si128 (),
							 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
							 (__v4si) __W,
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
{
  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
							 (__v4si)
							 _mm_setzero_si128 (),
							 (__mmask8)
							 __U);
}

#ifdef __DISABLE_AVX512VLCD__
#pragma GCC pop_options
#endif

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
			 __m256d __B)
{
  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
		      __m128d __B)
{
  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
			 __m256 __B)
{
  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
			 __m256d __B)
{
  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df) __W,
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
						    (__v4df) __B,
						    (__v4df)
						    _mm256_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
		      __m128d __B)
{
  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
						    (__v2df) __B,
						    (__v2df)
						    _mm_setzero_pd (),
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
			 __m256 __B)
{
  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf) __W,
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
						 (__v4sf) __W,
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
{
  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
						 (__v4sf)
						 _mm_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
						   (__v8sf) __B,
						   (__v8sf)
						   _mm256_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
{
  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
{
  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf) __W,
						   (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
						   (__v4sf) __B,
						   (__v4sf)
						   _mm_setzero_ps (),
						   (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sra_epi64 (__m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi64 (__m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
						 (__v4si) __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    __m128i __B)
{
  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
						 (__v2di) __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
						 (__v4si) __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       __m128i __B)
{
  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
{
  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
						 (__v2di) __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
			    __m256 __Y)
{
  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
						    (__v8si) __X,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
{
  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
						    (__v8si) __X,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
						     (__v4di) __X,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
			    __m256d __Y)
{
  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
						     (__v4di) __X,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
						     (__v4di) __X,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
			   __m256i __C)
{
  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
							(__v4di) __C,
							(__v4df) __W,
							(__mmask8)
							__U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
{
  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
							(__v4di) __C,
							(__v4df)
							_mm256_setzero_pd (),
							(__mmask8)
							__U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
			   __m256i __C)
{
  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
						       (__v8si) __C,
						       (__v8sf) __W,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
{
  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
						       (__v8si) __C,
						       (__v8sf)
						       _mm256_setzero_ps (),
						       (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
			__m128i __C)
{
  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
						     (__v2di) __C,
						     (__v2df) __W,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
{
  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
						     (__v2di) __C,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
			__m128i __C)
{
  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
						    (__v4si) __C,
						    (__v4sf) __W,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
{
  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
						    (__v4si) __C,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
						     (__v4di) __X,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
			 __m256i __B)
{
  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
						  (__v8si) __B,
						  (__v8si) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
{
  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
		      __m128i __B)
{
  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
						  (__v4si) __B,
						  (__v4si) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
		       __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
						  (__v8si) __Y,
						  (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
						  (__v8si) __Y,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
		    __m128i __Y)
{
  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
						  (__v4si) __Y,
						  (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
						  (__v4si) __Y,
						  (__v2di)
						  _mm_setzero_si128 (),
						  __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
						     (__v4di) __X,
						     (__v4di)
						     _mm256_setzero_si256 (),
						     (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
			       __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
						     (__v4di) __X,
						     (__v4di) __W,
						     __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
		       __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
						   (__v8si) __Y,
						   (__v4di) __W, __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
						     (__v8si) __X,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
						   (__v8si) __Y,
						   (__v4di)
						   _mm256_setzero_si256 (),
						   __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
		    __m128i __Y)
{
  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
						   (__v4si) __Y,
						   (__v2di) __W, __M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
						   (__v4si) __Y,
						   (__v2di)
						   _mm_setzero_si128 (),
						   __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
						     (__v8si) __X,
						     (__v8si)
						     _mm256_setzero_si256 (),
						     (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
			       __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
						     (__v8si) __X,
						     (__v8si) __W,
						     __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 4,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 4,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 1,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 1,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 5,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 5,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 2,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, 2,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 4,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 4,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 1,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 1,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 5,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 5,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 2,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, 2,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 4,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 4,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 1,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 1,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 5,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 5,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 2,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, 2,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 4,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 4,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 1,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 1,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 5,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 5,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 2,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, 2,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 4,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 4,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 1,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 1,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 5,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 5,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 2,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, 2,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 4,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 4,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 1,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 1,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 5,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 5,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 2,
						  (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, 2,
						  (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 4,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 4,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 1,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 1,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 5,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 5,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 2,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, 2,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 4,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 4,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 1,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 1,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 5,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 5,
						 (__mmask8) -1);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 2,
						 (__mmask8) __M);
}

extern __inline __mmask8
  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, 2,
						 (__mmask8) -1);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex_epi64 (__m256i __X, const int __I)
{
  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
					      __I,
					      (__v4di)
					      _mm256_setzero_si256(),
					      (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
			    __m256i __X, const int __I)
{
  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
						  __I,
						  (__v4di) __W,
						  (__mmask8) __M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
{
  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
						  __I,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __M);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
			__m256d __B, const int __imm)
{
  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
						  (__v4df) __B, __imm,
						  (__v4df) __W,
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
			 const int __imm)
{
  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
						  (__v4df) __B, __imm,
						  (__v4df)
						  _mm256_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
		     __m128d __B, const int __imm)
{
  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
						  (__v2df) __B, __imm,
						  (__v2df) __W,
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
		      const int __imm)
{
  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
						  (__v2df) __B, __imm,
						  (__v2df)
						  _mm_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
			__m256 __B, const int __imm)
{
  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
						 (__v8sf) __B, __imm,
						 (__v8sf) __W,
						 (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
			 const int __imm)
{
  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
						 (__v8sf) __B, __imm,
						 (__v8sf)
						 _mm256_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
		     const int __imm)
{
  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
						 (__v4sf) __B, __imm,
						 (__v4sf) __W,
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
		      const int __imm)
{
  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
						 (__v4sf) __B, __imm,
						 (__v4sf)
						 _mm_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
							(__v4si) __B,
							__imm,
							(__v8si)
							_mm256_setzero_si256 (),
							(__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
			 __m128i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
							(__v4si) __B,
							__imm,
							(__v8si) __W,
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
			  const int __imm)
{
  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
							(__v4si) __B,
							__imm,
							(__v8si)
							_mm256_setzero_si256 (),
							(__mmask8)
							__U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
{
  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
						       (__v4sf) __B,
						       __imm,
						       (__v8sf)
						       _mm256_setzero_ps (),
						       (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
			 __m128 __B, const int __imm)
{
  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
						       (__v4sf) __B,
						       __imm,
						       (__v8sf) __W,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
			  const int __imm)
{
  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
						       (__v4sf) __B,
						       __imm,
						       (__v8sf)
						       _mm256_setzero_ps (),
						       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
							 __imm,
							 (__v4si)
							 _mm_setzero_si128 (),
							 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
				const int __imm)
{
  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
							 __imm,
							 (__v4si) __W,
							 (__mmask8)
							 __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
				 const int __imm)
{
  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
							 __imm,
							 (__v4si)
							 _mm_setzero_si128 (),
							 (__mmask8)
							 __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf32x4_ps (__m256 __A, const int __imm)
{
  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
							__imm,
							(__v4sf)
							_mm_setzero_ps (),
							(__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
			     const int __imm)
{
  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
							__imm,
							(__v4sf) __W,
							(__mmask8)
							__U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
			      const int __imm)
{
  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
							__imm,
							(__v4sf)
							_mm_setzero_ps (),
							(__mmask8)
							__U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
						       (__v4di) __B,
						       __imm,
						       (__v4di)
						       _mm256_setzero_si256 (),
						       (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
			   __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
						       (__v4di) __B,
						       __imm,
						       (__v4di) __W,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
			    const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
						       (__v4di) __B,
						       __imm,
						       (__v4di)
						       _mm256_setzero_si256 (),
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
						       (__v8si) __B,
						       __imm,
						       (__v8si)
						       _mm256_setzero_si256 (),
						       (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
			   __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
						       (__v8si) __B,
						       __imm,
						       (__v8si) __W,
						       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
			    const int __imm)
{
  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
						       (__v8si) __B,
						       __imm,
						       (__v8si)
						       _mm256_setzero_si256 (),
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
{
  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
						       (__v4df) __B,
						       __imm,
						       (__v4df)
						       _mm256_setzero_pd (),
						       (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
			   __m256d __B, const int __imm)
{
  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
						       (__v4df) __B,
						       __imm,
						       (__v4df) __W,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
			    const int __imm)
{
  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
						       (__v4df) __B,
						       __imm,
						       (__v4df)
						       _mm256_setzero_pd (),
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
{
  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
						      (__v8sf) __B,
						      __imm,
						      (__v8sf)
						      _mm256_setzero_ps (),
						      (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
			   __m256 __B, const int __imm)
{
  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
						      (__v8sf) __B,
						      __imm,
						      (__v8sf) __W,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
			    const int __imm)
{
  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
						      (__v8sf) __B,
						      __imm,
						      (__v8sf)
						      _mm256_setzero_ps (),
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
		    const int __imm)
{
  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
						      (__v4df) __B,
						      (__v4di) __C,
						      __imm,
						      (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
			 __m256i __C, const int __imm)
{
  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
						      (__v4df) __B,
						      (__v4di) __C,
						      __imm,
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
			  __m256i __C, const int __imm)
{
  return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
						       (__v4df) __B,
						       (__v4di) __C,
						       __imm,
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
		    const int __imm)
{
  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8si) __C,
						     __imm,
						     (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
			 __m256i __C, const int __imm)
{
  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
						     (__v8sf) __B,
						     (__v8si) __C,
						     __imm,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
			  __m256i __C, const int __imm)
{
  return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
						      (__v8sf) __B,
						      (__v8si) __C,
						      __imm,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
		 const int __imm)
{
  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
						      (__v2df) __B,
						      (__v2di) __C,
						      __imm,
						      (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
		      __m128i __C, const int __imm)
{
  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
						      (__v2df) __B,
						      (__v2di) __C,
						      __imm,
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
		       __m128i __C, const int __imm)
{
  return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
						       (__v2df) __B,
						       (__v2di) __C,
						       __imm,
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
{
  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4si) __C,
						     __imm,
						     (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
		      __m128i __C, const int __imm)
{
  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
						     (__v4sf) __B,
						     (__v4si) __C,
						     __imm,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
		       __m128i __C, const int __imm)
{
  return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
						      (__v4sf) __B,
						      (__v4si) __C,
						      __imm,
						      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			const int __imm)
{
  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
{
  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		     const int __imm)
{
  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			const int __imm)
{
  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
{
  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		     const int __imm)
{
  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
			   const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogq256_mask ((__v4di) __A,
				      (__v4di) __B,
				      (__v4di) __C,
				      (unsigned char) __imm,
				      (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
				__m256i __B, __m256i __C,
				const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogq256_mask ((__v4di) __A,
				      (__v4di) __B,
				      (__v4di) __C,
				      (unsigned char) __imm,
				      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
				 __m256i __B, __m256i __C,
				 const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
				       (__v4di) __B,
				       (__v4di) __C,
				       (unsigned char) __imm,
				       (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
			   const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogd256_mask ((__v8si) __A,
				      (__v8si) __B,
				      (__v8si) __C,
				      (unsigned char) __imm,
				      (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
				__m256i __B, __m256i __C,
				const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogd256_mask ((__v8si) __A,
				      (__v8si) __B,
				      (__v8si) __C,
				      (unsigned char) __imm,
				      (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
				 __m256i __B, __m256i __C,
				 const int __imm)
{
  return (__m256i)
    __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
				       (__v8si) __B,
				       (__v8si) __C,
				       (unsigned char) __imm,
				       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
			const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogq128_mask ((__v2di) __A,
				      (__v2di) __B,
				      (__v2di) __C,
				      (unsigned char) __imm,
				      (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
			     __m128i __B, __m128i __C,
			     const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogq128_mask ((__v2di) __A,
				      (__v2di) __B,
				      (__v2di) __C,
				      (unsigned char) __imm,
				      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
			      __m128i __B, __m128i __C,
			      const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
				       (__v2di) __B,
				       (__v2di) __C,
				       (unsigned char) __imm,
				       (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
			const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogd128_mask ((__v4si) __A,
				      (__v4si) __B,
				      (__v4si) __C,
				      (unsigned char) __imm,
				      (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
			     __m128i __B, __m128i __C,
			     const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogd128_mask ((__v4si) __A,
				      (__v4si) __B,
				      (__v4si) __C,
				      (unsigned char) __imm,
				      (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
			      __m128i __B, __m128i __C,
			      const int __imm)
{
  return (__m128i)
    __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
				       (__v4si) __B,
				       (__v4si) __C,
				       (unsigned char) __imm,
				       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_roundscale_ps (__m256 __A, const int __imm)
{
  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
						      __imm,
						      (__v8sf)
						      _mm256_setzero_ps (),
						      (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
			   const int __imm)
{
  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
						      __imm,
						      (__v8sf) __W,
						      (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
{
  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
						      __imm,
						      (__v8sf)
						      _mm256_setzero_ps (),
						      (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_roundscale_pd (__m256d __A, const int __imm)
{
  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
						       __imm,
						       (__v4df)
						       _mm256_setzero_pd (),
						       (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
			   const int __imm)
{
  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
						       __imm,
						       (__v4df) __W,
						       (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
{
  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
						       __imm,
						       (__v4df)
						       _mm256_setzero_pd (),
						       (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_ps (__m128 __A, const int __imm)
{
  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
						      __imm,
						      (__v4sf)
						      _mm_setzero_ps (),
						      (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
			const int __imm)
{
  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
						      __imm,
						      (__v4sf) __W,
						      (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
{
  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
						      __imm,
						      (__v4sf)
						      _mm_setzero_ps (),
						      (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_pd (__m128d __A, const int __imm)
{
  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
						       __imm,
						       (__v2df)
						       _mm_setzero_pd (),
						       (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
			const int __imm)
{
  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
						       __imm,
						       (__v2df) __W,
						       (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
{
  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
						       __imm,
						       (__v2df)
						       _mm_setzero_pd (),
						       (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
		   _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
						    (__C << 2) | __B,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
			_MM_MANTISSA_NORM_ENUM __B,
			_MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
						    (__C << 2) | __B,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
			 _MM_MANTISSA_NORM_ENUM __B,
			 _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
						    (__C << 2) | __B,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
		_MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
						    (__C << 2) | __B,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) -1);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
		     _MM_MANTISSA_NORM_ENUM __B,
		     _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
						    (__C << 2) | __B,
						    (__v4sf) __W,
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
		      _MM_MANTISSA_NORM_ENUM __B,
		      _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
						    (__C << 2) | __B,
						    (__v4sf)
						    _mm_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
		   _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
						     (__C << 2) | __B,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
			_MM_MANTISSA_NORM_ENUM __B,
			_MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
						     (__C << 2) | __B,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
			 _MM_MANTISSA_NORM_ENUM __B,
			 _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
						     (__C << 2) | __B,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
		_MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
						     (__C << 2) | __B,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) -1);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
		     _MM_MANTISSA_NORM_ENUM __B,
		     _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
						     (__C << 2) | __B,
						     (__v2df) __W,
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
		      _MM_MANTISSA_NORM_ENUM __B,
		      _MM_MANTISSA_SIGN_ENUM __C)
{
  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
						     (__C << 2) | __B,
						     (__v2df)
						     _mm_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
			   __m256i __index, void const *__addr,
			   int __scale)
{
  return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
						__addr,
						(__v8si) __index,
						__mask, __scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
			__m128i __index, void const *__addr,
			int __scale)
{
  return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
						__addr,
						(__v4si) __index,
						__mask, __scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
			   __m128i __index, void const *__addr,
			   int __scale)
{
  return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
						 __addr,
						 (__v4si) __index,
						 __mask, __scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
			__m128i __index, void const *__addr,
			int __scale)
{
  return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
						 __addr,
						 (__v4si) __index,
						 __mask, __scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
			   __m256i __index, void const *__addr,
			   int __scale)
{
  return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
						__addr,
						(__v4di) __index,
						__mask, __scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
			__m128i __index, void const *__addr,
			int __scale)
{
  return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
						__addr,
						(__v2di) __index,
						__mask, __scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
			   __m256i __index, void const *__addr,
			   int __scale)
{
  return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
						 __addr,
						 (__v4di) __index,
						 __mask, __scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
			__m128i __index, void const *__addr,
			int __scale)
{
  return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
						 __addr,
						 (__v2di) __index,
						 __mask, __scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
			      __m256i __index, void const *__addr,
			      int __scale)
{
  return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
						 __addr,
						 (__v8si) __index,
						 __mask, __scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
			   __m128i __index, void const *__addr,
			   int __scale)
{
  return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
						 __addr,
						 (__v4si) __index,
						 __mask, __scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
			      __m128i __index, void const *__addr,
			      int __scale)
{
  return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
						 __addr,
						 (__v4si) __index,
						 __mask, __scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
			   __m128i __index, void const *__addr,
			   int __scale)
{
  return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
						 __addr,
						 (__v4si) __index,
						 __mask, __scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
			      __m256i __index, void const *__addr,
			      int __scale)
{
  return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
						 __addr,
						 (__v4di) __index,
						 __mask, __scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
			   __m128i __index, void const *__addr,
			   int __scale)
{
  return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
						 __addr,
						 (__v2di) __index,
						 __mask, __scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
			      __m256i __index, void const *__addr,
			      int __scale)
{
  return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
						 __addr,
						 (__v4di) __index,
						 __mask, __scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
			   __m128i __index, void const *__addr,
			   int __scale)
{
  return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
						 __addr,
						 (__v2di) __index,
						 __mask, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32scatter_ps (void *__addr, __m256i __index,
		      __m256 __v1, const int __scale)
{
  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
				(__v8si) __index, (__v8sf) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
			   __m256i __index, __m256 __v1,
			   const int __scale)
{
  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
				(__v8sf) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
		   const int __scale)
{
  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v4sf) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
			__m128i __index, __m128 __v1,
			const int __scale)
{
  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
				(__v4sf) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32scatter_pd (void *__addr, __m128i __index,
		      __m256d __v1, const int __scale)
{
  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v4df) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
			   __m128i __index, __m256d __v1,
			   const int __scale)
{
  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
				(__v4df) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32scatter_pd (void *__addr, __m128i __index,
		   __m128d __v1, const int __scale)
{
  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v2df) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
			__m128i __index, __m128d __v1,
			const int __scale)
{
  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
				(__v2df) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64scatter_ps (void *__addr, __m256i __index,
		      __m128 __v1, const int __scale)
{
  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
				(__v4di) __index, (__v4sf) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
			   __m256i __index, __m128 __v1,
			   const int __scale)
{
  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
				(__v4sf) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
		   const int __scale)
{
  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
				(__v2di) __index, (__v4sf) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
			__m128i __index, __m128 __v1,
			const int __scale)
{
  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
				(__v4sf) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64scatter_pd (void *__addr, __m256i __index,
		      __m256d __v1, const int __scale)
{
  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
				(__v4di) __index, (__v4df) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
			   __m256i __index, __m256d __v1,
			   const int __scale)
{
  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
				(__v4df) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64scatter_pd (void *__addr, __m128i __index,
		   __m128d __v1, const int __scale)
{
  __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
				(__v2di) __index, (__v2df) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
			__m128i __index, __m128d __v1,
			const int __scale)
{
  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
				(__v2df) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
			 __m256i __v1, const int __scale)
{
  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
				(__v8si) __index, (__v8si) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
			      __m256i __index, __m256i __v1,
			      const int __scale)
{
  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
				(__v8si) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32scatter_epi32 (void *__addr, __m128i __index,
		      __m128i __v1, const int __scale)
{
  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v4si) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
			   __m128i __index, __m128i __v1,
			   const int __scale)
{
  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
				(__v4si) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
			 __m256i __v1, const int __scale)
{
  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v4di) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
			      __m128i __index, __m256i __v1,
			      const int __scale)
{
  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
				(__v4di) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32scatter_epi64 (void *__addr, __m128i __index,
		      __m128i __v1, const int __scale)
{
  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
				(__v4si) __index, (__v2di) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
			   __m128i __index, __m128i __v1,
			   const int __scale)
{
  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
				(__v2di) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
			 __m128i __v1, const int __scale)
{
  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
				(__v4di) __index, (__v4si) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
			      __m256i __index, __m128i __v1,
			      const int __scale)
{
  __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
				(__v4si) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64scatter_epi32 (void *__addr, __m128i __index,
		      __m128i __v1, const int __scale)
{
  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
				(__v2di) __index, (__v4si) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
			   __m128i __index, __m128i __v1,
			   const int __scale)
{
  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
				(__v4si) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
			 __m256i __v1, const int __scale)
{
  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
				(__v4di) __index, (__v4di) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
			      __m256i __index, __m256i __v1,
			      const int __scale)
{
  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
				(__v4di) __v1, __scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64scatter_epi64 (void *__addr, __m128i __index,
		      __m128i __v1, const int __scale)
{
  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
				(__v2di) __index, (__v2di) __v1,
				__scale);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
			   __m128i __index, __m128i __v1,
			   const int __scale)
{
  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
				(__v2di) __v1, __scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			   _MM_PERM_ENUM __mask)
{
  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
			    _MM_PERM_ENUM __mask)
{
  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
			_MM_PERM_ENUM __mask)
{
  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
			 _MM_PERM_ENUM __mask)
{
  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rol_epi32 (__m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       const int __B)
{
  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rol_epi32 (__m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    const int __B)
{
  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ror_epi32 (__m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
		       const int __B)
{
  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
						 (__v8si) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
						 (__v8si)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ror_epi32 (__m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		    const int __B)
{
  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
						 (__v4si) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
						 (__v4si)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rol_epi64 (__m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       const int __B)
{
  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rol_epi64 (__m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    const int __B)
{
  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ror_epi64 (__m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
		       const int __B)
{
  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
						 (__v4di) __W,
						 (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
{
  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
						 (__v4di)
						 _mm256_setzero_si256 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ror_epi64 (__m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		    const int __B)
{
  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
						 (__v2di) __W,
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
{
  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
						 (__v2di)
						 _mm_setzero_si128 (),
						 (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
{
  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
						  (__v4si) __B, __imm,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		       __m128i __B, const int __imm)
{
  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
						  (__v4si) __B, __imm,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
			const int __imm)
{
  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
						  (__v4si) __B, __imm,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
{
  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
						  (__v2di) __B, __imm,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		       __m128i __B, const int __imm)
{
  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
						  (__v2di) __B, __imm,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
			const int __imm)
{
  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
						  (__v2di) __B, __imm,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
						  (__v8si) __B, __imm,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			  __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
						  (__v8si) __B, __imm,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
			   const int __imm)
{
  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
						  (__v8si) __B, __imm,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
						  (__v4di) __B, __imm,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			  __m256i __B, const int __imm)
{
  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
						  (__v4di) __B, __imm,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
			   const int __imm)
{
  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
						  (__v4di) __B, __imm,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
		   const int __I)
{
  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
						  (__v8hi) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
{
  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
						  (__v8hi)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
		      const int __I)
{
  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
						     (__v8hi) __W,
						     (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
{
  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
						     (__v8hi)
						     _mm_setzero_si128 (),
						     (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			const int __imm)
{
  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
{
  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
		     const int __imm)
{
  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srai_epi64 (__m256i __A, const int __imm)
{
  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) -1);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			const int __imm)
{
  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
{
  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi64 (__m128i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) -1);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
		     const int __imm)
{
  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
{
  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
{
  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
						  (__v4si) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
{
  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
						  (__v4si)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
{
  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
						  (__v2di) __W,
						  (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
{
  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
						  (__v2di)
						  _mm_setzero_si128 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
			int __B)
{
  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
						  (__v8si) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
{
  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
						  (__v8si)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
			int __B)
{
  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
						  (__v4di) __W,
						  (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
{
  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
						  (__v4di)
						  _mm256_setzero_si256 (),
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
			 const int __imm)
{
  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
						  (__v4df) __W,
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
{
  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
						  (__v4df)
						  _mm256_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
			const int __C)
{
  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
{
  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
						     (__v4df)
						     _mm256_setzero_pd (),
						     (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
		     const int __C)
{
  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
						  (__v2df) __W,
						  (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
{
  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
						  (__v2df)
						  _mm_setzero_pd (),
						  (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
			const int __C)
{
  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
{
  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
						    (__v8sf)
						    _mm256_setzero_ps (),
						    (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
		     const int __C)
{
  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
						 (__v4sf) __W,
						 (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
{
  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
						 (__v4sf)
						 _mm_setzero_ps (),
						 (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
{
  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
						     (__v4df) __W,
						     (__mmask8) __U);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
{
  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
						    (__v8sf) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
{
  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
						    (__v4di) __W,
						    (__mmask8) __U);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
{
  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
						    (__v8si) __W,
						    (__mmask8) __U);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
{
  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
						     (__v2df) __W,
						     (__mmask8) __U);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
{
  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
						    (__v4sf) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
{
  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
						    (__v2di) __W,
						    (__mmask8) __U);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
{
  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
						    (__v4si) __W,
						    (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, __P,
						 (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, __P,
						 (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
						  (__v4df) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
						  (__v8sf) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
			    const int __P)
{
  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
						 (__v4di) __Y, __P,
						 (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
			    const int __P)
{
  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
						 (__v8si) __Y, __P,
						 (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
			    const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
						  (__v4di) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
			    const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
						  (__v8si) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
						  (__v4df) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
						  (__v8sf) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, __P,
						 (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, __P,
						 (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
						  (__v2df) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
{
  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
						  (__v4sf) __Y, __P,
						  (__mmask8) -1);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
						 (__v2di) __Y, __P,
						 (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
						 (__v4si) __Y, __P,
						 (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
						  (__v2di) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
			 const int __P)
{
  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
						  (__v4si) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
		      const int __P)
{
  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
						  (__v2df) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
		      const int __P)
{
  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
						  (__v4sf) __Y, __P,
						  (__mmask8) __U);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex_pd (__m256d __X, const int __M)
{
  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
						  (__v4df)
						  _mm256_undefined_pd (),
						  (__mmask8) -1);
}

#else
#define _mm256_permutex_pd(X, M)						\
  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),	\
					    (__v4df)(__m256d)			\
					    _mm256_undefined_pd (),		\
					    (__mmask8)-1))

#define _mm256_permutex_epi64(X, I)               \
  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
					    (int)(I),		\
					    (__v4di)(__m256i)	\
					    (_mm256_setzero_si256 ()),\
					    (__mmask8) -1))

#define _mm256_maskz_permutex_epi64(M, X, I)                    \
  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
					    (int)(I),                \
					    (__v4di)(__m256i)        \
					    (_mm256_setzero_si256 ()),\
					    (__mmask8)(M)))

#define _mm256_mask_permutex_epi64(W, M, X, I)               \
  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
					    (int)(I),             \
					    (__v4di)(__m256i)(W), \
					    (__mmask8)(M)))

#define _mm256_insertf32x4(X, Y, C)                                     \
  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
    (__v4sf)(__m128) (Y), (int) (C),					\
    (__v8sf)(__m256)_mm256_setzero_ps (),				\
    (__mmask8)-1))

#define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
    (__v4sf)(__m128) (Y), (int) (C),					\
    (__v8sf)(__m256)(W),						\
    (__mmask8)(U)))

#define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
    (__v4sf)(__m128) (Y), (int) (C),					\
    (__v8sf)(__m256)_mm256_setzero_ps (),				\
    (__mmask8)(U)))

#define _mm256_inserti32x4(X, Y, C)                                     \
  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
    (__v4si)(__m128i) (Y), (int) (C),					\
    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
    (__mmask8)-1))

#define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
    (__v4si)(__m128i) (Y), (int) (C),					\
    (__v8si)(__m256i)(W),						\
    (__mmask8)(U)))

#define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
    (__v4si)(__m128i) (Y), (int) (C),					\
    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
    (__mmask8)(U)))

#define _mm256_extractf32x4_ps(X, C)                                    \
  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
    (int) (C),								\
    (__v4sf)(__m128)_mm_setzero_ps (),					\
    (__mmask8)-1))

#define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
    (int) (C),								\
    (__v4sf)(__m128)(W),						\
    (__mmask8)(U)))

#define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
    (int) (C),								\
    (__v4sf)(__m128)_mm_setzero_ps (),					\
    (__mmask8)(U)))

#define _mm256_extracti32x4_epi32(X, C)                                 \
  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))

#define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
    (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))

#define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))

#define _mm256_shuffle_i64x2(X, Y, C)                                                   \
  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
                                                  (__v4di)(__m256i)(Y), (int)(C),       \
                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
                                                  (__mmask8)-1))

#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
                                                  (__v4di)(__m256i)(Y), (int)(C),       \
                                                  (__v4di)(__m256i)(W),\
                                                  (__mmask8)(U)))

#define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
                                                  (__v4di)(__m256i)(Y), (int)(C),       \
                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
                                                  (__mmask8)(U)))

#define _mm256_shuffle_i32x4(X, Y, C)                                                   \
  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                  (__v8si)(__m256i)(Y), (int)(C),       \
						  (__v8si)(__m256i)			\
						  _mm256_setzero_si256 (),		\
                                                  (__mmask8)-1))

#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                  (__v8si)(__m256i)(Y), (int)(C),       \
                                                  (__v8si)(__m256i)(W),                 \
                                                  (__mmask8)(U)))

#define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                  (__v8si)(__m256i)(Y), (int)(C),       \
						  (__v8si)(__m256i)			\
						  _mm256_setzero_si256 (),		\
                                                  (__mmask8)(U)))

#define _mm256_shuffle_f64x2(X, Y, C)                                                   \
  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                  (__v4df)(__m256d)(Y), (int)(C),       \
						  (__v4df)(__m256d)_mm256_setzero_pd (),\
                                                  (__mmask8)-1))

#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                  (__v4df)(__m256d)(Y), (int)(C),       \
                                                  (__v4df)(__m256d)(W),                 \
                                                  (__mmask8)(U)))

#define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                  (__v4df)(__m256d)(Y), (int)(C),       \
						  (__v4df)(__m256d)_mm256_setzero_pd( ),\
                                                  (__mmask8)(U)))

#define _mm256_shuffle_f32x4(X, Y, C)                                                   \
  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                 (__v8sf)(__m256)(Y), (int)(C),         \
						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                                 (__mmask8)-1))

#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                 (__v8sf)(__m256)(Y), (int)(C),         \
                                                 (__v8sf)(__m256)(W),                   \
                                                 (__mmask8)(U)))

#define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                 (__v8sf)(__m256)(Y), (int)(C),         \
						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                                 (__mmask8)(U)))

#define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
                                           (__v4df)(__m256d)(B), (int)(C),      \
                                           (__v4df)(__m256d)(W),                \
                                           (__mmask8)(U)))

#define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
                                           (__v4df)(__m256d)(B), (int)(C),      \
					   (__v4df)(__m256d)			\
					   _mm256_setzero_pd (),		\
                                           (__mmask8)(U)))

#define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
                                           (__v2df)(__m128d)(B), (int)(C),      \
                                           (__v2df)(__m128d)(W),                \
                                           (__mmask8)(U)))

#define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
                                           (__v2df)(__m128d)(B), (int)(C),      \
					   (__v2df)(__m128d)_mm_setzero_pd (),  \
                                           (__mmask8)(U)))

#define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
                                           (__v8sf)(__m256)(B), (int)(C),       \
                                           (__v8sf)(__m256)(W),                 \
                                           (__mmask8)(U)))

#define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
                                           (__v8sf)(__m256)(B), (int)(C),       \
					   (__v8sf)(__m256)_mm256_setzero_ps (),\
                                           (__mmask8)(U)))

#define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
                                           (__v4sf)(__m128)(B), (int)(C),       \
                                           (__v4sf)(__m128)(W),                 \
                                           (__mmask8)(U)))

#define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
                                           (__v4sf)(__m128)(B), (int)(C),       \
					   (__v4sf)(__m128)_mm_setzero_ps (),   \
                                           (__mmask8)(U)))

#define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
  ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),		\
					       (__v4df)(__m256d)(Y),		\
					       (__v4di)(__m256i)(Z), (int)(C),	\
					       (__mmask8)(-1)))

#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
						(__v4df)(__m256d)(Y),           \
						(__v4di)(__m256i)(Z), (int)(C), \
						(__mmask8)(U)))

#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
   ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
						 (__v4df)(__m256d)(Y),          \
						 (__v4di)(__m256i)(Z), (int)(C),\
						 (__mmask8)(U)))

#define _mm256_fixupimm_ps(X, Y, Z, C)						\
  ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),		\
					      (__v8sf)(__m256)(Y),		\
					      (__v8si)(__m256i)(Z), (int)(C),	\
					      (__mmask8)(-1)))


#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
    ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
						(__v8sf)(__m256)(Y),            \
						(__v8si)(__m256i)(Z), (int)(C), \
						(__mmask8)(U)))

#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
    ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
						 (__v8sf)(__m256)(Y),           \
						 (__v8si)(__m256i)(Z), (int)(C),\
						 (__mmask8)(U)))

#define _mm_fixupimm_pd(X, Y, Z, C)						\
  ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),		\
					       (__v2df)(__m128d)(Y),		\
					       (__v2di)(__m128i)(Z), (int)(C), 	\
					       (__mmask8)(-1)))


#define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
     ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
						  (__v2df)(__m128d)(Y),           \
						  (__v2di)(__m128i)(Z), (int)(C), \
						  (__mmask8)(U)))

#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
     ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
						   (__v2df)(__m128d)(Y),          \
						   (__v2di)(__m128i)(Z), (int)(C),\
						   (__mmask8)(U)))

#define _mm_fixupimm_ps(X, Y, Z, C)						\
   ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),		\
					       (__v4sf)(__m128)(Y),		\
					       (__v4si)(__m128i)(Z), (int)(C), 	\
					       (__mmask8)(-1)))

#define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
      ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
						  (__v4sf)(__m128)(Y),           \
						  (__v4si)(__m128i)(Z), (int)(C),\
						  (__mmask8)(U)))

#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
      ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
						   (__v4sf)(__m128)(Y),           \
						   (__v4si)(__m128i)(Z), (int)(C),\
						   (__mmask8)(U)))

#define _mm256_mask_srli_epi32(W, U, A, B)				\
  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_srli_epi32(U, A, B)				\
  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))

#define _mm_mask_srli_epi32(W, U, A, B)                                 \
  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))

#define _mm_maskz_srli_epi32(U, A, B)                                   \
  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))

#define _mm256_mask_srli_epi64(W, U, A, B)				\
  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_srli_epi64(U, A, B)				\
  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))

#define _mm_mask_srli_epi64(W, U, A, B)                                 \
  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))

#define _mm_maskz_srli_epi64(U, A, B)                                   \
  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))

#define _mm256_mask_slli_epi32(W, U, X, C)                                \
  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
    (__v8si)(__m256i)(W),						  \
    (__mmask8)(U)))

#define _mm256_maskz_slli_epi32(U, X, C)                                  \
  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
    (__v8si)(__m256i)_mm256_setzero_si256 (),				  \
    (__mmask8)(U)))

#define _mm256_mask_slli_epi64(W, U, X, C)                                \
  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
    (__v4di)(__m256i)(W),						  \
    (__mmask8)(U)))

#define _mm256_maskz_slli_epi64(U, X, C)                                  \
  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
    (__v4di)(__m256i)_mm256_setzero_si256 (),				  \
    (__mmask8)(U)))

#define _mm_mask_slli_epi32(W, U, X, C)					  \
  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
    (__v4si)(__m128i)(W),\
    (__mmask8)(U)))

#define _mm_maskz_slli_epi32(U, X, C)					  \
  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
    (__v4si)(__m128i)_mm_setzero_si128 (),\
    (__mmask8)(U)))

#define _mm_mask_slli_epi64(W, U, X, C)					  \
  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
    (__v2di)(__m128i)(W),\
    (__mmask8)(U)))

#define _mm_maskz_slli_epi64(U, X, C)					  \
  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
    (__v2di)(__m128i)_mm_setzero_si128 (),\
    (__mmask8)(U)))

#define _mm256_ternarylogic_epi64(A, B, C, I)			\
  ((__m256i)							\
   __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A),	\
				     (__v4di) (__m256i) (B),	\
				     (__v4di) (__m256i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) -1))

#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)		\
  ((__m256i)							\
   __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A),	\
				     (__v4di) (__m256i) (B),	\
				     (__v4di) (__m256i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) (U)))

#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)		\
  ((__m256i)							\
   __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A),	\
				      (__v4di) (__m256i) (B),	\
				      (__v4di) (__m256i) (C),	\
				      (unsigned char) (I),	\
				      (__mmask8) (U)))

#define _mm256_ternarylogic_epi32(A, B, C, I)			\
  ((__m256i)							\
   __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A),	\
				     (__v8si) (__m256i) (B),	\
				     (__v8si) (__m256i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) -1))

#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)		\
  ((__m256i)							\
   __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A),	\
				     (__v8si) (__m256i) (B),	\
				     (__v8si) (__m256i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) (U)))

#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)		\
  ((__m256i)							\
   __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A),	\
				      (__v8si) (__m256i) (B),	\
				      (__v8si) (__m256i) (C),	\
				      (unsigned char) (I),	\
				      (__mmask8) (U)))

#define _mm_ternarylogic_epi64(A, B, C, I)			\
  ((__m128i)							\
   __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A),	\
				     (__v2di) (__m128i) (B),	\
				     (__v2di) (__m128i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) -1))

#define _mm_mask_ternarylogic_epi64(A, U, B, C, I)		\
  ((__m128i)							\
   __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A),	\
				     (__v2di) (__m128i) (B),	\
				     (__v2di) (__m128i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) (U)))

#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)		\
  ((__m128i)							\
   __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A),	\
				      (__v2di) (__m128i) (B),	\
				      (__v2di) (__m128i) (C),	\
				      (unsigned char) (I),	\
				      (__mmask8) (U)))

#define _mm_ternarylogic_epi32(A, B, C, I)			\
  ((__m128i)							\
   __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A),	\
				     (__v4si) (__m128i) (B),	\
				     (__v4si) (__m128i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) -1))

#define _mm_mask_ternarylogic_epi32(A, U, B, C, I)		\
  ((__m128i)							\
   __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A),	\
				     (__v4si) (__m128i) (B),	\
				     (__v4si) (__m128i) (C),	\
				     (unsigned char) (I),	\
				     (__mmask8) (U)))

#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)		\
  ((__m128i)							\
   __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A),	\
				      (__v4si) (__m128i) (B),	\
				      (__v4si) (__m128i) (C),	\
				      (unsigned char) (I),	\
				      (__mmask8) (U)))

#define _mm256_roundscale_ps(A, B)				        \
  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))

#define _mm256_mask_roundscale_ps(W, U, A, B)			        \
  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))

#define _mm256_maskz_roundscale_ps(U, A, B)			        \
  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))

#define _mm256_roundscale_pd(A, B)				        \
  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))

#define _mm256_mask_roundscale_pd(W, U, A, B)			        \
  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))

#define _mm256_maskz_roundscale_pd(U, A, B)			        \
  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))

#define _mm_roundscale_ps(A, B)					        \
  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))

#define _mm_mask_roundscale_ps(W, U, A, B)			        \
  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))

#define _mm_maskz_roundscale_ps(U, A, B)			        \
  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))

#define _mm_roundscale_pd(A, B)					        \
  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))

#define _mm_mask_roundscale_pd(W, U, A, B)			        \
  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))

#define _mm_maskz_roundscale_pd(U, A, B)			        \
  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))

#define _mm256_getmant_ps(X, B, C)                                              \
  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                         (__mmask8)-1))

#define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
                                         (__v8sf)(__m256)(W),                   \
                                         (__mmask8)(U)))

#define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                         (__mmask8)(U)))

#define _mm_getmant_ps(X, B, C)                                                 \
  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
					 (__v4sf)(__m128)_mm_setzero_ps (),     \
                                         (__mmask8)-1))

#define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
                                         (__v4sf)(__m128)(W),                   \
                                         (__mmask8)(U)))

#define _mm_maskz_getmant_ps(U, X, B, C)                                        \
  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                         (int)(((C)<<2) | (B)),                 \
					 (__v4sf)(__m128)_mm_setzero_ps (),     \
                                         (__mmask8)(U)))

#define _mm256_getmant_pd(X, B, C)                                              \
  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
					  (__v4df)(__m256d)_mm256_setzero_pd (),\
                                          (__mmask8)-1))

#define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
                                          (__v4df)(__m256d)(W),                 \
                                          (__mmask8)(U)))

#define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
					  (__v4df)(__m256d)_mm256_setzero_pd (),\
                                          (__mmask8)(U)))

#define _mm_getmant_pd(X, B, C)                                                 \
  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
					  (__v2df)(__m128d)_mm_setzero_pd (),   \
                                          (__mmask8)-1))

#define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
                                          (__v2df)(__m128d)(W),                 \
                                          (__mmask8)(U)))

#define _mm_maskz_getmant_pd(U, X, B, C)                                        \
  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                         (int)(((C)<<2) | (B)),                 \
					  (__v2df)(__m128d)_mm_setzero_pd (),   \
                                          (__mmask8)(U)))

#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD),	\
					 (void const *) (ADDR),		\
					 (__v8si)(__m256i) (INDEX),	\
					 (__mmask8) (MASK),		\
					 (int) (SCALE))

#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD),	\
					 (void const *) (ADDR),		\
					 (__v4si)(__m128i) (INDEX),	\
					 (__mmask8) (MASK),		\
					 (int) (SCALE))

#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4si)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4si)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD),	\
					 (void const *) (ADDR),		\
					 (__v4di)(__m256i) (INDEX),	\
					 (__mmask8) (MASK),		\
					 (int) (SCALE))

#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD),	\
					 (void const *) (ADDR),		\
					 (__v2di)(__m128i) (INDEX),	\
					 (__mmask8) (MASK),		\
					 (int) (SCALE))

#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4di)(__m256i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v2di)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v8si)(__m256i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4si)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4si)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4si)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4di)(__m256i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v2di)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v4di)(__m256i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD),	\
					  (void const *) (ADDR),	\
					  (__v2di)(__m128i) (INDEX),	\
					  (__mmask8) (MASK),		\
					  (int) (SCALE))

#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v8si)(__m256i) (INDEX),		\
				(__v8sf)(__m256) (V1), (int) (SCALE))

#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v8si)(__m256i) (INDEX),		\
				(__v8sf)(__m256) (V1), (int) (SCALE))

#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4df)(__m256d) (V1), (int) (SCALE))

#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4df)(__m256d) (V1), (int) (SCALE))

#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v2df)(__m128d) (V1), (int) (SCALE))

#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v2df)(__m128d) (V1), (int) (SCALE))

#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v2di)(__m128i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v2di)(__m128i) (INDEX),		\
				(__v4sf)(__m128) (V1), (int) (SCALE))

#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4df)(__m256d) (V1), (int) (SCALE))

#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4df)(__m256d) (V1), (int) (SCALE))

#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v2di)(__m128i) (INDEX),		\
				(__v2df)(__m128d) (V1), (int) (SCALE))

#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v2di)(__m128i) (INDEX),		\
				(__v2df)(__m128d) (V1), (int) (SCALE))

#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v8si)(__m256i) (INDEX),		\
				(__v8si)(__m256i) (V1), (int) (SCALE))

#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v8si)(__m256i) (INDEX),		\
				(__v8si)(__m256i) (V1), (int) (SCALE))

#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4di)(__m256i) (V1), (int) (SCALE))

#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v4di)(__m256i) (V1), (int) (SCALE))

#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4si)(__m128i) (INDEX),		\
				(__v2di)(__m128i) (V1), (int) (SCALE))

#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4si)(__m128i) (INDEX),		\
				(__v2di)(__m128i) (V1), (int) (SCALE))

#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v2di)(__m128i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v2di)(__m128i) (INDEX),		\
				(__v4si)(__m128i) (V1), (int) (SCALE))

#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4di)(__m256i) (V1), (int) (SCALE))

#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v4di)(__m256i) (INDEX),		\
				(__v4di)(__m256i) (V1), (int) (SCALE))

#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
				(__v2di)(__m128i) (INDEX),		\
				(__v2di)(__m128i) (V1), (int) (SCALE))

#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
				(__v2di)(__m128i) (INDEX),		\
				(__v2di)(__m128i) (V1), (int) (SCALE))

#define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
                                             (__v8si)(__m256i)(W),                  \
                                             (__mmask8)(U)))

#define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
					     (__v8si)(__m256i)			    \
					     _mm256_setzero_si256 (),		    \
                                             (__mmask8)(U)))

#define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
                                             (__v4si)(__m128i)(W),                  \
                                             (__mmask8)(U)))

#define _mm_maskz_shuffle_epi32(U, X, C)                                            \
  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
					     (__v4si)(__m128i)_mm_setzero_si128 (), \
                                             (__mmask8)(U)))

#define _mm256_rol_epi64(A, B)                                                 \
  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)-1))

#define _mm256_mask_rol_epi64(W, U, A, B)                                      \
  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)(W),                \
                                          (__mmask8)(U)))

#define _mm256_maskz_rol_epi64(U, A, B)                                        \
  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)(U)))

#define _mm_rol_epi64(A, B)                                                    \
  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
					  (__v2di)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)-1))

#define _mm_mask_rol_epi64(W, U, A, B)                                         \
  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
                                          (__v2di)(__m128i)(W),                \
                                          (__mmask8)(U)))

#define _mm_maskz_rol_epi64(U, A, B)                                           \
  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
					  (__v2di)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)(U)))

#define _mm256_ror_epi64(A, B)                                                 \
  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)-1))

#define _mm256_mask_ror_epi64(W, U, A, B)                                      \
  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)(W),                \
                                          (__mmask8)(U)))

#define _mm256_maskz_ror_epi64(U, A, B)                                        \
  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)(U)))

#define _mm_ror_epi64(A, B)                                                    \
  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
					  (__v2di)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)-1))

#define _mm_mask_ror_epi64(W, U, A, B)                                         \
  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
                                          (__v2di)(__m128i)(W),                \
                                          (__mmask8)(U)))

#define _mm_maskz_ror_epi64(U, A, B)                                           \
  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
					  (__v2di)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)(U)))

#define _mm256_rol_epi32(A, B)                                                 \
  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)-1))

#define _mm256_mask_rol_epi32(W, U, A, B)                                      \
  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
                                          (__v8si)(__m256i)(W),                \
                                          (__mmask8)(U)))

#define _mm256_maskz_rol_epi32(U, A, B)                                        \
  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)(U)))

#define _mm_rol_epi32(A, B)                                                    \
  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
					  (__v4si)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)-1))

#define _mm_mask_rol_epi32(W, U, A, B)                                         \
  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
                                          (__v4si)(__m128i)(W),                \
                                          (__mmask8)(U)))

#define _mm_maskz_rol_epi32(U, A, B)                                           \
  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
					  (__v4si)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)(U)))

#define _mm256_ror_epi32(A, B)                                                 \
  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                          (__mmask8)-1))

#define _mm256_mask_ror_epi32(W, U, A, B)                                      \
  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
                                          (__v8si)(__m256i)(W),                \
                                          (__mmask8)(U)))

#define _mm256_maskz_ror_epi32(U, A, B)                                        \
  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
					  (__v8si)(__m256i)		       \
					  _mm256_setzero_si256 (),	       \
                                          (__mmask8)(U)))

#define _mm_ror_epi32(A, B)                                                    \
  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
					  (__v4si)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)-1))

#define _mm_mask_ror_epi32(W, U, A, B)                                         \
  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
                                          (__v4si)(__m128i)(W),                \
                                          (__mmask8)(U)))

#define _mm_maskz_ror_epi32(U, A, B)                                           \
  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
					  (__v4si)(__m128i)_mm_setzero_si128 (),\
                                          (__mmask8)(U)))

#define _mm256_alignr_epi32(X, Y, C)                                        \
    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))

#define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
        (__mmask8)(U)))

#define _mm256_alignr_epi64(X, Y, C)                                        \
    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))

#define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
        (__mmask8)(U)))

#define _mm_alignr_epi32(X, Y, C)                                           \
    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))

#define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))

#define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
	(__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
        (__mmask8)(U)))

#define _mm_alignr_epi64(X, Y, C)                                           \
    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))

#define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))

#define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
	(__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
        (__mmask8)(U)))

#define _mm_mask_cvtps_ph(W, U, A, I)						\
  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I),	\
      (__v8hi)(__m128i) (W), (__mmask8) (U)))

#define _mm_maskz_cvtps_ph(U, A, I)						\
  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I),	\
      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))

#define _mm256_mask_cvtps_ph(W, U, A, I)					\
  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
      (__v8hi)(__m128i) (W), (__mmask8) (U)))

#define _mm256_maskz_cvtps_ph(U, A, I)						\
  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))

#define _mm256_mask_srai_epi32(W, U, A, B)				\
  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_srai_epi32(U, A, B)				\
  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))

#define _mm_mask_srai_epi32(W, U, A, B)                                 \
  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))

#define _mm_maskz_srai_epi32(U, A, B)                                   \
  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))

#define _mm256_srai_epi64(A, B)						\
  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))

#define _mm256_mask_srai_epi64(W, U, A, B)				\
  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))

#define _mm256_maskz_srai_epi64(U, A, B)				\
  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))

#define _mm_srai_epi64(A, B)						\
  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))

#define _mm_mask_srai_epi64(W, U, A, B)                                 \
  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))

#define _mm_maskz_srai_epi64(U, A, B)                                   \
  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))

#define _mm256_mask_permutex_pd(W, U, A, B)                             \
  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))

#define _mm256_maskz_permutex_pd(U, A, B)				\
  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))

#define _mm256_mask_permute_pd(W, U, X, C)					    \
  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
					      (__v4df)(__m256d)(W),		    \
					      (__mmask8)(U)))

#define _mm256_maskz_permute_pd(U, X, C)					    \
  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
					      (__v4df)(__m256d)_mm256_setzero_pd (),\
					      (__mmask8)(U)))

#define _mm256_mask_permute_ps(W, U, X, C)					    \
  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
					      (__v8sf)(__m256)(W), (__mmask8)(U)))

#define _mm256_maskz_permute_ps(U, X, C)					    \
  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
					      (__v8sf)(__m256)_mm256_setzero_ps (), \
					      (__mmask8)(U)))

#define _mm_mask_permute_pd(W, U, X, C)						    \
  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
					    (__v2df)(__m128d)(W), (__mmask8)(U)))

#define _mm_maskz_permute_pd(U, X, C)						    \
  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
					    (__v2df)(__m128d)_mm_setzero_pd (),	    \
					    (__mmask8)(U)))

#define _mm_mask_permute_ps(W, U, X, C)						    \
  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
					  (__v4sf)(__m128)(W), (__mmask8)(U)))

#define _mm_maskz_permute_ps(U, X, C)						    \
  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
					  (__v4sf)(__m128)_mm_setzero_ps (),	    \
					  (__mmask8)(U)))

#define _mm256_mask_blend_pd(__U, __A, __W)			      \
  ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),	      \
						     (__v4df) (__W),  \
						     (__mmask8) (__U)))

#define _mm256_mask_blend_ps(__U, __A, __W)			      \
  ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),	      \
						    (__v8sf) (__W),   \
						    (__mmask8) (__U)))

#define _mm256_mask_blend_epi64(__U, __A, __W)			      \
  ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),	      \
						    (__v4di) (__W),   \
						    (__mmask8) (__U)))

#define _mm256_mask_blend_epi32(__U, __A, __W)			      \
  ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),	      \
						    (__v8si) (__W),   \
						    (__mmask8) (__U)))

#define _mm_mask_blend_pd(__U, __A, __W)			      \
  ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),	      \
						     (__v2df) (__W),  \
						     (__mmask8) (__U)))

#define _mm_mask_blend_ps(__U, __A, __W)			      \
  ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),	      \
						    (__v4sf) (__W),   \
						    (__mmask8) (__U)))

#define _mm_mask_blend_epi64(__U, __A, __W)			      \
  ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),	      \
						    (__v2di) (__W),   \
						    (__mmask8) (__U)))

#define _mm_mask_blend_epi32(__U, __A, __W)			      \
  ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),	      \
						    (__v4si) (__W),   \
						    (__mmask8) (__U)))

#define _mm256_cmp_epu32_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
					    (__v8si)(__m256i)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm256_cmp_epi64_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
					   (__v4di)(__m256i)(Y), (int)(P),\
					   (__mmask8)-1))

#define _mm256_cmp_epi32_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
					   (__v8si)(__m256i)(Y), (int)(P),\
					   (__mmask8)-1))

#define _mm256_cmp_epu64_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
					    (__v4di)(__m256i)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm256_cmp_pd_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
					    (__v4df)(__m256d)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm256_cmp_ps_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
					     (__v8sf)(__m256)(Y), (int)(P),\
					     (__mmask8)-1))

#define _mm256_mask_cmp_epi64_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
					   (__v4di)(__m256i)(Y), (int)(P),\
					   (__mmask8)(M)))

#define _mm256_mask_cmp_epi32_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
					   (__v8si)(__m256i)(Y), (int)(P),\
					   (__mmask8)(M)))

#define _mm256_mask_cmp_epu64_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
					    (__v4di)(__m256i)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm256_mask_cmp_epu32_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
					    (__v8si)(__m256i)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm256_mask_cmp_pd_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
					    (__v4df)(__m256d)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm256_mask_cmp_ps_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
					     (__v8sf)(__m256)(Y), (int)(P),\
					     (__mmask8)(M)))

#define _mm_cmp_epi64_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
					   (__v2di)(__m128i)(Y), (int)(P),\
					   (__mmask8)-1))

#define _mm_cmp_epi32_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
					   (__v4si)(__m128i)(Y), (int)(P),\
					   (__mmask8)-1))

#define _mm_cmp_epu64_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
					    (__v2di)(__m128i)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm_cmp_epu32_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
					    (__v4si)(__m128i)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm_cmp_pd_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
					    (__v2df)(__m128d)(Y), (int)(P),\
					    (__mmask8)-1))

#define _mm_cmp_ps_mask(X, Y, P)					\
  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
					     (__v4sf)(__m128)(Y), (int)(P),\
					     (__mmask8)-1))

#define _mm_mask_cmp_epi64_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
					   (__v2di)(__m128i)(Y), (int)(P),\
					   (__mmask8)(M)))

#define _mm_mask_cmp_epi32_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
					   (__v4si)(__m128i)(Y), (int)(P),\
					   (__mmask8)(M)))

#define _mm_mask_cmp_epu64_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
					    (__v2di)(__m128i)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm_mask_cmp_epu32_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
					    (__v4si)(__m128i)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm_mask_cmp_pd_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
					    (__v2df)(__m128d)(Y), (int)(P),\
					    (__mmask8)(M)))

#define _mm_mask_cmp_ps_mask(M, X, Y, P)				\
  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
					     (__v4sf)(__m128)(Y), (int)(P),\
					     (__mmask8)(M)))

#endif

#define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps ((B), (A))
#define _mm256_mask_cvt_roundps_ph(A, B, C, D)	\
  _mm256_mask_cvtps_ph ((A), (B), (C), (D))
#define _mm256_maskz_cvt_roundps_ph(A, B, C)	\
  _mm256_maskz_cvtps_ph ((A), (B), (C))
#define _mm_mask_cvt_roundps_ph(A, B, C, D)	\
  _mm_mask_cvtps_ph ((A), (B), (C), (D))
#define _mm_maskz_cvt_roundps_ph(A, B, C) _mm_maskz_cvtps_ph ((A), (B), (C))

#ifdef __DISABLE_AVX512VL__
#undef __DISABLE_AVX512VL__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512VL__ */

#endif /* _AVX512VLINTRIN_H_INCLUDED */
                                                                                                                                                                                                              ^y                  Ғ                                  ¨                  k          "6                           L 	             l         d
                       )  	                       
 X     (     * f                         J     g      r          f                      	 Y                          g +     p 1 q 0p	 4
      
         y  6p ? l
               >              1 j	 r
 
 R     y"
                  j .
      	         ? 	              	. { 
     ʇ  
                                                T
 k 
 5     $
                         kV                  '             I  ^@      v                                                ?    H   A                                           t     đ         *    W  ~         JM
 [	      -
           x[             @
                       M                 *         ]
 o                 -l             / S                      -
     6         T   
      	         z                                                      a  S: 8U
     R                           3                 z% B                 g|     
     ]	     @         c                 V         G  C -                         O          k  
     Y              k
  W         t 3x     [r     V
         /     "          |y     Z     u _k     
 V?
                             MZ
     c        [
     % 8q i
     .     +         Y ֳ
 K
     
     ˺          H 
                   RB                      tR       Ł
 ?5
    C      B                   Ͼ  J Դ                                
          k $     4 [         b N                                                                   	 ]* q     	     M 	          ;     
 d	 8                 xJ     
               Ò
             *     ?                 y         I k                   E     l:                   4	 \ n e     ;         N
         	 `
                                 
 F     h  	     &  
     ^
                     D     	             [                     'n     E                       $D             
     G
         D                        Z           T
                       v                       
         
                  c                 /     _
 k         ³ C^                     	G E     }?                 az ?         k qx n.
     
     z
 
 ' T                      { Wx /  [          /         *         k
             ~
     l     T
      Y p                                        &V             w^	           	 !k kR [           x             U         չ x
     i     M
                                                   L                 I ^Y _+     H
 k
      
     
     * @          #n              ^
                 2 
z
                              p	         
          	                 ^
      I bd
 [
     ^             
, ;       h  `k 	 ^  n: $ 
 \                  tI p	      y     M      u         @
              E + (                     z
                     i +         {I
     * My     ?     .   v : ~         M
                  7              a                 *             l         S         l         pI
             u ^ q
 	 Ѐ
     2          '                             	                             _ ?         :I         ~                 	              
 oZ               -  !     /                      Z          Ȑ L	         R	                              QG  x     #
                     3	                 	 |                                 x x                         HI @         S         B                          k                             Zr                          {     M          x     [                 X-         3     4E     
             5
 	                                   #  s       (8                         
          
 @*         +	              e	 ^	 z
 !         	         
 M                               I      
     o 
 e
     4         l l -
 w Ò                          , 	 H             (@                 O @ V =                )             
 !   h      [         wl          
 ^ >$     
 	 ; 	 ?                 6 c	   9T
                                  	 I               k @ 	 &
                              x      	     l  E
                         ށ vJ         @                                              X                     m                   m         	              R                          @     :
 2  
     y Z :
 m
                                       C ʝ               .                      v
                     pp	     # r         -       
 4             Z
     
          )                                   X
 %
     	                      s      
         0	  *           K             8 +     uS       6
         S ?                                                                            @
     
                                    Ɩ     8E                 ~* ʧ b                       0                              @ 5 k {           SZ         W             F         8             t ^                               | I              ;         m-          6                         l         
      
             ?         t%
     @5
      + F         +                 I      I     W             9A                  C      B                       z          UZ S !          .     h   Þ             3  . F@                                b     F B +                      q l
 &% :         tl p ;             #                       O         
              n Z               ! @
 -             ~y
              	 
!
                 '	 p     / o	          4n     &          l 
                  
                       +!      
4         _	 D
     `o R	         <@                  & / '                                           t                 .  !             ~x               fU           / eu
 
     wR R         Tr     
             ?      	            *     @     5r         a
                      t	  ?
     Ap             :     I4	          
      i~ w              M 
 F  A                  	 Fk       	              5
         	 E                                            c
           G -
                 ׈     r                                  b ;                               5
          
 A                     '     V             
         v              o   -
      ] |
     j     W @5	 7                       /
      ^ 	 +      
                             ^	 I
                                 	              XM
     
      .         1             qy                          
                          6l
          .             +	 7                          + 9V             A!     ,     
 ?         	  G
 -              
         p Ȧ	 B             @                         ѩ
         e             .           9Z
   s                       a
        o                  # v 
               	                   {/                                 + +     M
 &     &o       .             . O                     
 
         Xe
 U
 fm         
         %k                                 y ?             k hd
                      	         *          ~ B		 	             m                             	         
7  m $*                                  
     C           1 X A ij             - j K                   $             il             E*       Z Z 	                  JY              }             
                      .
             	 @                    	 J ܑ
 8           G                               k y
          
 
     ӆ                                           ( $         ɂ 	 
                     Z g  p     g              o     	             vj              <A  ~   0(      ȁ
     Mq     /@                      $     b                               h4
             
 5	 W  s!
     S         SV                      O     	                                    +      p         
       2[ S               w
        " %             K                 f.  /     *                                   p         #t E
                                                  	                 Z x }             r	                 R     zq             C          6                 ß             9           i     q   g (/                          T         x      
 A )I                 X p              ~     jz  Y         
     `
              
      ֑
  O	                         Z Z              r 	 6                               6         	                     b      4	                   ě /y <@          8 
     P     -
 2T
                    > y
     k 	                                            E"                          ?         2"     ^
 +U
                         @ p	 + A          %?
     r n S        cx       t
 F                 P7     $
     
     .      k             e Y
     K                       s	     V  Sb         :                 	            V              \q      
                               6             : W V
     ?
         T -         uJ "      K     Ɂ H     u      y
     r =[         n     |     k E y5
     .
                 H 	 ~          q     IA         6         G Xn             E                      ! )/                                       z                     C 
     &| )5 	 4         $
             P     ;     r                  4z
 Y  8
 ^
     _
     * v	 Ɇ
   $,             r                                   \                    -             g 	   E (
 .
     N     	          S
     %
         */          /	                  Z
         5@                          z H#                                        
     
     R         T                       j                                [             , 8  t
                     Ep	 E                         	             ;|          
                                         !     c ˶ E                                    ^E              &         Y  d q Z     - /               q             WE                   `
 K / y         #                 -                       
 ;     u[ .                  e                  %+              Р  
             6 -          C Y           b *
              TZ j     k             c                             k     n
         % : D
 ?                 ~	 Z + &     I                                             ; F EZ                      _
 Z                     	         Z                   5      I     rU     l                 
                   	                  N
                         Y q         sR           7F        8                                       2; 
 q             t     ;                  :     "     K9           f/                 4 " y 3_	     
              !;                 +              ީ  
                      ~                     R rb  f     Y(               7     Ş          S             {
     B                             E	      [     !     s  	             ^ (      
              @ ~4	                             "V                      
C  e           	             ~ 6`	         c         v             H                 ^	 +         x     X             &*     i4                                                  
     rk l p	 @~              .                                     b              @                     	 z              2	          V      ;
      U
   X     A
 X
                      ?
     v?     [D 	                       H * :     .                 _ b                                             %                      D         R %  pq	   H     - ]                              {k      k;           1     
 	     
                      J F ̧             q                       xU     ! .U
                                             Y Oj          =                                   H         tb          	         $E     *               , @ ~ 
     ӷ # q 	              a             .              t G                                                         rZ W         H	 Ԁ
 4     U     4	 ށ
 P                      D             oy l     r             A         x
                 !              K
                                         :          |              #n O         '         (
 E      h T     Z X l     k @             ~     8J                     
 |	                         "+                   u! \ s	     D j             ?                       t ̈     '+     ù 	     \D     -7     	 q     
      Ȁ
         Q     R N
 ~
                               
             &
     + 	      XI
     	           A@     CB D_
                  2
     4                      ~t         ʊ       W     a             9                        A          vy
             Z         q
      0         ~  P, J U                     
     ZS !                  
  8 u
             O |      
          z                   
             Y8
       
                 P
      
 	          S$             !             #     ,
              	         (. r '  O                                  
 !- >             
                                   	 ٟ             H	         :q	 @  /                 E                    Z
 ;                 ^	     k	         
     U?
         D         VI     _
 pr             R                  ܞ     / I  b .
                           }  D ɖ +         
                               U_	 } ?                      
         
     U         M       h              L      /	               5y
         k
       l
         |A      Kl
 R     {}         8      M
 Q             @                     y
          ! q.
 \          
         
 L     
         J 3U
                             	y     :             [  S     r                                                          	             8              y 	                 X     4 O  Q
 ~	                                  
 
     ) Ԩ  ^g x	                  0
 I     ܁	                                           9                     AO                          ) 8+     y                                                       	             _
                 
         
         ! 	      /     4        K     c          ak                      p         k         + ^  c     	       >	      G)     J F                                B         !                     ,              Qp                    , 	     ?
                      9
         ; \J Z
 c
 <$         i	 q ʦ	              c
          
             	             }                              q             ]              ;
         A         
                 I Ǯ
 n                                                   7+ l   2 ~ I                     /          Bq '          K! % g     5 t
     H ~ 
     \
          D      z     G	     ׁ  
 )  S%         r K 
@
                                                                  P j     {"
 '[ F Y     , j?                                C8 9      M
 
         9             M
     Z 
     0         8 \_
                  *                            $ JX                             )`
      +      _	     &              ]
     ( @		     K@     6             [o                     [z     q         	           2      
             6 	      ^             * ׁ     
      :         C        4     
 \             _
 +@          F                      	              
                             
  }	  : '   !
 }                                                
                       u	             !     D8 6F  
     )
                                             w         :
             
             l                  
      $ ~n                   j f         %! Y E                              ! H                     © j             [     1p              A
         g6 )p	 c         
 &	             ̮     M     -             Z              
 8D     6             ;     
Z
 v	 bZ
 Z.      G
     |             
                             1% (Z     L                   1              ! E             	                                  j     N     	     %                 7                  0 _
 0k     h         T             ,         J                 n                `                                                      /         A	 x     
                                                    @ [     =R         B             U     X	             4 }     ~ K  'G                  0     ,      
     *            
             ~J       M Q5          	 C                 }          	          	          
                  *         x         ˈ             
             /H
 
             
 \+     Qx     4Y     
                         
 
 	 s*             {t         ;!         G u ,4 `g     #m         F A ]                                                           !      rZ 
                                               
     j k         	                                                l
 p6                        
     
  
                         ޳
             s                         	              m                               D            / M a
 
 C                 87     	 O  e c@     D                        Z ]n         
         xb         )k         S             j          ]              Z x : O 6                 ئ l
                                       P+                                    I         8 	             u}             f                                     	{                 |  +     J      S     	                  z
     WN
 ڷ     
             
 3	 	 ;
     m  2
     k               
         ~     e          Q,  {         
                          	     h                 ߦ l*                                                          
              	                         y 	                 c   ԑ       z	   \q !
     
          3	                Xd
         g                           n"             
? 	                         ;     -
         h            +T
 z             ;	             N                                 l j             6 .[ C     5 m ' + : x          ~         ;
              G             U     w          B              &
         p	 Y  %
          &                             
     B                         ]- ^              z                                      )      
 .         )                                         }	                   q 
        :                   c            x 	 Q         c
      ?5	     	
  nk     
     	 )                          O
 w~           	         
                                          ?          8              
          . e                 _
         K         l          
 A                     K         T E                           
                              XD >
  I             4; 
         uR                      V q 	 G ?
 R         
  &5	 
     ?k
         @8     |@
 *N
        g8
     
              *         <
                                      `- $,     h	     A 	 f     #     
  OZ
 Z ݘ            7         L,         !         I         ^@     =     s A                          
 f
                     @                             C         jt
                                             3	 Y     ^
      
  o          ^         `-                 c
             ó
     ] '                 [     J     O     h                              5	              I                                                        W                                          o	 nk  P     >     Xk n     t         p     [               35
 Ɛ         [         ;          rG          k             j 6
     3   a y
   @ O
          	             4	             _Z           0 5                    g             }                                     z     W	     .
         [k $
                          ȗ	 ߋ
 
                       4
     "         k         A                 AN
 Z      )
                    3     I     
          *                                       FS            	     5                           g                 :B           3-              `@         _r     *     K                     T -                               k         K     P!
     	     .     w ԇ                        ޅ
 é                               
     6             ع 
     Ӂ     
     o~     ) è  ѿ
                          W k 
         l      |z
             	                                 ʈ                            	, S ) 5 w         + @
 :              k     \ D		 i     0     , n                         t       ƴ     s     
  ,          U
           i                           ё
                  9     '     O  ' 	                     0             c                    Y γ
 ) 6          |                                .  ;
                                         l V                  ? ,  p e                 ț                 
             I
         \
                                      
                   v n; BT
      .     
             T                     :     ^         Z                 U         5-         t  @         !
     	     x 	 X      Z
     U         V	 ʥ      ԅ
                                      4 m.                 s	 }      J k !         Z            Y	  
 	                                   ?
         
 \                  ^           2      	 @             s ,     { И 
         
 Lx              < 'p	 j  z 	         w	     J"                              t          
 ]     ̂     w
 |!
             ; C      Y              u8
             t         +                             m	     k
 X
  m  	               
         
     8
 |                         )
                  JR     jq JA                 Z      S4
                             95     .      9k                                P)         l : *                 g.             {H 	       J                                      . *         & j8     +     W6               1z                           
     z
              	             c 	 :     +     k                           Rj      t     		      a
          {
         
 NF     +  &q                                   5t
 ~                                              b  $             QD             $     O                         D         
       o  
     E                                                              :x         $@     s                     o                                             W       r U *, 4 J     O      S *                     ^	                      	             @
       }      x     	 
             x                                  > , :                    r
              Y T	     ? ?         W                     b 
     +8                                          R         T  	 Bm         ؟ -         {              
         s                             r             o	 D* @         U 
  ;"         
          
              8 g                   [ [          	                     * m+         
 k     ?              WA      /     { 
         M                  uA     )                                                  
                         ]                 @                    N         D         
     lA          $
     
 08                       r$
     ɯ   p	 (U
             J                       +	         h             
             Y                                          g 
 7
                              R             ^r     j G                  `R D     l s 
 
                           C  j \ 	      '     bk          " 9	     Z?
 [  =     C              
     "                       +     l
 .         / F     uT
         
 -         J             
     V n_
         Ӓ          
                    I                 a       R/              2 ]                        i (         =,              k     T o         &                      q     -A 
       E
 qY
                                                             . I 
  VA                         }                       ^     	     1 &                         ~     2 c          YY
         <                                         ~
          ; .                     \Y lu
     ,               (P                              	 Z             6           x     m               Ɨ	         
                 >t
              *             (
     J     A         	 )
 x
                                       r     
                           	           ? {              م
         4          SA 3	     .9 ,      ?[ ! & `	         X ? 	       A     "
      *      mq	     
      
 9 	     j     b
 IE             o         @      u  6
 U R             J             O	          E      `	                    	              
         i 	         z	     \             p         `         \     |               =8     	 Z, (A              ۵      d
 , U                              o          
     P                      ~      uG     
                 W
             z
 
             q [U                             L	 *  E     Y         &	         ӑ         ' L
 p k     "                  89                 @  ^	 p                 e                     Z     	      "             KU              	                                      P                  t t
               aq	 *	  (@     s,     ~ 
                    u*                 /             / )             C                     ½ q              J m? H     8 L
         ^^                         +               @
 z
                          *
 k                                 \      x             
      
     ak         Z  
 
     T
 	
 X Z         ;
 d                 @   
     u6         H
   ^ 
 	     C     m         	         }!
 *         Ig U                  N
 ^
                                                  y                         n     n               	 H  Y
     ;       ɭ  4           ,!     lt
                                 Q4
  	                     g     @ 
                          	                  |     ;      U                              x     g 
 F                  k     N         I a     K     c-
                 d 
             
         m
 y             b     ? 	 @ |  ځ , z             |         ./          	     W     aZ
 
     y &             E 8             /         w 	 Q              + E     ~                             I     
             g
                                   o          
      ;                                                       u
             5 մ
                 H     Q          P     ʐ                 J bh         .  1      "z 
 g                  ; S 
     F             {                       'R +         
k
  +         
                  <       J .      ߶                                          1	                   O	 5
                 Q 
                     ^	 Hx              5             ۽     
 B!              J  m     /`
          _?             A                                  V       	                  >A                        t H
  M6               _
 ,      ?$ %                {X     ~                               (     A     D~      E 
                                  
                   -         m
                  
     <     
                  ~ P c
         J     r 	  9 e X n j -  q[               "                     w U_
      
      6                               L w	              |         ~     J	 l h  xl 9 	 S         7     g
         .P 	 XZ     + ,     +                  
  
      ٔ     #     1@   ֞              4     
  7 >F     q                  +         ӹ @                                             	             y          /
                             +     
                                     K                           ѽ         "J             6                  e4	         !           Q *     }     
 9 J Z 
         e	                                   ׵                                              j 	         )     7 	 R                       "O              + * P      T                              W                                                 $t          Z Y >       Z 4         q $ A                          
                 N
 
 "         k         3  l                       0 y u
 p t 	 t                  	 Z  t U4	                             
 *	  x.     Y
       +                  HB         gI
                                  Y z                     T                                           }	         B                  },      5 _[                            m
      ,V           q ^         
       "7 8
      9     
          g     ae
  Ĩ      Ƞ             p	            n             K          wp	         Vl >                    	          
V 8o +              Ҁ
                         	                !
  k       
  p        H
 I         4
     T 	     7 
         ^
              g      c,                         +     x g                                                    k
         @             n p: }k ^
         p
                  ;
                     G	 y ?
 3[                         4"         4	                                 2
     Z,         a }                              5
                 1                       + ;4                   z
 F          E U	                 
         w                              
 
             U 
  + 9              W		 #J          
                                   *5 л
       q I                                      N
         R     k
     
                                 P
 d              
         , "       ~      +     Œ  
         T                 wY
                      @ Z     R
       R                      jo                 A T7 f 	             U R	     .                                      Z                                       m                  "t @     
 L       u         	 M
  4	                  D[ ˵ G
     o 
     	 T                 #	         x     )     a  FJ     L4     Xx     gF          	 [y  ׺           B     19
                x թ + *         Ij q e	                                                   	 %         6              Q	       x Z
                  $        yk \     $-     @                  * *              k 0                                     	     j           L I
 J      =  -                K
 "     k      V. 
   A
         i          Z     	         ~
  4
     C{             $Z < r	          z.
 =
 |
                                  $
   V, 
                 
     -         W
         t            9"                 et       	 F     
 T N         2          {         .
                      P@ P                   #%
      =7                                     ֩          s F5	      I ;# N 
       \
                 Dr     I   Q 	         f             !
               7'             
                 e                                      N :  E             }
              9                      J      j      SU         3                             6     ,
 Q[                      !          :      ! dV 3     q	 
 > `          
     !          l
  L/             и       J+     ָ         " G-                 
     1
         E                  @     ,
 G+                D Ak              @                                 	             k		       W@             y 7     ,M
         ֵ     x
         *     c $
 O     Gm j             
      X                   '
 	 k     	         n
             D             "     Im 
         d     X U p V                                    ߆ T
         T(         6          ?         +         o y
         -             .      `
                        .
         
             n
      ?_
 w     \     8 8 q	          [         	            n
      Y X*                                    A 
 C ;	         @                 c wk <
     I Pj  Q '  Z     + xZ     c
                     &	 	                                        M 1              @ FR 
                 6 
  !          b  Ȩ              Q     +_	              )y                  B                                
 
 f
     `     J 
 [ @         7J A         ˔              l 5H
                                       H                               +  F                      +         
 	             <c         9          
                        =@     V	              
 
                   3,             x               8     . W   +         L[     ^      
       M      _                 q ~^
 x O     6
          '                                                                    :Z             O          ˩                                            	F 
     [Z              x                      M             ^	             D                          
                 [     p         T A                      5
 o. E
      - X          o              il r8
      y 
                  !             6     U \V 3             $                                ֪
                             H
 $ KZ         S	 *7     Ȃ                          3      r	             v Υ T                              `. lX I@ J                         @                    s
 n                               "      Z     
                 Y T6 
  I         	                 [       s     /             =
      % I     4r            6                             nS         G@ a      WZ     
 	     ?R                                     
           J             ٺ  A	  }O ={                      T B         +
 B     "     ~         
      	     S	     	B :J          Ip	                 ^     
  ]	 8
          -             N O
 ~
       4	     _	 a%
                  vk
     A       .
     B: T              
  
         j      ~ y          p                         oZ F	         Z                  
 ! ]
                                        ?         
 :     ~)                             3 e     Ч         ݆ Ѩ     I
            ;
 R                                 7     Ǚ
             	      

 h 	 2h         ,I ^
                 B          
          ϥ                                          cM
                             G          &	                                       O                          +                       I $                               2I         d8                                          
                                          +      ȇ         )9
 z     %     ?         ϑ  ZI
     Fn   ӵ     X         J 	     I 
              Ե v_
      	 c
    L
             =_
 ( 
   >      z
     `k >      ~ B     Ь                       	                     ; m
                 ] dT          Mq	             A      ; 5
          ځ               
 	     w
                  5
     g+     : 
     _             
                     MY
     [ ;                                     *  		              )     h             *                       
 E 
     s 
                               c     	      
         /	          R     /D          G           ٳ      Q
 = C
  I) b5
                         !                 ,     E         Y       U
             X                        m p     (       <              Z  / -
         ) 9#         Z                        
 T     
      
     gr                /"   
 k     +	                            6                             ~      	             ym         nG                         
                            f     y s
  Q  	  ť  )
         
               ܟ         & ?                  kJ                   !               QU      R         A                      H                      >     %
                   !q V ?
 y  + 
 k         {                     &!            s
      (         s                  =
     +          
         l
         ;         I          $ q                         Z 0	  T         &Z     r                    _
                       H.
                  $J     #M
 %B         D         1! 	                     Z  k                                                  +
                              n         *         
         	 .     ŗ     a	 ^
 o
                         p 8
   
         xE <   I
 +                  Y B                     	  { E	                          gq                 m         o                          
          ߛ             -
 K       I                   
 o  %
 y 5A X" f     G9     $H             x
  +                   y	 
 O                       I          h                     g              l                       Z	                       B                                         l ?
                       3             Y                             : 
     T     <! ;	 @         ՘         '                                             h M   o z     >
     	         
 2		 j|           @ W                  +                                         *                      +                
 05
 V ac     #	         5
          	     G! 
     >J                 E         "+ Y                    <
     :             !                     y	         Й
                 ՛ x^	     k     &
     վ
         c                  #  @     :l E                  U     k                  D                 S
               TH         	                      
 j 
      f t~ P                 ?                       :     <q ,     uk      RZ A          " D           p                                   ]     G
 l
     :
 l j	     o             [         Z                                              	     @                          U                                  k      	                          | U             2R     \                                     G '         >
 /     &         ,                   \?                                             U
                            B     U              /                  k      	 H             
                     g                        jn kn m           2+        
                   X
 ݴ Z |_
     I         Y YZ                       x     DH
                         
                     
 %
           Bp	 |        

 ɫ             Z     l
     h             	      n
                 X                                      d
               r      a~                           G t         u       -                       j     k             3      y     9  +     V  !                     y
 + M	 >
  x         	 g
                           * D	     +                       	 l SH
     9
     b                   G 
      , 
| 8                       	         dU             *	                                5
                                     { |  
 $ 
 p     	 ~
      c .T
          
  b                  5+         bY     ؀
     )                     ]A                                                     /             Ɋ 	          i                     d               x                 u	           4                      A         W8         M
       k                  |H     A         F         @                      q,                                                v+         M             ǈ          X     S@                     _	 5                         gG         q	     	         W  r 
             q     
 *      gY 	 
               {               9 <
 ? 	      	                      h
      T{ 
                  @                  т  4;                                 	 J. ^4	 Z Q G     H     	 3 :/                       I  p                 b *
                     H | A      | 
      "             I             k
 V                               d
                   3	 [                     : 
B         E     L  q                  P      #     )                  Y                 
     j [d
         e             (C   ҥ             +          5
                 1,     א     
 ڇ                  @$             5 
. t5
 #         r 7                 ы V+             Y Td
 3-
      y
 @          ݋
         D !                                                              GZ 0	     о A                           
 y
         "
          Ȉ                  Z: 	 N
  ʀ
     Xl
             WZ         Z[                                                  3	 n          /q           !
          
 A             Ǩ  U ?                           g
 l         
                   q         	      UT
         s  ܖ                   1         0m D
                     >R         :@ 
 j          @         7- zT
     /!
     z       *            +                             [ '                                      5
     0	                         i
 
 :              3                                                        !E                          r
         @      
             y
 4                                     Y  ? p	     l                  -H
 ї     .
   O	 k.                 ^B       Gj         	 k     k      |	      ?
         
     g     -                  Q                 h                ,                                &m                  @
             "                                   J YD             <         ִ
          ?q N
 S               	                 D v      <                               !     Y     4
              ^       Ƽ         +                                                                  nF                 	     r      
     , z      ׳              t s @         ;      	                               %
   
 6
 	             ^ f     E           ݺ
     f     3 j         V             S 
             b7                       )                 FA                 F
         ޒ         b 	       p     k     V
                             6         	     	 U
     >                 
                               ڷ	                                     x                              4
         V         D	 5 $            
                 y
     m	     m ҭ     =q      4 	     C                         ®
 \-
                  +     
 7      5
                 7	 ! H	      V         	     [ 9q             @ 3	                  (     
 T
             Ց P  w              ȇ     O	             N                 e
          d      y     ݼ                            1      !Z         p*     '~                     [ eq Ҷ         4 ͠ E     g4	          	                 " ? DU          .     # C 
              b                 #[   W     	  P                 Y
 =  z     
 g
                         l                               
 5           z         !
         @ w [
      8q |m >                     @         ,         G      n     f	 V      A                 r%
          l     
                
     g                 j              p     <	     l !                  k )          R    ^	         ?
                     m	          R                 ? 
                                                          
         \           7{         X c
   
                 Y                  Z                         
                 4             
             "     d
                                   5
 U
 @             z
         $             _	     ɇ                  s
         	 j                             Fy
          U      o 0         Gr =         .Z                 NZ            
 U                 ߊ
 4             \                            FU      W     
         	 fT
 J     	         Y     Gq     {                     ^                              A,         
     
              ԟ
 U     ~     M.     7     7       Y     ݧ         C + a
     X4
               
 Z         [      E '         g                            [               ;6 r"
                      Q	 Xk                    ]o )     wb     	     /         SI
 q  k p	                   D          W'  	                                 .
      M          T ߞ             	     cu
 J                                       6          xo ]                  A     x     U      |$             [         m                             ?                                  R%
 !   r x lk  O4	               r         	                                                       ek 		          (                                 `A          	                                                                E                         G
 @x              Y          [     )
               q  ː              F
 .         T
             
                                               &                                                    Z
 y
               )                      dZ             
     )     .~                     %             Zx      ;             ]Z
     ϩ     j 
                  ? G /         *      )          7B s $                             S; j 
     zb     )p             O	 !          
      
- + } &             P                                                            
                       r@ ^              
                             J             	     ":     Y          uZ         }           ~-
      5              -         qJ ,     T                   j              '.                  x         j_
             c	   q
                                      (
 2: 
         Y
                         @     ,     z
 o         -     '              Q	                                           y
 4	 E
 ڧ         S     o 
 0.         [ ?
                 x          	         
     ,     1z
                              8     ۞
 s                                  ܓ J           6              g m               m                     !
     )	                                     o 
              V 	                         
              8     A_
 ) V7             	 >
 j:                 
                         (
     j         	                             
                                         *     <  -                                    # m  + 
  +         b         E 3] ]H          d !r     ح         F[     O      
                              -! 8                           4                    @     8 Z     [         FM
     y         zr         `@     	         ͛     Ԕ    H
          
 8	 @         ~A       _         '  
 t{                 t
     F      
 2              M                                      jP          B4	 lY     P  b          8
         &     ]      u                 
 
     H	     S                      `	      x ͥ	     6 @I     9z S          E      	 Dz
                                   Z v	                        >? 
 2@                 Zz
      }~ U &      t
 _	 Я
  9
      3         ~               
          '          8  yF g r: n
         ߸ 
                  >                 A c$          = 8                  W          4
 H/             / 1
 ΀      ˞
     K
 hx                                       #                     	          H 
                 ;          u@                  o+ S%
             ^     l t         J             	                                     C!                 j                  5q                                     H                  >     6u
         	          !	      O+ y ^                            Z     +                               ! O +	          +     #                                 Y
 4      ?
                          _ r  5 e-  Q@ 	
 ^p o              #             R                     n                                                     	     .     9  | I
                         3              /	                 	 A 	                         T.     q          9
 j              J 
                                   /  a              3y  A                 |     
     4                                  7             g      u                              3          
 \      8             
 1y             3h '                     b *     X                      :  @         L     1%
     	                 
     B     Oe
     V               X+          ( J "                  k 	 k@      C	               ʯ
         -     r	         X         H     @
       4  ?' S /             M                                                       [                     m ?                  p	 E"
 z
         N
         7              Z             	         +             x S             Ը     
                       9 ˬ      >                      z             '     r } 8                  T         
     3             	 Ċ         	 |     /          + ^                                                 B         	 i         I     T
                          
 J!     I*         J               	     ql      U   C                          3 r 	 L                 3	             r <                        4	   0H       #k                     =              \, 
                          ;         /                                                 4h ys         V9
                     @     :		 q                 7 & 5
     O             
     
         |      m     	     @ q  Ӿ
     
 Z
 f             pp      Z               ?
     p	                      u g uO "9         =
                          
                                ݛ     @
                  f     r                      	     
                     Wk     
          :     $          
      x              I          + M w+     
      T N[         ǵ   {?
                  C           GV      
 /     8k     ]l          Ts             ~                 o5
                           [I     TZ                 -     " D	 ?	         n     rP   8                 ;         c
     	         
     /                j
     7  ҆             Ip             \  _A   j                 <	     	p	 Dj                 T
         Q+  P                           k     
 G   
       G _
     P
 :     CI ! Y P	 U
      P     )[     jl                  @     5
 Z 4N
              9	                         ǯ  $         '
                                   Y		         tU 
 m     
               z
      W                                                                b             
 P
            
         e	 .
           R M4     ~	  H
                                      
     HV                 
          UM
 
              	 x	 
 6         k,          %         &        3      =B         xF             %Z
         t
     -                 ~q             g .                           
 	      x F \ yA         & $^                 #         $5	                     z                                          T
     ]_	      l             TY        '         5     X                  Dl          Z         M,              {             m[              q     F              		                     >j  -Z     G
     #                    A F                 ԑ
     A             Z
         c         ]	 :     J  a                                          >     ݊          R _l         
     e	                     e. 
       X             )* @ 	 `	  h         n     ҇                   ^	                  9             ~       
 c[                         ! x                         -
 cq                                                    	 I
 ]                  H          W
         
                                   L                   + 3
 ?
             @     Bk
         E                     
 
                      	     ^ 1     G                           G	 4
          S                                Z
                  
      h       (H                             z     <	 #     P *                    %             q             !P                                            k       8d
     m
                 	 } & n      j  	     n     	         պ
                  	 ' $                           H
             Z                     [         m                     *  4
         *[ c@                         A (    A + ^ Y
 dJ t             V  <                                           u '     
             I                     	  G                 cZ     M H
     . M
 }.
       5R     
                      + k
              Qj         U 
             	  -                 
 Z                  ˺
                     .
         `
                 ~ u           	          z         K 
         
                 ;[        j     /      ە       .                S                    m 0                  P               0           Yx iO     ?                  .     
         -; ־ h
 :                              _	                   1     	 @ ͑                 + u	  MZ Y               Z     A     f              Q                     R                 Y      
 f
  W +                 Ll                                 + p	      F         - @     /     E     ۨ py 5+          	           N                                            8                   Z k D     9	             "      
           #              +             e     ?                 4 &	         G               	 S 	   j	     2 l 
+ $G _,     
         Ut                           C+                 

 UA                                     
- Z
             ?                                   A                         Y k+     : F )         j       )     ~+  d
     . 	 3         x A H                     [ I                     %
         k                                 P             Y         Z 4$             	     RS     jO u
                     A                         +                 1     I  +                 w n    s         _
  
                         	         b     ; d
                               H	                         +  n J D                       y
 e
          dh : 7
                  T !I
         )             *I
                  ] "
             
                  76      [                 )         r~ H      *             W 7     j         M             u 4         	     -*     	                  A                       6         XZ ,     . æ             +%
  p	 e"
 " q                                      
         !                     /                             8~     T         
         `
              r     ~	     2 [
 -         _	 5     $         
 3	         H
                                                 
               )              m   ;  { Z~  +Z     g 
 
 @                . y W
  ɠ                 s
                      y l8 _	 ?      
     d ݩ    AG                   ' [ 	 K	                               J I 
   y
                                    2          VA         
5 b
 o 	                  h 8 M .
      g 
                           
           [ @ p	                     1
                 
  q	         E     ?                 f         b         c. 	      
               	     v 
[         !  }          $
     z          8l                                        	  _
                 H
   q     wk 6             G
             	                         u"         A                    g
 s ~     E               f                           	 B  e^ Z              
             l       	     0O                             =
 E                 Ҵ
     	 d
     
Z      3                  ^. M
     H	                          զ	     ?      +                     ?         xu
                  Y ?              *`
 _            Gy               8	                             
     '             3  e                         7     E                  A k         c
      @          >
              I                                       ' 	     p	                      
 ; 4         j [               [                                             B     s z l T-               
                 
     o	                  {     M         	 #	             *@     
         NV                         @  t  ^     +         ! `                                      $     
     ,                 g	 '!         	 3 6
                       e |          >     n5
     |                          	     	 \     * +U         M	                                          7
                                         Y
    a
          >
 y.         5                Q6     2!          w
     @	 !     h b                     +         tH         	         r &9
 M         = 
         A   qj             
                 ` %
      A g     >?                            
                      ) _	 X              ʁ k         z             * 7              ?     k     ;          m      t
                               4                   L                                                               7     [ MT
     F$ I B           Hj eJ     X+ I Nq 
 4 _	 J t q                          ;         ? K	 [ + T 

                     	                  Z             wq     ax 3         L  	     ?     g
 W  _	 _	                 1Y
 R                 + S         }	         
                  g  b	 =!                     w :               	                                  qJ 	 B     P	         ^
                          	      Q     nZ      o                 4
                      9
                  p 
, 
 [ ^ ({     6          ;/          |            H!     Ky
 yY                          #Z             @ 
         E                Ju
             8
         S                   6t
              	     b
         
             &     t           2         z
 n*                   *                                   Y r p Ǭ       	     R              	     ހ         F               	 >9                                     ?            E     zj ɸ	         @  g     R jY      .   h     F9     ~c +
 gJ D     m	 (  	 Y                  	 O         O :                      	  < =         z     @                  [	          ? 8q C 
                           ?         N ަ Z                         Z                 [7                          B J  ;  @ 	            . 8O        	          w      |                            E
 J                                 
       / f
     T ;         Yk L
             Jq     4  x             T
                 إ           o n 	 1 l)     Y         f
 M 1U         3	     n ܚ Ҷ     VD      R                         fl                 @
                 q                      7 T         	                          
             	 |c         ?                      6            t
         ;         s     ~e
 v
     g  >g 	 u     
      [         M	            
                         q
         R         	     Z 	             b         it
 ?
  
     Х	          o	     Y             b         o"     
         d                       ^      a                     8 چ p Γ 	         Ҡ     i  +  f$
 $
     	  ;X      H5
 t
         i *    # A 	 
                           :                                          , 
     
 |l                   :                                                s                          S                           J          
 (                     ڟ
 W	 P e
 _
         U	                             H )                        4               
                                               6     _	                                  
 3	     :     d                         + D     |8             9                      !|         3 
     e U     X R                         	 O  ; ~  E     n     X  O 	     L Q           
      G         
                                         B         F             6                 ,             ޾
         ?
     6         .
       * V[ 
 : 	     Z \	     ,                      5         +.
     j                                                   )N
 ~         	 {o     dg         
 R / T
                              m 2                     ړ      X                                         p
 @         z k d Z      z Y
             o	           zz
      7
     [ !
           H~           R  "
      Έ 8Z         \
   m     	                       b  k             SH         
         I                              
         
             Y z     
                         <k s%  _ 	 b                                                           \:         q N
                                                 
       0R            J 
     E fx                     S X                              #     _                     	 3%
     [k     B l              i
 ,	          Q:     g                 ~           ͯ
       U 
   Ȩ                           j             A          E     [     i  %
 h
 @	     C                 @     ՞         ƨ                  B5
                 ک
         Y i         f+     x^
 E             յ " U
             U
         u k
 X[ 	                 S     P
     ^                    m     8
 #e
                                  
5	 E%
     6 q      P             ;              lh     ~G         | )  
                     * 
                     '          	         +                  I ) R                     6  k              g          27 z ɼ 
       n       E +                               <          	               t
 	   [ J          @y     k
     ϗ	         r t Z       m                 ]: 	       D {     7 8                      G     &
     F                  / ] 
.     6   ^	     
 Y
     n P 
                     cy         \               r
                      X         	             f                                 L                                       R  : "
 
  
 8 G OU     ? R                     ?      
     G  Z            2Z             ~                                   r t
 i           B qs                 Ҋ
     
                 
                                        tE     p      W+             W+     }      E k      
         '      Ck          5r +Z
        
     3
 	     *             "[         +
 	 )             
                     W                         s	     .  h U
 
 ^
 
                  M
                    Y? R     2@ 9
             z  -
                   -
                  C     H     8 F?     MZ m  {                 
 d
     =	             .
   0	 !U
     s t                 z                     [                  ?+ C 
           R-                      "     {      	 Z	               v;  -
 ϶ N              	         1H 
 d6     _
 ^ R     @ O         Z :                  ]z
                         Od
                     r      k
     Hl              %                          
 5                     _		                          l         &         Ƿ i                            1
           3     Ps 
         g     C      8S ݐ         l@  
 ,     4l
                          
             %O                               @                          ?,                     '                     Ȟ
 * (       
     Ś     R     +             c               Y              !     -q      ^ l	                                   ?
     K 	 vS          ' D U
         o     E     
             J 4                     o @          	     ."             S      5	          \     M              	 
 GE     	     c                  ,      /h     
 ϸ                        	     
     	 }
 [                 2*         $V '             Y                  o      ߙ
     C
          Ѷ 
 Z                     	 	                                                                      *                                                      E     +         *     a      L
  P                                              2 >              % !
      ɷ                 	 w      n         !     ^                          	                                             VI
                          5%             7 ~ v  
              Y w                Y     h  " 4
         	 t	     @                 7 qy
                      e } 
 ]U     ѧ              t	 l
         ,                                         k
                  _Z                           zO      !                  V                         j     _
                   A                  
                              l vq                 :?            4                            3	         1	 	 	 !     R                         [; 	 (         [A     CS           ҧ }         o     
 O                 
 }     
                           
       q z
          ,         D             <T
             )     
                          7	         
     r          
                               q	       \g     /           `                 {[ T
      	         
         I"
 	  A I
         H _	     ky +     	     } s@
                  k         <S     j k 	         vy                                     0             c g 
             Δ     !
 *                  q             k ̊ x                       q                                        =E         ,	 
             [I
              j m                 U-                                   6 8      l     )         T
 !         [     ٗ                           i                 o|     r ߻
 $;          L
         
           Z         t     ,     ?Y
 PV         ]@ HS     9Y     "d
     x         vk                          S
     ` 
 6 hk         u
     -                         `'                   d ' .     q
      h              ' j 4 A  z          	     ^ 	 (Z                   ]~                                    E ~@         j I
                     	              1 0H
             &U     +         
     bZ                     % ` Z
                       s U v      k{             \             M                       i	              e
                      "@
             vb                          )                           	                             3N
 A Z             {
          _B     s         Ȫ !!  X ^	                  	             
 V      + ^                      n     
     m
         =k
 N5 C
         {{     G
         Db                 V         A ^     i                         w 
                      cl
         / n m     '4	         Wq H *      "	     .!             
             }[ q	              A		         	 Ʌ
 ~     h                        & 
                             `
 
                            qp	         k t cH
 *r t
           .A     	                          q n 	 ƭ                                    !     8F n_	                     #     
     
 
 c             G      G         J     
"
                                  
             Z              Ϩ     r w}         
     ̓ E                         z!                              W                        r  _X         J/  4B                  X5	                 *     .     W
     ~8                  E  0     R     S       R      u[ $
         
         z \ f8
 j                S     S      	                  gH
     S S      rJ 
      c ǫ E 2R         
     S _	 ~ Y              z
 3
                         !S         g         &S                                   ́       9
                 &	                      t       &         
 0S ~                                      Y	 y                     5S     eD BS      7  z	 JS     B                     ]         W o
 OS Z 
                     VS     Ë
             R     =t
               x
 u$     @     q!
                 +r [S         `S                 :     ! fS                                {                 k         }
 I 
             - tS      i  Y          U Z
  1     ~S   S                 k+          T 
                     S 
                                                  S      	       	                         c     S         S   
         !     <           _                                                                        S                           0  S  S  S                   ?                              |          .     / B   	     D 
 4u
             ,  O                       S                  A	                ϗ      6     S
  Ř     :t   S  ' S + a	 - 	     
          9t
         o      B                   S         S Y"
     `, ͷ	 > N
      S ^ D	 +u
 ]      Ϻ
          	  RM
     Z                          %]                 	         -     +]                  z       k             	 > ,         FH      a8                                  ;]     /         
         @]       Z/                     O y     k Y U     g, &p      !
     |R              g                /_
             @B N               #A     	 .
          n             	     ^     I] &<     B.               o 	                          q                              Y]     _]        L   i]   u]     {]         
 M
                  .      J           ] ]                  ]             ]     	 | O                           1 B                @ ]                  +     M A !  0r             <(         ]     ҁ             ?     8             ]         T ]     
             C  O                   <t         2                 { 	     *                     o	     ^ (U                          >           l
 p   !  ^#     O                     M%
         A p ]     9E      ڥ	              0         ,      Y     g          
		          
 T                  b
           oR     	 j     ׯ         @ I                 	  Y6 t
 P          G                    c
  r             Zy         ^
 
                            ?                 	           
j j     3	              2                         F              ב     Ҿ           P  }  -
             0Z      l         B
  6     O
               R                 `	         
               ؑ
     	          ' U      @                  \ -          
             Jk JZ < 	     
     ھ
         z jh `     6  
             R Lp     k	 ~ !              ge
  %              0
     + Q          %
          1 z      0  l	      l     %
              
 	                     
             uV     m         5
                  F-                             Un                           ê R _                   ?
 n	          	        2	 
4	         
             	 j             G 
                  W     U             c          	 +	 	                          ~B              	            3     4
     S     U Z                 3             3	              <k
                 ^+ f      #         a     %
                                                ^
         
                         3      $
                            D
                              ;r              
                 
 	_	     e
                 `d
     
                                        Fd
 ~;      &      R                        k                         4
     ]     B8 
     , BE         F  
      4                     [      	 $
               9         /  {Y 	 	 	     
                     gl
     9	                      Z7                                  $y _
                         J                 ?
 V q         6A                    n                 dq 
          .     o? X	         @ *      3 $
 5( 	 ?                                 
       ݠ          c                                      +	              t '                         8
                                          
         /                     	 X5
             0 ޥ	                     = 	           4 	   y                  I }     Z y             2       V )                      
 <5	                                       	 -     .  I                                  > @         R         [?              X h
 %
       r          !              
      e
     2 %p	 
         V	             
  j                 >          @         j                 _                                           ~                    |p	                          $q d+                   rM
      p         
 n               c     $Z                  t 
         V 
     Q      `
     
     &     @Z
     K B     (     "                      
                        !          j      =5	  Ӗ	 F     k      wA               { e Sq 'U     Q
                                                    *             l  J
 @                      =     ' 
         KJ                     3k iq     l                     c.         [q             3	 ?  j	          `              	 &V l             O H	         
              L         :+ E         Rk                                   + n                                                             U     ֚                                        Z                                    "
              
 	         5
     QY %
  
 ^
         T
         aX ]m             J"
         W     {u
             a     E
                                           _
 
                                  	  }H G 	             [     G                                  r     ,              z{     ë         v-
 ^                      P >_	     t
     (          
         &         
     
	  I         U     X                                      @y  ?
               /	 P                         Ζ                 /G                                          :                        * k <     %                   
 &      PB                 	 (	 /                 \	     r^
 l"
                     Wn  q          X U         s     I 	     "
            \                                       5       <
     5      ~ 
 	 ' ] /Z  G
                                      h nu
                   k                             	 z  6 ;          ;l
 L "     ;	 y $                   8] @                             Z  :+ X                 T     
      _
 @  ,Y } A             P?
         H@
         "	         k :          @	 A                         T
     H
     c                              1O 	 yl ˁ         R          T
 
 )         f      
      v ^ !
 \                G     
                        D         *     ~                 ~                     
          BH
           E J	             Y           @                         x             ^ p     3 ;	             m     Ul                          U?                                 d         %                                `
 +          d     3E                      	                         U     $
  3	     
     Q r} 7                               ~                  g
     x)     ] }	     ė	     G[         v 	     ! Z ͪ
  R+                 6      +	                             7*             ԁ	     T          _               P D I               b E     #                              :             Y
       | ]     ^
              A                  6                    A     
         O% -                          
                 f         {                  
         k
 9	     .                  `     c 	                  e
 
 4* D j 6{      ڱ  C  {l u  9 ;                 
         ? Z	 3          ^. ?          ^	  G `l 7 S
     "     g
                       `      ux     P Y IZ
 5         Eq      
     }           ʏ Z                             v                     :
                 S8
         t      -     U 
     	         5
 B         v .                              :       k# j*         A ]?
              `
                  KZ
 '
 m     N{      
                     ;      l _. E Z [                 -              Q_
                           gO         D     x     ?
  E                 .
      }                     o } -             /   z:      4	 o4	                          T         1G h
 $
             Ź                 	.                 m     9D         r              I,             f     )A         f &         4
                     Y      y  ?"     x     !
                          ̒      ! P	             ~
 A;         Z           /             
         
     N                               >                     5              U         Y     B	 , |
                                             Z	             // unordered_map implementation -*- C++ -*-

// Copyright (C) 2010-2022 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

/** @file bits/unordered_map.h
 *  This is an internal header file, included by other library headers.
 *  Do not attempt to use it directly. @headername{unordered_map}
 */

#ifndef _UNORDERED_MAP_H
#define _UNORDERED_MAP_H

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_CONTAINER

  /// Base types for unordered_map.
  template<bool _Cache>
    using __umap_traits = __detail::_Hashtable_traits<_Cache, false, true>;

  template<typename _Key,
	   typename _Tp,
	   typename _Hash = hash<_Key>,
	   typename _Pred = std::equal_to<_Key>,
	   typename _Alloc = std::allocator<std::pair<const _Key, _Tp> >,
	   typename _Tr = __umap_traits<__cache_default<_Key, _Hash>::value>>
    using __umap_hashtable = _Hashtable<_Key, std::pair<const _Key, _Tp>,
                                        _Alloc, __detail::_Select1st,
				        _Pred, _Hash,
				        __detail::_Mod_range_hashing,
				        __detail::_Default_ranged_hash,
				        __detail::_Prime_rehash_policy, _Tr>;

  /// Base types for unordered_multimap.
  template<bool _Cache>
    using __ummap_traits = __detail::_Hashtable_traits<_Cache, false, false>;

  template<typename _Key,
	   typename _Tp,
	   typename _Hash = hash<_Key>,
	   typename _Pred = std::equal_to<_Key>,
	   typename _Alloc = std::allocator<std::pair<const _Key, _Tp> >,
	   typename _Tr = __ummap_traits<__cache_default<_Key, _Hash>::value>>
    using __ummap_hashtable = _Hashtable<_Key, std::pair<const _Key, _Tp>,
					 _Alloc, __detail::_Select1st,
					 _Pred, _Hash,
					 __detail::_Mod_range_hashing,
					 __detail::_Default_ranged_hash,
					 __detail::_Prime_rehash_policy, _Tr>;

  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    class unordered_multimap;

  /**
   *  @brief A standard container composed of unique keys (containing
   *  at most one of each key value) that associates values of another type
   *  with the keys.
   *
   *  @ingroup unordered_associative_containers
   *
   *  @tparam  _Key    Type of key objects.
   *  @tparam  _Tp     Type of mapped objects.
   *  @tparam  _Hash   Hashing function object type, defaults to hash<_Value>.
   *  @tparam  _Pred   Predicate function object type, defaults
   *                   to equal_to<_Value>.
   *  @tparam  _Alloc  Allocator type, defaults to 
   *                   std::allocator<std::pair<const _Key, _Tp>>.
   *
   *  Meets the requirements of a <a href="tables.html#65">container</a>, and
   *  <a href="tables.html#xx">unordered associative container</a>
   *
   * The resulting value type of the container is std::pair<const _Key, _Tp>.
   *
   *  Base is _Hashtable, dispatched at compile time via template
   *  alias __umap_hashtable.
   */
  template<typename _Key, typename _Tp,
	   typename _Hash = hash<_Key>,
	   typename _Pred = equal_to<_Key>,
	   typename _Alloc = allocator<std::pair<const _Key, _Tp>>>
    class unordered_map
    {
      typedef __umap_hashtable<_Key, _Tp, _Hash, _Pred, _Alloc>  _Hashtable;
      _Hashtable _M_h;

    public:
      // typedefs:
      ///@{
      /// Public typedefs.
      typedef typename _Hashtable::key_type	key_type;
      typedef typename _Hashtable::value_type	value_type;
      typedef typename _Hashtable::mapped_type	mapped_type;
      typedef typename _Hashtable::hasher	hasher;
      typedef typename _Hashtable::key_equal	key_equal;
      typedef typename _Hashtable::allocator_type allocator_type;
      ///@}

      ///@{
      ///  Iterator-related typedefs.
      typedef typename _Hashtable::pointer		pointer;
      typedef typename _Hashtable::const_pointer	const_pointer;
      typedef typename _Hashtable::reference		reference;
      typedef typename _Hashtable::const_reference	const_reference;
      typedef typename _Hashtable::iterator		iterator;
      typedef typename _Hashtable::const_iterator	const_iterator;
      typedef typename _Hashtable::local_iterator	local_iterator;
      typedef typename _Hashtable::const_local_iterator	const_local_iterator;
      typedef typename _Hashtable::size_type		size_type;
      typedef typename _Hashtable::difference_type	difference_type;
      ///@}

#if __cplusplus > 201402L
      using node_type = typename _Hashtable::node_type;
      using insert_return_type = typename _Hashtable::insert_return_type;
#endif

      //construct/destroy/copy

      /// Default constructor.
      unordered_map() = default;

      /**
       *  @brief  Default constructor creates no elements.
       *  @param __n  Minimal initial number of buckets.
       *  @param __hf  A hash functor.
       *  @param __eql  A key equality functor.
       *  @param __a  An allocator object.
       */
      explicit
      unordered_map(size_type __n,
		    const hasher& __hf = hasher(),
		    const key_equal& __eql = key_equal(),
		    const allocator_type& __a = allocator_type())
      : _M_h(__n, __hf, __eql, __a)
      { }

      /**
       *  @brief  Builds an %unordered_map from a range.
       *  @param  __first  An input iterator.
       *  @param  __last  An input iterator.
       *  @param __n  Minimal initial number of buckets.
       *  @param __hf  A hash functor.
       *  @param __eql  A key equality functor.
       *  @param __a  An allocator object.
       *
       *  Create an %unordered_map consisting of copies of the elements from
       *  [__first,__last).  This is linear in N (where N is
       *  distance(__first,__last)).
       */
      template<typename _InputIterator>
	unordered_map(_InputIterator __first, _InputIterator __last,
		      size_type __n = 0,
		      const hasher& __hf = hasher(),
		      const key_equal& __eql = key_equal(),
		      const allocator_type& __a = allocator_type())
	: _M_h(__first, __last, __n, __hf, __eql, __a)
	{ }

      /// Copy constructor.
      unordered_map(const unordered_map&) = default;

      /// Move constructor.
      unordered_map(unordered_map&&) = default;

      /**
       *  @brief Creates an %unordered_map with no elements.
       *  @param __a An allocator object.
       */
      explicit
      unordered_map(const allocator_type& __a)
	: _M_h(__a)
      { }

      /*
       *  @brief Copy constructor with allocator argument.
       * @param  __uset  Input %unordered_map to copy.
       * @param  __a  An allocator object.
       */
      unordered_map(const unordered_map& __umap,
		    const allocator_type& __a)
      : _M_h(__umap._M_h, __a)
      { }

      /*
       *  @brief  Move constructor with allocator argument.
       *  @param  __uset Input %unordered_map to move.
       *  @param  __a    An allocator object.
       */
      unordered_map(unordered_map&& __umap,
		    const allocator_type& __a)
	noexcept( noexcept(_Hashtable(std::move(__umap._M_h), __a)) )
      : _M_h(std::move(__umap._M_h), __a)
      { }

      /**
       *  @brief  Builds an %unordered_map from an initializer_list.
       *  @param  __l  An initializer_list.
       *  @param __n  Minimal initial number of buckets.
       *  @param __hf  A hash functor.
       *  @param __eql  A key equality functor.
       *  @param  __a  An allocator object.
       *
       *  Create an %unordered_map consisting of copies of the elements in the
       *  list. This is linear in N (where N is @a __l.size()).
       */
      unordered_map(initializer_list<value_type> __l,
		    size_type __n = 0,
		    const hasher& __hf = hasher(),
		    const key_equal& __eql = key_equal(),
		    const allocator_type& __a = allocator_type())
      : _M_h(__l, __n, __hf, __eql, __a)
      { }

      unordered_map(size_type __n, const allocator_type& __a)
      : unordered_map(__n, hasher(), key_equal(), __a)
      { }

      unordered_map(size_type __n, const hasher& __hf,
		    const allocator_type& __a)
      : unordered_map(__n, __hf, key_equal(), __a)
      { }

      template<typename _InputIterator>
	unordered_map(_InputIterator __first, _InputIterator __last,
		      size_type __n,
		      const allocator_type& __a)
	: unordered_map(__first, __last, __n, hasher(), key_equal(), __a)
	{ }

      template<typename _InputIterator>
	unordered_map(_InputIterator __first, _InputIterator __last,
		      size_type __n, const hasher& __hf,
		      const allocator_type& __a)
	  : unordered_map(__first, __last, __n, __hf, key_equal(), __a)
	{ }

      unordered_map(initializer_list<value_type> __l,
		    size_type __n,
		    const allocator_type& __a)
      : unordered_map(__l, __n, hasher(), key_equal(), __a)
      { }

      unordered_map(initializer_list<value_type> __l,
		    size_type __n, const hasher& __hf,
		    const allocator_type& __a)
      : unordered_map(__l, __n, __hf, key_equal(), __a)
      { }

      /// Copy assignment operator.
      unordered_map&
      operator=(const unordered_map&) = default;

      /// Move assignment operator.
      unordered_map&
      operator=(unordered_map&&) = default;

      /**
       *  @brief  %Unordered_map list assignment operator.
       *  @param  __l  An initializer_list.
       *
       *  This function fills an %unordered_map with copies of the elements in
       *  the initializer list @a __l.
       *
       *  Note that the assignment completely changes the %unordered_map and
       *  that the resulting %unordered_map's size is the same as the number
       *  of elements assigned.
       */
      unordered_map&
      operator=(initializer_list<value_type> __l)
      {
	_M_h = __l;
	return *this;
      }

      ///  Returns the allocator object used by the %unordered_map.
      allocator_type
      get_allocator() const noexcept
      { return _M_h.get_allocator(); }

      // size and capacity:

      ///  Returns true if the %unordered_map is empty.
      _GLIBCXX_NODISCARD bool
      empty() const noexcept
      { return _M_h.empty(); }

      ///  Returns the size of the %unordered_map.
      size_type
      size() const noexcept
      { return _M_h.size(); }

      ///  Returns the maximum size of the %unordered_map.
      size_type
      max_size() const noexcept
      { return _M_h.max_size(); }

      // iterators.

      /**
       *  Returns a read/write iterator that points to the first element in the
       *  %unordered_map.
       */
      iterator
      begin() noexcept
      { return _M_h.begin(); }

      ///@{
      /**
       *  Returns a read-only (constant) iterator that points to the first
       *  element in the %unordered_map.
       */
      const_iterator
      begin() const noexcept
      { return _M_h.begin(); }

      const_iterator
      cbegin() const noexcept
      { return _M_h.begin(); }
      ///@}

      /**
       *  Returns a read/write iterator that points one past the last element in
       *  the %unordered_map.
       */
      iterator
      end() noexcept
      { return _M_h.end(); }

      ///@{
      /**
       *  Returns a read-only (constant) iterator that points one past the last
       *  element in the %unordered_map.
       */
      const_iterator
      end() const noexcept
      { return _M_h.end(); }

      const_iterator
      cend() const noexcept
      { return _M_h.end(); }
      ///@}

      // modifiers.

      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_map.
       *
       *  @param __args  Arguments used to generate a new pair instance (see
       *	        std::piecewise_contruct for passing arguments to each
       *	        part of the pair constructor).
       *
       *  @return  A pair, of which the first element is an iterator that points
       *           to the possibly inserted pair, and the second is a bool that
       *           is true if the pair was actually inserted.
       *
       *  This function attempts to build and insert a (key, value) %pair into
       *  the %unordered_map.
       *  An %unordered_map relies on unique keys and thus a %pair is only
       *  inserted if its first element (the key) is not already present in the
       *  %unordered_map.
       *
       *  Insertion requires amortized constant time.
       */
      template<typename... _Args>
	std::pair<iterator, bool>
	emplace(_Args&&... __args)
	{ return _M_h.emplace(std::forward<_Args>(__args)...); }

      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_map.
       *
       *  @param  __pos  An iterator that serves as a hint as to where the pair
       *                should be inserted.
       *  @param  __args  Arguments used to generate a new pair instance (see
       *	         std::piecewise_contruct for passing arguments to each
       *	         part of the pair constructor).
       *  @return An iterator that points to the element with key of the
       *          std::pair built from @a __args (may or may not be that
       *          std::pair).
       *
       *  This function is not concerned about whether the insertion took place,
       *  and thus does not return a boolean like the single-argument emplace()
       *  does.
       *  Note that the first parameter is only a hint and can potentially
       *  improve the performance of the insertion process. A bad hint would
       *  cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      template<typename... _Args>
	iterator
	emplace_hint(const_iterator __pos, _Args&&... __args)
	{ return _M_h.emplace_hint(__pos, std::forward<_Args>(__args)...); }

#if __cplusplus > 201402L
      /// Extract a node.
      node_type
      extract(const_iterator __pos)
      {
	__glibcxx_assert(__pos != end());
	return _M_h.extract(__pos);
      }

      /// Extract a node.
      node_type
      extract(const key_type& __key)
      { return _M_h.extract(__key); }

      /// Re-insert an extracted node.
      insert_return_type
      insert(node_type&& __nh)
      { return _M_h._M_reinsert_node(std::move(__nh)); }

      /// Re-insert an extracted node.
      iterator
      insert(const_iterator, node_type&& __nh)
      { return _M_h._M_reinsert_node(std::move(__nh)).position; }

#define __cpp_lib_unordered_map_try_emplace 201411L
      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_map.
       *
       *  @param __k    Key to use for finding a possibly existing pair in
       *                the unordered_map.
       *  @param __args  Arguments used to generate the .second for a 
       *                new pair instance.
       *
       *  @return  A pair, of which the first element is an iterator that points
       *           to the possibly inserted pair, and the second is a bool that
       *           is true if the pair was actually inserted.
       *
       *  This function attempts to build and insert a (key, value) %pair into
       *  the %unordered_map.
       *  An %unordered_map relies on unique keys and thus a %pair is only
       *  inserted if its first element (the key) is not already present in the
       *  %unordered_map.
       *  If a %pair is not inserted, this function has no effect.
       *
       *  Insertion requires amortized constant time.
       */
      template <typename... _Args>
	pair<iterator, bool>
	try_emplace(const key_type& __k, _Args&&... __args)
	{
	  return _M_h.try_emplace(cend(), __k, std::forward<_Args>(__args)...);
	}

      // move-capable overload
      template <typename... _Args>
	pair<iterator, bool>
	try_emplace(key_type&& __k, _Args&&... __args)
	{
	  return _M_h.try_emplace(cend(), std::move(__k),
				  std::forward<_Args>(__args)...);
	}

      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_map.
       *
       *  @param  __hint  An iterator that serves as a hint as to where the pair
       *                should be inserted.
       *  @param __k    Key to use for finding a possibly existing pair in
       *                the unordered_map.
       *  @param __args  Arguments used to generate the .second for a 
       *                new pair instance.
       *  @return An iterator that points to the element with key of the
       *          std::pair built from @a __args (may or may not be that
       *          std::pair).
       *
       *  This function is not concerned about whether the insertion took place,
       *  and thus does not return a boolean like the single-argument emplace()
       *  does. However, if insertion did not take place,
       *  this function has no effect.
       *  Note that the first parameter is only a hint and can potentially
       *  improve the performance of the insertion process. A bad hint would
       *  cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      template <typename... _Args>
	iterator
	try_emplace(const_iterator __hint, const key_type& __k,
		    _Args&&... __args)
	{
	  return _M_h.try_emplace(__hint, __k,
				  std::forward<_Args>(__args)...).first;
	}

      // move-capable overload
      template <typename... _Args>
	iterator
	try_emplace(const_iterator __hint, key_type&& __k, _Args&&... __args)
	{
	  return _M_h.try_emplace(__hint, std::move(__k),
				  std::forward<_Args>(__args)...).first;
	}
#endif // C++17

      ///@{
      /**
       *  @brief Attempts to insert a std::pair into the %unordered_map.

       *  @param __x Pair to be inserted (see std::make_pair for easy
       *	     creation of pairs).
       *
       *  @return  A pair, of which the first element is an iterator that 
       *           points to the possibly inserted pair, and the second is 
       *           a bool that is true if the pair was actually inserted.
       *
       *  This function attempts to insert a (key, value) %pair into the
       *  %unordered_map. An %unordered_map relies on unique keys and thus a
       *  %pair is only inserted if its first element (the key) is not already
       *  present in the %unordered_map.
       *
       *  Insertion requires amortized constant time.
       */
      std::pair<iterator, bool>
      insert(const value_type& __x)
      { return _M_h.insert(__x); }

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2354. Unnecessary copying when inserting into maps with braced-init
      std::pair<iterator, bool>
      insert(value_type&& __x)
      { return _M_h.insert(std::move(__x)); }

      template<typename _Pair>
	__enable_if_t<is_constructible<value_type, _Pair&&>::value,
		      pair<iterator, bool>>
	insert(_Pair&& __x)
        { return _M_h.emplace(std::forward<_Pair>(__x)); }
      ///@}

      ///@{
      /**
       *  @brief Attempts to insert a std::pair into the %unordered_map.
       *  @param  __hint  An iterator that serves as a hint as to where the
       *                 pair should be inserted.
       *  @param  __x  Pair to be inserted (see std::make_pair for easy creation
       *               of pairs).
       *  @return An iterator that points to the element with key of
       *           @a __x (may or may not be the %pair passed in).
       *
       *  This function is not concerned about whether the insertion took place,
       *  and thus does not return a boolean like the single-argument insert()
       *  does.  Note that the first parameter is only a hint and can
       *  potentially improve the performance of the insertion process.  A bad
       *  hint would cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      iterator
      insert(const_iterator __hint, const value_type& __x)
      { return _M_h.insert(__hint, __x); }

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2354. Unnecessary copying when inserting into maps with braced-init
      iterator
      insert(const_iterator __hint, value_type&& __x)
      { return _M_h.insert(__hint, std::move(__x)); }

      template<typename _Pair>
	__enable_if_t<is_constructible<value_type, _Pair&&>::value, iterator>
	insert(const_iterator __hint, _Pair&& __x)
	{ return _M_h.emplace_hint(__hint, std::forward<_Pair>(__x)); }
      ///@}

      /**
       *  @brief A template function that attempts to insert a range of
       *  elements.
       *  @param  __first  Iterator pointing to the start of the range to be
       *                   inserted.
       *  @param  __last  Iterator pointing to the end of the range.
       *
       *  Complexity similar to that of the range constructor.
       */
      template<typename _InputIterator>
	void
	insert(_InputIterator __first, _InputIterator __last)
	{ _M_h.insert(__first, __last); }

      /**
       *  @brief Attempts to insert a list of elements into the %unordered_map.
       *  @param  __l  A std::initializer_list<value_type> of elements
       *               to be inserted.
       *
       *  Complexity similar to that of the range constructor.
       */
      void
      insert(initializer_list<value_type> __l)
      { _M_h.insert(__l); }


#if __cplusplus > 201402L
      /**
       *  @brief Attempts to insert a std::pair into the %unordered_map.
       *  @param __k    Key to use for finding a possibly existing pair in
       *                the map.
       *  @param __obj  Argument used to generate the .second for a pair 
       *                instance.
       *
       *  @return  A pair, of which the first element is an iterator that 
       *           points to the possibly inserted pair, and the second is 
       *           a bool that is true if the pair was actually inserted.
       *
       *  This function attempts to insert a (key, value) %pair into the
       *  %unordered_map. An %unordered_map relies on unique keys and thus a
       *  %pair is only inserted if its first element (the key) is not already
       *  present in the %unordered_map.
       *  If the %pair was already in the %unordered_map, the .second of 
       *  the %pair is assigned from __obj.
       *
       *  Insertion requires amortized constant time.
       */
      template <typename _Obj>
	pair<iterator, bool>
	insert_or_assign(const key_type& __k, _Obj&& __obj)
	{
	  auto __ret = _M_h.try_emplace(cend(), __k,
					std::forward<_Obj>(__obj));
	  if (!__ret.second)
	    __ret.first->second = std::forward<_Obj>(__obj);
	  return __ret;
	}

      // move-capable overload
      template <typename _Obj>
	pair<iterator, bool>
	insert_or_assign(key_type&& __k, _Obj&& __obj)
	{
	  auto __ret = _M_h.try_emplace(cend(), std::move(__k),
					std::forward<_Obj>(__obj));
	  if (!__ret.second)
	    __ret.first->second = std::forward<_Obj>(__obj);
	  return __ret;
	}

      /**
       *  @brief Attempts to insert a std::pair into the %unordered_map.
       *  @param  __hint  An iterator that serves as a hint as to where the
       *                  pair should be inserted.
       *  @param __k    Key to use for finding a possibly existing pair in
       *                the unordered_map.
       *  @param __obj  Argument used to generate the .second for a pair 
       *                instance.
       *  @return An iterator that points to the element with key of
       *           @a __x (may or may not be the %pair passed in).
       *
       *  This function is not concerned about whether the insertion took place,
       *  and thus does not return a boolean like the single-argument insert()
       *  does.         
       *  If the %pair was already in the %unordered map, the .second of
       *  the %pair is assigned from __obj.
       *  Note that the first parameter is only a hint and can
       *  potentially improve the performance of the insertion process.  A bad
       *  hint would cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      template <typename _Obj>
	iterator
	insert_or_assign(const_iterator __hint, const key_type& __k,
			 _Obj&& __obj)
	{
	  auto __ret = _M_h.try_emplace(__hint, __k, std::forward<_Obj>(__obj));
	  if (!__ret.second)
	    __ret.first->second = std::forward<_Obj>(__obj);
	  return __ret.first;
	}

      // move-capable overload
      template <typename _Obj>
	iterator
	insert_or_assign(const_iterator __hint, key_type&& __k, _Obj&& __obj)
	{
	  auto __ret = _M_h.try_emplace(__hint, std::move(__k),
					std::forward<_Obj>(__obj));
	  if (!__ret.second)
	    __ret.first->second = std::forward<_Obj>(__obj);
	  return __ret.first;
	}
#endif

      ///@{
      /**
       *  @brief Erases an element from an %unordered_map.
       *  @param  __position  An iterator pointing to the element to be erased.
       *  @return An iterator pointing to the element immediately following
       *          @a __position prior to the element being erased. If no such
       *          element exists, end() is returned.
       *
       *  This function erases an element, pointed to by the given iterator,
       *  from an %unordered_map.
       *  Note that this function only erases the element, and that if the
       *  element is itself a pointer, the pointed-to memory is not touched in
       *  any way.  Managing the pointer is the user's responsibility.
       */
      iterator
      erase(const_iterator __position)
      { return _M_h.erase(__position); }

      // LWG 2059.
      iterator
      erase(iterator __position)
      { return _M_h.erase(__position); }
      ///@}

      /**
       *  @brief Erases elements according to the provided key.
       *  @param  __x  Key of element to be erased.
       *  @return  The number of elements erased.
       *
       *  This function erases all the elements located by the given key from
       *  an %unordered_map. For an %unordered_map the result of this function
       *  can only be 0 (not present) or 1 (present).
       *  Note that this function only erases the element, and that if the
       *  element is itself a pointer, the pointed-to memory is not touched in
       *  any way.  Managing the pointer is the user's responsibility.
       */
      size_type
      erase(const key_type& __x)
      { return _M_h.erase(__x); }

      /**
       *  @brief Erases a [__first,__last) range of elements from an
       *  %unordered_map.
       *  @param  __first  Iterator pointing to the start of the range to be
       *                  erased.
       *  @param __last  Iterator pointing to the end of the range to
       *                be erased.
       *  @return The iterator @a __last.
       *
       *  This function erases a sequence of elements from an %unordered_map.
       *  Note that this function only erases the elements, and that if
       *  the element is itself a pointer, the pointed-to memory is not touched
       *  in any way.  Managing the pointer is the user's responsibility.
       */
      iterator
      erase(const_iterator __first, const_iterator __last)
      { return _M_h.erase(__first, __last); }

      /**
       *  Erases all elements in an %unordered_map.
       *  Note that this function only erases the elements, and that if the
       *  elements themselves are pointers, the pointed-to memory is not touched
       *  in any way.  Managing the pointer is the user's responsibility.
       */
      void
      clear() noexcept
      { _M_h.clear(); }

      /**
       *  @brief  Swaps data with another %unordered_map.
       *  @param  __x  An %unordered_map of the same element and allocator
       *  types.
       *
       *  This exchanges the elements between two %unordered_map in constant
       *  time.
       *  Note that the global std::swap() function is specialized such that
       *  std::swap(m1,m2) will feed to this function.
       */
      void
      swap(unordered_map& __x)
      noexcept( noexcept(_M_h.swap(__x._M_h)) )
      { _M_h.swap(__x._M_h); }

#if __cplusplus > 201402L
      template<typename, typename, typename>
	friend class std::_Hash_merge_helper;

      template<typename _H2, typename _P2>
	void
	merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>& __source)
	{
	  using _Merge_helper = _Hash_merge_helper<unordered_map, _H2, _P2>;
	  _M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
	}

      template<typename _H2, typename _P2>
	void
	merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
	{ merge(__source); }

      template<typename _H2, typename _P2>
	void
	merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>& __source)
	{
	  using _Merge_helper = _Hash_merge_helper<unordered_map, _H2, _P2>;
	  _M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
	}

      template<typename _H2, typename _P2>
	void
	merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
	{ merge(__source); }
#endif // C++17

      // observers.

      ///  Returns the hash functor object with which the %unordered_map was
      ///  constructed.
      hasher
      hash_function() const
      { return _M_h.hash_function(); }

      ///  Returns the key comparison object with which the %unordered_map was
      ///  constructed.
      key_equal
      key_eq() const
      { return _M_h.key_eq(); }

      // lookup.

      ///@{
      /**
       *  @brief Tries to locate an element in an %unordered_map.
       *  @param  __x  Key to be located.
       *  @return  Iterator pointing to sought-after element, or end() if not
       *           found.
       *
       *  This function takes a key and tries to locate the element with which
       *  the key matches.  If successful the function returns an iterator
       *  pointing to the sought after element.  If unsuccessful it returns the
       *  past-the-end ( @c end() ) iterator.
       */
      iterator
      find(const key_type& __x)
      { return _M_h.find(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	find(const _Kt& __x) -> decltype(_M_h._M_find_tr(__x))
	{ return _M_h._M_find_tr(__x); }
#endif

      const_iterator
      find(const key_type& __x) const
      { return _M_h.find(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	find(const _Kt& __x) const -> decltype(_M_h._M_find_tr(__x))
	{ return _M_h._M_find_tr(__x); }
#endif
      ///@}

      ///@{
      /**
       *  @brief  Finds the number of elements.
       *  @param  __x  Key to count.
       *  @return  Number of elements with specified key.
       *
       *  This function only makes sense for %unordered_multimap; for
       *  %unordered_map the result will either be 0 (not present) or 1
       *  (present).
       */
      size_type
      count(const key_type& __x) const
      { return _M_h.count(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	count(const _Kt& __x) const -> decltype(_M_h._M_count_tr(__x))
	{ return _M_h._M_count_tr(__x); }
#endif
      ///@}

#if __cplusplus > 201703L
      ///@{
      /**
       *  @brief  Finds whether an element with the given key exists.
       *  @param  __x  Key of elements to be located.
       *  @return  True if there is any element with the specified key.
       */
      bool
      contains(const key_type& __x) const
      { return _M_h.find(__x) != _M_h.end(); }

      template<typename _Kt>
	auto
	contains(const _Kt& __x) const
	-> decltype(_M_h._M_find_tr(__x), void(), true)
	{ return _M_h._M_find_tr(__x) != _M_h.end(); }
      ///@}
#endif

      ///@{
      /**
       *  @brief Finds a subsequence matching given key.
       *  @param  __x  Key to be located.
       *  @return  Pair of iterators that possibly points to the subsequence
       *           matching given key.
       *
       *  This function probably only makes sense for %unordered_multimap.
       */
      std::pair<iterator, iterator>
      equal_range(const key_type& __x)
      { return _M_h.equal_range(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	equal_range(const _Kt& __x)
	-> decltype(_M_h._M_equal_range_tr(__x))
	{ return _M_h._M_equal_range_tr(__x); }
#endif

      std::pair<const_iterator, const_iterator>
      equal_range(const key_type& __x) const
      { return _M_h.equal_range(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	equal_range(const _Kt& __x) const
	-> decltype(_M_h._M_equal_range_tr(__x))
	{ return _M_h._M_equal_range_tr(__x); }
#endif
      ///@}

      ///@{
      /**
       *  @brief  Subscript ( @c [] ) access to %unordered_map data.
       *  @param  __k  The key for which data should be retrieved.
       *  @return  A reference to the data of the (key,data) %pair.
       *
       *  Allows for easy lookup with the subscript ( @c [] )operator.  Returns
       *  data associated with the key specified in subscript.  If the key does
       *  not exist, a pair with that key is created using default values, which
       *  is then returned.
       *
       *  Lookup requires constant time.
       */
      mapped_type&
      operator[](const key_type& __k)
      { return _M_h[__k]; }

      mapped_type&
      operator[](key_type&& __k)
      { return _M_h[std::move(__k)]; }
      ///@}

      ///@{
      /**
       *  @brief  Access to %unordered_map data.
       *  @param  __k  The key for which data should be retrieved.
       *  @return  A reference to the data whose key is equal to @a __k, if
       *           such a data is present in the %unordered_map.
       *  @throw  std::out_of_range  If no such data is present.
       */
      mapped_type&
      at(const key_type& __k)
      { return _M_h.at(__k); }

      const mapped_type&
      at(const key_type& __k) const
      { return _M_h.at(__k); }
      ///@}

      // bucket interface.

      /// Returns the number of buckets of the %unordered_map.
      size_type
      bucket_count() const noexcept
      { return _M_h.bucket_count(); }

      /// Returns the maximum number of buckets of the %unordered_map.
      size_type
      max_bucket_count() const noexcept
      { return _M_h.max_bucket_count(); }

      /*
       * @brief  Returns the number of elements in a given bucket.
       * @param  __n  A bucket index.
       * @return  The number of elements in the bucket.
       */
      size_type
      bucket_size(size_type __n) const
      { return _M_h.bucket_size(__n); }

      /*
       * @brief  Returns the bucket index of a given element.
       * @param  __key  A key instance.
       * @return  The key bucket index.
       */
      size_type
      bucket(const key_type& __key) const
      { return _M_h.bucket(__key); }
      
      /**
       *  @brief  Returns a read/write iterator pointing to the first bucket
       *         element.
       *  @param  __n The bucket index.
       *  @return  A read/write local iterator.
       */
      local_iterator
      begin(size_type __n)
      { return _M_h.begin(__n); }

      ///@{
      /**
       *  @brief  Returns a read-only (constant) iterator pointing to the first
       *         bucket element.
       *  @param  __n The bucket index.
       *  @return  A read-only local iterator.
       */
      const_local_iterator
      begin(size_type __n) const
      { return _M_h.begin(__n); }

      const_local_iterator
      cbegin(size_type __n) const
      { return _M_h.cbegin(__n); }
      ///@}

      /**
       *  @brief  Returns a read/write iterator pointing to one past the last
       *         bucket elements.
       *  @param  __n The bucket index.
       *  @return  A read/write local iterator.
       */
      local_iterator
      end(size_type __n)
      { return _M_h.end(__n); }

      ///@{
      /**
       *  @brief  Returns a read-only (constant) iterator pointing to one past
       *         the last bucket elements.
       *  @param  __n The bucket index.
       *  @return  A read-only local iterator.
       */
      const_local_iterator
      end(size_type __n) const
      { return _M_h.end(__n); }

      const_local_iterator
      cend(size_type __n) const
      { return _M_h.cend(__n); }
      ///@}

      // hash policy.

      /// Returns the average number of elements per bucket.
      float
      load_factor() const noexcept
      { return _M_h.load_factor(); }

      /// Returns a positive number that the %unordered_map tries to keep the
      /// load factor less than or equal to.
      float
      max_load_factor() const noexcept
      { return _M_h.max_load_factor(); }

      /**
       *  @brief  Change the %unordered_map maximum load factor.
       *  @param  __z The new maximum load factor.
       */
      void
      max_load_factor(float __z)
      { _M_h.max_load_factor(__z); }

      /**
       *  @brief  May rehash the %unordered_map.
       *  @param  __n The new number of buckets.
       *
       *  Rehash will occur only if the new number of buckets respect the
       *  %unordered_map maximum load factor.
       */
      void
      rehash(size_type __n)
      { _M_h.rehash(__n); }

      /**
       *  @brief  Prepare the %unordered_map for a specified number of
       *          elements.
       *  @param  __n Number of elements required.
       *
       *  Same as rehash(ceil(n / max_load_factor())).
       */
      void
      reserve(size_type __n)
      { _M_h.reserve(__n); }

      template<typename _Key1, typename _Tp1, typename _Hash1, typename _Pred1,
	       typename _Alloc1>
        friend bool
	operator==(const unordered_map<_Key1, _Tp1, _Hash1, _Pred1, _Alloc1>&,
		   const unordered_map<_Key1, _Tp1, _Hash1, _Pred1, _Alloc1>&);
    };

#if __cpp_deduction_guides >= 201606

  template<typename _InputIterator,
	   typename _Hash = hash<__iter_key_t<_InputIterator>>,
	   typename _Pred = equal_to<__iter_key_t<_InputIterator>>,
	   typename _Allocator = allocator<__iter_to_alloc_t<_InputIterator>>,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireNotAllocator<_Pred>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(_InputIterator, _InputIterator,
		  typename unordered_map<int, int>::size_type = {},
		  _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
    -> unordered_map<__iter_key_t<_InputIterator>,
		     __iter_val_t<_InputIterator>,
		     _Hash, _Pred, _Allocator>;

  template<typename _Key, typename _Tp, typename _Hash = hash<_Key>,
	   typename _Pred = equal_to<_Key>,
	   typename _Allocator = allocator<pair<const _Key, _Tp>>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireNotAllocator<_Pred>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(initializer_list<pair<_Key, _Tp>>,
		  typename unordered_map<int, int>::size_type = {},
		  _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
    -> unordered_map<_Key, _Tp, _Hash, _Pred, _Allocator>;

  template<typename _InputIterator, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(_InputIterator, _InputIterator,
		  typename unordered_map<int, int>::size_type, _Allocator)
    -> unordered_map<__iter_key_t<_InputIterator>,
		     __iter_val_t<_InputIterator>,
		     hash<__iter_key_t<_InputIterator>>,
		     equal_to<__iter_key_t<_InputIterator>>,
		     _Allocator>;

  template<typename _InputIterator, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(_InputIterator, _InputIterator, _Allocator)
    -> unordered_map<__iter_key_t<_InputIterator>,
		     __iter_val_t<_InputIterator>,
		     hash<__iter_key_t<_InputIterator>>,
		     equal_to<__iter_key_t<_InputIterator>>,
		     _Allocator>;

  template<typename _InputIterator, typename _Hash, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(_InputIterator, _InputIterator,
		  typename unordered_map<int, int>::size_type,
		  _Hash, _Allocator)
    -> unordered_map<__iter_key_t<_InputIterator>,
		     __iter_val_t<_InputIterator>, _Hash,
		     equal_to<__iter_key_t<_InputIterator>>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Allocator,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(initializer_list<pair<_Key, _Tp>>,
		  typename unordered_map<int, int>::size_type,
		  _Allocator)
    -> unordered_map<_Key, _Tp, hash<_Key>, equal_to<_Key>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Allocator,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(initializer_list<pair<_Key, _Tp>>, _Allocator)
    -> unordered_map<_Key, _Tp, hash<_Key>, equal_to<_Key>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Hash, typename _Allocator,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_map(initializer_list<pair<_Key, _Tp>>,
		  typename unordered_map<int, int>::size_type,
		  _Hash, _Allocator)
    -> unordered_map<_Key, _Tp, _Hash, equal_to<_Key>, _Allocator>;

#endif

  /**
   *  @brief A standard container composed of equivalent keys
   *  (possibly containing multiple of each key value) that associates
   *  values of another type with the keys.
   *
   *  @ingroup unordered_associative_containers
   *
   *  @tparam  _Key    Type of key objects.
   *  @tparam  _Tp     Type of mapped objects.
   *  @tparam  _Hash   Hashing function object type, defaults to hash<_Value>.
   *  @tparam  _Pred   Predicate function object type, defaults
   *                   to equal_to<_Value>.
   *  @tparam  _Alloc  Allocator type, defaults to
   *                   std::allocator<std::pair<const _Key, _Tp>>.
   *
   *  Meets the requirements of a <a href="tables.html#65">container</a>, and
   *  <a href="tables.html#xx">unordered associative container</a>
   *
   * The resulting value type of the container is std::pair<const _Key, _Tp>.
   *
   *  Base is _Hashtable, dispatched at compile time via template
   *  alias __ummap_hashtable.
   */
  template<typename _Key, typename _Tp,
	   typename _Hash = hash<_Key>,
	   typename _Pred = equal_to<_Key>,
	   typename _Alloc = allocator<std::pair<const _Key, _Tp>>>
    class unordered_multimap
    {
      typedef __ummap_hashtable<_Key, _Tp, _Hash, _Pred, _Alloc>  _Hashtable;
      _Hashtable _M_h;

    public:
      // typedefs:
      ///@{
      /// Public typedefs.
      typedef typename _Hashtable::key_type	key_type;
      typedef typename _Hashtable::value_type	value_type;
      typedef typename _Hashtable::mapped_type	mapped_type;
      typedef typename _Hashtable::hasher	hasher;
      typedef typename _Hashtable::key_equal	key_equal;
      typedef typename _Hashtable::allocator_type allocator_type;
      ///@}

      ///@{
      ///  Iterator-related typedefs.
      typedef typename _Hashtable::pointer		pointer;
      typedef typename _Hashtable::const_pointer	const_pointer;
      typedef typename _Hashtable::reference		reference;
      typedef typename _Hashtable::const_reference	const_reference;
      typedef typename _Hashtable::iterator		iterator;
      typedef typename _Hashtable::const_iterator	const_iterator;
      typedef typename _Hashtable::local_iterator	local_iterator;
      typedef typename _Hashtable::const_local_iterator	const_local_iterator;
      typedef typename _Hashtable::size_type		size_type;
      typedef typename _Hashtable::difference_type	difference_type;
      ///@}

#if __cplusplus > 201402L
      using node_type = typename _Hashtable::node_type;
#endif

      //construct/destroy/copy

      /// Default constructor.
      unordered_multimap() = default;

      /**
       *  @brief  Default constructor creates no elements.
       *  @param __n  Mnimal initial number of buckets.
       *  @param __hf  A hash functor.
       *  @param __eql  A key equality functor.
       *  @param __a  An allocator object.
       */
      explicit
      unordered_multimap(size_type __n,
			 const hasher& __hf = hasher(),
			 const key_equal& __eql = key_equal(),
			 const allocator_type& __a = allocator_type())
      : _M_h(__n, __hf, __eql, __a)
      { }

      /**
       *  @brief  Builds an %unordered_multimap from a range.
       *  @param  __first An input iterator.
       *  @param  __last  An input iterator.
       *  @param __n      Minimal initial number of buckets.
       *  @param __hf     A hash functor.
       *  @param __eql    A key equality functor.
       *  @param __a      An allocator object.
       *
       *  Create an %unordered_multimap consisting of copies of the elements
       *  from [__first,__last).  This is linear in N (where N is
       *  distance(__first,__last)).
       */
      template<typename _InputIterator>
	unordered_multimap(_InputIterator __first, _InputIterator __last,
			   size_type __n = 0,
			   const hasher& __hf = hasher(),
			   const key_equal& __eql = key_equal(),
			   const allocator_type& __a = allocator_type())
	: _M_h(__first, __last, __n, __hf, __eql, __a)
	{ }

      /// Copy constructor.
      unordered_multimap(const unordered_multimap&) = default;

      /// Move constructor.
      unordered_multimap(unordered_multimap&&) = default;

      /**
       *  @brief Creates an %unordered_multimap with no elements.
       *  @param __a An allocator object.
       */
      explicit
      unordered_multimap(const allocator_type& __a)
      : _M_h(__a)
      { }

      /*
       *  @brief Copy constructor with allocator argument.
       * @param  __uset  Input %unordered_multimap to copy.
       * @param  __a  An allocator object.
       */
      unordered_multimap(const unordered_multimap& __ummap,
			 const allocator_type& __a)
      : _M_h(__ummap._M_h, __a)
      { }

      /*
       *  @brief  Move constructor with allocator argument.
       *  @param  __uset Input %unordered_multimap to move.
       *  @param  __a    An allocator object.
       */
      unordered_multimap(unordered_multimap&& __ummap,
			 const allocator_type& __a)
	noexcept( noexcept(_Hashtable(std::move(__ummap._M_h), __a)) )
      : _M_h(std::move(__ummap._M_h), __a)
      { }

      /**
       *  @brief  Builds an %unordered_multimap from an initializer_list.
       *  @param  __l  An initializer_list.
       *  @param __n  Minimal initial number of buckets.
       *  @param __hf  A hash functor.
       *  @param __eql  A key equality functor.
       *  @param  __a  An allocator object.
       *
       *  Create an %unordered_multimap consisting of copies of the elements in
       *  the list. This is linear in N (where N is @a __l.size()).
       */
      unordered_multimap(initializer_list<value_type> __l,
			 size_type __n = 0,
			 const hasher& __hf = hasher(),
			 const key_equal& __eql = key_equal(),
			 const allocator_type& __a = allocator_type())
      : _M_h(__l, __n, __hf, __eql, __a)
      { }

      unordered_multimap(size_type __n, const allocator_type& __a)
      : unordered_multimap(__n, hasher(), key_equal(), __a)
      { }

      unordered_multimap(size_type __n, const hasher& __hf,
			 const allocator_type& __a)
      : unordered_multimap(__n, __hf, key_equal(), __a)
      { }

      template<typename _InputIterator>
	unordered_multimap(_InputIterator __first, _InputIterator __last,
			   size_type __n,
			   const allocator_type& __a)
	: unordered_multimap(__first, __last, __n, hasher(), key_equal(), __a)
	{ }

      template<typename _InputIterator>
	unordered_multimap(_InputIterator __first, _InputIterator __last,
			   size_type __n, const hasher& __hf,
			   const allocator_type& __a)
	: unordered_multimap(__first, __last, __n, __hf, key_equal(), __a)
	{ }

      unordered_multimap(initializer_list<value_type> __l,
			 size_type __n,
			 const allocator_type& __a)
      : unordered_multimap(__l, __n, hasher(), key_equal(), __a)
      { }

      unordered_multimap(initializer_list<value_type> __l,
			 size_type __n, const hasher& __hf,
			 const allocator_type& __a)
      : unordered_multimap(__l, __n, __hf, key_equal(), __a)
      { }

      /// Copy assignment operator.
      unordered_multimap&
      operator=(const unordered_multimap&) = default;

      /// Move assignment operator.
      unordered_multimap&
      operator=(unordered_multimap&&) = default;

      /**
       *  @brief  %Unordered_multimap list assignment operator.
       *  @param  __l  An initializer_list.
       *
       *  This function fills an %unordered_multimap with copies of the
       *  elements in the initializer list @a __l.
       *
       *  Note that the assignment completely changes the %unordered_multimap
       *  and that the resulting %unordered_multimap's size is the same as the
       *  number of elements assigned.
       */
      unordered_multimap&
      operator=(initializer_list<value_type> __l)
      {
	_M_h = __l;
	return *this;
      }

      ///  Returns the allocator object used by the %unordered_multimap.
      allocator_type
      get_allocator() const noexcept
      { return _M_h.get_allocator(); }

      // size and capacity:

      ///  Returns true if the %unordered_multimap is empty.
      _GLIBCXX_NODISCARD bool
      empty() const noexcept
      { return _M_h.empty(); }

      ///  Returns the size of the %unordered_multimap.
      size_type
      size() const noexcept
      { return _M_h.size(); }

      ///  Returns the maximum size of the %unordered_multimap.
      size_type
      max_size() const noexcept
      { return _M_h.max_size(); }

      // iterators.

      /**
       *  Returns a read/write iterator that points to the first element in the
       *  %unordered_multimap.
       */
      iterator
      begin() noexcept
      { return _M_h.begin(); }

      ///@{
      /**
       *  Returns a read-only (constant) iterator that points to the first
       *  element in the %unordered_multimap.
       */
      const_iterator
      begin() const noexcept
      { return _M_h.begin(); }

      const_iterator
      cbegin() const noexcept
      { return _M_h.begin(); }
      ///@}

      /**
       *  Returns a read/write iterator that points one past the last element in
       *  the %unordered_multimap.
       */
      iterator
      end() noexcept
      { return _M_h.end(); }

      ///@{
      /**
       *  Returns a read-only (constant) iterator that points one past the last
       *  element in the %unordered_multimap.
       */
      const_iterator
      end() const noexcept
      { return _M_h.end(); }

      const_iterator
      cend() const noexcept
      { return _M_h.end(); }
      ///@}

      // modifiers.

      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_multimap.
       *
       *  @param __args  Arguments used to generate a new pair instance (see
       *	        std::piecewise_contruct for passing arguments to each
       *	        part of the pair constructor).
       *
       *  @return  An iterator that points to the inserted pair.
       *
       *  This function attempts to build and insert a (key, value) %pair into
       *  the %unordered_multimap.
       *
       *  Insertion requires amortized constant time.
       */
      template<typename... _Args>
	iterator
	emplace(_Args&&... __args)
	{ return _M_h.emplace(std::forward<_Args>(__args)...); }

      /**
       *  @brief Attempts to build and insert a std::pair into the
       *  %unordered_multimap.
       *
       *  @param  __pos  An iterator that serves as a hint as to where the pair
       *                should be inserted.
       *  @param  __args  Arguments used to generate a new pair instance (see
       *	         std::piecewise_contruct for passing arguments to each
       *	         part of the pair constructor).
       *  @return An iterator that points to the element with key of the
       *          std::pair built from @a __args.
       *
       *  Note that the first parameter is only a hint and can potentially
       *  improve the performance of the insertion process. A bad hint would
       *  cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      template<typename... _Args>
	iterator
	emplace_hint(const_iterator __pos, _Args&&... __args)
	{ return _M_h.emplace_hint(__pos, std::forward<_Args>(__args)...); }

      ///@{
      /**
       *  @brief Inserts a std::pair into the %unordered_multimap.
       *  @param __x Pair to be inserted (see std::make_pair for easy
       *	     creation of pairs).
       *
       *  @return  An iterator that points to the inserted pair.
       *
       *  Insertion requires amortized constant time.
       */
      iterator
      insert(const value_type& __x)
      { return _M_h.insert(__x); }

      iterator
      insert(value_type&& __x)
      { return _M_h.insert(std::move(__x)); }

      template<typename _Pair>
	__enable_if_t<is_constructible<value_type, _Pair&&>::value, iterator>
	insert(_Pair&& __x)
        { return _M_h.emplace(std::forward<_Pair>(__x)); }
      ///@}

      ///@{
      /**
       *  @brief Inserts a std::pair into the %unordered_multimap.
       *  @param  __hint  An iterator that serves as a hint as to where the
       *                 pair should be inserted.
       *  @param  __x  Pair to be inserted (see std::make_pair for easy creation
       *               of pairs).
       *  @return An iterator that points to the element with key of
       *           @a __x (may or may not be the %pair passed in).
       *
       *  Note that the first parameter is only a hint and can potentially
       *  improve the performance of the insertion process.  A bad hint would
       *  cause no gains in efficiency.
       *
       *  See
       *  https://gcc.gnu.org/onlinedocs/libstdc++/manual/associative.html#containers.associative.insert_hints
       *  for more on @a hinting.
       *
       *  Insertion requires amortized constant time.
       */
      iterator
      insert(const_iterator __hint, const value_type& __x)
      { return _M_h.insert(__hint, __x); }

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2354. Unnecessary copying when inserting into maps with braced-init
      iterator
      insert(const_iterator __hint, value_type&& __x)
      { return _M_h.insert(__hint, std::move(__x)); }

      template<typename _Pair>
	__enable_if_t<is_constructible<value_type, _Pair&&>::value, iterator>
	insert(const_iterator __hint, _Pair&& __x)
        { return _M_h.emplace_hint(__hint, std::forward<_Pair>(__x)); }
      ///@}

      /**
       *  @brief A template function that attempts to insert a range of
       *  elements.
       *  @param  __first  Iterator pointing to the start of the range to be
       *                   inserted.
       *  @param  __last  Iterator pointing to the end of the range.
       *
       *  Complexity similar to that of the range constructor.
       */
      template<typename _InputIterator>
	void
	insert(_InputIterator __first, _InputIterator __last)
	{ _M_h.insert(__first, __last); }

      /**
       *  @brief Attempts to insert a list of elements into the
       *  %unordered_multimap.
       *  @param  __l  A std::initializer_list<value_type> of elements
       *               to be inserted.
       *
       *  Complexity similar to that of the range constructor.
       */
      void
      insert(initializer_list<value_type> __l)
      { _M_h.insert(__l); }

#if __cplusplus > 201402L
      /// Extract a node.
      node_type
      extract(const_iterator __pos)
      {
	__glibcxx_assert(__pos != end());
	return _M_h.extract(__pos);
      }

      /// Extract a node.
      node_type
      extract(const key_type& __key)
      { return _M_h.extract(__key); }

      /// Re-insert an extracted node.
      iterator
      insert(node_type&& __nh)
      { return _M_h._M_reinsert_node_multi(cend(), std::move(__nh)); }

      /// Re-insert an extracted node.
      iterator
      insert(const_iterator __hint, node_type&& __nh)
      { return _M_h._M_reinsert_node_multi(__hint, std::move(__nh)); }
#endif // C++17

      ///@{
      /**
       *  @brief Erases an element from an %unordered_multimap.
       *  @param  __position  An iterator pointing to the element to be erased.
       *  @return An iterator pointing to the element immediately following
       *          @a __position prior to the element being erased. If no such
       *          element exists, end() is returned.
       *
       *  This function erases an element, pointed to by the given iterator,
       *  from an %unordered_multimap.
       *  Note that this function only erases the element, and that if the
       *  element is itself a pointer, the pointed-to memory is not touched in
       *  any way.  Managing the pointer is the user's responsibility.
       */
      iterator
      erase(const_iterator __position)
      { return _M_h.erase(__position); }

      // LWG 2059.
      iterator
      erase(iterator __position)
      { return _M_h.erase(__position); }
      ///@}

      /**
       *  @brief Erases elements according to the provided key.
       *  @param  __x  Key of elements to be erased.
       *  @return  The number of elements erased.
       *
       *  This function erases all the elements located by the given key from
       *  an %unordered_multimap.
       *  Note that this function only erases the element, and that if the
       *  element is itself a pointer, the pointed-to memory is not touched in
       *  any way.  Managing the pointer is the user's responsibility.
       */
      size_type
      erase(const key_type& __x)
      { return _M_h.erase(__x); }

      /**
       *  @brief Erases a [__first,__last) range of elements from an
       *  %unordered_multimap.
       *  @param  __first  Iterator pointing to the start of the range to be
       *                  erased.
       *  @param __last  Iterator pointing to the end of the range to
       *                be erased.
       *  @return The iterator @a __last.
       *
       *  This function erases a sequence of elements from an
       *  %unordered_multimap.
       *  Note that this function only erases the elements, and that if
       *  the element is itself a pointer, the pointed-to memory is not touched
       *  in any way.  Managing the pointer is the user's responsibility.
       */
      iterator
      erase(const_iterator __first, const_iterator __last)
      { return _M_h.erase(__first, __last); }

      /**
       *  Erases all elements in an %unordered_multimap.
       *  Note that this function only erases the elements, and that if the
       *  elements themselves are pointers, the pointed-to memory is not touched
       *  in any way.  Managing the pointer is the user's responsibility.
       */
      void
      clear() noexcept
      { _M_h.clear(); }

      /**
       *  @brief  Swaps data with another %unordered_multimap.
       *  @param  __x  An %unordered_multimap of the same element and allocator
       *  types.
       *
       *  This exchanges the elements between two %unordered_multimap in
       *  constant time.
       *  Note that the global std::swap() function is specialized such that
       *  std::swap(m1,m2) will feed to this function.
       */
      void
      swap(unordered_multimap& __x)
      noexcept( noexcept(_M_h.swap(__x._M_h)) )
      { _M_h.swap(__x._M_h); }

#if __cplusplus > 201402L
      template<typename, typename, typename>
	friend class std::_Hash_merge_helper;

      template<typename _H2, typename _P2>
	void
	merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>& __source)
	{
	  using _Merge_helper
	    = _Hash_merge_helper<unordered_multimap, _H2, _P2>;
	  _M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
	}

      template<typename _H2, typename _P2>
	void
	merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
	{ merge(__source); }

      template<typename _H2, typename _P2>
	void
	merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>& __source)
	{
	  using _Merge_helper
	    = _Hash_merge_helper<unordered_multimap, _H2, _P2>;
	  _M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
	}

      template<typename _H2, typename _P2>
	void
	merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
	{ merge(__source); }
#endif // C++17

      // observers.

      ///  Returns the hash functor object with which the %unordered_multimap
      ///  was constructed.
      hasher
      hash_function() const
      { return _M_h.hash_function(); }

      ///  Returns the key comparison object with which the %unordered_multimap
      ///  was constructed.
      key_equal
      key_eq() const
      { return _M_h.key_eq(); }

      // lookup.

      ///@{
      /**
       *  @brief Tries to locate an element in an %unordered_multimap.
       *  @param  __x  Key to be located.
       *  @return  Iterator pointing to sought-after element, or end() if not
       *           found.
       *
       *  This function takes a key and tries to locate the element with which
       *  the key matches.  If successful the function returns an iterator
       *  pointing to the sought after element.  If unsuccessful it returns the
       *  past-the-end ( @c end() ) iterator.
       */
      iterator
      find(const key_type& __x)
      { return _M_h.find(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	find(const _Kt& __x) -> decltype(_M_h._M_find_tr(__x))
	{ return _M_h._M_find_tr(__x); }
#endif

      const_iterator
      find(const key_type& __x) const
      { return _M_h.find(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	find(const _Kt& __x) const -> decltype(_M_h._M_find_tr(__x))
	{ return _M_h._M_find_tr(__x); }
#endif
      ///@}

      ///@{
      /**
       *  @brief  Finds the number of elements.
       *  @param  __x  Key to count.
       *  @return  Number of elements with specified key.
       */
      size_type
      count(const key_type& __x) const
      { return _M_h.count(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	count(const _Kt& __x) const -> decltype(_M_h._M_count_tr(__x))
	{ return _M_h._M_count_tr(__x); }
#endif
      ///@}

#if __cplusplus > 201703L
      ///@{
      /**
       *  @brief  Finds whether an element with the given key exists.
       *  @param  __x  Key of elements to be located.
       *  @return  True if there is any element with the specified key.
       */
      bool
      contains(const key_type& __x) const
      { return _M_h.find(__x) != _M_h.end(); }

      template<typename _Kt>
	auto
	contains(const _Kt& __x) const
	-> decltype(_M_h._M_find_tr(__x), void(), true)
	{ return _M_h._M_find_tr(__x) != _M_h.end(); }
      ///@}
#endif

      ///@{
      /**
       *  @brief Finds a subsequence matching given key.
       *  @param  __x  Key to be located.
       *  @return  Pair of iterators that possibly points to the subsequence
       *           matching given key.
       */
      std::pair<iterator, iterator>
      equal_range(const key_type& __x)
      { return _M_h.equal_range(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	equal_range(const _Kt& __x)
	-> decltype(_M_h._M_equal_range_tr(__x))
	{ return _M_h._M_equal_range_tr(__x); }
#endif

      std::pair<const_iterator, const_iterator>
      equal_range(const key_type& __x) const
      { return _M_h.equal_range(__x); }

#if __cplusplus > 201703L
      template<typename _Kt>
	auto
	equal_range(const _Kt& __x) const
	-> decltype(_M_h._M_equal_range_tr(__x))
	{ return _M_h._M_equal_range_tr(__x); }
#endif
      ///@}

      // bucket interface.

      /// Returns the number of buckets of the %unordered_multimap.
      size_type
      bucket_count() const noexcept
      { return _M_h.bucket_count(); }

      /// Returns the maximum number of buckets of the %unordered_multimap.
      size_type
      max_bucket_count() const noexcept
      { return _M_h.max_bucket_count(); }

      /*
       * @brief  Returns the number of elements in a given bucket.
       * @param  __n  A bucket index.
       * @return  The number of elements in the bucket.
       */
      size_type
      bucket_size(size_type __n) const
      { return _M_h.bucket_size(__n); }

      /*
       * @brief  Returns the bucket index of a given element.
       * @param  __key  A key instance.
       * @return  The key bucket index.
       */
      size_type
      bucket(const key_type& __key) const
      { return _M_h.bucket(__key); }
      
      /**
       *  @brief  Returns a read/write iterator pointing to the first bucket
       *         element.
       *  @param  __n The bucket index.
       *  @return  A read/write local iterator.
       */
      local_iterator
      begin(size_type __n)
      { return _M_h.begin(__n); }

      ///@{
      /**
       *  @brief  Returns a read-only (constant) iterator pointing to the first
       *         bucket element.
       *  @param  __n The bucket index.
       *  @return  A read-only local iterator.
       */
      const_local_iterator
      begin(size_type __n) const
      { return _M_h.begin(__n); }

      const_local_iterator
      cbegin(size_type __n) const
      { return _M_h.cbegin(__n); }
      ///@}

      /**
       *  @brief  Returns a read/write iterator pointing to one past the last
       *         bucket elements.
       *  @param  __n The bucket index.
       *  @return  A read/write local iterator.
       */
      local_iterator
      end(size_type __n)
      { return _M_h.end(__n); }

      ///@{
      /**
       *  @brief  Returns a read-only (constant) iterator pointing to one past
       *         the last bucket elements.
       *  @param  __n The bucket index.
       *  @return  A read-only local iterator.
       */
      const_local_iterator
      end(size_type __n) const
      { return _M_h.end(__n); }

      const_local_iterator
      cend(size_type __n) const
      { return _M_h.cend(__n); }
      ///@}

      // hash policy.

      /// Returns the average number of elements per bucket.
      float
      load_factor() const noexcept
      { return _M_h.load_factor(); }

      /// Returns a positive number that the %unordered_multimap tries to keep
      /// the load factor less than or equal to.
      float
      max_load_factor() const noexcept
      { return _M_h.max_load_factor(); }

      /**
       *  @brief  Change the %unordered_multimap maximum load factor.
       *  @param  __z The new maximum load factor.
       */
      void
      max_load_factor(float __z)
      { _M_h.max_load_factor(__z); }

      /**
       *  @brief  May rehash the %unordered_multimap.
       *  @param  __n The new number of buckets.
       *
       *  Rehash will occur only if the new number of buckets respect the
       *  %unordered_multimap maximum load factor.
       */
      void
      rehash(size_type __n)
      { _M_h.rehash(__n); }

      /**
       *  @brief  Prepare the %unordered_multimap for a specified number of
       *          elements.
       *  @param  __n Number of elements required.
       *
       *  Same as rehash(ceil(n / max_load_factor())).
       */
      void
      reserve(size_type __n)
      { _M_h.reserve(__n); }

      template<typename _Key1, typename _Tp1, typename _Hash1, typename _Pred1,
	       typename _Alloc1>
        friend bool
	operator==(const unordered_multimap<_Key1, _Tp1,
					    _Hash1, _Pred1, _Alloc1>&,
		   const unordered_multimap<_Key1, _Tp1,
					    _Hash1, _Pred1, _Alloc1>&);
    };

#if __cpp_deduction_guides >= 201606

  template<typename _InputIterator,
	   typename _Hash = hash<__iter_key_t<_InputIterator>>,
	   typename _Pred = equal_to<__iter_key_t<_InputIterator>>,
	   typename _Allocator = allocator<__iter_to_alloc_t<_InputIterator>>,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireNotAllocator<_Pred>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(_InputIterator, _InputIterator,
		       unordered_multimap<int, int>::size_type = {},
		       _Hash = _Hash(), _Pred = _Pred(),
		       _Allocator = _Allocator())
    -> unordered_multimap<__iter_key_t<_InputIterator>,
			  __iter_val_t<_InputIterator>, _Hash, _Pred,
			  _Allocator>;

  template<typename _Key, typename _Tp, typename _Hash = hash<_Key>,
	   typename _Pred = equal_to<_Key>,
	   typename _Allocator = allocator<pair<const _Key, _Tp>>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireNotAllocator<_Pred>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(initializer_list<pair<_Key, _Tp>>,
		       unordered_multimap<int, int>::size_type = {},
		       _Hash = _Hash(), _Pred = _Pred(),
		       _Allocator = _Allocator())
    -> unordered_multimap<_Key, _Tp, _Hash, _Pred, _Allocator>;

  template<typename _InputIterator, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(_InputIterator, _InputIterator,
		       unordered_multimap<int, int>::size_type, _Allocator)
    -> unordered_multimap<__iter_key_t<_InputIterator>,
			  __iter_val_t<_InputIterator>,
			  hash<__iter_key_t<_InputIterator>>,
			  equal_to<__iter_key_t<_InputIterator>>, _Allocator>;

  template<typename _InputIterator, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(_InputIterator, _InputIterator, _Allocator)
    -> unordered_multimap<__iter_key_t<_InputIterator>,
			  __iter_val_t<_InputIterator>,
			  hash<__iter_key_t<_InputIterator>>,
			  equal_to<__iter_key_t<_InputIterator>>, _Allocator>;

  template<typename _InputIterator, typename _Hash, typename _Allocator,
	   typename = _RequireInputIter<_InputIterator>,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(_InputIterator, _InputIterator,
		       unordered_multimap<int, int>::size_type, _Hash,
		       _Allocator)
    -> unordered_multimap<__iter_key_t<_InputIterator>,
			  __iter_val_t<_InputIterator>, _Hash,
			  equal_to<__iter_key_t<_InputIterator>>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Allocator,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(initializer_list<pair<_Key, _Tp>>,
		       unordered_multimap<int, int>::size_type,
		       _Allocator)
    -> unordered_multimap<_Key, _Tp, hash<_Key>, equal_to<_Key>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Allocator,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
    -> unordered_multimap<_Key, _Tp, hash<_Key>, equal_to<_Key>, _Allocator>;

  template<typename _Key, typename _Tp, typename _Hash, typename _Allocator,
	   typename = _RequireNotAllocatorOrIntegral<_Hash>,
	   typename = _RequireAllocator<_Allocator>>
    unordered_multimap(initializer_list<pair<_Key, _Tp>>,
		       unordered_multimap<int, int>::size_type,
		       _Hash, _Allocator)
    -> unordered_multimap<_Key, _Tp, _Hash, equal_to<_Key>, _Allocator>;

#endif

  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline void
    swap(unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	 unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    noexcept(noexcept(__x.swap(__y)))
    { __x.swap(__y); }

  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline void
    swap(unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	 unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    noexcept(noexcept(__x.swap(__y)))
    { __x.swap(__y); }

  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline bool
    operator==(const unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	       const unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    { return __x._M_h._M_equal(__y._M_h); }

#if __cpp_impl_three_way_comparison < 201907L
  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline bool
    operator!=(const unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	       const unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    { return !(__x == __y); }
#endif

  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline bool
    operator==(const unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	       const unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    { return __x._M_h._M_equal(__y._M_h); }

#if __cpp_impl_three_way_comparison < 201907L
  template<class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
    inline bool
    operator!=(const unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x,
	       const unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __y)
    { return !(__x == __y); }
#endif

_GLIBCXX_END_NAMESPACE_CONTAINER

#if __cplusplus > 201402L
  // Allow std::unordered_map access to internals of compatible maps.
  template<typename _Key, typename _Val, typename _Hash1, typename _Eq1,
	   typename _Alloc, typename _Hash2, typename _Eq2>
    struct _Hash_merge_helper<
      _GLIBCXX_STD_C::unordered_map<_Key, _Val, _Hash1, _Eq1, _Alloc>,
      _Hash2, _Eq2>
    {
    private:
      template<typename... _Tp>
	using unordered_map = _GLIBCXX_STD_C::unordered_map<_Tp...>;
      template<typename... _Tp>
	using unordered_multimap = _GLIBCXX_STD_C::unordered_multimap<_Tp...>;

      friend unordered_map<_Key, _Val, _Hash1, _Eq1, _Alloc>;

      static auto&
      _S_get_table(unordered_map<_Key, _Val, _Hash2, _Eq2, _Alloc>& __map)
      { return __map._M_h; }

      static auto&
      _S_get_table(unordered_multimap<_Key, _Val, _Hash2, _Eq2, _Alloc>& __map)
      { return __map._M_h; }
    };

  // Allow std::unordered_multimap access to internals of compatible maps.
  template<typename _Key, typename _Val, typename _Hash1, typename _Eq1,
	   typename _Alloc, typename _Hash2, typename _Eq2>
    struct _Hash_merge_helper<
      _GLIBCXX_STD_C::unordered_multimap<_Key, _Val, _Hash1, _Eq1, _Alloc>,
      _Hash2, _Eq2>
    {
    private:
      template<typename... _Tp>
	using unordered_map = _GLIBCXX_STD_C::unordered_map<_Tp...>;
      template<typename... _Tp>
	using unordered_multimap = _GLIBCXX_STD_C::unordered_multimap<_Tp...>;

      friend unordered_multimap<_Key, _Val, _Hash1, _Eq1, _Alloc>;

      static auto&
      _S_get_table(unordered_map<_Key, _Val, _Hash2, _Eq2, _Alloc>& __map)
      { return __map._M_h; }

      static auto&
      _S_get_table(unordered_multimap<_Key, _Val, _Hash2, _Eq2, _Alloc>& __map)
      { return __map._M_h; }
    };
#endif // C++17

_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std

#endif /* _UNORDERED_MAP_H */
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      [           yE           C         \           ;           A         ]           ?]           }         ^           8o                 _                      )         `                  	 
         a           l2                    b           *$           j         c                               d           I        
             e                      PM tD     f           E        j h         g           {E       sN
 9         h            <           V         i           7o                    j                      b         k                               l                  {
          m           u                    n                      [         o           
                    p                   [M         q           ߳           \M         r           o2        ]M         s           +$           ^M         t                     Z
 `M      u                               v           I                        w           tk            bM         x           I            V         y           I                      z           a           K         {                      3         |           f       "r -         }           I            '         ~           5           (                               ׼                    I         Y                    	        m:                    I                             t           vM                    I            /                    I        ^
 SW                    I        ?                        's           }M                                  ~M 6                I                                I            s                    i.            Q                       5 !                    I                                   7                            ^:       
 /                    I            M                    |           =                    K           O                               hd                    I            M                    ]Z           M                    T           V                    eI           M                               M                    
       !k
 M                               M                    I                                $x       	 M                 #                               	                                                                                             I        Y                        I            M                    I                                   I                                   I        3% m                                +                                  M 1                k           q                    q            T                            v                    
       e
                     k2                               N                                CF       "
                     )I                               {v           q                    -F           M                    I                                   >_        
 M                    zk            -                    T     	      M                    |     
  
 |4                    f           M                    I                                   x           F                                
 8                              
 7                           
 6                              	
 9                *           	                    +           C                            
      M                    I                                   I                                   a           *                            H                    h                               I        Ё
 s                              ~	 M .                              M /                I         wp                    C3           c                                   M :+                        !      M ;+                H3     "      1                    a     #  
                     I      $  
 M                                                               \ h  \           mK 0           jx          c           Y     $ h  $          K 2          R     t    oI          Z     $ h  $          K 3                %     `      I  [                  K P          `            AT          \     ` h X `           K R          8             :          ]      h             K S          hB      ;     2;          ^                  K Z                 p     լ5          _            
      K c          8             Ab          `            
          e          ,           (          a     4  N 4          K f          7             i	          b     4  N 4           K g          6#      P     %/
      I  c     4  N 4          K              T           I  d     4  N 4          K k                ;     髹`      I  e     4  N 4          K h          ~
           r 
      I  f     4  N 4          ! K t                p#     p      I  g     n h b n     #     $  L                       Ho          h             &     ' L           HW      \     }2          i             )     * L           $4             E*          j     0 < & D     ,     - L                 xq     ԧ           k             /     0 !L                  |     %҈          l      $      2     3 &L           0v                 I  m            5     6 )L           h      `     1      I  n            8     9 /L           H8      Q     
      I  o      $ 
      ;     < 2L                      ];          p     j  ^ j     >     ? 3L           A             ş          q      h       A     B 4L Ñ          Lv      x     L3          r      h      D     E >L đ          U     b    ӭ}      I  s     F  > F     G     H ?L Ǒ          H      (     6/          t             J     K BL ȑ          B      J     <\$          u      <       M     N DL ʑ          =             |          v      *       P     Q FL ˑ          x/            T7          w     ` < X `     T     U GL ̑  S     f            +.f          x            W     X KL ϑ          H!             3+#           y     @ H 4 @     Z     [ OL ӑ          ,u            $!          z             ]     ^ VL ّ          T	      8     #D"          {       *     `     a hL           z       L     `	#          |       *      f     g iL   e                 \V$      I  }             i     j qL           H       h     DI%          ~      h 
      l     m rL                  X     pyZ&                h 
     o     p yL           T4      H7     y|'      I               r     s zL           T$             K+(               ,  H <     w     x {L   v           /     yy)               T  ` T     {     | L    z      $       D     b]*               T  ` T          L   ~     |       x     ѱ+      I       T  ` T           L                       Kv,      I                     L 
                      @-                 
            L                 &     F*.               z  p z          L           N            </                            L           @`            ´0                            L           	#      $#     Vy|1      I       l H b l          L           \D      4     :2               D P < D          L 5          y           a3                             L 8          r      <     J|4               D H 8 D           L <                     Wa.5                h             L A          ,            vd6                h                B          +      \?     sfz|7      I       @ L T @              C          i             MqY 8                H             L D        }	      8     ׼O9                            L M                     sa:                            L R          J             	;      I       ~  p ~          L T          8            lЦ<                            L W          M            c(wn=               Z  J Z          L ]          {            %a\>                            L `          t            ĕ5?                " ,           L a          @       x     eF-@                             L d               Xc
    bzA                $                n                   }B      I        $                o          #      '     Lby|C      I                     
M p          @O      ,6     ":D                                q                ܮ     bgE      I                    'M                       4Q#F      I                    /M           d       h     Q[G      I                    3M t                 X~     4H      I                    NM                       -JI      I        H            bM                 8     VJ                H             cM           <           MWK      I       D  T  :  D           hM            *      \     NL                                 kM           ]      d     6~M               L * @ X           rM           8             P^N                             tM           |      0     NO                             wM           9           kzP      I       P  D P           {M           h       t      WLcQ                $            }M           \      E     wz|R                            M           X      [     &S                            M           0           (JZdT      I        H             M           T            }FU               . $ N .          M                 D     9{ʊV               .  N .          M Œ          ;      +     +1W      I         z            M ̒          Ȱ0           qwX               (   (     
      M Ғ                      K~Y               (   (     
      M Ԓ          4            `Z      I       (   (          M Ւ                 x      [      I       (   (          M Ӓ          X       8     \      I        *            M ֒          04      I     ̈́]               j L t j          M                       ύj ^               d	 L r	 d	          M           G            U;_               ^
 $ |
 ^
                         D?       @     >`               ^
  |
 ^
    "     # M           T            -!a      I       n * Z n     %     & M           M      
     =b               n  Z n    (     ) M            "       l      [c      I       n  Z n    +     , M b          F            =d      I       n $ Z n    .     /     d          <e       `     e      I       n  Z n    1     2 M                       Akf      I       ] * ] ]     4     5 M e                P     ng               ^  "^ ^     7     8 M g          +      p     Vڢh               ^  "^ ^     :     ; M j                      Wci      I       ^  "^ ^     =     > M h          Ѐ	           j      I  Ô     ^  "^ ^     @     A M l                n     |h:k      I  Ĕ     ^ $ "^ ^    C     D M m          T         }l      I  Ŕ     ^  "^ ^     F     G M o          <             Y#Um      I  Ɣ     ^  "^ ^     I     J M q          Lm             U7n      I  ǔ     ^  "^ ^     L     M N s          <       @     Ao      I  Ȕ     ^  "^ ^     O     P N u          m             Gіp      I  ɔ     ^  "^ ^     R     S 
N w                 p     63*Xq      I  ʔ     ^  "^ ^     U     V N x                      뺏r      I  ˔     ^  "^ ^     X     Y N y                      \gs      I  ̔     ^  "^ ^     [     \ N z                      t      I  ͔     ^  "^ ^     ^     _ N {                     ou      I  Δ     ^  "^ ^     a     b N }          ̅            ȸv      I  ϔ     ^  "^ ^     d     e  N           !            Zw      I  Д     ^  "^ ^     g     h $N                       A)Sx      I  є     ^  "^ ^     j     k &N |          h            y      I  Ҕ     ^  "^ ^     m     n +N ~          <       T     wz      I  Ӕ     ^  "^ ^     p     q .N                        Tu{      I  Ԕ     ^  "^ ^     s     t 3N           Tm             |      I  Ք     ^  "^ ^     v     w 6N           x            Җm}      I  ֔     ^  "^ ^     y     z :N                  @     Z,x~      I  ה     ^  "^ ^     |     } <N           (             ;      I  ؔ     ^  "^ ^           AN           x       l     '=       J  ٔ     ^  "^ ^           EN                  0           J  ڔ     ^  "^ ^           JN            n             
y      J  ۔     ^  "^ ^           MN                 <     ղQ@      J  ܔ     ^  "^ ^           QN                       [      J  ݔ     ^  "^ ^           TN           p            k      J  ޔ     ^  "^ ^           XN                      z,j      J  ߔ     ^  "^ ^           ZN k          X      L     Yux      J       ^  "^ ^           ^N                       Ox      J       ^  "^ ^          `N n                !           	J       f P f f          eN                        _}               g h 
g g           jN           T'      <     l               zg P g zg          nN           T             ~++               g P g g          sN           8<            B3               8h  dh Ph           xN        H             ӎ               Ph  dh Ph          yN                   <      iFmB      J       Ph  dh Ph          zN                  <      y      J       6i " Pi Di           {N                       ma               i  i i           ~N           tf          ODF               i  i i          N                      Z3      J       j  j j          N                       @               j  j j          N           9             
qY                (k  k (k           N                       +W               \k  Tk \k          N        /           ,a               k   k          N m          <       H     eӘ               k   k          N            {       $     ]=      J       k   k          N        h      |     _      J       l  l l           N           `.e     @    ucz               l  l l           N           8b         .C      J       l  l l          N           ɡ
     Xf(    w+      J       l  l l          N           4Qx         ̞      J       l $ l l          N           0X           ܋,f      J       l  l l           N           @      ,            J       l  l l          N                 @     :^T      !J       l  l l          N           06      T&    fk>      "J       l P l l          N                 t     `      #J       o o n o           N ĝ                p     
IZ               o h o 
p           N ˝          Ĺ                           Pp  Hp np           N ͝          5           m               p  p p          N Ν          D7             Q7F                q q p q           N ϝ          >0      d     8               zq H jq zq           N ҝ          8R       4     3ݩ               q q r q          N ԝ             <     z               q q r q          N ۝                 (      
,      +J       u * tu u           N           }       \     شr               u  u u           N           (6                            u v v u          N        H8             ynԮ               xv * v xv          ! N           
       D      nn~               v * v v     #     $ N           `#             z0          	     v * v v    &     ' O           Pm             ʯB      1J  
     v * v v    )     * O                  l      u      2J       x  x x     .     / O   -     Ж      &     xܳ               y * hw 0y     1     2 O           (       h               
     y  hw 0y     4     5 $O           S             q      5J       y  y y     7     8 %O 
          \              7               6z  *z Dz     :     ; 'O           H       T     d6               |z  tz |z    =     >               H       H                    z  z z     A     B +O   @     \           :lӹ               <{  L{ <{    D     E -O           D            _               {  { {     G     H /O           
      r                    } L } }    J     K KO *          $       $
     䪼               } L } }    M     N SO -          #?      TP     Ke      =J        L      P     Q !` <          (X                           $      S     T $` V          c      6           ?J       z# L # z#    V     W '` 1                $     d               z# $ # z#    Y     Z )` X          X      ^     T{|      AJ       @$ q &$ @$    \     ] +` Y                 h      i               $  $ $     _     ` 0` _          T      $     e*^               $  $ $     b     c 3` `          #      %           DJ       $ $ $ $    e     f 7` c                $     @      EJ       $  $ $     h     i 9` a          D           !n      FJ       $  $ $     k     l =` b          (       \     ^Ѻ      GJ        $& $ @& $&    n     o C` d                ,    E          !     $& $ @& $&    q     r D` f          (	         r      IJ  "     $& $ @& $&    t     u E` g          /     <           JJ  #     '   ' '    w     x F` h          h       <     Շ          $     '   ' '     z     { I` j                |     #[r      LJ  %     '  
( '    }     ~ T` o          Q            W          &     n(  \( n(           W` r          T      H     !'}          '     n(  \( n(              s          ,           E      OJ  (     (  ( (          \` t          T                       )     H)  R) H)          `` u          pr            $K          *     H)  R) H)           c` v                T
     ǮD      RJ  +     &* * * &*           n` {          X      |     E?          ,     *   * *           t` ~                      ػ          -     * o * *           y`           l                  UJ  .     *  * *           `           H       D     K`      VJ  /     *  * *           `           T                 WJ  0     *  * *           `           \;             N{      XJ  1     *  * *           `                  P     ܬ!      YJ  2     l, *  l,          ` l                
     {          3     ,  , ,          `           $F       $     a%DM          4     -  &- -          `                 c     ס          5     -  &- -          `           L           2(      ]J  6     -  &- -          `                      rx|      ^J  7     -  &- -          `        $      ̂     K
      _J  8     -  &- -          `              p     )/\      `J  9     -  &- -           `           c
      Hc     slp      aJ  :     / * / /           `                       !P<          ;     /  / /           `           8Y             y?          <     /  / /          `           d       d      >      dJ  =     /  / /          `           `            '|      eJ  >     0 q 0 0          `           "      -     !          ?     1  1 1           `                       f?          @                                                                      dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd  0.0.26-3   games    dpkg   1.15.6~   I I  K nK     J I  oK oK    K I      pK    L I  qK qK    M I      rK    N I  " sK    O I      tK    P I      uK    Q I  W vK    R I  e wK 	   S I  ΐ xK 
   T I      yK    U I      zK    V I  
 {K 
   W I   |K    X I   }K    Y I  
 ~K    Z I   K    [ I      K    \ I      K    ] I   K    ^ I  a K    _ I  V K    ` I  W K    a I  { K    b I      K    c I             I I      K    d I  k        I I  K K    e I  WM K    f I  T K    g I  b; K     h I   K !   i I  7 K "   j I   K #   k I  K K $   d I      K %   l I  nK     &   m I  K K '   n I  
     (   O I  sK     )   o I  )L K *   p I   K +   q I      K ,   r I      K -   s I  L K .   t I  K K /   u I   K 0   v I       1   w I   K 2   x I   K 3   y I  M K 4   z I   K 5   { I   K 6   | I  {x K 7   } I   K 8   ~ I  a     9    I  Z     :    I      K ;    I          <    I  6{ K =    I  F K >    I  K K ?    I      K @    I   K A    I  J K B    I      K C    I  =< K D    I  5
 K E    I  n` K F    I  ?a K G    I  K K H    I   K I    I      K J    I      K K    I  5 K L   O I  K K M    I  <m K N    I   K O    I  K K P   V I      K Q    I  s K R    I      K S    I  K K T    I      K U    I  ? K V    I  v K W    I  T K X    I  ڿ K Y    I      K Z   w I      K [    I          \    I      K ]    I  EK K ^    I  12 K _    I  K K `    I  K K a    I  K K b    I          c   O I  K K d    I  & K e    I  [} K f    I  xK K g    I  .
 K h    I      K i    I      K j    I      K k    I  K K l    I  h K m    I      K n    I  , K o    I      K p    I  K K q    I  K K r    I  K K s    I  K K t    I      K u    I          v    I  J K w    I  S K x    I      K y    I      K z    I  1 K {    I  G K |    I      K }    I   K ~    I  I K     I      K     I              I  R K     I      K     I  ަ K     I      K     I  {G K    x I      K    £ I  ރ K    ã I   K    w I  K K     I      K    ģ I      K    ţ I   K    ƣ I  \ K    ǣ I  K K    ȣ I  K K    ɣ I  K K    ʣ I             o I  K L    a I      L    ˣ I   L    ̣ I  ب L    ͣ I  >
        O I   L L    Σ I  k L    ϣ I  tL L    У I  x 	L    ѣ I  y 
L    ң I  v L    ӣ I      L    ԣ I   
L    գ I      L    Y I      L    ֣ I   L    ף I  L L    ^ I      L    _ I      L    ` I      L    أ I          o I  L L    ٣ I      L    ڣ I  c        ۣ I  R L    O I  L L    ܣ I      L    ϣ I  K L    ݣ I      L    ޣ I      L    ף I  K L    ^ I  L  L    ` I  L        ߣ I  /L "L    O I  L #L    ϣ I  L $L     I  M %L     I  1L         I      'L     I      (L     I  #        O I  "L *L    ϣ I  #L +L     I  2 ,L     I   -L     I  / .L    ף I  L        ߣ I      0L     I  h 1L     I              I  K         I  hL         I      5L    O I  2L 6L     I  tK 7L    Q I      8L    ϣ I  *L 9L    X I      :L    Z I      ;L    ף I  .L <L     I  ؤ =L     I              I          O I  5L @L    ϣ I  8L AL    t I  ;L        O I  ?L CL     I             m I  BL EL    a I  L        m I  DL         I  FL HL    a I  EL IL     I  [ JL     I              I  { LL     I  v ML     I   NL     I              I  PK PL    w I  K QL     I  a RL    O I  GL SL     I  5 TL     I  * UL     I  C;         I  } WL     I   XL    O I  RL YL     I  { ZL     I  g[ [L      I   \L     I  +/ ]L     I      ^L     I  - _L     I      `L     I   aL     I  C bL     I   cL     I  V dL    	 I  } eL    
 I  2 fL     I   gL     I           o I  XL        
 I  X jL     I  sL kL     I  nL lL     I  t2 mL     I  pL nL     I      oL    I      pL    I            m I  3L        I      sL   m I  qL tL   ϣ I  @L uL   У I      vL    I      wL 	   I   xL 
   I             I  
        I  L     
   I  ` |L    I  n }L    I  9 ~L    I  h L    I  L L     I      L   ! I      L   " I            # I  L L   o I  jL L   $ I  L L   % I             I  L L   & I  + L   ' I  1( L   ( I  7 L   ) I  p L   a I  HL L   * I             # I      L !  + I  
 L "  , I  : L #  - I   L $  . I  L L %  / I          &  0 I  L L '  1 I   L (  2 I   L )  Y I  L     *  O I  zL L +  3 I  L L ,  4 I  X* L -  5 I  8     .  6 I      L /  7 I      L 0  8 I  A     1  9 I  = L 2  : I  * L 3  ; I          4  < I       5  = I  F L 6  > I   L 7  ? I  LU L 8  @ I   L 9  A I  g L :  B I   L ;  C I  ]; L <  D I  KE L =  E I   L >  F I  y L ?  G I   L @  H I   L A  I I  : L B  J I      L C  K I   L D  L I    L E  M I  W L F  N I      L G  O I   L H  P I      L I  Q I   L J  R I   L K  S I      L L  T I   L M  U I   L N  V I      L O  W I   L P  X I          Q  Y I  W L R  Z I  -
     S  O I  L L T  S I      L U  [ I  +M L V  \ I  CM L W  ] I   L X  ^ I       Y  O I  L L Z  _ I  L L [  ` I   L \  a I  - L ]  b I  D L ^  c I  ~ L // Simd Abi specific implementations -*- C++ -*-

// Copyright (C) 2020-2022 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
#define _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_

#if __cplusplus >= 201703L

#include <array>
#include <cmath>
#include <cstdlib>

_GLIBCXX_SIMD_BEGIN_NAMESPACE
// _S_allbits{{{
template <typename _V>
  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_allbits
    = reinterpret_cast<_V>(~__vector_type_t<char, sizeof(_V) / sizeof(char)>());

// }}}
// _S_signmask, _S_absmask{{{
template <typename _V, typename = _VectorTraits<_V>>
  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_signmask
    = __xor(_V() + 1, _V() - 1);

template <typename _V, typename = _VectorTraits<_V>>
  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_absmask
    = __andnot(_S_signmask<_V>, _S_allbits<_V>);

//}}}
// __vector_permute<Indices...>{{{
// Index == -1 requests zeroing of the output element
template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
	  typename = __detail::__odr_helper>
  _Tp
  __vector_permute(_Tp __x)
  {
    static_assert(sizeof...(_Indices) == _TVT::_S_full_size);
    return __make_vector<typename _TVT::value_type>(
      (_Indices == -1 ? 0 : __x[_Indices == -1 ? 0 : _Indices])...);
  }

// }}}
// __vector_shuffle<Indices...>{{{
// Index == -1 requests zeroing of the output element
template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
	  typename = __detail::__odr_helper>
  _Tp
  __vector_shuffle(_Tp __x, _Tp __y)
  {
    return _Tp{(_Indices == -1 ? 0
		: _Indices < _TVT::_S_full_size
		  ? __x[_Indices]
		  : __y[_Indices - _TVT::_S_full_size])...};
  }

// }}}
// __make_wrapper{{{
template <typename _Tp, typename... _Args>
  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, sizeof...(_Args)>
  __make_wrapper(const _Args&... __args)
  { return __make_vector<_Tp>(__args...); }

// }}}
// __wrapper_bitcast{{{
template <typename _Tp, size_t _ToN = 0, typename _Up, size_t _M,
	  size_t _Np = _ToN != 0 ? _ToN : sizeof(_Up) * _M / sizeof(_Tp)>
  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _Np>
  __wrapper_bitcast(_SimdWrapper<_Up, _M> __x)
  {
    static_assert(_Np > 1);
    return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data);
  }

// }}}
// __shift_elements_right{{{
// if (__shift % 2ⁿ == 0) => the low n Bytes are correct
template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _GLIBCXX_SIMD_INTRINSIC _Tp
  __shift_elements_right(_Tp __v)
  {
    [[maybe_unused]] const auto __iv = __to_intrin(__v);
    static_assert(__shift <= sizeof(_Tp));
    if constexpr (__shift == 0)
      return __v;
    else if constexpr (__shift == sizeof(_Tp))
      return _Tp();
#if _GLIBCXX_SIMD_X86INTRIN // {{{
    else if constexpr (__have_sse && __shift == 8
		       && _TVT::template _S_is<float, 4>)
      return _mm_movehl_ps(__iv, __iv);
    else if constexpr (__have_sse2 && __shift == 8
		       && _TVT::template _S_is<double, 2>)
      return _mm_unpackhi_pd(__iv, __iv);
    else if constexpr (__have_sse2 && sizeof(_Tp) == 16)
      return reinterpret_cast<typename _TVT::type>(
	_mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift));
    else if constexpr (__shift == 16 && sizeof(_Tp) == 32)
      {
	/*if constexpr (__have_avx && _TVT::template _S_is<double, 4>)
	  return _mm256_permute2f128_pd(__iv, __iv, 0x81);
	else if constexpr (__have_avx && _TVT::template _S_is<float, 8>)
	  return _mm256_permute2f128_ps(__iv, __iv, 0x81);
	else if constexpr (__have_avx)
	  return reinterpret_cast<typename _TVT::type>(
	    _mm256_permute2f128_si256(__iv, __iv, 0x81));
	else*/
	return __zero_extend(__hi128(__v));
      }
    else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16)
      {
	const auto __vll = __vector_bitcast<_LLong>(__v);
	return reinterpret_cast<typename _TVT::type>(
	  _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81),
			     __vll, __shift));
      }
    else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16)
      {
	const auto __vll = __vector_bitcast<_LLong>(__v);
	return reinterpret_cast<typename _TVT::type>(
	  __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift),
		   _mm_srli_si128(__hi128(__vll), __shift)));
      }
    else if constexpr (sizeof(_Tp) == 32 && __shift > 16)
      return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v)));
    else if constexpr (sizeof(_Tp) == 64 && __shift == 32)
      return __zero_extend(__hi256(__v));
    else if constexpr (__have_avx512f && sizeof(_Tp) == 64)
      {
	if constexpr (__shift >= 48)
	  return __zero_extend(
	    __shift_elements_right<__shift - 48>(__extract<3, 4>(__v)));
	else if constexpr (__shift >= 32)
	  return __zero_extend(
	    __shift_elements_right<__shift - 32>(__hi256(__v)));
	else if constexpr (__shift % 8 == 0)
	  return reinterpret_cast<typename _TVT::type>(
	    _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v),
				__shift / 8));
	else if constexpr (__shift % 4 == 0)
	  return reinterpret_cast<typename _TVT::type>(
	    _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v),
				__shift / 4));
	else if constexpr (__have_avx512bw && __shift < 16)
	  {
	    const auto __vll = __vector_bitcast<_LLong>(__v);
	    return reinterpret_cast<typename _TVT::type>(
	      _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9),
				 __vll, __shift));
	  }
	else if constexpr (__have_avx512bw && __shift < 32)
	  {
	    const auto __vll = __vector_bitcast<_LLong>(__v);
	    return reinterpret_cast<typename _TVT::type>(
	      _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee),
				 _mm512_shuffle_i32x4(__vll, __vll, 0xf9),
				 __shift - 16));
	  }
	else
	  __assert_unreachable<_Tp>();
      }
  /*
      } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64)
	  return __auto_bitcast(__extract<__shift / 16, 4>(__v));
  */
#endif // _GLIBCXX_SIMD_X86INTRIN }}}
    else
      {
	constexpr int __chunksize = __shift % 8 == 0   ? 8
				    : __shift % 4 == 0 ? 4
				    : __shift % 2 == 0 ? 2
						       : 1;
	auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v);
	using _Up = decltype(__w);
	return __intrin_bitcast<_Tp>(
	  __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
	    [](auto... __chunks) { return _Up{__chunks...}; },
	    [&](auto __i) { return __w[__shift / __chunksize + __i]; }));
      }
  }

// }}}
// __extract_part(_SimdWrapper<_Tp, _Np>) {{{
template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
  _SimdWrapper<_Tp, _Np / _Total * _Combine>
  __extract_part(const _SimdWrapper<_Tp, _Np> __x)
  {
    if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
      return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x);
    else
      {
	constexpr size_t __values_per_part = _Np / _Total;
	constexpr size_t __values_to_skip = _Index * __values_per_part;
	constexpr size_t __return_size = __values_per_part * _Combine;
	using _R = __vector_type_t<_Tp, __return_size>;
	static_assert((_Index + _Combine) * __values_per_part * sizeof(_Tp)
			<= sizeof(__x),
		      "out of bounds __extract_part");
	// the following assertion would ensure no "padding" to be read
	// static_assert(_Total >= _Index + _Combine, "_Total must be greater
	// than _Index");

	// static_assert(__return_size * _Total == _Np, "_Np must be divisible
	// by _Total");
	if (__x._M_is_constprop())
	  return __generate_from_n_evaluations<__return_size, _R>(
	    [&](auto __i) { return __x[__values_to_skip + __i]; });
	if constexpr (_Index == 0 && _Total == 1)
	  return __x;
	else if constexpr (_Index == 0)
	  return __intrin_bitcast<_R>(__as_vector(__x));
#if _GLIBCXX_SIMD_X86INTRIN // {{{
	else if constexpr (sizeof(__x) == 32
			   && __return_size * sizeof(_Tp) <= 16)
	  {
	    constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp);
	    if constexpr (__bytes_to_skip == 16)
	      return __vector_bitcast<_Tp, __return_size>(
		__hi128(__as_vector(__x)));
	    else
	      return __vector_bitcast<_Tp, __return_size>(
		_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
				__lo128(__vector_bitcast<_LLong>(__x)),
				__bytes_to_skip));
	  }
#endif // _GLIBCXX_SIMD_X86INTRIN }}}
	else if constexpr (_Index > 0
			   && (__values_to_skip % __return_size != 0
			       || sizeof(_R) >= 8)
			   && (__values_to_skip + __return_size) * sizeof(_Tp)
				<= 64
			   && sizeof(__x) >= 16)
	  return __intrin_bitcast<_R>(
	    __shift_elements_right<__values_to_skip * sizeof(_Tp)>(
	      __as_vector(__x)));
	else
	  {
	    _R __r = {};
	    __builtin_memcpy(&__r,
			     reinterpret_cast<const char*>(&__x)
			       + sizeof(_Tp) * __values_to_skip,
			     __return_size * sizeof(_Tp));
	    return __r;
	  }
      }
  }

// }}}
// __extract_part(_SimdWrapper<bool, _Np>) {{{
template <int _Index, int _Total, int _Combine = 1, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _Np / _Total * _Combine>
  __extract_part(const _SimdWrapper<bool, _Np> __x)
  {
    static_assert(_Combine == 1, "_Combine != 1 not implemented");
    static_assert(__have_avx512f && _Np == _Np);
    static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
    return __x._M_data >> (_Index * _Np / _Total);
  }

// }}}

// __vector_convert {{{
// implementation requires an index sequence
template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, _From __l, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, _From __l, _From __m, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
	       static_cast<_Tp>(__m[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, _From __l, _From __m, _From __n,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
	       static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, _From __l, _From __m, _From __n, _From __o,
		   index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
	       static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
	       static_cast<_Tp>(__o[_I])...};
  }

template <typename _To, typename _From, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
		   _From __f, _From __g, _From __h, _From __i, _From __j,
		   _From __k, _From __l, _From __m, _From __n, _From __o,
		   _From __p, index_sequence<_I...>)
  {
    using _Tp = typename _VectorTraits<_To>::value_type;
    return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
	       static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
	       static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
	       static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
	       static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
	       static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
	       static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
	       static_cast<_Tp>(__o[_I])..., static_cast<_Tp>(__p[_I])...};
  }

// Defer actual conversion to the overload that takes an index sequence. Note
// that this function adds zeros or drops values off the end if you don't ensure
// matching width.
template <typename _To, typename... _From, size_t _FromSize>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __vector_convert(_SimdWrapper<_From, _FromSize>... __xs)
  {
#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
    using _From0 = __first_of_pack_t<_From...>;
    using _FW = _SimdWrapper<_From0, _FromSize>;
    if (!_FW::_S_is_partial && !(... && __xs._M_is_constprop()))
      {
	if constexpr ((sizeof...(_From) & (sizeof...(_From) - 1))
		      == 0) // power-of-two number of arguments
	  return __convert_x86<_To>(__as_vector(__xs)...);
	else // append zeros and recurse until the above branch is taken
	  return __vector_convert<_To>(__xs..., _FW{});
      }
    else
#endif
      return __vector_convert<_To>(
	__as_vector(__xs)...,
	make_index_sequence<(sizeof...(__xs) == 1 ? std::min(
			       _VectorTraits<_To>::_S_full_size, int(_FromSize))
						  : _FromSize)>());
  }

// }}}
// __convert function{{{
template <typename _To, typename _From, typename... _More>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __convert(_From __v0, _More... __vs)
  {
    static_assert((true && ... && is_same_v<_From, _More>) );
    if constexpr (__is_vectorizable_v<_From>)
      {
	using _V = typename _VectorTraits<_To>::type;
	using _Tp = typename _VectorTraits<_To>::value_type;
	return _V{static_cast<_Tp>(__v0), static_cast<_Tp>(__vs)...};
      }
    else if constexpr (__is_vector_type_v<_From>)
      return __convert<_To>(__as_wrapper(__v0), __as_wrapper(__vs)...);
    else // _SimdWrapper arguments
      {
	constexpr size_t __input_size = _From::_S_size * (1 + sizeof...(_More));
	if constexpr (__is_vectorizable_v<_To>)
	  return __convert<__vector_type_t<_To, __input_size>>(__v0, __vs...);
	else if constexpr (!__is_vector_type_v<_To>)
	  return _To(__convert<typename _To::_BuiltinType>(__v0, __vs...));
	else
	  {
	    static_assert(
	      sizeof...(_More) == 0
		|| _VectorTraits<_To>::_S_full_size >= __input_size,
	      "__convert(...) requires the input to fit into the output");
	    return __vector_convert<_To>(__v0, __vs...);
	  }
      }
  }

// }}}
// __convert_all{{{
// Converts __v into array<_To, N>, where N is _NParts if non-zero or
// otherwise deduced from _To such that N * #elements(_To) <= #elements(__v).
// Note: this function may return less than all converted elements
template <typename _To,
	  size_t _NParts = 0, // allows to convert fewer or more (only last
			      // _To, to be partially filled) than all
	  size_t _Offset = 0, // where to start, # of elements (not Bytes or
			      // Parts)
	  typename _From, typename _FromVT = _VectorTraits<_From>>
  _GLIBCXX_SIMD_INTRINSIC auto
  __convert_all(_From __v)
  {
    if constexpr (is_arithmetic_v<_To> && _NParts != 1)
      {
	static_assert(_Offset < _FromVT::_S_full_size);
	constexpr auto _Np
	  = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts;
	return __generate_from_n_evaluations<_Np, array<_To, _Np>>(
	  [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); });
      }
    else
      {
	static_assert(__is_vector_type_v<_To>);
	using _ToVT = _VectorTraits<_To>;
	if constexpr (__is_vector_type_v<_From>)
	  return __convert_all<_To, _NParts>(__as_wrapper(__v));
	else if constexpr (_NParts == 1)
	  {
	    static_assert(_Offset % _ToVT::_S_full_size == 0);
	    return array<_To, 1>{__vector_convert<_To>(
	      __extract_part<_Offset / _ToVT::_S_full_size,
			     __div_roundup(_FromVT::_S_partial_width,
					   _ToVT::_S_full_size)>(__v))};
	  }
#if _GLIBCXX_SIMD_X86INTRIN // {{{
	else if constexpr (!__have_sse4_1 && _Offset == 0
	  && is_integral_v<typename _FromVT::value_type>
	  && sizeof(typename _FromVT::value_type)
	      < sizeof(typename _ToVT::value_type)
	  && !(sizeof(typename _FromVT::value_type) == 4
	      && is_same_v<typename _ToVT::value_type, double>))
	  {
	    using _ToT = typename _ToVT::value_type;
	    using _FromT = typename _FromVT::value_type;
	    constexpr size_t _Np
	      = _NParts != 0
		  ? _NParts
		  : (_FromVT::_S_partial_width / _ToVT::_S_full_size);
	    using _R = array<_To, _Np>;
	    // __adjust modifies its input to have _Np (use _SizeConstant)
	    // entries so that no unnecessary intermediate conversions are
	    // requested and, more importantly, no intermediate conversions are
	    // missing
	    [[maybe_unused]] auto __adjust
	      = [](auto __n,
		   auto __vv) -> _SimdWrapper<_FromT, decltype(__n)::value> {
	      return __vector_bitcast<_FromT, decltype(__n)::value>(__vv);
	    };
	    [[maybe_unused]] const auto __vi = __to_intrin(__v);
	    auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) {
	      if constexpr (_Np == 1)
		return _R{__intrin_bitcast<_To>(__x0)};
	      else
		return _R{__intrin_bitcast<_To>(__x0),
			  __intrin_bitcast<_To>(__x1)};
	    };

	    if constexpr (_Np == 0)
	      return _R{};
	    else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 2)
	      {
		static_assert(is_integral_v<_FromT>);
		static_assert(is_integral_v<_ToT>);
		if constexpr (is_unsigned_v<_FromT>)
		  return __make_array(_mm_unpacklo_epi8(__vi, __m128i()),
				      _mm_unpackhi_epi8(__vi, __m128i()));
		else
		  return __make_array(
		    _mm_srai_epi16(_mm_unpacklo_epi8(__vi, __vi), 8),
		    _mm_srai_epi16(_mm_unpackhi_epi8(__vi, __vi), 8));
	      }
	    else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 4)
	      {
		static_assert(is_integral_v<_FromT>);
		if constexpr (is_floating_point_v<_ToT>)
		  {
		    const auto __ints
		      = __convert_all<__vector_type16_t<int>, _Np>(
			__adjust(_SizeConstant<_Np * 4>(), __v));
		    return __generate_from_n_evaluations<_Np, _R>(
		      [&](auto __i) {
			return __vector_convert<_To>(__as_wrapper(__ints[__i]));
		      });
		  }
		else if constexpr (is_unsigned_v<_FromT>)
		  return __make_array(_mm_unpacklo_epi16(__vi, __m128i()),
				      _mm_unpackhi_epi16(__vi, __m128i()));
		else
		  return __make_array(
		    _mm_srai_epi32(_mm_unpacklo_epi16(__vi, __vi), 16),
		    _mm_srai_epi32(_mm_unpackhi_epi16(__vi, __vi), 16));
	      }
	    else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
			       && is_integral_v<_FromT> && is_integral_v<_ToT>)
	      {
		if constexpr (is_unsigned_v<_FromT>)
		  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
				      _mm_unpackhi_epi32(__vi, __m128i()));
		else
		  return __make_array(
		    _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
		    _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
	      }
	    else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
			       && is_integral_v<_FromT> && is_integral_v<_ToT>)
	      {
		if constexpr (is_unsigned_v<_FromT>)
		  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
				      _mm_unpackhi_epi32(__vi, __m128i()));
		else
		  return __make_array(
		    _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
		    _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
	      }
	    else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) >= 4
			       && is_signed_v<_FromT>)
	      {
		const __m128i __vv[2] = {_mm_unpacklo_epi8(__vi, __vi),
					 _mm_unpackhi_epi8(__vi, __vi)};
		const __vector_type_t<int, 4> __vvvv[4] = {
		  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[0], __vv[0])),
		  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[0], __vv[0])),
		  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])),
		  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))};
		if constexpr (sizeof(_ToT) == 4)
		  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		    return __vector_convert<_To>(
		      _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
		  });
		else if constexpr (is_integral_v<_ToT>)
		  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		    const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
		    const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
		    return __vector_bitcast<_ToT>(
		      __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
				   : _mm_unpackhi_epi32(__sx32, __signbits));
		  });
		else
		  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		    const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
		    return __vector_convert<_To>(
		      __i % 2 == 0 ? __int4
				   : _SimdWrapper<int, 4>(
				     _mm_unpackhi_epi64(__to_intrin(__int4),
							__to_intrin(__int4))));
		  });
	      }
	    else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4)
	      {
		const auto __shorts = __convert_all<__vector_type16_t<
		  conditional_t<is_signed_v<_FromT>, short, unsigned short>>>(
		  __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v));
		return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		  return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
		});
	      }
	    else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8
			       && is_signed_v<_FromT> && is_integral_v<_ToT>)
	      {
		const __m128i __vv[2] = {_mm_unpacklo_epi16(__vi, __vi),
					 _mm_unpackhi_epi16(__vi, __vi)};
		const __vector_type16_t<int> __vvvv[4]
		  = {__vector_bitcast<int>(
		       _mm_unpacklo_epi32(_mm_srai_epi32(__vv[0], 16),
					  _mm_srai_epi32(__vv[0], 31))),
		     __vector_bitcast<int>(
		       _mm_unpackhi_epi32(_mm_srai_epi32(__vv[0], 16),
					  _mm_srai_epi32(__vv[0], 31))),
		     __vector_bitcast<int>(
		       _mm_unpacklo_epi32(_mm_srai_epi32(__vv[1], 16),
					  _mm_srai_epi32(__vv[1], 31))),
		     __vector_bitcast<int>(
		       _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16),
					  _mm_srai_epi32(__vv[1], 31)))};
		return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		  return __vector_bitcast<_ToT>(__vvvv[__i]);
		});
	      }
	    else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8)
	      {
		const auto __ints
		  = __convert_all<__vector_type16_t<conditional_t<
		    is_signed_v<_FromT> || is_floating_point_v<_ToT>, int,
		    unsigned int>>>(
		    __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v));
		return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
		  return __convert_all<_To>(__ints[__i / 2])[__i % 2];
		});
	      }
	    else
	      __assert_unreachable<_To>();
	  }
#endif // _GLIBCXX_SIMD_X86INTRIN }}}
	else if constexpr ((_FromVT::_S_partial_width - _Offset)
			   > _ToVT::_S_full_size)
	  {
	    /*
	    static_assert(
	      (_FromVT::_S_partial_width & (_FromVT::_S_partial_width - 1)) ==
	    0,
	      "__convert_all only supports power-of-2 number of elements.
	    Otherwise " "the return type cannot be array<_To, N>.");
	      */
	    constexpr size_t _NTotal
	      = (_FromVT::_S_partial_width - _Offset) / _ToVT::_S_full_size;
	    constexpr size_t _Np = _NParts == 0 ? _NTotal : _NParts;
	    static_assert(
	      _Np <= _NTotal
	      || (_Np == _NTotal + 1
		  && (_FromVT::_S_partial_width - _Offset) % _ToVT::_S_full_size
		       > 0));
	    using _R = array<_To, _Np>;
	    if constexpr (_Np == 1)
	      return _R{__vector_convert<_To>(
		__extract_part<_Offset, _FromVT::_S_partial_width,
			       _ToVT::_S_full_size>(__v))};
	    else
	      return __generate_from_n_evaluations<_Np, _R>([&](
		auto __i) constexpr {
		auto __part
		  = __extract_part<__i * _ToVT::_S_full_size + _Offset,
				   _FromVT::_S_partial_width,
				   _ToVT::_S_full_size>(__v);
		return __vector_convert<_To>(__part);
	      });
	  }
	else if constexpr (_Offset == 0)
	  return array<_To, 1>{__vector_convert<_To>(__v)};
	else
	  return array<_To, 1>{__vector_convert<_To>(
	    __extract_part<_Offset, _FromVT::_S_partial_width,
			   _FromVT::_S_partial_width - _Offset>(__v))};
      }
  }

// }}}

// _GnuTraits {{{
template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
  struct _GnuTraits
  {
    using _IsValid = true_type;
    using _SimdImpl = typename _Abi::_SimdImpl;
    using _MaskImpl = typename _Abi::_MaskImpl;

    // simd and simd_mask member types {{{
    using _SimdMember = _SimdWrapper<_Tp, _Np>;
    using _MaskMember = _SimdWrapper<_Mp, _Np>;
    static constexpr size_t _S_simd_align = alignof(_SimdMember);
    static constexpr size_t _S_mask_align = alignof(_MaskMember);

    // }}}
    // size metadata {{{
    static constexpr size_t _S_full_size = _SimdMember::_S_full_size;
    static constexpr bool _S_is_partial = _SimdMember::_S_is_partial;

    // }}}
    // _SimdBase / base class for simd, providing extra conversions {{{
    struct _SimdBase2
    {
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __intrinsic_type_t<_Tp, _Np>() const
      {
	return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
      }
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __vector_type_t<_Tp, _Np>() const
      {
	return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
      }
    };

    struct _SimdBase1
    {
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __intrinsic_type_t<_Tp, _Np>() const
      { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
    };

    using _SimdBase = conditional_t<
      is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
      _SimdBase1, _SimdBase2>;

    // }}}
    // _MaskBase {{{
    struct _MaskBase2
    {
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __intrinsic_type_t<_Tp, _Np>() const
      {
	return static_cast<const simd_mask<_Tp, _Abi>*>(this)
	  ->_M_data.__intrin();
      }
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __vector_type_t<_Tp, _Np>() const
      {
	return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
      }
    };

    struct _MaskBase1
    {
      _GLIBCXX_SIMD_ALWAYS_INLINE
      explicit operator __intrinsic_type_t<_Tp, _Np>() const
      { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
    };

    using _MaskBase = conditional_t<
      is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
      _MaskBase1, _MaskBase2>;

    // }}}
    // _MaskCastType {{{
    // parameter type of one explicit simd_mask constructor
    class _MaskCastType
    {
      using _Up = __intrinsic_type_t<_Tp, _Np>;
      _Up _M_data;

    public:
      _GLIBCXX_SIMD_ALWAYS_INLINE
      _MaskCastType(_Up __x) : _M_data(__x) {}
      _GLIBCXX_SIMD_ALWAYS_INLINE
      operator _MaskMember() const { return _M_data; }
    };

    // }}}
    // _SimdCastType {{{
    // parameter type of one explicit simd constructor
    class _SimdCastType1
    {
      using _Ap = __intrinsic_type_t<_Tp, _Np>;
      _SimdMember _M_data;

    public:
      _GLIBCXX_SIMD_ALWAYS_INLINE
      _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
      _GLIBCXX_SIMD_ALWAYS_INLINE
      operator _SimdMember() const { return _M_data; }
    };

    class _SimdCastType2
    {
      using _Ap = __intrinsic_type_t<_Tp, _Np>;
      using _Bp = __vector_type_t<_Tp, _Np>;
      _SimdMember _M_data;

    public:
      _GLIBCXX_SIMD_ALWAYS_INLINE
      _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
      _GLIBCXX_SIMD_ALWAYS_INLINE
      _SimdCastType2(_Bp __b) : _M_data(__b) {}
      _GLIBCXX_SIMD_ALWAYS_INLINE
      operator _SimdMember() const { return _M_data; }
    };

    using _SimdCastType = conditional_t<
      is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
      _SimdCastType1, _SimdCastType2>;
    //}}}
  };

// }}}
struct _CommonImplX86;
struct _CommonImplNeon;
struct _CommonImplBuiltin;
template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin;
template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin;
template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86;
template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86;
template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon;
template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon;
template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc;
template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc;

// simd_abi::_VecBuiltin {{{
template <int _UsedBytes>
  struct simd_abi::_VecBuiltin
  {
    template <typename _Tp>
      static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);

    // validity traits {{{
    struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};

    template <typename _Tp>
      struct _IsValidSizeFor
	: __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
			   && _UsedBytes % sizeof(_Tp) == 0
			   && _UsedBytes <= __vectorized_sizeof<_Tp>()
			   && (!__have_avx512f || _UsedBytes <= 32))> {};

    template <typename _Tp>
      struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
				    _IsValidSizeFor<_Tp>> {};

    template <typename _Tp>
      static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;

    // }}}
    // _SimdImpl/_MaskImpl {{{
#if _GLIBCXX_SIMD_X86INTRIN
    using _CommonImpl = _CommonImplX86;
    using _SimdImpl = _SimdImplX86<_VecBuiltin<_UsedBytes>>;
    using _MaskImpl = _MaskImplX86<_VecBuiltin<_UsedBytes>>;
#elif _GLIBCXX_SIMD_HAVE_NEON
    using _CommonImpl = _CommonImplNeon;
    using _SimdImpl = _SimdImplNeon<_VecBuiltin<_UsedBytes>>;
    using _MaskImpl = _MaskImplNeon<_VecBuiltin<_UsedBytes>>;
#else
    using _CommonImpl = _CommonImplBuiltin;
#ifdef __ALTIVEC__
    using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
    using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
#else
    using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
    using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
#endif
#endif

    // }}}
    // __traits {{{
    template <typename _Tp>
      using _MaskValueType = __int_for_sizeof_t<_Tp>;

    template <typename _Tp>
      using __traits
	= conditional_t<_S_is_valid_v<_Tp>,
			_GnuTraits<_Tp, _MaskValueType<_Tp>,
				   _VecBuiltin<_UsedBytes>, _S_size<_Tp>>,
			_InvalidTraits>;

    //}}}
    // size metadata {{{
    template <typename _Tp>
      static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;

    template <typename _Tp>
      static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;

    // }}}
    // implicit masks {{{
    template <typename _Tp>
      using _MaskMember = _SimdWrapper<_MaskValueType<_Tp>, _S_size<_Tp>>;

    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_implicit_mask()
      {
	using _UV = typename _MaskMember<_Tp>::_BuiltinType;
	if constexpr (!_MaskMember<_Tp>::_S_is_partial)
	  return ~_UV();
	else
	  {
	    constexpr auto __size = _S_size<_Tp>;
	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>(
	      [](auto __i) constexpr { return __i < __size ? -1 : 0; });
	    return __r;
	  }
      }

    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr __intrinsic_type_t<_Tp,
								  _S_size<_Tp>>
      _S_implicit_mask_intrin()
      {
	return __to_intrin(
	  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()._M_data));
      }

    template <typename _TW, typename _TVT = _VectorTraits<_TW>>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _TW _S_masked(_TW __x)
      {
	using _Tp = typename _TVT::value_type;
	if constexpr (!_MaskMember<_Tp>::_S_is_partial)
	  return __x;
	else
	  return __and(__as_vector(__x),
		       __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()));
      }

    template <typename _TW, typename _TVT = _VectorTraits<_TW>>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      __make_padding_nonzero(_TW __x)
      {
	using _Tp = typename _TVT::value_type;
	if constexpr (!_S_is_partial<_Tp>)
	  return __x;
	else
	  {
	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask
	      = __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>());
	    if constexpr (is_integral_v<_Tp>)
	      return __or(__x, ~__implicit_mask);
	    else
	      {
		_GLIBCXX_SIMD_USE_CONSTEXPR auto __one
		  = __andnot(__implicit_mask,
			     __vector_broadcast<_S_full_size<_Tp>>(_Tp(1)));
		// it's not enough to return `x | 1_in_padding` because the
		// padding in x might be inf or nan (independent of
		// __FINITE_MATH_ONLY__, because it's about padding bits)
		return __or(__and(__x, __implicit_mask), __one);
	      }
	  }
      }
    // }}}
  };

// }}}
// simd_abi::_VecBltnBtmsk {{{
template <int _UsedBytes>
  struct simd_abi::_VecBltnBtmsk
  {
    template <typename _Tp>
      static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);

    // validity traits {{{
    struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};

    template <typename _Tp>
      struct _IsValidSizeFor
	: __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
			   && _UsedBytes % sizeof(_Tp) == 0 && _UsedBytes <= 64
			   && (_UsedBytes > 32 || __have_avx512vl))> {};

    // Bitmasks require at least AVX512F. If sizeof(_Tp) < 4 the AVX512BW is also
    // required.
    template <typename _Tp>
      struct _IsValid
	: conjunction<
	    _IsValidAbiTag, __bool_constant<__have_avx512f>,
	    __bool_constant<__have_avx512bw || (sizeof(_Tp) >= 4)>,
	    __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>,
	    _IsValidSizeFor<_Tp>> {};

    template <typename _Tp>
      static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;

    // }}}
    // simd/_MaskImpl {{{
  #if _GLIBCXX_SIMD_X86INTRIN
    using _CommonImpl = _CommonImplX86;
    using _SimdImpl = _SimdImplX86<_VecBltnBtmsk<_UsedBytes>>;
    using _MaskImpl = _MaskImplX86<_VecBltnBtmsk<_UsedBytes>>;
  #else
    template <int>
      struct _MissingImpl;

    using _CommonImpl = _MissingImpl<_UsedBytes>;
    using _SimdImpl = _MissingImpl<_UsedBytes>;
    using _MaskImpl = _MissingImpl<_UsedBytes>;
  #endif

    // }}}
    // __traits {{{
    template <typename _Tp>
      using _MaskMember = _SimdWrapper<bool, _S_size<_Tp>>;

    template <typename _Tp>
      using __traits = conditional_t<
	_S_is_valid_v<_Tp>,
	_GnuTraits<_Tp, bool, _VecBltnBtmsk<_UsedBytes>, _S_size<_Tp>>,
	_InvalidTraits>;

    //}}}
    // size metadata {{{
    template <typename _Tp>
      static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
    template <typename _Tp>
      static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;

    // }}}
    // implicit mask {{{
  private:
    template <typename _Tp>
      using _ImplicitMask = _SimdWrapper<bool, _S_size<_Tp>>;

  public:
    template <size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<_Np>
      __implicit_mask_n()
      {
	using _Tp = __bool_storage_member_type_t<_Np>;
	return _Np < sizeof(_Tp) * __CHAR_BIT__ ? _Tp((1ULL << _Np) - 1) : ~_Tp();
      }

    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _ImplicitMask<_Tp>
      _S_implicit_mask()
      { return __implicit_mask_n<_S_size<_Tp>>(); }

    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<
	_S_size<_Tp>>
      _S_implicit_mask_intrin()
      { return __implicit_mask_n<_S_size<_Tp>>(); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_masked(_SimdWrapper<_Tp, _Np> __x)
      {
	if constexpr (is_same_v<_Tp, bool>)
	  if constexpr (_Np < 8 || (_Np & (_Np - 1)) != 0)
	    return _MaskImpl::_S_bit_and(
	      __x, _SimdWrapper<_Tp, _Np>(
		     __bool_storage_member_type_t<_Np>((1ULL << _Np) - 1)));
	  else
	    return __x;
	else
	  return _S_masked(__x._M_data);
      }

    template <typename _TV>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _TV
      _S_masked(_TV __x)
      {
	using _Tp = typename _VectorTraits<_TV>::value_type;
	static_assert(
	  !__is_bitmask_v<_TV>,
	  "_VecBltnBtmsk::_S_masked cannot work on bitmasks, since it doesn't "
	  "know the number of elements. Use _SimdWrapper<bool, N> instead.");
	if constexpr (_S_is_partial<_Tp>)
	  {
	    constexpr size_t _Np = _S_size<_Tp>;
	    return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
	      _S_implicit_mask<_Tp>(), _SimdWrapper<_Tp, _Np>(),
	      _SimdWrapper<_Tp, _Np>(__x));
	  }
	else
	  return __x;
      }

    template <typename _TV, typename _TVT = _VectorTraits<_TV>>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      __make_padding_nonzero(_TV __x)
      {
	using _Tp = typename _TVT::value_type;
	if constexpr (!_S_is_partial<_Tp>)
	  return __x;
	else
	  {
	    constexpr size_t _Np = _S_size<_Tp>;
	    if constexpr (is_integral_v<typename _TVT::value_type>)
	      return __x
		     | __generate_vector<_Tp, _S_full_size<_Tp>>(
		       [](auto __i) -> _Tp {
			 if (__i < _Np)
			   return 0;
			 else
			   return 1;
		       });
	    else
	      return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
		       _S_implicit_mask<_Tp>(),
		       _SimdWrapper<_Tp, _Np>(
			 __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))),
		       _SimdWrapper<_Tp, _Np>(__x))
		._M_data;
	  }
      }

    // }}}
  };

//}}}
// _CommonImplBuiltin {{{
struct _CommonImplBuiltin
{
  // _S_converts_via_decomposition{{{
  // This lists all cases where a __vector_convert needs to fall back to
  // conversion of individual scalars (i.e. decompose the input vector into
  // scalars, convert, compose output vector). In those cases, _S_masked_load &
  // _S_masked_store prefer to use the _S_bit_iteration implementation.
  template <typename _From, typename _To, size_t _ToSize>
    static inline constexpr bool __converts_via_decomposition_v
      = sizeof(_From) != sizeof(_To);

  // }}}
  // _S_load{{{
  template <typename _Tp, size_t _Np, size_t _Bytes = _Np * sizeof(_Tp)>
    _GLIBCXX_SIMD_INTRINSIC static __vector_type_t<_Tp, _Np>
    _S_load(const void* __p)
    {
      static_assert(_Np > 1);
      static_assert(_Bytes % sizeof(_Tp) == 0);
      using _Rp = __vector_type_t<_Tp, _Np>;
      if constexpr (sizeof(_Rp) == _Bytes)
	{
	  _Rp __r;
	  __builtin_memcpy(&__r, __p, _Bytes);
	  return __r;
	}
      else
	{
#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
	  using _Up = conditional_t<
	    is_integral_v<_Tp>,
	    conditional_t<_Bytes % 4 == 0,
			  conditional_t<_Bytes % 8 == 0, long long, int>,
			  conditional_t<_Bytes % 2 == 0, short, signed char>>,
	    conditional_t<(_Bytes < 8 || _Np % 2 == 1 || _Np == 2), _Tp,
			  double>>;
	  using _V = __vector_type_t<_Up, _Np * sizeof(_Tp) / sizeof(_Up)>;
	  if constexpr (sizeof(_V) != sizeof(_Rp))
	    { // on i386 with 4 < _Bytes <= 8
	      _Rp __r{};
	      __builtin_memcpy(&__r, __p, _Bytes);
	      return __r;
	    }
	  else
#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
	  using _V = _Rp;
#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
	    {
	      _V __r{};
	      static_assert(_Bytes <= sizeof(_V));
	      __builtin_memcpy(&__r, __p, _Bytes);
	      return reinterpret_cast<_Rp>(__r);
	    }
	}
    }

  // }}}
  // _S_store {{{
  template <size_t _ReqBytes = 0, typename _TV>
    _GLIBCXX_SIMD_INTRINSIC static void _S_store(_TV __x, void* __addr)
    {
      constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
      static_assert(sizeof(__x) >= _Bytes);

      if constexpr (__is_vector_type_v<_TV>)
	{
	  using _Tp = typename _VectorTraits<_TV>::value_type;
	  constexpr size_t _Np = _Bytes / sizeof(_Tp);
	  static_assert(_Np * sizeof(_Tp) == _Bytes);

#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
	  using _Up = conditional_t<
	    (is_integral_v<_Tp> || _Bytes < 4),
	    conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
	    float>;
	  const auto __v = __vector_bitcast<_Up>(__x);
#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
	  const __vector_type_t<_Tp, _Np> __v = __x;
#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424

	  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
	    {
	      constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
	      alignas(decltype(__v)) char __tmp[_MoreBytes];
	      __builtin_memcpy(__tmp, &__v, _MoreBytes);
	      __builtin_memcpy(__addr, __tmp, _Bytes);
	    }
	  else
	    __builtin_memcpy(__addr, &__v, _Bytes);
	}
      else
	__builtin_memcpy(__addr, &__x, _Bytes);
    }

  template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __x,
						 void* __addr)
    { _S_store<_Np * sizeof(_Tp)>(__x._M_data, __addr); }

  // }}}
  // _S_store_bool_array(_BitMask) {{{
  template <size_t _Np, bool _Sanitized>
    _GLIBCXX_SIMD_INTRINSIC static constexpr void
    _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
    {
      if constexpr (_Np == 1)
	__mem[0] = __x[0];
      else if constexpr (_Np == 2)
	{
	  short __bool2 = (__x._M_to_bits() * 0x81) & 0x0101;
	  _S_store<_Np>(__bool2, __mem);
	}
      else if constexpr (_Np == 3)
	{
	  int __bool3 = (__x._M_to_bits() * 0x4081) & 0x010101;
	  _S_store<_Np>(__bool3, __mem);
	}
      else
	{
	  __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) {
	    constexpr int __offset = __i * 4;
	    constexpr int __remaining = _Np - __offset;
	    if constexpr (__remaining > 4 && __remaining <= 7)
	      {
		const _ULLong __bool7
		  = (__x.template _M_extract<__offset>()._M_to_bits()
		     * 0x40810204081ULL)
		    & 0x0101010101010101ULL;
		_S_store<__remaining>(__bool7, __mem + __offset);
	      }
	    else if constexpr (__remaining >= 4)
	      {
		int __bits = __x.template _M_extract<__offset>()._M_to_bits();
		if constexpr (__remaining > 7)
		  __bits &= 0xf;
		const int __bool4 = (__bits * 0x204081) & 0x01010101;
		_S_store<4>(__bool4, __mem + __offset);
	      }
	  });
	}
    }

  // }}}
  // _S_blend{{{
  template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static constexpr auto
    _S_blend(_SimdWrapper<__int_for_sizeof_t<_Tp>, _Np> __k,
	     _SimdWrapper<_Tp, _Np> __at0, _SimdWrapper<_Tp, _Np> __at1)
    { return __k._M_data ? __at1._M_data : __at0._M_data; }

  // }}}
};

// }}}
// _SimdImplBuiltin {{{1
template <typename _Abi, typename>
  struct _SimdImplBuiltin
  {
    // member types {{{2
    template <typename _Tp>
      static constexpr size_t _S_max_store_size = 16;

    using abi_type = _Abi;

    template <typename _Tp>
      using _TypeTag = _Tp*;

    template <typename _Tp>
      using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;

    template <typename _Tp>
      using _MaskMember = typename _Abi::template _MaskMember<_Tp>;

    template <typename _Tp>
      static constexpr size_t _S_size = _Abi::template _S_size<_Tp>;

    template <typename _Tp>
      static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>;

    using _CommonImpl = typename _Abi::_CommonImpl;
    using _SuperImpl = typename _Abi::_SimdImpl;
    using _MaskImpl = typename _Abi::_MaskImpl;

    // _M_make_simd(_SimdWrapper/__intrinsic_type_t) {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
      _M_make_simd(_SimdWrapper<_Tp, _Np> __x)
      { return {__private_init, __x}; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
      _M_make_simd(__intrinsic_type_t<_Tp, _Np> __x)
      { return {__private_init, __vector_bitcast<_Tp>(__x)}; }

    // _S_broadcast {{{2
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
      _S_broadcast(_Tp __x) noexcept
      { return __vector_broadcast<_S_full_size<_Tp>>(__x); }

    // _S_generator {{{2
    template <typename _Fp, typename _Tp>
      inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen,
							    _TypeTag<_Tp>)
      {
	return __generate_vector<_Tp, _S_full_size<_Tp>>([&](
	  auto __i) constexpr {
	  if constexpr (__i < _S_size<_Tp>)
	    return __gen(__i);
	  else
	    return 0;
	});
      }

    // _S_load {{{2
    template <typename _Tp, typename _Up>
      _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
      _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
      {
	constexpr size_t _Np = _S_size<_Tp>;
	constexpr size_t __max_load_size
	  = (sizeof(_Up) >= 4 && __have_avx512f) || __have_avx512bw   ? 64
	    : (is_floating_point_v<_Up> && __have_avx) || __have_avx2 ? 32
								      : 16;
	constexpr size_t __bytes_to_load = sizeof(_Up) * _Np;
	if constexpr (sizeof(_Up) > 8)
	  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&](
	    auto __i) constexpr {
	    return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
	  });
	else if constexpr (is_same_v<_Up, _Tp>)
	  return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>,
					       _Np * sizeof(_Tp)>(__mem);
	else if constexpr (__bytes_to_load <= __max_load_size)
	  return __convert<_SimdMember<_Tp>>(
	    _CommonImpl::template _S_load<_Up, _Np>(__mem));
	else if constexpr (__bytes_to_load % __max_load_size == 0)
	  {
	    constexpr size_t __n_loads = __bytes_to_load / __max_load_size;
	    constexpr size_t __elements_per_load = _Np / __n_loads;
	    return __call_with_n_evaluations<__n_loads>(
	      [](auto... __uncvted) {
		return __convert<_SimdMember<_Tp>>(__uncvted...);
	      },
	      [&](auto __i) {
		return _CommonImpl::template _S_load<_Up, __elements_per_load>(
		  __mem + __i * __elements_per_load);
	      });
	  }
	else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0
			   && __max_load_size > 16)
	  { // e.g. int[] -> <char, 12> with AVX2
	    constexpr size_t __n_loads
	      = __bytes_to_load / (__max_load_size / 2);
	    constexpr size_t __elements_per_load = _Np / __n_loads;
	    return __call_with_n_evaluations<__n_loads>(
	      [](auto... __uncvted) {
		return __convert<_SimdMember<_Tp>>(__uncvted...);
	      },
	      [&](auto __i) {
		return _CommonImpl::template _S_load<_Up, __elements_per_load>(
		  __mem + __i * __elements_per_load);
	      });
	  }
	else // e.g. int[] -> <char, 9>
	  return __call_with_subscripts(
	    __mem, make_index_sequence<_Np>(), [](auto... __args) {
	      return __vector_type_t<_Tp, _S_full_size<_Tp>>{
		static_cast<_Tp>(__args)...};
	    });
      }

    // _S_masked_load {{{2
    template <typename _Tp, size_t _Np, typename _Up>
      static inline _SimdWrapper<_Tp, _Np>
      _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
		     const _Up* __mem) noexcept
      {
	_BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) {
	  __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
	});
	return __merge;
      }

    // _S_store {{{2
    template <typename _Tp, typename _Up>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
      {
	// TODO: converting int -> "smaller int" can be optimized with AVX512
	constexpr size_t _Np = _S_size<_Tp>;
	constexpr size_t __max_store_size
	  = _SuperImpl::template _S_max_store_size<_Up>;
	if constexpr (sizeof(_Up) > 8)
	  __execute_n_times<_Np>([&](auto __i) constexpr {
	    __mem[__i] = __v[__i];
	  });
	else if constexpr (is_same_v<_Up, _Tp>)
	  _CommonImpl::_S_store(__v, __mem);
	else if constexpr (sizeof(_Up) * _Np <= __max_store_size)
	  _CommonImpl::_S_store(_SimdWrapper<_Up, _Np>(__convert<_Up>(__v)),
				__mem);
	else
	  {
	    constexpr size_t __vsize = __max_store_size / sizeof(_Up);
	    // round up to convert the last partial vector as well:
	    constexpr size_t __stores = __div_roundup(_Np, __vsize);
	    constexpr size_t __full_stores = _Np / __vsize;
	    using _V = __vector_type_t<_Up, __vsize>;
	    const array<_V, __stores> __converted
	      = __convert_all<_V, __stores>(__v);
	    __execute_n_times<__full_stores>([&](auto __i) constexpr {
	      _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
	    });
	    if constexpr (__full_stores < __stores)
	      _CommonImpl::template _S_store<(_Np - __full_stores * __vsize)
					     * sizeof(_Up)>(
		__converted[__full_stores], __mem + __full_stores * __vsize);
	  }
      }

    // _S_masked_store_nocvt {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
			    _MaskMember<_Tp> __k)
      {
	_BitOps::_S_bit_iteration(
	  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
	    __mem[__i] = __v[__i];
	  });
      }

    // _S_masked_store {{{2
    template <typename _TW, typename _TVT = _VectorTraits<_TW>,
	      typename _Tp = typename _TVT::value_type, typename _Up>
      static inline void
      _S_masked_store(const _TW __v, _Up* __mem, const _MaskMember<_Tp> __k)
	noexcept
      {
	constexpr size_t _TV_size = _S_size<_Tp>;
	[[maybe_unused]] const auto __vi = __to_intrin(__v);
	constexpr size_t __max_store_size
	  = _SuperImpl::template _S_max_store_size<_Up>;
	if constexpr (
	  is_same_v<
	    _Tp,
	    _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up)))
	  {
	    // bitwise or no conversion, reinterpret:
	    const _MaskMember<_Up> __kk = [&]() {
	      if constexpr (__is_bitmask_v<decltype(__k)>)
		return _MaskMember<_Up>(__k._M_data);
	      else
		return __wrapper_bitcast<__int_for_sizeof_t<_Up>>(__k);
	    }();
	    _SuperImpl::_S_masked_store_nocvt(__wrapper_bitcast<_Up>(__v),
					      __mem, __kk);
	  }
	else if constexpr (__vectorized_sizeof<_Up>() > sizeof(_Up)
			   && !_CommonImpl::
				template __converts_via_decomposition_v<
				  _Tp, _Up, __max_store_size>)
	  { // conversion via decomposition is better handled via the
	    // bit_iteration
	    // fallback below
	    constexpr size_t _UW_size
	      = std::min(_TV_size, __max_store_size / sizeof(_Up));
	    static_assert(_UW_size <= _TV_size);
	    using _UW = _SimdWrapper<_Up, _UW_size>;
	    using _UV = __vector_type_t<_Up, _UW_size>;
	    using _UAbi = simd_abi::deduce_t<_Up, _UW_size>;
	    if constexpr (_UW_size == _TV_size) // one convert+store
	      {
		const _UW __converted = __convert<_UW>(__v);
		_SuperImpl::_S_masked_store_nocvt(
		  __converted, __mem,
		  _UAbi::_MaskImpl::template _S_convert<
		    __int_for_sizeof_t<_Up>>(__k));
	      }
	    else
	      {
		static_assert(_UW_size * sizeof(_Up) == __max_store_size);
		constexpr size_t _NFullStores = _TV_size / _UW_size;
		constexpr size_t _NAllStores
		  = __div_roundup(_TV_size, _UW_size);
		constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size;
		const array<_UV, _NAllStores> __converted
		  = __convert_all<_UV, _NAllStores>(__v);
		__execute_n_times<_NFullStores>([&](auto __i) {
		  _SuperImpl::_S_masked_store_nocvt(
		    _UW(__converted[__i]), __mem + __i * _UW_size,
		    _UAbi::_MaskImpl::template _S_convert<
		      __int_for_sizeof_t<_Up>>(
		      __extract_part<__i, _NParts>(__k.__as_full_vector())));
		});
		if constexpr (_NAllStores
			      > _NFullStores) // one partial at the end
		  _SuperImpl::_S_masked_store_nocvt(
		    _UW(__converted[_NFullStores]),
		    __mem + _NFullStores * _UW_size,
		    _UAbi::_MaskImpl::template _S_convert<
		      __int_for_sizeof_t<_Up>>(
		      __extract_part<_NFullStores, _NParts>(
			__k.__as_full_vector())));
	      }
	  }
	else
	  _BitOps::_S_bit_iteration(
	    _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
	      __mem[__i] = static_cast<_Up>(__v[__i]);
	    });
      }

    // _S_complement {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept
      {
	if constexpr (is_floating_point_v<_Tp>)
	  return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x));
	else
	  return ~__x._M_data;
      }

    // _S_unary_minus {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_unary_minus(_SimdWrapper<_Tp, _Np> __x) noexcept
      {
	// GCC doesn't use the psign instructions, but pxor & psub seem to be
	// just as good a choice as pcmpeqd & psign. So meh.
	return -__x._M_data;
      }

    // arithmetic operators {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_plus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data + __y._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_minus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data - __y._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_multiplies(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data * __y._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_divides(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      {
	// Note that division by 0 is always UB, so we must ensure we avoid the
	// case for partial registers
	if constexpr (!_Abi::template _S_is_partial<_Tp>)
	  return __x._M_data / __y._M_data;
	else
	  return __x._M_data / _Abi::__make_padding_nonzero(__y._M_data);
      }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      {
	if constexpr (!_Abi::template _S_is_partial<_Tp>)
	  return __x._M_data % __y._M_data;
	else
	  return __as_vector(__x)
		 % _Abi::__make_padding_nonzero(__as_vector(__y));
      }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_and(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __and(__x, __y); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_or(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __or(__x, __y); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_xor(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __xor(__x, __y); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
      _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data << __y._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
      _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data >> __y._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
      { return __x._M_data << __y; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
      { return __x._M_data >> __y; }

    // compares {{{2
    // _S_equal_to {{{3
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data == __y._M_data; }

    // _S_not_equal_to {{{3
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data != __y._M_data; }

    // _S_less {{{3
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_less(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data < __y._M_data; }

    // _S_less_equal {{{3
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_less_equal(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      { return __x._M_data <= __y._M_data; }

    // _S_negate {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_negate(_SimdWrapper<_Tp, _Np> __x) noexcept
      { return !__x._M_data; }

    // _S_min, _S_max, _S_minmax {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
      _SimdWrapper<_Tp, _Np>
      _S_min(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
      { return __a._M_data < __b._M_data ? __a._M_data : __b._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
      _SimdWrapper<_Tp, _Np>
      _S_max(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
      { return __a._M_data > __b._M_data ? __a._M_data : __b._M_data; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
      pair<_SimdWrapper<_Tp, _Np>, _SimdWrapper<_Tp, _Np>>
      _S_minmax(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
      {
	return {__a._M_data < __b._M_data ? __a._M_data : __b._M_data,
		__a._M_data < __b._M_data ? __b._M_data : __a._M_data};
      }

    // reductions {{{2
    template <size_t _Np, size_t... _Is, size_t... _Zeros, typename _Tp,
	      typename _BinaryOperation>
      _GLIBCXX_SIMD_INTRINSIC static _Tp
      _S_reduce_partial(index_sequence<_Is...>, index_sequence<_Zeros...>,
			simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
      {
	using _V = __vector_type_t<_Tp, _Np / 2>;
	static_assert(sizeof(_V) <= sizeof(__x));
	// _S_full_size is the size of the smallest native SIMD register that
	// can store _Np/2 elements:
	using _FullSimd = __deduced_simd<_Tp, _VectorTraits<_V>::_S_full_size>;
	using _HalfSimd = __deduced_simd<_Tp, _Np / 2>;
	const auto __xx = __as_vector(__x);
	return _HalfSimd::abi_type::_SimdImpl::_S_reduce(
	  static_cast<_HalfSimd>(__as_vector(__binary_op(
	    static_cast<_FullSimd>(__intrin_bitcast<_V>(__xx)),
	    static_cast<_FullSimd>(__intrin_bitcast<_V>(
	      __vector_permute<(_Np / 2 + _Is)..., (int(_Zeros * 0) - 1)...>(
		__xx)))))),
	  __binary_op);
      }

    template <typename _Tp, typename _BinaryOperation>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
      _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
      {
	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
	if constexpr (_Np == 1)
	  return __x[0];
	else if constexpr (_Np == 2)
	  return __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
			     simd<_Tp, simd_abi::scalar>(__x[1]))[0];
	else if constexpr (_Abi::template _S_is_partial<_Tp>) //{{{
	  {
	    [[maybe_unused]] constexpr auto __full_size
	      = _Abi::template _S_full_size<_Tp>;
	    if constexpr (_Np == 3)
	      return __binary_op(
		__binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
			    simd<_Tp, simd_abi::scalar>(__x[1])),
		simd<_Tp, simd_abi::scalar>(__x[2]))[0];
	    else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
					 plus<>>)
	      {
		using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
		return _Ap::_SimdImpl::_S_reduce(
		  simd<_Tp, _Ap>(__private_init,
				 _Abi::_S_masked(__as_vector(__x))),
		  __binary_op);
	      }
	    else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
					 multiplies<>>)
	      {
		using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
		using _TW = _SimdWrapper<_Tp, __full_size>;
		_GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full
		  = _Abi::template _S_implicit_mask<_Tp>().__as_full_vector();
		_GLIBCXX_SIMD_USE_CONSTEXPR _TW __one
		  = __vector_broadcast<__full_size>(_Tp(1));
		const _TW __x_full = __data(__x).__as_full_vector();
		const _TW __x_padded_with_ones
		  = _Ap::_CommonImpl::_S_blend(__implicit_mask_full, __one,
					       __x_full);
		return _Ap::_SimdImpl::_S_reduce(
		  simd<_Tp, _Ap>(__private_init, __x_padded_with_ones),
		  __binary_op);
	      }
	    else if constexpr (_Np & 1)
	      {
		using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>;
		return __binary_op(
		  simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce(
		    simd<_Tp, _Ap>(
		      __intrin_bitcast<__vector_type_t<_Tp, _Np - 1>>(
			__as_vector(__x))),
		    __binary_op)),
		  simd<_Tp, simd_abi::scalar>(__x[_Np - 1]))[0];
	      }
	    else
	      return _S_reduce_partial<_Np>(
		make_index_sequence<_Np / 2>(),
		make_index_sequence<__full_size - _Np / 2>(), __x, __binary_op);
	  }                                   //}}}
	else if constexpr (sizeof(__x) == 16) //{{{
	  {
	    if constexpr (_Np == 16)
	      {
		const auto __y = __data(__x);
		__x = __binary_op(
		  _M_make_simd<_Tp, _Np>(
		    __vector_permute<0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
				     7, 7>(__y)),
		  _M_make_simd<_Tp, _Np>(
		    __vector_permute<8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
				     14, 14, 15, 15>(__y)));
	      }
	    if constexpr (_Np >= 8)
	      {
		const auto __y = __vector_bitcast<short>(__data(__x));
		__x = __binary_op(
		  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
		    __vector_permute<0, 0, 1, 1, 2, 2, 3, 3>(__y))),
		  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
		    __vector_permute<4, 4, 5, 5, 6, 6, 7, 7>(__y))));
	      }
	    if constexpr (_Np >= 4)
	      {
		using _Up = conditional_t<is_floating_point_v<_Tp>, float, int>;
		const auto __y = __vector_bitcast<_Up>(__data(__x));
		__x = __binary_op(__x,
				  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
				    __vector_permute<3, 2, 1, 0>(__y))));
	      }
	    using _Up = conditional_t<is_floating_point_v<_Tp>, double, _LLong>;
	    const auto __y = __vector_bitcast<_Up>(__data(__x));
	    __x = __binary_op(__x, _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
				     __vector_permute<1, 1>(__y))));
	    return __x[0];
	  } //}}}
	else
	  {
	    static_assert(sizeof(__x) > __min_vector_size<_Tp>);
	    static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2
	    using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>;
	    using _V = simd<_Tp, _Ap>;
	    return _Ap::_SimdImpl::_S_reduce(
	      __binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))),
			  _V(__private_init,
			     __extract<1, 2>(__as_vector(__x)))),
	      static_cast<_BinaryOperation&&>(__binary_op));
	  }
      }

    // math {{{2
    // frexp, modf and copysign implemented in simd_math.h
#define _GLIBCXX_SIMD_MATH_FALLBACK(__name)                                    \
    template <typename _Tp, typename... _More>                                 \
      static _Tp _S_##__name(const _Tp& __x, const _More&... __more)           \
      {                                                                        \
	return __generate_vector<_Tp>(                                         \
	  [&](auto __i) { return __name(__x[__i], __more[__i]...); });         \
      }

#define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name)                            \
    template <typename _Tp, typename... _More>                                 \
      static typename _Tp::mask_type _S_##__name(const _Tp& __x,               \
						 const _More&... __more)       \
      {                                                                        \
	return __generate_vector<_Tp>(                                         \
	  [&](auto __i) { return __name(__x[__i], __more[__i]...); });         \
      }

#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name)                   \
    template <typename _Tp, typename... _More>                                 \
      static auto _S_##__name(const _Tp& __x, const _More&... __more)          \
      {                                                                        \
	return __fixed_size_storage_t<_RetTp,                                  \
				      _VectorTraits<_Tp>::_S_partial_width>::  \
	  _S_generate([&](auto __meta) constexpr {                             \
	    return __meta._S_generator(                                        \
	      [&](auto __i) {                                                  \
		return __name(__x[__meta._S_offset + __i],                     \
			      __more[__meta._S_offset + __i]...);              \
	      },                                                               \
	      static_cast<_RetTp*>(nullptr));                                  \
	  });                                                                  \
      }

    _GLIBCXX_SIMD_MATH_FALLBACK(acos)
    _GLIBCXX_SIMD_MATH_FALLBACK(asin)
    _GLIBCXX_SIMD_MATH_FALLBACK(atan)
    _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
    _GLIBCXX_SIMD_MATH_FALLBACK(cos)
    _GLIBCXX_SIMD_MATH_FALLBACK(sin)
    _GLIBCXX_SIMD_MATH_FALLBACK(tan)
    _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
    _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
    _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
    _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
    _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
    _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
    _GLIBCXX_SIMD_MATH_FALLBACK(exp)
    _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
    _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
    _GLIBCXX_SIMD_MATH_FALLBACK(ldexp)
    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
    _GLIBCXX_SIMD_MATH_FALLBACK(log)
    _GLIBCXX_SIMD_MATH_FALLBACK(log10)
    _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
    _GLIBCXX_SIMD_MATH_FALLBACK(log2)
    _GLIBCXX_SIMD_MATH_FALLBACK(logb)

    // modf implemented in simd_math.h
    _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
    _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
    _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
    _GLIBCXX_SIMD_MATH_FALLBACK(fabs)
    _GLIBCXX_SIMD_MATH_FALLBACK(pow)
    _GLIBCXX_SIMD_MATH_FALLBACK(sqrt)
    _GLIBCXX_SIMD_MATH_FALLBACK(erf)
    _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
    _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
    _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)

    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)

    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)

    _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
    _GLIBCXX_SIMD_MATH_FALLBACK(remainder)

    template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
      static _Tp
      _S_remquo(const _Tp __x, const _Tp __y,
		__fixed_size_storage_t<int, _TVT::_S_partial_width>* __z)
      {
	return __generate_vector<_Tp>([&](auto __i) {
	  int __tmp;
	  auto __r = remquo(__x[__i], __y[__i], &__tmp);
	  __z->_M_set(__i, __tmp);
	  return __r;
	});
      }

    // copysign in simd_math.h
    _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
    _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
    _GLIBCXX_SIMD_MATH_FALLBACK(fmax)
    _GLIBCXX_SIMD_MATH_FALLBACK(fmin)
    _GLIBCXX_SIMD_MATH_FALLBACK(fma)

    template <typename _Tp, size_t _Np>
      static constexpr _MaskMember<_Tp>
      _S_isgreater(_SimdWrapper<_Tp, _Np> __x,
		   _SimdWrapper<_Tp, _Np> __y) noexcept
      {
	using _Ip = __int_for_sizeof_t<_Tp>;
	const auto __xn = __vector_bitcast<_Ip>(__x);
	const auto __yn = __vector_bitcast<_Ip>(__y);
	const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
	const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
	return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
			__xp > __yp);
      }

    template <typename _Tp, size_t _Np>
      static constexpr _MaskMember<_Tp>
      _S_isgreaterequal(_SimdWrapper<_Tp, _Np> __x,
			_SimdWrapper<_Tp, _Np> __y) noexcept
      {
	using _Ip = __int_for_sizeof_t<_Tp>;
	const auto __xn = __vector_bitcast<_Ip>(__x);
	const auto __yn = __vector_bitcast<_Ip>(__y);
	const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
	const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
	return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
			__xp >= __yp);
      }

    template <typename _Tp, size_t _Np>
      static constexpr _MaskMember<_Tp>
      _S_isless(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) noexcept
      {
	using _Ip = __int_for_sizeof_t<_Tp>;
	const auto __xn = __vector_bitcast<_Ip>(__x);
	const auto __yn = __vector_bitcast<_Ip>(__y);
	const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
	const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
	return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
			__xp < __yp);
      }

    template <typename _Tp, size_t _Np>
      static constexpr _MaskMember<_Tp>
      _S_islessequal(_SimdWrapper<_Tp, _Np> __x,
		     _SimdWrapper<_Tp, _Np> __y) noexcept
      {
	using _Ip = __int_for_sizeof_t<_Tp>;
	const auto __xn = __vector_bitcast<_Ip>(__x);
	const auto __yn = __vector_bitcast<_Ip>(__y);
	const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
	const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
	return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
			__xp <= __yp);
      }

    template <typename _Tp, size_t _Np>
      static constexpr _MaskMember<_Tp>
      _S_islessgreater(_SimdWrapper<_Tp, _Np> __x,
		       _SimdWrapper<_Tp, _Np> __y) noexcept
      {
	return __andnot(_SuperImpl::_S_isunordered(__x, __y),
			_SuperImpl::_S_not_equal_to(__x, __y));
      }

#undef _GLIBCXX_SIMD_MATH_FALLBACK
#undef _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET
#undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
    // _S_abs {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
    _S_abs(_SimdWrapper<_Tp, _Np> __x) noexcept
    {
      // if (__builtin_is_constant_evaluated())
      //  {
      //    return __x._M_data < 0 ? -__x._M_data : __x._M_data;
      //  }
      if constexpr (is_floating_point_v<_Tp>)
	// `v < 0 ? -v : v` cannot compile to the efficient implementation of
	// masking the signbit off because it must consider v == -0

	// ~(-0.) & v would be easy, but breaks with fno-signed-zeros
	return __and(_S_absmask<__vector_type_t<_Tp, _Np>>, __x._M_data);
      else
	return __x._M_data < 0 ? -__x._M_data : __x._M_data;
    }

    // }}}3
    // _S_plus_minus {{{
    // Returns __x + __y - __y without -fassociative-math optimizing to __x.
    // - _TV must be __vector_type_t<floating-point type, N>.
    // - _UV must be _TV or floating-point type.
    template <typename _TV, typename _UV>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _TV _S_plus_minus(_TV __x,
							       _UV __y) noexcept
    {
  #if defined __i386__ && !defined __SSE_MATH__
      if constexpr (sizeof(__x) == 8)
	{ // operations on __x would use the FPU
	  static_assert(is_same_v<_TV, __vector_type_t<float, 2>>);
	  const auto __x4 = __vector_bitcast<float, 4>(__x);
	  if constexpr (is_same_v<_TV, _UV>)
	    return __vector_bitcast<float, 2>(
	      _S_plus_minus(__x4, __vector_bitcast<float, 4>(__y)));
	  else
	    return __vector_bitcast<float, 2>(_S_plus_minus(__x4, __y));
	}
  #endif
  #if !defined __clang__ && __GCC_IEC_559 == 0
      if (__builtin_is_constant_evaluated()
	  || (__builtin_constant_p(__x) && __builtin_constant_p(__y)))
	return (__x + __y) - __y;
      else
	return [&] {
	  __x += __y;
	  if constexpr(__have_sse)
	    {
	      if constexpr (sizeof(__x) >= 16)
		asm("" : "+x"(__x));
	      else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
		asm("" : "+x"(__x[0]), "+x"(__x[1]));
	      else
		__assert_unreachable<_TV>();
	    }
	  else if constexpr(__have_neon)
	    asm("" : "+w"(__x));
	  else if constexpr (__have_power_vmx)
	    {
	      if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
		asm("" : "+fgr"(__x[0]), "+fgr"(__x[1]));
	      else
		asm("" : "+v"(__x));
	    }
	  else
	    asm("" : "+g"(__x));
	  return __x - __y;
	}();
  #else
      return (__x + __y) - __y;
  #endif
    }

    // }}}
    // _S_nearbyint {{{3
    template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
    _GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x_) noexcept
    {
      using value_type = typename _TVT::value_type;
      using _V = typename _TVT::type;
      const _V __x = __x_;
      const _V __absx = __and(__x, _S_absmask<_V>);
      static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<value_type>);
      _GLIBCXX_SIMD_USE_CONSTEXPR _V __shifter_abs
	= _V() + (1ull << (__digits_v<value_type> - 1));
      const _V __shifter = __or(__and(_S_signmask<_V>, __x), __shifter_abs);
      const _V __shifted = _S_plus_minus(__x, __shifter);
      return __absx < __shifter_abs ? __shifted : __x;
    }

    // _S_rint {{{3
    template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
    _GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x) noexcept
    {
      return _SuperImpl::_S_nearbyint(__x);
    }

    // _S_trunc {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
    _S_trunc(_SimdWrapper<_Tp, _Np> __x)
    {
      using _V = __vector_type_t<_Tp, _Np>;
      const _V __absx = __and(__x._M_data, _S_absmask<_V>);
      static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<_Tp>);
      constexpr _Tp __shifter = 1ull << (__digits_v<_Tp> - 1);
      _V __truncated = _S_plus_minus(__absx, __shifter);
      __truncated -= __truncated > __absx ? _V() + 1 : _V();
      return __absx < __shifter ? __or(__xor(__absx, __x._M_data), __truncated)
				: __x._M_data;
    }

    // _S_round {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
    _S_round(_SimdWrapper<_Tp, _Np> __x)
    {
      const auto __abs_x = _SuperImpl::_S_abs(__x);
      const auto __t_abs = _SuperImpl::_S_trunc(__abs_x)._M_data;
      const auto __r_abs // round(abs(x)) =
	= __t_abs + (__abs_x._M_data - __t_abs >= _Tp(.5) ? _Tp(1) : 0);
      return __or(__xor(__abs_x._M_data, __x._M_data), __r_abs);
    }

    // _S_floor {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
    _S_floor(_SimdWrapper<_Tp, _Np> __x)
    {
      const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
      const auto __negative_input
	= __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
      const auto __mask
	= __andnot(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
      return __or(__andnot(__mask, __y),
		  __and(__mask, __y - __vector_broadcast<_Np, _Tp>(1)));
    }

    // _S_ceil {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
    _S_ceil(_SimdWrapper<_Tp, _Np> __x)
    {
      const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
      const auto __negative_input
	= __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
      const auto __inv_mask
	= __or(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
      return __or(__and(__inv_mask, __y),
		  __andnot(__inv_mask, __y + __vector_broadcast<_Np, _Tp>(1)));
    }

    // _S_isnan {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_isnan([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
    {
  #if __FINITE_MATH_ONLY__
      return {}; // false
  #elif !defined __SUPPORT_SNAN__
      return ~(__x._M_data == __x._M_data);
  #elif defined __STDC_IEC_559__
      using _Ip = __int_for_sizeof_t<_Tp>;
      const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
      const auto __infn
	= __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__infinity_v<_Tp>));
      return __infn < __absn;
  #else
  #error "Not implemented: how to support SNaN but non-IEC559 floating-point?"
  #endif
    }

    // _S_isfinite {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_isfinite([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
    {
  #if __FINITE_MATH_ONLY__
      using _UV = typename _MaskMember<_Tp>::_BuiltinType;
      _GLIBCXX_SIMD_USE_CONSTEXPR _UV __alltrue = ~_UV();
      return __alltrue;
  #else
      // if all exponent bits are set, __x is either inf or NaN
      using _Ip = __int_for_sizeof_t<_Tp>;
      const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
      const auto __maxn
	= __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
      return __absn <= __maxn;
  #endif
    }

    // _S_isunordered {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_isunordered(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
    {
      return __or(_S_isnan(__x), _S_isnan(__y));
    }

    // _S_signbit {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_signbit(_SimdWrapper<_Tp, _Np> __x)
    {
      using _Ip = __int_for_sizeof_t<_Tp>;
      return __vector_bitcast<_Ip>(__x) < 0;
      // Arithmetic right shift (SRA) would also work (instead of compare), but
      // 64-bit SRA isn't available on x86 before AVX512. And in general,
      // compares are more likely to be efficient than SRA.
    }

    // _S_isinf {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_isinf([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
    {
  #if __FINITE_MATH_ONLY__
      return {}; // false
  #else
      return _SuperImpl::template _S_equal_to<_Tp, _Np>(_SuperImpl::_S_abs(__x),
							__vector_broadcast<_Np>(
							  __infinity_v<_Tp>));
      // alternative:
      // compare to inf using the corresponding integer type
      /*
	 return
	 __vector_bitcast<_Tp>(__vector_bitcast<__int_for_sizeof_t<_Tp>>(
			       _S_abs(__x)._M_data)
	 ==
	 __vector_bitcast<__int_for_sizeof_t<_Tp>>(__vector_broadcast<_Np>(
	 __infinity_v<_Tp>)));
	 */
  #endif
    }

    // _S_isnormal {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
    _S_isnormal(_SimdWrapper<_Tp, _Np> __x)
    {
      using _Ip = __int_for_sizeof_t<_Tp>;
      const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
      const auto __minn
	= __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__norm_min_v<_Tp>));
  #if __FINITE_MATH_ONLY__
      return __absn >= __minn;
  #else
      const auto __maxn
	= __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
      return __minn <= __absn && __absn <= __maxn;
  #endif
    }

    // _S_fpclassify {{{3
    template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
    _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
    {
      using _I = __int_for_sizeof_t<_Tp>;
      const auto __xn
	= __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
      constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
	= __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
	= __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));

      _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
	= __vector_broadcast<_NI, _I>(FP_NORMAL);
  #if !__FINITE_MATH_ONLY__
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
	= __vector_broadcast<_NI, _I>(FP_NAN);
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
	= __vector_broadcast<_NI, _I>(FP_INFINITE);
  #endif
  #ifndef __FAST_MATH__
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
	= __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
  #endif
      _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
	= __vector_broadcast<_NI, _I>(FP_ZERO);

      __vector_type_t<_I, _NI>
	__tmp = __xn < __minn
  #ifdef __FAST_MATH__
		  ? __fp_zero
  #else
		  ? (__xn == 0 ? __fp_zero : __fp_subnormal)
  #endif
  #if __FINITE_MATH_ONLY__
		  : __fp_normal;
  #else
		  : (__xn < __infn ? __fp_normal
				   : (__xn == __infn ? __fp_infinite : __fp_nan));
  #endif

      if constexpr (sizeof(_I) == sizeof(int))
	{
	  using _FixedInt = __fixed_size_storage_t<int, _Np>;
	  const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
	  if constexpr (_FixedInt::_S_tuple_size == 1)
	    return {__as_int};
	  else if constexpr (_FixedInt::_S_tuple_size == 2
			     && is_same_v<
			       typename _FixedInt::_SecondType::_FirstAbi,
			       simd_abi::scalar>)
	    return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
	  else if constexpr (_FixedInt::_S_tuple_size == 2)
	    return {__extract<0, 2>(__as_int),
		    __auto_bitcast(__extract<1, 2>(__as_int))};
	  else
	    __assert_unreachable<_Tp>();
	}
      else if constexpr (_Np == 2 && sizeof(_I) == 8
			 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
	{
	  const auto __aslong = __vector_bitcast<_LLong>(__tmp);
	  return {int(__aslong[0]), {int(__aslong[1])}};
	}
  #if _GLIBCXX_SIMD_X86INTRIN
      else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
			 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
	return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
				__to_intrin(__hi128(__tmp)))};
      else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
			 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
	return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
  #endif // _GLIBCXX_SIMD_X86INTRIN
      else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
	return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
					    [](auto... __l) {
					      return __make_wrapper<int>(__l...);
					    })};
      else
	__assert_unreachable<_Tp>();
    }

    // _S_increment & _S_decrement{{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_increment(_SimdWrapper<_Tp, _Np>& __x)
      { __x = __x._M_data + 1; }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_decrement(_SimdWrapper<_Tp, _Np>& __x)
      { __x = __x._M_data - 1; }

    // smart_reference access {{{2
    template <typename _Tp, size_t _Np, typename _Up>
      _GLIBCXX_SIMD_INTRINSIC constexpr static void
      _S_set(_SimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
      { __v._M_set(__i, static_cast<_Up&&>(__x)); }

    // _S_masked_assign{{{2
    template <typename _Tp, typename _K, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
		       __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
      {
	if (__k._M_is_constprop_none_of())
	  return;
	else if (__k._M_is_constprop_all_of())
	  __lhs = __rhs;
	else
	  __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs);
      }

    template <typename _Tp, typename _K, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
		       __type_identity_t<_Tp> __rhs)
      {
	if (__k._M_is_constprop_none_of())
	  return;
	else if (__k._M_is_constprop_all_of())
	  __lhs = __vector_broadcast<_Np>(__rhs);
	else if (__builtin_constant_p(__rhs) && __rhs == 0)
	  {
	    if constexpr (!is_same_v<bool, _K>)
	      // the __andnot optimization only makes sense if __k._M_data is a
	      // vector register
	      __lhs._M_data
		= __andnot(__vector_bitcast<_Tp>(__k), __lhs._M_data);
	    else
	      // for AVX512/__mmask, a _mm512_maskz_mov is best
	      __lhs
		= _CommonImpl::_S_blend(__k, __lhs, _SimdWrapper<_Tp, _Np>());
	  }
	else
	  __lhs = _CommonImpl::_S_blend(__k, __lhs,
					_SimdWrapper<_Tp, _Np>(
					  __vector_broadcast<_Np>(__rhs)));
      }

    // _S_masked_cassign {{{2
    template <typename _Op, typename _Tp, typename _K, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
			_SimdWrapper<_Tp, _Np>& __lhs,
			const __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs,
			_Op __op)
      {
	if (__k._M_is_constprop_none_of())
	  return;
	else if (__k._M_is_constprop_all_of())
	  __lhs = __op(_SuperImpl{}, __lhs, __rhs);
	else
	  __lhs = _CommonImpl::_S_blend(__k, __lhs,
					__op(_SuperImpl{}, __lhs, __rhs));
      }

    template <typename _Op, typename _Tp, typename _K, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
			_SimdWrapper<_Tp, _Np>& __lhs,
			const __type_identity_t<_Tp> __rhs, _Op __op)
      { _S_masked_cassign(__k, __lhs, __vector_broadcast<_Np>(__rhs), __op); }

    // _S_masked_unary {{{2
    template <template <typename> class _Op, typename _Tp, typename _K,
	      size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
      _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
		      const _SimdWrapper<_Tp, _Np> __v)
      {
	if (__k._M_is_constprop_none_of())
	  return __v;
	auto __vv = _M_make_simd(__v);
	_Op<decltype(__vv)> __op;
	if (__k._M_is_constprop_all_of())
	  return __data(__op(__vv));
	else
	  return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
      }

    //}}}2
  };

// _MaskImplBuiltinMixin {{{1
struct _MaskImplBuiltinMixin
{
  template <typename _Tp>
    using _TypeTag = _Tp*;

  // _S_to_maskvector {{{
  template <typename _Up, size_t _ToN = 1>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
    _S_to_maskvector(bool __x)
    {
      static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
      return __x ? __vector_type_t<_Up, _ToN>{~_Up()}
		 : __vector_type_t<_Up, _ToN>{};
    }

  template <typename _Up, size_t _UpN = 0, size_t _Np, bool _Sanitized,
	    size_t _ToN = _UpN == 0 ? _Np : _UpN>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
    _S_to_maskvector(_BitMask<_Np, _Sanitized> __x)
    {
      static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
      return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
	auto __i) constexpr {
	if constexpr (__i < _Np)
	  return __x[__i] ? ~_Up() : _Up();
	else
	  return _Up();
      });
    }

  template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np,
	    size_t _ToN = _UpN == 0 ? _Np : _UpN>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
    _S_to_maskvector(_SimdWrapper<_Tp, _Np> __x)
    {
      static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
      using _TW = _SimdWrapper<_Tp, _Np>;
      using _UW = _SimdWrapper<_Up, _ToN>;
      if constexpr (sizeof(_Up) == sizeof(_Tp) && sizeof(_TW) == sizeof(_UW))
	return __wrapper_bitcast<_Up, _ToN>(__x);
      else if constexpr (is_same_v<_Tp, bool>) // bits -> vector
	return _S_to_maskvector<_Up, _ToN>(_BitMask<_Np>(__x._M_data));
      else
	{ // vector -> vector
	  /*
	  [[maybe_unused]] const auto __y = __vector_bitcast<_Up>(__x._M_data);
	  if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 4 && sizeof(__y) ==
	  16) return __vector_permute<1, 3, -1, -1>(__y); else if constexpr
	  (sizeof(_Tp) == 4 && sizeof(_Up) == 2
			     && sizeof(__y) == 16)
	    return __vector_permute<1, 3, 5, 7, -1, -1, -1, -1>(__y);
	  else if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 2
			     && sizeof(__y) == 16)
	    return __vector_permute<3, 7, -1, -1, -1, -1, -1, -1>(__y);
	  else if constexpr (sizeof(_Tp) == 2 && sizeof(_Up) == 1
			     && sizeof(__y) == 16)
	    return __vector_permute<1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1,
	  -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 4 &&
	  sizeof(_Up) == 1
			     && sizeof(__y) == 16)
	    return __vector_permute<3, 7, 11, 15, -1, -1, -1, -1, -1, -1, -1,
	  -1, -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 8 &&
	  sizeof(_Up) == 1
			     && sizeof(__y) == 16)
	    return __vector_permute<7, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	  -1, -1, -1, -1, -1>(__y); else
	  */
	  {
	    return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
	      auto __i) constexpr {
	      if constexpr (__i < _Np)
		return _Up(__x[__i.value]);
	      else
		return _Up();
	    });
	  }
	}
    }

  // }}}
  // _S_to_bits {{{
  template <typename _Tp, size_t _Np>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
    _S_to_bits(_SimdWrapper<_Tp, _Np> __x)
    {
      static_assert(!is_same_v<_Tp, bool>);
      static_assert(_Np <= __CHAR_BIT__ * sizeof(_ULLong));
      using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
      const auto __bools
	= __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1);
      _ULLong __r = 0;
      __execute_n_times<_Np>(
	[&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; });
      return __r;
    }

  // }}}
};

// _MaskImplBuiltin {{{1
template <typename _Abi, typename>
  struct _MaskImplBuiltin : _MaskImplBuiltinMixin
  {
    using _MaskImplBuiltinMixin::_S_to_bits;
    using _MaskImplBuiltinMixin::_S_to_maskvector;

    // member types {{{
    template <typename _Tp>
      using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;

    template <typename _Tp>
      using _MaskMember = typename _Abi::template _MaskMember<_Tp>;

    using _SuperImpl = typename _Abi::_MaskImpl;
    using _CommonImpl = typename _Abi::_CommonImpl;

    template <typename _Tp>
      static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>;

    // }}}
    // _S_broadcast {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_broadcast(bool __x)
      {
	return __x ? _Abi::template _S_implicit_mask<_Tp>()
		   : _MaskMember<_Tp>();
      }

    // }}}
    // _S_load {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
      _S_load(const bool* __mem)
      {
	using _I = __int_for_sizeof_t<_Tp>;
	if constexpr (sizeof(_Tp) == sizeof(bool))
	  {
	    const auto __bools
	      = _CommonImpl::template _S_load<_I, _S_size<_Tp>>(__mem);
	    // bool is {0, 1}, everything else is UB
	    return __bools > 0;
	  }
	else
	  return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr {
	    return __mem[__i] ? ~_I() : _I();
	  });
      }

    // }}}
    // _S_convert {{{
    template <typename _Tp, size_t _Np, bool _Sanitized>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      _S_convert(_BitMask<_Np, _Sanitized> __x)
      {
	if constexpr (__is_builtin_bitmask_abi<_Abi>())
	  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_to_bits());
	else
	  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
						       _S_size<_Tp>>(
	    __x._M_sanitized());
      }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      _S_convert(_SimdWrapper<bool, _Np> __x)
      {
	if constexpr (__is_builtin_bitmask_abi<_Abi>())
	  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_data);
	else
	  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
						       _S_size<_Tp>>(
	    _BitMask<_Np>(__x._M_data)._M_sanitized());
      }

    template <typename _Tp, typename _Up, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      _S_convert(_SimdWrapper<_Up, _Np> __x)
      {
	if constexpr (__is_builtin_bitmask_abi<_Abi>())
	  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(
	    _SuperImpl::_S_to_bits(__x));
	else
	  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
						       _S_size<_Tp>>(__x);
      }

    template <typename _Tp, typename _Up, typename _UAbi>
      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
      _S_convert(simd_mask<_Up, _UAbi> __x)
      {
	if constexpr (__is_builtin_bitmask_abi<_Abi>())
	  {
	    using _R = _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>;
	    if constexpr (__is_builtin_bitmask_abi<_UAbi>()) // bits -> bits
	      return _R(__data(__x));
	    else if constexpr (__is_scalar_abi<_UAbi>()) // bool -> bits
	      return _R(__data(__x));
	    else if constexpr (__is_fixed_size_abi_v<_UAbi>) // bitset -> bits
	      return _R(__data(__x)._M_to_bits());
	    else // vector -> bits
	      return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
	  }
	else
	  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
						       _S_size<_Tp>>(
	    __data(__x));
      }

    // }}}
    // _S_masked_load {{{2
    template <typename _Tp, size_t _Np>
      static inline _SimdWrapper<_Tp, _Np>
      _S_masked_load(_SimdWrapper<_Tp, _Np> __merge,
		     _SimdWrapper<_Tp, _Np> __mask, const bool* __mem) noexcept
      {
	// AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity
	auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge);
	_BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask),
				  [&](auto __i) {
				    __tmp._M_set(__i, -__mem[__i]);
				  });
	__merge = __wrapper_bitcast<_Tp>(__tmp);
	return __merge;
      }

    // _S_store {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v,
						   bool* __mem) noexcept
      {
	__execute_n_times<_Np>([&](auto __i) constexpr {
	  __mem[__i] = __v[__i];
	});
      }

    // _S_masked_store {{{2
    template <typename _Tp, size_t _Np>
      static inline void
      _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem,
		      const _SimdWrapper<_Tp, _Np> __k) noexcept
      {
	_BitOps::_S_bit_iteration(
	  _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr {
	    __mem[__i] = __v[__i];
	  });
      }

    // _S_from_bitmask{{{2
    template <size_t _Np, typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
      _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
      {
	return _SuperImpl::template _S_to_maskvector<_Tp, _S_size<_Tp>>(__bits);
      }

    // logical and bitwise operators {{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_logical_and(const _SimdWrapper<_Tp, _Np>& __x,
		     const _SimdWrapper<_Tp, _Np>& __y)
      { return __and(__x._M_data, __y._M_data); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_logical_or(const _SimdWrapper<_Tp, _Np>& __x,
		    const _SimdWrapper<_Tp, _Np>& __y)
      { return __or(__x._M_data, __y._M_data); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_not(const _SimdWrapper<_Tp, _Np>& __x)
      {
	if constexpr (_Abi::template _S_is_partial<_Tp>)
	  return __andnot(__x, __wrapper_bitcast<_Tp>(
				 _Abi::template _S_implicit_mask<_Tp>()));
	else
	  return __not(__x._M_data);
      }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_and(const _SimdWrapper<_Tp, _Np>& __x,
		 const _SimdWrapper<_Tp, _Np>& __y)
      { return __and(__x._M_data, __y._M_data); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_or(const _SimdWrapper<_Tp, _Np>& __x,
		const _SimdWrapper<_Tp, _Np>& __y)
      { return __or(__x._M_data, __y._M_data); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x,
		 const _SimdWrapper<_Tp, _Np>& __y)
      { return __xor(__x._M_data, __y._M_data); }

    // smart_reference access {{{2
    template <typename _Tp, size_t _Np>
      static constexpr void _S_set(_SimdWrapper<_Tp, _Np>& __k, int __i,
				   bool __x) noexcept
      {
	if constexpr (is_same_v<_Tp, bool>)
	  __k._M_set(__i, __x);
	else
	  {
	    static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>);
	    if (__builtin_is_constant_evaluated())
	      {
		__k = __generate_from_n_evaluations<_Np,
						    __vector_type_t<_Tp, _Np>>(
		  [&](auto __j) {
		    if (__i == __j)
		      return _Tp(-__x);
		    else
		      return __k[+__j];
		  });
	      }
	    else
	      __k._M_data[__i] = -__x;
	  }
      }

    // _S_masked_assign{{{2
    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
		       _SimdWrapper<_Tp, _Np>& __lhs,
		       __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
      { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); }

    template <typename _Tp, size_t _Np>
      _GLIBCXX_SIMD_INTRINSIC static void
      _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
		       _SimdWrapper<_Tp, _Np>& __lhs, bool __rhs)
      {
	if (__builtin_constant_p(__rhs))
	  {
	    if (__rhs == false)
	      __lhs = __andnot(__k, __lhs);
	    else
	      __lhs = __or(__k, __lhs);
	    return;
	  }
	__lhs = _CommonImpl::_S_blend(__k, __lhs,
				      __data(simd_mask<_Tp, _Abi>(__rhs)));
      }

    //}}}2
    // _S_all_of {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static bool
      _S_all_of(simd_mask<_Tp, _Abi> __k)
      {
	return __call_with_subscripts(
	  __data(__k), make_index_sequence<_S_size<_Tp>>(),
	  [](const auto... __ent) constexpr { return (... && !(__ent == 0)); });
      }

    // }}}
    // _S_any_of {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static bool
      _S_any_of(simd_mask<_Tp, _Abi> __k)
      {
	return __call_with_subscripts(
	  __data(__k), make_index_sequence<_S_size<_Tp>>(),
	  [](const auto... __ent) constexpr { return (... || !(__ent == 0)); });
      }

    // }}}
    // _S_none_of {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static bool
      _S_none_of(simd_mask<_Tp, _Abi> __k)
      {
	return __call_with_subscripts(
	  __data(__k), make_index_sequence<_S_size<_Tp>>(),
	  [](const auto... __ent) constexpr { return (... && (__ent == 0)); });
      }

    // }}}
    // _S_some_of {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static bool
      _S_some_of(simd_mask<_Tp, _Abi> __k)
      {
	const int __n_true = _SuperImpl::_S_popcount(__k);
	return __n_true > 0 && __n_true < int(_S_size<_Tp>);
      }

    // }}}
    // _S_popcount {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static int
      _S_popcount(simd_mask<_Tp, _Abi> __k)
      {
	using _I = __int_for_sizeof_t<_Tp>;
	if constexpr (is_default_constructible_v<simd<_I, _Abi>>)
	  return -reduce(
	    simd<_I, _Abi>(__private_init, __wrapper_bitcast<_I>(__data(__k))));
	else
	  return -reduce(__bit_cast<rebind_simd_t<_I, simd<_Tp, _Abi>>>(
	    simd<_Tp, _Abi>(__private_init, __data(__k))));
      }

    // }}}
    // _S_find_first_set {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static int
      _S_find_first_set(simd_mask<_Tp, _Abi> __k)
      {
	return std::__countr_zero(
	  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits());
      }

    // }}}
    // _S_find_last_set {{{
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC static int
      _S_find_last_set(simd_mask<_Tp, _Abi> __k)
      {
	return std::__bit_width(
	  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()) - 1;
      }

    // }}}
  };

//}}}1
_GLIBCXX_SIMD_END_NAMESPACE
#endif // __cplusplus >= 201703L
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_

// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               gb O  ( LR          P  ) MR      ib Q  I MR  Tb jb R  * MR  Ub kb S  + MR  H     T  , NR   mb U  - NR      nb V  h NR  ` ob W  i NR      pb X  . NR  ° qb Y  / NR  	 rb Z  0 NR  9 sb [  1 NR  Sh tb \  ̤ NR      ub ]  2 NR  	 vb ^  3 NR  D wb _  4 NR   xb `  5 NR  B yb a  6 NR  E     b  O OR  Yb {b c  S OR  8a |b d   OR      }b e  [ OR  M ~b f  2 OR  BM b g  Y OR  N b h   OR      b i  7 OR  1 b j  a OR  ^b b k   OR  _b     l  I PR  ib b m   PR  mb b n  h PR  zb b o   PR  |b b p  [ PR  }b b q  j PR  \b b r  a PR  b b s   PR  b b t  8 PR       u  h QR  b b v   QR          w  9 RR      b x  : RR  b b y  ; RR      b z  < RR  b b {  = RR          |  O SR  b b }  > SR   b ~   SR  N b   ) SR  Ab b    SR  $b b   ? SR  b b   @ SR  b b   A SR            B TR  b b   C TR  b b   D TR  b b   E TR  b b    TR  \` b    TR  ]` b    TR  _` b    TR  b b   F TR  ؋       G UR   b   H UR   b   I UR  ڭ b   J UR  5 b   K UR   b   L UR   b   M UR      b   N UR  5 b   O UR   b   P UR  5 b   Q UR      b   R UR  5 b   S UR  5 b   T UR  I	 b   U UR      b   V UR  ߭ b   W UR  5 b   X UR  |	 b   Y UR  
 b   Z UR   b   [ UR  ـ
 b   \ UR  
 b   ] UR  @
 b   ^ UR  5 b   _ UR      b   ` UR  s
 b   a UR   b   b UR  5 b   c UR  
 b   d UR  i b   e UR  k b   f UR  
 b   g UR  l b   h UR  m b   i UR  5 b   j UR  k5 b   k UR  	
 b   l UR      b   m UR   b    UR  b b    UR  b b   ) UR  b b    UR  b       n VR  6 b   o VR  b b   p VR  } b   q VR  b b   r VR  J
 b   s VR  ^
 b   t VR  
 b   u VR  
 b   v VR  
 b   w VR      b   x VR            y WR  b b   z WR  b b   { WR  b b   | WR  b       } XR            O YR  b       h [R  b       } \R  
b b   ~ \R  b b    \R  b b   - \R  ` b   O \R  b b   ~ \R      b   ϣ \R  ?b b    \R  b b    \R  b b    \R      b    \R  ZU b    \R             ]R      b    ]R      b    ]R  b        ^R  b b   ϣ ^R  b b   t ^R  @b b    ^R  b b    ^R             _R  b b    _R      b    _R  b       ۤ `R  a b   ܤ `R  b b   m `R  b b   S `R  {b b   ϣ `R  b b   t `R  b        aR  B{        bR  b c    bR  b c    bR  b c    bR  c c    bR  c c   F bR            O cR  ` c   ܣ cR  L c    cR      	c    cR   
c   ϣ cR  b c    cR  c c   ӣ cR  a 
c   ԣ cR  #a c    cR  @M c    cR      c    cR  c c    cR  L c    cR  ] c   ף cR  b c    cR   c    cR  _E       o fR  c       ' gR  KO c    ( gR  ` c    gR  c c    gR             hR      c    hR  c c    hR   c c    hR  d` c    hR       c    hR          	   iR  c "c 
   iR  !c #c    iR      $c    iR  )c %c 
   iR  *c &c    iR  +c 'c    iR  ,c (c    iR      )c    iR      *c    iR      +c    iR      ,c    iR            m jR  c .c   ϣ jR  
c /c    jR  b`        kR  8 1c    kR      2c   O kR  -c 3c    kR      4c    kR      5c    kR   6c    kR      7c    kR   8c     kR  b 9c !   kR  Jc :c "   kR  # ;c #   kR      <c $   kR          %   lR      >c &   lR      ?c '   lR  jb @c (  դ lR  ` Ac )  ֤ lR  ` Bc *   lR      Cc +   lR  78 Dc ,   lR  P8 Ec -   lR      Fc .   lR      Gc /   lR  ! Hc 0   lR  R     1   mR   Jc 2   mR          3   nR  ec Lc 4  O nR  2c Mc 5  ϣ nR  .c Nc 6   nR  c Oc 7   nR  Pc Pc 8   nR          9   oR  qc Rc :  O oR  Lc Sc ;  ϣ oR  Mc Tc <  j oR  b Uc =   oR  Nc Vc >   oR  Wc Wc ?   oR          @   pR      Yc A  m pR  Rc Zc B  ϣ pR  Sc [c C  ף pR  Uc \c D   pR  _c ]c E   pR  `c ^c F   pR  ac _c G   pR      `c H   pR      ac I   pR          J  § qR          K  ç rR      dc L  ħ rR          M   sR  cc fc N  m sR  Yc gc O  ϣ sR  Zc hc P  [ sR  [b ic Q  j sR  Tc jc R  ף sR  [c kc S  ŧ sR  nc lc T  Ƨ sR  oc mc U  ǧ sR  pc nc V  ȧ sR      oc W  ɧ sR      pc X  ʧ sR          Y  ˧ tR      rc Z  ̧ tR          [  ͧ uR      tc \  w uR  b     ]  Χ vR  ˳ vc ^  ϧ vR  U wc _   vR          `  w wR  tc yc a  Ч wR          b  w xR  xc     c   yR  c     d  w zR  zc }c e  ѧ zR          f  ҧ {R  c c g  w {R  |c     h  ҧ |R      c i  ӧ |R  \ c j  ԧ |R   c k  է |R  zM c l  w |R  c c m  ֧ |R  c     n  ҧ }R  c c o  ק }R  W
 c p  w }R  c c q  ֧ }R  c     r  ҧ ~R  c c s  ا ~R      c t  ٧ ~R  [5 c u  w ~R  c     v  w R  c c w  ڧ R          x  ҧ R  c c y  w R  c c z   R  {c     {   R  c c |  ۧ R  M     }  ܧ R  T c ~  w R  c c   ݧ R  c c   ާ R            w R  c       w R  c c   ߧ R            w R  c        R   c    R   c   w R  c        R  c       w R  c c    R             R  w c   w R  c       x R  N c   w R  c c    R             R  c       w R  c       w R  c c   ҧ R  c        R  4j c   w R  c        R  ` c   w R  c c    R  I       o R  fc c    R  c c    R  K        R  S1 c    R   c    R  
 c    R      c    R  
 c    R  O c   w R  c c   O R  c c   ϣ R  gc c    R      c    R   
 c    R  l c    R  < c   ڣ R      c    R  - c    R      c   t R  jc c    R  8c c    R  r c    R  ?7 c    R         ڤ R      c   w R  c c    R          R      c    R      c    R  -        R  c        R  c c   M R  c        R            m R  c c    R  ,O        R  c c   O R  c        R            O R  c c    R  c c    R   c    R      c    R             R      c   O R  c c     R   c   f R  a        R  a c    R  a c    R  T c    R      c   m R  c c    R  c c    R   c    R   c    R  b c    R  hc c    R  c c    R  GM c    R  ݈ c    R  H        R  a c   O R  c c    R  a c    R  a c    R  ߥ
       	 R      c    R  c c    R  c c   
 R      c   m R  c c   ܣ R  c c    R  j c    R  H c   
 R  c c    R   c    R   c    R  c  d    R  w        R  } d    R  S d    R  } d    R  T d    R      d    R  d d    R      d    R  d        R      
d    R  d d    R  
d d    R      
d    R  d        R      d   m R  c d   ϣ R  c d    R  l d    R  ` d    R  Ǽ d   t R  c d    R    d    R  
d          libxpp3-java   derby-tools    libcommons-dbcp-java   libcommons-pool-java  
 libderby-java    libjettison-java   1.5.3   
 libjmdns-java    3.5.5    libosgi-core-java    libxstream-java    1.4.20    b7875bda385f5f6b4e36597054392132   3.1.4-3    activemq-activeio    libcommons-logging-java    1.2       # V  V                #               BJ      # W  W                # X  X                &$ Y  Y  ]   CJ      L$ Z  Z                X$ [  [        m     h$ \  \        J}     p$ ]  ]        g     |$ ^  ^        .     $ _  _             $               HJ      % `  `                ,% a  a  (           >% b  b                T% c  c  o_           & d  d                @&               KJ      X& e  e             & f  f                & g  g  r           ' h  h                 '            MJ      *' i  i  l           f' j  j                ~' k  k  ^           ' l  l  F           ' m  m                ' n  n  W           ' o  o             
(         }   NJ      ( p  p             $( q  q             \( r  r        PJ      ( s  s                ( t  t  4H           (               QJ      4) u  u                 R)               SJ      ) v  v                ) w  w                ) x  x             ) y  y                ) z  z                * {  {      TJ      2* |  |                F* }  }  J 	          * ~  ~      
          *               ZJ      *                   &+         
          <+                   P+                   +                   +                   +                   ,                   v,                   ,           %      ,                ,           \J      ,                   &-               bJ      0-                H-                   j-     I           -     x
           -     z           -                   -     lx            -     
 !          .     > "          .         #          P.         $          `.     ] %          .     C  &          .         '          .         (           /     W )          \/         *          /         +  cJ      /      ,          /         -  fJ      /     MO .          0     Jo /        0       0  ~     (0         1  
     40         2  Q     F0         3          |0         4          0     6r 5  gJ      0         6          1      7          1         8          1         9          $1     f :          21         ;          B1     C <          P1         =          \1     v >  =>     t1      ?          ~1     O @          1         A          1         B          1      C  (     1         D  R       2         E               nA	 F                       G  R               H          Ȃ         I          Ԃ         J          4         K          r     i9
 L                   M                    N  
R      ̃     ' O          ڃ     
 P          4     8 Q  R      X         R          t Þ  Þ      S           Ğ  Ğ      T           Ş  Ş      U  R      & ƞ  ƞ      V  R      < Ǟ  Ǟ      W          ` Ȟ  Ȟ  " X           ɞ  ɞ   Y                       Z  R       ʞ  ʞ      [           ˞  ˞      \           ̞  ̞      ]          4 ͞  ͞   ^          n Ξ  Ξ      _           Ϟ  Ϟ  c `           О  О      a          ̆             b  R      Ԇ ў  ў      c           Ҟ  Ҟ      d          J Ӟ  Ӟ  A e  R       Ԟ  Ԟ  }s f           ՞  ՞    g  R       ֞  ֞      h           מ  מ      i           ؞  ؞      j  [      ٞ  ٞ   k       J ڞ  ڞ      l          l         [A m  R       ۞  ۞      n  R       ܞ  ܞ      o  R       ݞ  ݞ  *' p          " ޞ  ޞ  # q  m     2 ߞ  ߞ      r          L         s          d         t          ~         u                   v          ԉ     
 w                x                y                   z  1     <         {          \             |  R      h         }               l ~                                                        R                 "j                        2                   ^                   r     G                              Ћ           &R                                            0     9	           `                   p                                                                            ̌                                                                   'R      *                   >     #           N                              )R      Ȏ     *
                                                                 2 	  	                L 
  
                b                                    
  
                                   ԏ                                   v        *R           m                                           ΐ                                                         <         A   +R      N     ϝ                	   ,R           B
                      =          [=   -R           iH           (                8                   B       	        J !  !  @	        z "  "  kS   .R       #  #        /R       $  $  4   0R      X %  %                p &  &  q            '  '                 (  (  `@          )  )        1R       *  *  p   0     8 +  +  )
           `               2R      t ,  ,  H            -  -                 .  .                 /  /  p           0 0  0  (
   3R      z 1  1     4R       2  2  	   5R       #  #     6R      h6 $  $  Fd   :R      6 %  %  s           6 &  &                6 '  '  C           7 (  (  $           <7 )  )                X7 *  *                v7 +  +                7 ,  ,                7 -  -                7 .  .                7 /  /              8 0  0             88 1  1                p8 2  2        ;R      8 3  3             8 4  4                8 5  5                8 6  6             9         Q
   <R      <9 7  7  QG           X9               ?R      b9 8  8                |9 9  9                9 :  :                9 ;  ;        m,     9 <  <                9 =  =             : >  >                8: ?  ?  8   @R      ~: @  @             :            FR      : A  A  n           : B  B  ^a
   g<     : C  C                : D  D             ; E  E                 ; F  F  \           R; G  G  ?           ; H  H                ; I  I     HR      "< J  J                ,< K  K             < L  L        IR      < M  M                <               KR      < N  N                = O  O                0= P  P                r= Q  Q                = R  R                = S  S        PR      = T  T                > U  U                > V  V                 .> W  W             6> X  X             B> Y  Y        l     J> Z  Z             f> [  [  4           > \  \             > ]  ]        jk      > ^  ^  a   ?     > _  _      	  {k      > `  `  { 
  k      > a  a             > b  b  ~
   /     (? c  c      
          >? d  d                P? e  e  @           d? f  f             r? g  g                ? h  h             ? i  i                ? j  j                ? k  k                ? l  l                ? m  m                @ n  n                n@ o  o  }   QR      @ p  p     VR      @ q  q                @ r  r        XR      @ s  s                @ t  t  o
           A u  u                JA v  v                 A w  w      !          A x  x      "          A y  y      #          B z  z      $          :B {  {      %          \B |  |      &          B }  }      '          B ~  ~      (          B         )          B         *           C         +          HC         ,          nC         -          C         .          C         /          C         0          D         1          .D         2          TD         3          |D         4          D         5          D         6          D         7          E         8          :E         9          XE         :          E         ;          E         <          E         =          E         >          F         ?          :F         @          ZF         A          ~F         B          F         C          F         D          F         E          G         F          JG         G          rG         H          G         I          G     [
 J          G         K          H         L          &H         M          2H         N          FH     _ O          fH         P          ~H         Q          H         R          RI     ^ S  ZR      I         T          I         U  [R      $J         V  \R      <J         W          xJ         X          J          Y  _R      J         Z          J         [          J         \          ZK         ]          K         aX ^  `R      K         _  aR      
L         `          0L             a  bR      nL     K b  cR      L     
 c          L      d          L         e          M     Ձ f          "M         g          4M         h  ~1     fM      i  dR      M         j          M             k  eR      N     8r l  fR      VN     x m  gR      jN         n  Qw      N         o          N             p  jR      N ¨  ¨      q          N è  è      r          O Ĩ  Ĩ  O s          O Ũ  Ũ   t          $O ƨ  ƨ  t u          // Definition of the public simd interfaces -*- C++ -*-

// Copyright (C) 2020-2022 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
#define _GLIBCXX_EXPERIMENTAL_SIMD_H

#if __cplusplus >= 201703L

#include "simd_detail.h"
#include "numeric_traits.h"
#include <bit>
#include <bitset>
#ifdef _GLIBCXX_DEBUG_UB
#include <cstdio> // for stderr
#endif
#include <cstring>
#include <cmath>
#include <functional>
#include <iosfwd>
#include <utility>

#if _GLIBCXX_SIMD_X86INTRIN
#include <x86intrin.h>
#elif _GLIBCXX_SIMD_HAVE_NEON
#include <arm_neon.h>
#endif

/** @ingroup ts_simd
 * @{
 */
/* There are several closely related types, with the following naming
 * convention:
 * _Tp: vectorizable (arithmetic) type (or any type)
 * _TV: __vector_type_t<_Tp, _Np>
 * _TW: _SimdWrapper<_Tp, _Np>
 * _TI: __intrinsic_type_t<_Tp, _Np>
 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
 * If one additional type is needed use _U instead of _T.
 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
 *
 * More naming conventions:
 * _Ap or _Abi: An ABI tag from the simd_abi namespace
 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
 *      _IV, _IW as for _TV, _TW
 * _Np: number of elements (not bytes)
 * _Bytes: number of bytes
 *
 * Variable names:
 * __k: mask object (vector- or bitmask)
 */
_GLIBCXX_SIMD_BEGIN_NAMESPACE

#if !_GLIBCXX_SIMD_X86INTRIN
using __m128  [[__gnu__::__vector_size__(16)]] = float;
using __m128d [[__gnu__::__vector_size__(16)]] = double;
using __m128i [[__gnu__::__vector_size__(16)]] = long long;
using __m256  [[__gnu__::__vector_size__(32)]] = float;
using __m256d [[__gnu__::__vector_size__(32)]] = double;
using __m256i [[__gnu__::__vector_size__(32)]] = long long;
using __m512  [[__gnu__::__vector_size__(64)]] = float;
using __m512d [[__gnu__::__vector_size__(64)]] = double;
using __m512i [[__gnu__::__vector_size__(64)]] = long long;
#endif

namespace simd_abi {
// simd_abi forward declarations {{{
// implementation details:
struct _Scalar;

template <int _Np>
  struct _Fixed;

// There are two major ABIs that appear on different architectures.
// Both have non-boolean values packed into an N Byte register
// -> #elements = N / sizeof(T)
// Masks differ:
// 1. Use value vector registers for masks (all 0 or all 1)
// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
//    value vector
//
// Both can be partially used, masking off the rest when doing horizontal
// operations or operations that can trap (e.g. FP_INVALID or integer division
// by 0). This is encoded as the number of used bytes.
template <int _UsedBytes>
  struct _VecBuiltin;

template <int _UsedBytes>
  struct _VecBltnBtmsk;

template <typename _Tp, int _Np>
  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;

template <int _UsedBytes = 16>
  using _Sse = _VecBuiltin<_UsedBytes>;

template <int _UsedBytes = 32>
  using _Avx = _VecBuiltin<_UsedBytes>;

template <int _UsedBytes = 64>
  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;

template <int _UsedBytes = 16>
  using _Neon = _VecBuiltin<_UsedBytes>;

// implementation-defined:
using __sse = _Sse<>;
using __avx = _Avx<>;
using __avx512 = _Avx512<>;
using __neon = _Neon<>;
using __neon128 = _Neon<16>;
using __neon64 = _Neon<8>;

// standard:
template <typename _Tp, size_t _Np, typename...>
  struct deduce;

template <int _Np>
  using fixed_size = _Fixed<_Np>;

using scalar = _Scalar;

// }}}
} // namespace simd_abi
// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
template <typename _Tp>
  struct is_simd;

template <typename _Tp>
  struct is_simd_mask;

template <typename _Tp, typename _Abi>
  class simd;

template <typename _Tp, typename _Abi>
  class simd_mask;

template <typename _Tp, typename _Abi>
  struct simd_size;

// }}}
// load/store flags {{{
struct element_aligned_tag
{
  template <typename _Tp, typename _Up = typename _Tp::value_type>
    static constexpr size_t _S_alignment = alignof(_Up);

  template <typename _Tp, typename _Up>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
    _S_apply(_Up* __ptr)
    { return __ptr; }
};

struct vector_aligned_tag
{
  template <typename _Tp, typename _Up = typename _Tp::value_type>
    static constexpr size_t _S_alignment
      = std::__bit_ceil(sizeof(_Up) * _Tp::size());

  template <typename _Tp, typename _Up>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
    _S_apply(_Up* __ptr)
    {
      return static_cast<_Up*>(
	__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
    }
};

template <size_t _Np> struct overaligned_tag
{
  template <typename _Tp, typename _Up = typename _Tp::value_type>
    static constexpr size_t _S_alignment = _Np;

  template <typename _Tp, typename _Up>
    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
    _S_apply(_Up* __ptr)
    { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
};

inline constexpr element_aligned_tag element_aligned = {};

inline constexpr vector_aligned_tag vector_aligned = {};

template <size_t _Np>
  inline constexpr overaligned_tag<_Np> overaligned = {};

// }}}
template <size_t _Xp>
  using _SizeConstant = integral_constant<size_t, _Xp>;
// constexpr feature detection{{{
constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;

constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
constexpr inline bool __support_neon_float =
#if defined __GCC_IEC_559
  __GCC_IEC_559 == 0;
#elif defined __FAST_MATH__
  true;
#else
  false;
#endif

#ifdef _ARCH_PWR10
constexpr inline bool __have_power10vec = true;
#else
constexpr inline bool __have_power10vec = false;
#endif
#ifdef __POWER9_VECTOR__
constexpr inline bool __have_power9vec = true;
#else
constexpr inline bool __have_power9vec = false;
#endif
#if defined __POWER8_VECTOR__
constexpr inline bool __have_power8vec = true;
#else
constexpr inline bool __have_power8vec = __have_power9vec;
#endif
#if defined __VSX__
constexpr inline bool __have_power_vsx = true;
#else
constexpr inline bool __have_power_vsx = __have_power8vec;
#endif
#if defined __ALTIVEC__
constexpr inline bool __have_power_vmx = true;
#else
constexpr inline bool __have_power_vmx = __have_power_vsx;
#endif

// }}}

namespace __detail
{
#ifdef math_errhandling
  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
  // must be guessed.
  template <int = math_errhandling>
    constexpr bool __handle_fpexcept_impl(int)
    { return math_errhandling & MATH_ERREXCEPT; }
#endif

  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
  // ignored, otherwise implement correct exception behavior.
  constexpr bool __handle_fpexcept_impl(float)
  {
#if defined __FAST_MATH__
    return false;
#else
    return true;
#endif
  }

  /// True if math functions must raise floating-point exceptions as specified by C17.
  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);

  constexpr std::uint_least64_t
  __floating_point_flags()
  {
    std::uint_least64_t __flags = 0;
    if constexpr (_S_handle_fpexcept)
      __flags |= 1;
#ifdef __FAST_MATH__
    __flags |= 1 << 1;
#elif __FINITE_MATH_ONLY__
    __flags |= 2 << 1;
#elif __GCC_IEC_559 < 2
    __flags |= 3 << 1;
#endif
    __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
    return __flags;
  }

  constexpr std::uint_least64_t
  __machine_flags()
  {
    if constexpr (__have_mmx || __have_sse)
      return __have_mmx
		 | (__have_sse                << 1)
		 | (__have_sse2               << 2)
		 | (__have_sse3               << 3)
		 | (__have_ssse3              << 4)
		 | (__have_sse4_1             << 5)
		 | (__have_sse4_2             << 6)
		 | (__have_xop                << 7)
		 | (__have_avx                << 8)
		 | (__have_avx2               << 9)
		 | (__have_bmi                << 10)
		 | (__have_bmi2               << 11)
		 | (__have_lzcnt              << 12)
		 | (__have_sse4a              << 13)
		 | (__have_fma                << 14)
		 | (__have_fma4               << 15)
		 | (__have_f16c               << 16)
		 | (__have_popcnt             << 17)
		 | (__have_avx512f            << 18)
		 | (__have_avx512dq           << 19)
		 | (__have_avx512vl           << 20)
		 | (__have_avx512bw           << 21)
		 | (__have_avx512bitalg       << 22)
		 | (__have_avx512vbmi2        << 23)
		 | (__have_avx512vbmi         << 24)
		 | (__have_avx512ifma         << 25)
		 | (__have_avx512cd           << 26)
		 | (__have_avx512vnni         << 27)
		 | (__have_avx512vpopcntdq    << 28)
		 | (__have_avx512vp2intersect << 29);
    else if constexpr (__have_neon)
      return __have_neon
	       | (__have_neon_a32 << 1)
	       | (__have_neon_a64 << 2)
	       | (__have_neon_a64 << 2)
	       | (__support_neon_float << 3);
    else if constexpr (__have_power_vmx)
      return __have_power_vmx
	       | (__have_power_vsx  << 1)
	       | (__have_power8vec  << 2)
	       | (__have_power9vec  << 3)
	       | (__have_power10vec << 4);
    else
      return 0;
  }

  namespace
  {
    struct _OdrEnforcer {};
  }

  template <std::uint_least64_t...>
    struct _MachineFlagsTemplate {};

  /**@internal
   * Use this type as default template argument to all function templates that
   * are not declared always_inline. It ensures, that a function
   * specialization, which the compiler decides not to inline, has a unique symbol
   * (_OdrEnforcer) or a symbol matching the machine/architecture flags
   * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
   * users link TUs compiled with different flags. This is especially important
   * for using simd in libraries.
   */
  using __odr_helper
    = conditional_t<__machine_flags() == 0, _OdrEnforcer,
		    _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;

  struct _Minimum
  {
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC constexpr
      _Tp
      operator()(_Tp __a, _Tp __b) const
      {
	using std::min;
	return min(__a, __b);
      }
  };

  struct _Maximum
  {
    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC constexpr
      _Tp
      operator()(_Tp __a, _Tp __b) const
      {
	using std::max;
	return max(__a, __b);
      }
  };
} // namespace __detail

// unrolled/pack execution helpers
// __execute_n_times{{{
template <typename _Fp, size_t... _I>
  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
  void
  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
  { ((void)__f(_SizeConstant<_I>()), ...); }

template <typename _Fp>
  _GLIBCXX_SIMD_INTRINSIC constexpr void
  __execute_on_index_sequence(_Fp&&, index_sequence<>)
  { }

template <size_t _Np, typename _Fp>
  _GLIBCXX_SIMD_INTRINSIC constexpr void
  __execute_n_times(_Fp&& __f)
  {
    __execute_on_index_sequence(static_cast<_Fp&&>(__f),
				make_index_sequence<_Np>{});
  }

// }}}
// __generate_from_n_evaluations{{{
template <typename _R, typename _Fp, size_t... _I>
  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
  _R
  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
  { return _R{__f(_SizeConstant<_I>())...}; }

template <size_t _Np, typename _R, typename _Fp>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __generate_from_n_evaluations(_Fp&& __f)
  {
    return __execute_on_index_sequence_with_return<_R>(
      static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
  }

// }}}
// __call_with_n_evaluations{{{
template <size_t... _I, typename _F0, typename _FArgs>
  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
  auto
  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
  { return __f0(__fargs(_SizeConstant<_I>())...); }

template <size_t _Np, typename _F0, typename _FArgs>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
  {
    return __call_with_n_evaluations(make_index_sequence<_Np>{},
				     static_cast<_F0&&>(__f0),
				     static_cast<_FArgs&&>(__fargs));
  }

// }}}
// __call_with_subscripts{{{
template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
  auto
  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
  { return __fun(__x[_First + _It]...); }

template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
  {
    return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
					  make_index_sequence<_Np>(),
					  static_cast<_Fp&&>(__fun));
  }

// }}}

// vvv ---- type traits ---- vvv
// integer type aliases{{{
using _UChar = unsigned char;
using _SChar = signed char;
using _UShort = unsigned short;
using _UInt = unsigned int;
using _ULong = unsigned long;
using _ULLong = unsigned long long;
using _LLong = long long;

//}}}
// __first_of_pack{{{
template <typename _T0, typename...>
  struct __first_of_pack
  { using type = _T0; };

template <typename... _Ts>
  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;

//}}}
// __value_type_or_identity_t {{{
template <typename _Tp>
  typename _Tp::value_type
  __value_type_or_identity_impl(int);

template <typename _Tp>
  _Tp
  __value_type_or_identity_impl(float);

template <typename _Tp>
  using __value_type_or_identity_t
    = decltype(__value_type_or_identity_impl<_Tp>(int()));

// }}}
// __is_vectorizable {{{
template <typename _Tp>
  struct __is_vectorizable : public is_arithmetic<_Tp> {};

template <>
  struct __is_vectorizable<bool> : public false_type {};

template <typename _Tp>
  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;

// Deduces to a vectorizable type
template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
  using _Vectorizable = _Tp;

// }}}
// _LoadStorePtr / __is_possible_loadstore_conversion {{{
template <typename _Ptr, typename _ValueType>
  struct __is_possible_loadstore_conversion
  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};

template <>
  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};

// Deduces to a type allowed for load/store with the given value type.
template <typename _Ptr, typename _ValueType,
	  typename = enable_if_t<
	    __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
  using _LoadStorePtr = _Ptr;

// }}}
// __is_bitmask{{{
template <typename _Tp, typename = void_t<>>
  struct __is_bitmask : false_type {};

template <typename _Tp>
  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;

// the __mmaskXX case:
template <typename _Tp>
  struct __is_bitmask<_Tp,
    void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
  : true_type {};

// }}}
// __int_for_sizeof{{{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
template <size_t _Bytes>
  constexpr auto
  __int_for_sizeof()
  {
    if constexpr (_Bytes == sizeof(int))
      return int();
  #ifdef __clang__
    else if constexpr (_Bytes == sizeof(char))
      return char();
  #else
    else if constexpr (_Bytes == sizeof(_SChar))
      return _SChar();
  #endif
    else if constexpr (_Bytes == sizeof(short))
      return short();
  #ifndef __clang__
    else if constexpr (_Bytes == sizeof(long))
      return long();
  #endif
    else if constexpr (_Bytes == sizeof(_LLong))
      return _LLong();
  #ifdef __SIZEOF_INT128__
    else if constexpr (_Bytes == sizeof(__int128))
      return __int128();
  #endif // __SIZEOF_INT128__
    else if constexpr (_Bytes % sizeof(int) == 0)
      {
	constexpr size_t _Np = _Bytes / sizeof(int);
	struct _Ip
	{
	  int _M_data[_Np];

	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
	  operator&(_Ip __rhs) const
	  {
	    return __generate_from_n_evaluations<_Np, _Ip>(
	      [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
	  }

	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
	  operator|(_Ip __rhs) const
	  {
	    return __generate_from_n_evaluations<_Np, _Ip>(
	      [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
	  }

	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
	  operator^(_Ip __rhs) const
	  {
	    return __generate_from_n_evaluations<_Np, _Ip>(
	      [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
	  }

	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
	  operator~() const
	  {
	    return __generate_from_n_evaluations<_Np, _Ip>(
	      [&](auto __i) { return ~_M_data[__i]; });
	  }
	};
	return _Ip{};
      }
    else
      static_assert(_Bytes != _Bytes, "this should be unreachable");
  }
#pragma GCC diagnostic pop

template <typename _Tp>
  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());

template <size_t _Np>
  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());

// }}}
// __is_fixed_size_abi{{{
template <typename _Tp>
  struct __is_fixed_size_abi : false_type {};

template <int _Np>
  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};

template <typename _Tp>
  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;

// }}}
// __is_scalar_abi {{{
template <typename _Abi>
  constexpr bool
  __is_scalar_abi()
  { return is_same_v<simd_abi::scalar, _Abi>; }

// }}}
// __abi_bytes_v {{{
template <template <int> class _Abi, int _Bytes>
  constexpr int
  __abi_bytes_impl(_Abi<_Bytes>*)
  { return _Bytes; }

template <typename _Tp>
  constexpr int
  __abi_bytes_impl(_Tp*)
  { return -1; }

template <typename _Abi>
  inline constexpr int __abi_bytes_v
    = __abi_bytes_impl(static_cast<_Abi*>(nullptr));

// }}}
// __is_builtin_bitmask_abi {{{
template <typename _Abi>
  constexpr bool
  __is_builtin_bitmask_abi()
  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }

// }}}
// __is_sse_abi {{{
template <typename _Abi>
  constexpr bool
  __is_sse_abi()
  {
    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
    return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
  }

// }}}
// __is_avx_abi {{{
template <typename _Abi>
  constexpr bool
  __is_avx_abi()
  {
    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
    return _Bytes > 16 && _Bytes <= 32
	   && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
  }

// }}}
// __is_avx512_abi {{{
template <typename _Abi>
  constexpr bool
  __is_avx512_abi()
  {
    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
    return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
  }

// }}}
// __is_neon_abi {{{
template <typename _Abi>
  constexpr bool
  __is_neon_abi()
  {
    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
    return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
  }

// }}}
// __make_dependent_t {{{
template <typename, typename _Up>
  struct __make_dependent
  { using type = _Up; };

template <typename _Tp, typename _Up>
  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;

// }}}
// ^^^ ---- type traits ---- ^^^

// __invoke_ub{{{
template <typename... _Args>
  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
  __invoke_ub([[maybe_unused]] const char* __msg,
	      [[maybe_unused]] const _Args&... __args)
  {
#ifdef _GLIBCXX_DEBUG_UB
    __builtin_fprintf(stderr, __msg, __args...);
    __builtin_trap();
#else
    __builtin_unreachable();
#endif
  }

// }}}
// __assert_unreachable{{{
template <typename _Tp>
  struct __assert_unreachable
  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };

// }}}
// __size_or_zero_v {{{
template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
  constexpr size_t
  __size_or_zero_dispatch(int)
  { return _Np; }

template <typename _Tp, typename _Ap>
  constexpr size_t
  __size_or_zero_dispatch(float)
  { return 0; }

template <typename _Tp, typename _Ap>
  inline constexpr size_t __size_or_zero_v
     = __size_or_zero_dispatch<_Tp, _Ap>(0);

// }}}
// __div_roundup {{{
inline constexpr size_t
__div_roundup(size_t __a, size_t __b)
{ return (__a + __b - 1) / __b; }

// }}}
// _ExactBool{{{
class _ExactBool
{
  const bool _M_data;

public:
  _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}

  _ExactBool(int) = delete;

  _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
};

// }}}
// __may_alias{{{
/**@internal
 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
 * that support it).
 */
template <typename _Tp>
  using __may_alias [[__gnu__::__may_alias__]] = _Tp;

// }}}
// _UnsupportedBase {{{
// simd and simd_mask base for unsupported <_Tp, _Abi>
struct _UnsupportedBase
{
  _UnsupportedBase() = delete;
  _UnsupportedBase(const _UnsupportedBase&) = delete;
  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
  ~_UnsupportedBase() = delete;
};

// }}}
// _InvalidTraits {{{
/**
 * @internal
 * Defines the implementation of __a given <_Tp, _Abi>.
 *
 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
 * possible. Static assertions in the type definition do not suffice. It is
 * important that SFINAE works.
 */
struct _InvalidTraits
{
  using _IsValid = false_type;
  using _SimdBase = _UnsupportedBase;
  using _MaskBase = _UnsupportedBase;

  static constexpr size_t _S_full_size = 0;
  static constexpr bool _S_is_partial = false;

  static constexpr size_t _S_simd_align = 1;
  struct _SimdImpl;
  struct _SimdMember {};
  struct _SimdCastType;

  static constexpr size_t _S_mask_align = 1;
  struct _MaskImpl;
  struct _MaskMember {};
  struct _MaskCastType;
};

// }}}
// _SimdTraits {{{
template <typename _Tp, typename _Abi, typename = void_t<>>
  struct _SimdTraits : _InvalidTraits {};

// }}}
// __private_init, __bitset_init{{{
/**
 * @internal
 * Tag used for private init constructor of simd and simd_mask
 */
inline constexpr struct _PrivateInit {} __private_init = {};

inline constexpr struct _BitsetInit {} __bitset_init = {};

// }}}
// __is_narrowing_conversion<_From, _To>{{{
template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
	  bool = is_arithmetic_v<_To>>
  struct __is_narrowing_conversion;

// ignore "signed/unsigned mismatch" in the following trait.
// The implicit conversions will do the right thing here.
template <typename _From, typename _To>
  struct __is_narrowing_conversion<_From, _To, true, true>
  : public __bool_constant<(
      __digits_v<_From> > __digits_v<_To>
      || __finite_max_v<_From> > __finite_max_v<_To>
      || __finite_min_v<_From> < __finite_min_v<_To>
      || (is_signed_v<_From> && is_unsigned_v<_To>))> {};

template <typename _Tp>
  struct __is_narrowing_conversion<_Tp, bool, true, true>
  : public true_type {};

template <>
  struct __is_narrowing_conversion<bool, bool, true, true>
  : public false_type {};

template <typename _Tp>
  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
  : public false_type {};

template <typename _From, typename _To>
  struct __is_narrowing_conversion<_From, _To, false, true>
  : public negation<is_convertible<_From, _To>> {};

// }}}
// __converts_to_higher_integer_rank{{{
template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
  struct __converts_to_higher_integer_rank : public true_type {};

// this may fail for char -> short if sizeof(char) == sizeof(short)
template <typename _From, typename _To>
  struct __converts_to_higher_integer_rank<_From, _To, false>
  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};

// }}}
// __data(simd/simd_mask) {{{
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  __data(const simd<_Tp, _Ap>& __x);

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  __data(simd<_Tp, _Ap>& __x);

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  __data(const simd_mask<_Tp, _Ap>& __x);

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  __data(simd_mask<_Tp, _Ap>& __x);

// }}}
// _SimdConverter {{{
template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
	  typename = void>
  struct _SimdConverter;

template <typename _Tp, typename _Ap>
  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
  {
    template <typename _Up>
      _GLIBCXX_SIMD_INTRINSIC const _Up&
      operator()(const _Up& __x)
      { return __x; }
  };

// }}}
// __to_value_type_or_member_type {{{
template <typename _V>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
  { return __data(__x); }

template <typename _V>
  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
  __to_value_type_or_member_type(const typename _V::value_type& __x)
  { return __x; }

// }}}
// __bool_storage_member_type{{{
template <size_t _Size>
  struct __bool_storage_member_type;

template <size_t _Size>
  using __bool_storage_member_type_t =
    typename __bool_storage_member_type<_Size>::type;

// }}}
// _SimdTuple {{{
// why not tuple?
// 1. tuple gives no guarantee about the storage order, but I require
// storage
//    equivalent to array<_Tp, _Np>
// 2. direct access to the element type (first template argument)
// 3. enforces equal element type, only different _Abi types are allowed
template <typename _Tp, typename... _Abis>
  struct _SimdTuple;

//}}}
// __fixed_size_storage_t {{{
template <typename _Tp, int _Np>
  struct __fixed_size_storage;

template <typename _Tp, int _Np>
  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;

// }}}
// _SimdWrapper fwd decl{{{
template <typename _Tp, size_t _Size, typename = void_t<>>
  struct _SimdWrapper;

template <typename _Tp>
  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
template <typename _Tp>
  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
template <typename _Tp>
  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
template <typename _Tp>
  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;

// }}}
// __is_simd_wrapper {{{
template <typename _Tp>
  struct __is_simd_wrapper : false_type {};

template <typename _Tp, size_t _Np>
  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};

template <typename _Tp>
  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;

// }}}
// _BitOps {{{
struct _BitOps
{
  // _S_bit_iteration {{{
  template <typename _Tp, typename _Fp>
    static void
    _S_bit_iteration(_Tp __mask, _Fp&& __f)
    {
      static_assert(sizeof(_ULLong) >= sizeof(_Tp));
      conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
      if constexpr (is_convertible_v<_Tp, decltype(__k)>)
	__k = __mask;
      else
	__k = __mask.to_ullong();
      while(__k)
	{
	  __f(std::__countr_zero(__k));
	  __k &= (__k - 1);
	}
    }

  //}}}
};

//}}}
// __increment, __decrement {{{
template <typename _Tp = void>
  struct __increment
  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };

template <>
  struct __increment<void>
  {
    template <typename _Tp>
      constexpr _Tp
      operator()(_Tp __a) const
      { return ++__a; }
  };

template <typename _Tp = void>
  struct __decrement
  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };

template <>
  struct __decrement<void>
  {
    template <typename _Tp>
      constexpr _Tp
      operator()(_Tp __a) const
      { return --__a; }
  };

// }}}
// _ValuePreserving(OrInt) {{{
template <typename _From, typename _To,
	  typename = enable_if_t<negation<
	    __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
  using _ValuePreserving = _From;

template <typename _From, typename _To,
	  typename _DecayedFrom = __remove_cvref_t<_From>,
	  typename = enable_if_t<conjunction<
	    is_convertible<_From, _To>,
	    disjunction<
	      is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
	      conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
	      negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
  using _ValuePreservingOrInt = _From;

// }}}
// __intrinsic_type {{{
template <typename _Tp, size_t _Bytes, typename = void_t<>>
  struct __intrinsic_type;

template <typename _Tp, size_t _Size>
  using __intrinsic_type_t =
    typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;

template <typename _Tp>
  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
template <typename _Tp>
  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
template <typename _Tp>
  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
template <typename _Tp>
  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
template <typename _Tp>
  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
template <typename _Tp>
  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;

// }}}
// _BitMask {{{
template <size_t _Np, bool _Sanitized = false>
  struct _BitMask;

template <size_t _Np, bool _Sanitized>
  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};

template <size_t _Np>
  using _SanitizedBitMask = _BitMask<_Np, true>;

template <size_t _Np, bool _Sanitized>
  struct _BitMask
  {
    static_assert(_Np > 0);

    static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);

    using _Tp = conditional_t<_Np == 1, bool,
			      make_unsigned_t<__int_with_sizeof_t<std::min(
				sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;

    static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));

    _Tp _M_bits[_S_array_size];

    static constexpr int _S_unused_bits
      = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;

    static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;

    constexpr _BitMask() noexcept = default;

    constexpr _BitMask(unsigned long long __x) noexcept
      : _M_bits{static_cast<_Tp>(__x)} {}

    _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}

    constexpr _BitMask(const _BitMask&) noexcept = default;

    template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
							 && _Sanitized == true>>
      constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
	: _BitMask(__rhs._M_sanitized()) {}

    constexpr operator _SimdWrapper<bool, _Np>() const noexcept
    {
      static_assert(_S_array_size == 1);
      return _M_bits[0];
    }

    // precondition: is sanitized
    constexpr _Tp
    _M_to_bits() const noexcept
    {
      static_assert(_S_array_size == 1);
      return _M_bits[0];
    }

    // precondition: is sanitized
    constexpr unsigned long long
    to_ullong() const noexcept
    {
      static_assert(_S_array_size == 1);
      return _M_bits[0];
    }

    // precondition: is sanitized
    constexpr unsigned long
    to_ulong() const noexcept
    {
      static_assert(_S_array_size == 1);
      return _M_bits[0];
    }

    constexpr bitset<_Np>
    _M_to_bitset() const noexcept
    {
      static_assert(_S_array_size == 1);
      return _M_bits[0];
    }

    constexpr decltype(auto)
    _M_sanitized() const noexcept
    {
      if constexpr (_Sanitized)
	return *this;
      else if constexpr (_Np == 1)
	return _SanitizedBitMask<_Np>(_M_bits[0]);
      else
	{
	  _SanitizedBitMask<_Np> __r = {};
	  for (int __i = 0; __i < _S_array_size; ++__i)
	    __r._M_bits[__i] = _M_bits[__i];
	  if constexpr (_S_unused_bits > 0)
	    __r._M_bits[_S_array_size - 1] &= _S_bitmask;
	  return __r;
	}
    }

    template <size_t _Mp, bool _LSanitized>
      constexpr _BitMask<_Np + _Mp, _Sanitized>
      _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
      {
	constexpr size_t _RN = _Np + _Mp;
	using _Rp = _BitMask<_RN, _Sanitized>;
	if constexpr (_Rp::_S_array_size == 1)
	  {
	    _Rp __r{{_M_bits[0]}};
	    __r._M_bits[0] <<= _Mp;
	    __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
	    return __r;
	  }
	else
	  __assert_unreachable<_Rp>();
      }

    // Return a new _BitMask with size _NewSize while dropping _DropLsb least
    // significant bits. If the operation implicitly produces a sanitized bitmask,
    // the result type will have _Sanitized set.
    template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
      constexpr auto
      _M_extract() const noexcept
      {
	static_assert(_Np > _DropLsb);
	static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
		      "not implemented for bitmasks larger than one ullong");
	if constexpr (_NewSize == 1)
	  // must sanitize because the return _Tp is bool
	  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
	else
	  return _BitMask<_NewSize,
			  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
			    && _NewSize + _DropLsb <= _Np)
			   || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
			       && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
								>> _DropLsb);
      }

    // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
    constexpr bool
    all() const noexcept
    {
      if constexpr (_Np == 1)
	return _M_bits[0];
      else if constexpr (!_Sanitized)
	return _M_sanitized().all();
      else
	{
	  constexpr _Tp __allbits = ~_Tp();
	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
	    if (_M_bits[__i] != __allbits)
	      return false;
	  return _M_bits[_S_array_size - 1] == _S_bitmask;
	}
    }

    // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
    // false.
    constexpr bool
    any() const noexcept
    {
      if constexpr (_Np == 1)
	return _M_bits[0];
      else if constexpr (!_Sanitized)
	return _M_sanitized().any();
      else
	{
	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
	    if (_M_bits[__i] != 0)
	      return true;
	  return _M_bits[_S_array_size - 1] != 0;
	}
    }

    // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
    constexpr bool
    none() const noexcept
    {
      if constexpr (_Np == 1)
	return !_M_bits[0];
      else if constexpr (!_Sanitized)
	return _M_sanitized().none();
      else
	{
	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
	    if (_M_bits[__i] != 0)
	      return false;
	  return _M_bits[_S_array_size - 1] == 0;
	}
    }

    // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
    // false.
    constexpr int
    count() const noexcept
    {
      if constexpr (_Np == 1)
	return _M_bits[0];
      else if constexpr (!_Sanitized)
	return _M_sanitized().none();
      else
	{
	  int __result = __builtin_popcountll(_M_bits[0]);
	  for (int __i = 1; __i < _S_array_size; ++__i)
	    __result += __builtin_popcountll(_M_bits[__i]);
	  return __result;
	}
    }

    // Returns the bit at offset __i as bool.
    constexpr bool
    operator[](size_t __i) const noexcept
    {
      if constexpr (_Np == 1)
	return _M_bits[0];
      else if constexpr (_S_array_size == 1)
	return (_M_bits[0] >> __i) & 1;
      else
	{
	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
	  return (_M_bits[__j] >> __shift) & 1;
	}
    }

    template <size_t __i>
      constexpr bool
      operator[](_SizeConstant<__i>) const noexcept
      {
	static_assert(__i < _Np);
	constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
	constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
	return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
      }

    // Set the bit at offset __i to __x.
    constexpr void
    set(size_t __i, bool __x) noexcept
    {
      if constexpr (_Np == 1)
	_M_bits[0] = __x;
      else if constexpr (_S_array_size == 1)
	{
	  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
	  _M_bits[0] |= _Tp(_Tp(__x) << __i);
	}
      else
	{
	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
	  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
	  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
	}
    }

    template <size_t __i>
      constexpr void
      set(_SizeConstant<__i>, bool __x) noexcept
      {
	static_assert(__i < _Np);
	if constexpr (_Np == 1)
	  _M_bits[0] = __x;
	else
	  {
	    constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
	    constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
	    constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
	    _M_bits[__j] &= __mask;
	    _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
	  }
      }

    // Inverts all bits. Sanitized input leads to sanitized output.
    constexpr _BitMask
    operator~() const noexcept
    {
      if constexpr (_Np == 1)
	return !_M_bits[0];
      else
	{
	  _BitMask __result{};
	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
	    __result._M_bits[__i] = ~_M_bits[__i];
	  if constexpr (_Sanitized)
	    __result._M_bits[_S_array_size - 1]
	      = _M_bits[_S_array_size - 1] ^ _S_bitmask;
	  else
	    __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
	  return __result;
	}
    }

    constexpr _BitMask&
    operator^=(const _BitMask& __b) & noexcept
    {
      __execute_n_times<_S_array_size>(
	[&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
      return *this;
    }

    constexpr _BitMask&
    operator|=(const _BitMask& __b) & noexcept
    {
      __execute_n_times<_S_array_size>(
	[&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
      return *this;
    }

    constexpr _BitMask&
    operator&=(const _BitMask& __b) & noexcept
    {
      __execute_n_times<_S_array_size>(
	[&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
      return *this;
    }

    friend constexpr _BitMask
    operator^(const _BitMask& __a, const _BitMask& __b) noexcept
    {
      _BitMask __r = __a;
      __r ^= __b;
      return __r;
    }

    friend constexpr _BitMask
    operator|(const _BitMask& __a, const _BitMask& __b) noexcept
    {
      _BitMask __r = __a;
      __r |= __b;
      return __r;
    }

    friend constexpr _BitMask
    operator&(const _BitMask& __a, const _BitMask& __b) noexcept
    {
      _BitMask __r = __a;
      __r &= __b;
      return __r;
    }

    _GLIBCXX_SIMD_INTRINSIC
    constexpr bool
    _M_is_constprop() const
    {
      if constexpr (_S_array_size == 0)
	return __builtin_constant_p(_M_bits[0]);
      else
	{
	  for (int __i = 0; __i < _S_array_size; ++__i)
	    if (!__builtin_constant_p(_M_bits[__i]))
	      return false;
	  return true;
	}
    }
  };

// }}}

// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
// __min_vector_size {{{
template <typename _Tp = void>
  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);

#if _GLIBCXX_SIMD_HAVE_NEON
template <>
  inline constexpr int __min_vector_size<void> = 8;
#else
template <>
  inline constexpr int __min_vector_size<void> = 16;
#endif

// }}}
// __vector_type {{{
template <typename _Tp, size_t _Np, typename = void>
  struct __vector_type_n {};

// substition failure for 0-element case
template <typename _Tp>
  struct __vector_type_n<_Tp, 0, void> {};

// special case 1-element to be _Tp itself
template <typename _Tp>
  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
  { using type = _Tp; };

// else, use GNU-style builtin vector types
template <typename _Tp, size_t _Np>
  struct __vector_type_n<_Tp, _Np,
			 enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
  {
    static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));

    static constexpr size_t _S_Bytes =
#ifdef __i386__
      // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
      // those objects are passed via MMX registers and nothing ever calls EMMS.
      _S_Np2 == 8 ? 16 :
#endif
      _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
				      : _S_Np2;

    using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
  };

template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
  struct __vector_type;

template <typename _Tp, size_t _Bytes>
  struct __vector_type<_Tp, _Bytes, 0>
  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};

template <typename _Tp, size_t _Size>
  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;

template <typename _Tp>
  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
template <typename _Tp>
  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
template <typename _Tp>
  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
template <typename _Tp>
  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
template <typename _Tp>
  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
template <typename _Tp>
  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;

// }}}
// __is_vector_type {{{
template <typename _Tp, typename = void_t<>>
  struct __is_vector_type : false_type {};

template <typename _Tp>
  struct __is_vector_type<
    _Tp, void_t<typename __vector_type<
	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
    : is_same<_Tp, typename __vector_type<
		     remove_reference_t<decltype(declval<_Tp>()[0])>,
		     sizeof(_Tp)>::type> {};

template <typename _Tp>
  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;

// }}}
// __is_intrinsic_type {{{
#if _GLIBCXX_SIMD_HAVE_SSE_ABI
template <typename _Tp>
  using __is_intrinsic_type = __is_vector_type<_Tp>;
#else // not SSE (x86)
template <typename _Tp, typename = void_t<>>
  struct __is_intrinsic_type : false_type {};

template <typename _Tp>
  struct __is_intrinsic_type<
    _Tp, void_t<typename __intrinsic_type<
	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
    : is_same<_Tp, typename __intrinsic_type<
		     remove_reference_t<decltype(declval<_Tp>()[0])>,
		     sizeof(_Tp)>::type> {};
#endif

template <typename _Tp>
  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;

// }}}
// _VectorTraits{{{
template <typename _Tp, typename = void_t<>>
  struct _VectorTraitsImpl;

template <typename _Tp>
  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
					      || __is_intrinsic_type_v<_Tp>>>
  {
    using type = _Tp;
    using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
    static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
    using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
    template <typename _Up, int _W = _S_full_size>
      static constexpr bool _S_is
	= is_same_v<value_type, _Up> && _W == _S_full_size;
  };

template <typename _Tp, size_t _Np>
  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
			   void_t<__vector_type_t<_Tp, _Np>>>
  {
    using type = __vector_type_t<_Tp, _Np>;
    using value_type = _Tp;
    static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
    using _Wrapper = _SimdWrapper<_Tp, _Np>;
    static constexpr bool _S_is_partial = (_Np == _S_full_size);
    static constexpr int _S_partial_width = _Np;
    template <typename _Up, int _W = _S_full_size>
      static constexpr bool _S_is
	= is_same_v<value_type, _Up>&& _W == _S_full_size;
  };

template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
  using _VectorTraits = _VectorTraitsImpl<_Tp>;

// }}}
// __as_vector{{{
template <typename _V>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __as_vector(_V __x)
  {
    if constexpr (__is_vector_type_v<_V>)
      return __x;
    else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
      return __data(__x)._M_data;
    else if constexpr (__is_vectorizable_v<_V>)
      return __vector_type_t<_V, 2>{__x};
    else
      return __x._M_data;
  }

// }}}
// __as_wrapper{{{
template <size_t _Np = 0, typename _V>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __as_wrapper(_V __x)
  {
    if constexpr (__is_vector_type_v<_V>)
      return _SimdWrapper<typename _VectorTraits<_V>::value_type,
			  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
    else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
      {
	static_assert(_V::size() == _Np);
	return __data(__x);
      }
    else
      {
	static_assert(_V::_S_size == _Np);
	return __x;
      }
  }

// }}}
// __intrin_bitcast{{{
template <typename _To, typename _From>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __intrin_bitcast(_From __v)
  {
    static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
		    && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
    if constexpr (sizeof(_To) == sizeof(_From))
      return reinterpret_cast<_To>(__v);
    else if constexpr (sizeof(_From) > sizeof(_To))
      if constexpr (sizeof(_To) >= 16)
	return reinterpret_cast<const __may_alias<_To>&>(__v);
      else
	{
	  _To __r;
	  __builtin_memcpy(&__r, &__v, sizeof(_To));
	  return __r;
	}
#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
    else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
      return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
    else if constexpr (__have_avx512f && sizeof(_From) == 16
		       && sizeof(_To) == 64)
      return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
    else if constexpr (__have_avx512f && sizeof(_From) == 32
		       && sizeof(_To) == 64)
      return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
	reinterpret_cast<__vector_type_t<float, 8>>(__v)));
#endif // _GLIBCXX_SIMD_X86INTRIN
    else if constexpr (sizeof(__v) <= 8)
      return reinterpret_cast<_To>(
	__vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
	  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
    else
      {
	static_assert(sizeof(_To) > sizeof(_From));
	_To __r = {};
	__builtin_memcpy(&__r, &__v, sizeof(_From));
	return __r;
      }
  }

// }}}
// __vector_bitcast{{{
template <typename _To, size_t _NN = 0, typename _From,
	  typename _FromVT = _VectorTraits<_From>,
	  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
  __vector_bitcast(_From __x)
  {
    using _R = __vector_type_t<_To, _Np>;
    return __intrin_bitcast<_R>(__x);
  }

template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
	  size_t _Np
	  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
  {
    static_assert(_Np > 1);
    return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
  }

// }}}
// __convert_x86 declarations {{{
#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _To __convert_x86(_Tp);

template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _To __convert_x86(_Tp, _Tp);

template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);

template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);

template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
		    _Tp, _Tp, _Tp, _Tp);
#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048

//}}}
// __bit_cast {{{
template <typename _To, typename _From>
  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  __bit_cast(const _From __x)
  {
#if __has_builtin(__builtin_bit_cast)
    return __builtin_bit_cast(_To, __x);
#else
    static_assert(sizeof(_To) == sizeof(_From));
    constexpr bool __to_is_vectorizable
      = is_arithmetic_v<_To> || is_enum_v<_To>;
    constexpr bool __from_is_vectorizable
      = is_arithmetic_v<_From> || is_enum_v<_From>;
    if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
      return reinterpret_cast<_To>(__x);
    else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
      {
	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
	return reinterpret_cast<_To>(_FV{__x});
      }
    else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
      {
	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
	return reinterpret_cast<_TV>(_FV{__x})[0];
      }
    else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
      {
	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
	return reinterpret_cast<_TV>(__x)[0];
      }
    else
      {
	_To __r;
	__builtin_memcpy(reinterpret_cast<char*>(&__r),
			 reinterpret_cast<const char*>(&__x), sizeof(_To));
	return __r;
      }
#endif
  }

// }}}
// __to_intrin {{{
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
	  typename _R
	  = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __to_intrin(_Tp __x)
  {
    static_assert(sizeof(__x) <= sizeof(_R),
		  "__to_intrin may never drop values off the end");
    if constexpr (sizeof(__x) == sizeof(_R))
      return reinterpret_cast<_R>(__as_vector(__x));
    else
      {
	using _Up = __int_for_sizeof_t<_Tp>;
	return reinterpret_cast<_R>(
	  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
      }
  }

// }}}
// __make_vector{{{
template <typename _Tp, typename... _Args>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
  __make_vector(const _Args&... __args)
  {
    return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
  }

// }}}
// __vector_broadcast{{{
template <size_t _Np, typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  __vector_broadcast(_Tp __x)
  {
    return __call_with_n_evaluations<_Np>(
      [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
      [&__x](int) { return __x; });
  }

// }}}
// __generate_vector{{{
  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
  {
    return __vector_type_t<_Tp, _Np>{
      static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
  }

template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
  _GLIBCXX_SIMD_INTRINSIC constexpr _V
  __generate_vector(_Gp&& __gen)
  {
    if constexpr (__is_vector_type_v<_V>)
      return __generate_vector_impl<typename _VVT::value_type,
				    _VVT::_S_full_size>(
	static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
    else
      return __generate_vector_impl<typename _VVT::value_type,
				    _VVT::_S_partial_width>(
	static_cast<_Gp&&>(__gen),
	make_index_sequence<_VVT::_S_partial_width>());
  }

template <typename _Tp, size_t _Np, typename _Gp>
  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  __generate_vector(_Gp&& __gen)
  {
    return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
					    make_index_sequence<_Np>());
  }

// }}}
// __xor{{{
template <typename _TW>
  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  __xor(_TW __a, _TW __b) noexcept
  {
    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
      {
	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
					   _VectorTraitsImpl<_TW>>::value_type;
	if constexpr (is_floating_point_v<_Tp>)
	  {
	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
					 ^ __vector_bitcast<_Ip>(__b));
	  }
	else if constexpr (__is_vector_type_v<_TW>)
	  return __a ^ __b;
	else
	  return __a._M_data ^ __b._M_data;
      }
    else
      return __a ^ __b;
  }

// }}}
// __or{{{
template <typename _TW>
  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  __or(_TW __a, _TW __b) noexcept
  {
    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
      {
	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
					   _VectorTraitsImpl<_TW>>::value_type;
	if constexpr (is_floating_point_v<_Tp>)
	  {
	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
					 | __vector_bitcast<_Ip>(__b));
	  }
	else if constexpr (__is_vector_type_v<_TW>)
	  return __a | __b;
	else
	  return __a._M_data | __b._M_data;
      }
    else
      return __a | __b;
  }

// }}}
// __and{{{
template <typename _TW>
  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  __and(_TW __a, _TW __b) noexcept
  {
    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
      {
	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
					   _VectorTraitsImpl<_TW>>::value_type;
	if constexpr (is_floating_point_v<_Tp>)
	  {
	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
					 & __vector_bitcast<_Ip>(__b));
	  }
	else if constexpr (__is_vector_type_v<_TW>)
	  return __a & __b;
	else
	  return __a._M_data & __b._M_data;
      }
    else
      return __a & __b;
  }

// }}}
// __andnot{{{
#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
static constexpr struct
{
  _GLIBCXX_SIMD_INTRINSIC __v4sf
  operator()(__v4sf __a, __v4sf __b) const noexcept
  { return __builtin_ia32_andnps(__a, __b); }

  _GLIBCXX_SIMD_INTRINSIC __v2df
  operator()(__v2df __a, __v2df __b) const noexcept
  { return __builtin_ia32_andnpd(__a, __b); }

  _GLIBCXX_SIMD_INTRINSIC __v2di
  operator()(__v2di __a, __v2di __b) const noexcept
  { return __builtin_ia32_pandn128(__a, __b); }

  _GLIBCXX_SIMD_INTRINSIC __v8sf
  operator()(__v8sf __a, __v8sf __b) const noexcept
  { return __builtin_ia32_andnps256(__a, __b); }

  _GLIBCXX_SIMD_INTRINSIC __v4df
  operator()(__v4df __a, __v4df __b) const noexcept
  { return __builtin_ia32_andnpd256(__a, __b); }

  _GLIBCXX_SIMD_INTRINSIC __v4di
  operator()(__v4di __a, __v4di __b) const noexcept
  {
    if constexpr (__have_avx2)
      return __builtin_ia32_andnotsi256(__a, __b);
    else
      return reinterpret_cast<__v4di>(
	__builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
				 reinterpret_cast<__v4df>(__b)));
  }

  _GLIBCXX_SIMD_INTRINSIC __v16sf
  operator()(__v16sf __a, __v16sf __b) const noexcept
  {
    if constexpr (__have_avx512dq)
      return _mm512_andnot_ps(__a, __b);
    else
      return reinterpret_cast<__v16sf>(
	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
			    reinterpret_cast<__v8di>(__b)));
  }

  _GLIBCXX_SIMD_INTRINSIC __v8df
  operator()(__v8df __a, __v8df __b) const noexcept
  {
    if constexpr (__have_avx512dq)
      return _mm512_andnot_pd(__a, __b);
    else
      return reinterpret_cast<__v8df>(
	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
			    reinterpret_cast<__v8di>(__b)));
  }

  _GLIBCXX_SIMD_INTRINSIC __v8di
  operator()(__v8di __a, __v8di __b) const noexcept
  { return _mm512_andnot_si512(__a, __b); }
} _S_x86_andnot;
#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__

template <typename _TW>
  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  __andnot(_TW __a, _TW __b) noexcept
  {
    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
      {
	using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
				   _VectorTraitsImpl<_TW>>;
	using _Tp = typename _TVT::value_type;
#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
	if constexpr (sizeof(_TW) >= 16)
	  {
	    const auto __ai = __to_intrin(__a);
	    const auto __bi = __to_intrin(__b);
	    if (!__builtin_is_constant_evaluated()
		&& !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
	      {
		const auto __r = _S_x86_andnot(__ai, __bi);
		if constexpr (is_convertible_v<decltype(__r), _TW>)
		  return __r;
		else
		  return reinterpret_cast<typename _TVT::type>(__r);
	      }
	  }
#endif // _GLIBCXX_SIMD_X86INTRIN
	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
	return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
				     & __vector_bitcast<_Ip>(__b));
      }
    else
      return ~__a & __b;
  }

// }}}
// __not{{{
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  __not(_Tp __a) noexcept
  {
    if constexpr (is_floating_point_v<typename _TVT::value_type>)
      return reinterpret_cast<typename _TVT::type>(
	~__vector_bitcast<unsigned>(__a));
    else
      return ~__a;
  }

// }}}
// __concat{{{
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
	  typename _R = __vector_type_t<typename _TVT::value_type,
					_TVT::_S_full_size * 2>>
  constexpr _R
  __concat(_Tp a_, _Tp b_)
  {
#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
    using _W
      = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
		      conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
				    long long, typename _TVT::value_type>>;
    constexpr int input_width = sizeof(_Tp) / sizeof(_W);
    const auto __a = __vector_bitcast<_W>(a_);
    const auto __b = __vector_bitcast<_W>(b_);
    using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
#else
    constexpr int input_width = _TVT::_S_full_size;
    const _Tp& __a = a_;
    const _Tp& __b = b_;
    using _Up = _R;
#endif
    if constexpr (input_width == 2)
      return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
    else if constexpr (input_width == 4)
      return reinterpret_cast<_R>(
	_Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
    else if constexpr (input_width == 8)
      return reinterpret_cast<_R>(
	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
	    __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
    else if constexpr (input_width == 16)
      return reinterpret_cast<_R>(
	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
	    __a[14], __a[15], __b[0],  __b[1],  __b[2],  __b[3],  __b[4],
	    __b[5],  __b[6],  __b[7],  __b[8],  __b[9],  __b[10], __b[11],
	    __b[12], __b[13], __b[14], __b[15]});
    else if constexpr (input_width == 32)
      return reinterpret_cast<_R>(
	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
	    __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
	    __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
	    __a[28], __a[29], __a[30], __a[31], __b[0],  __b[1],  __b[2],
	    __b[3],  __b[4],  __b[5],  __b[6],  __b[7],  __b[8],  __b[9],
	    __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
	    __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
	    __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
	    __b[31]});
  }

// }}}
// __zero_extend {{{
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  struct _ZeroExtendProxy
  {
    using value_type = typename _TVT::value_type;
    static constexpr size_t _Np = _TVT::_S_full_size;
    const _Tp __x;

    template <typename _To, typename _ToVT = _VectorTraits<_To>,
	      typename
	      = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
      _GLIBCXX_SIMD_INTRINSIC operator _To() const
      {
	constexpr size_t _ToN = _ToVT::_S_full_size;
	if constexpr (_ToN == _Np)
	  return __x;
	else if constexpr (_ToN == 2 * _Np)
	  {
#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
	    if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
	      return __vector_bitcast<value_type>(
		_mm256_insertf128_ps(__m256(), __x, 0));
	    else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
	      return __vector_bitcast<value_type>(
		_mm256_insertf128_pd(__m256d(), __x, 0));
	    else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
	      return __vector_bitcast<value_type>(
		_mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
	    else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
	      {
		if constexpr (__have_avx512dq)
		  return __vector_bitcast<value_type>(
		    _mm512_insertf32x8(__m512(), __x, 0));
		else
		  return reinterpret_cast<__m512>(
		    _mm512_insertf64x4(__m512d(),
				       reinterpret_cast<__m256d>(__x), 0));
	      }
	    else if constexpr (__have_avx512f
			       && _TVT::template _S_is<double, 4>)
	      return __vector_bitcast<value_type>(
		_mm512_insertf64x4(__m512d(), __x, 0));
	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
	      return __vector_bitcast<value_type>(
		_mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
#endif
	    return __concat(__x, _Tp());
	  }
	else if constexpr (_ToN == 4 * _Np)
	  {
#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
	    if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
	      {
		return __vector_bitcast<value_type>(
		  _mm512_insertf64x2(__m512d(), __x, 0));
	      }
	    else if constexpr (__have_avx512f
			       && is_floating_point_v<value_type>)
	      {
		return __vector_bitcast<value_type>(
		  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
				     0));
	      }
	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
	      {
		return __vector_bitcast<value_type>(
		  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
	      }
#endif
	    return __concat(__concat(__x, _Tp()),
			    __vector_type_t<value_type, _Np * 2>());
	  }
	else if constexpr (_ToN == 8 * _Np)
	  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
			  __vector_type_t<value_type, _Np * 4>());
	else if constexpr (_ToN == 16 * _Np)
	  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
			  __vector_type_t<value_type, _Np * 8>());
	else
	  __assert_unreachable<_Tp>();
      }
  };

template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
  __zero_extend(_Tp __x)
  { return {__x}; }

// }}}
// __extract<_Np, By>{{{
template <int _Offset,
	  int _SplitBy,
	  typename _Tp,
	  typename _TVT = _VectorTraits<_Tp>,
	  typename _R = __vector_type_t<typename _TVT::value_type,
			  _TVT::_S_full_size / _SplitBy>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __extract(_Tp __in)
  {
    using value_type = typename _TVT::value_type;
#if _GLIBCXX_SIMD_X86INTRIN // {{{
    if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
      {
	if constexpr (__have_avx512dq && is_same_v<double, value_type>)
	  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
	else if constexpr (is_floating_point_v<value_type>)
	  return __vector_bitcast<value_type>(
	    _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
	else
	  return reinterpret_cast<_R>(
	    _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
				      _Offset));
      }
    else
#endif // _GLIBCXX_SIMD_X86INTRIN }}}
      {
#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
	using _W = conditional_t<
	  is_floating_point_v<value_type>, double,
	  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
	static_assert(sizeof(_R) % sizeof(_W) == 0);
	constexpr int __return_width = sizeof(_R) / sizeof(_W);
	using _Up = __vector_type_t<_W, __return_width>;
	const auto __x = __vector_bitcast<_W>(__in);
#else
      constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
      using _Up = _R;
      const __vector_type_t<value_type, _TVT::_S_full_size>& __x
	= __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
#endif
	constexpr int _O = _Offset * __return_width;
	return __call_with_subscripts<__return_width, _O>(
	  __x, [](auto... __entries) {
	    return reinterpret_cast<_R>(_Up{__entries...});
	  });
      }
  }

// }}}
// __lo/__hi64[z]{{{
template <typename _Tp,
	  typename _R
	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __lo64(_Tp __x)
  {
    _R __r{};
    __builtin_memcpy(&__r, &__x, 8);
    return __r;
  }

template <typename _Tp,
	  typename _R
	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __hi64(_Tp __x)
  {
    static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
    _R __r{};
    __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
    return __r;
  }

template <typename _Tp,
	  typename _R
	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  __hi64z([[maybe_unused]] _Tp __x)
  {
    _R __r{};
    if constexpr (sizeof(_Tp) == 16)
      __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
    return __r;
  }

// }}}
// __lo/__hi128{{{
template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __lo128(_Tp __x)
  { return __extract<0, sizeof(_Tp) / 16>(__x); }

template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __hi128(_Tp __x)
  {
    static_assert(sizeof(__x) == 32);
    return __extract<1, 2>(__x);
  }

// }}}
// __lo/__hi256{{{
template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __lo256(_Tp __x)
  {
    static_assert(sizeof(__x) == 64);
    return __extract<0, 2>(__x);
  }

template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  __hi256(_Tp __x)
  {
    static_assert(sizeof(__x) == 64);
    return __extract<1, 2>(__x);
  }

// }}}
// __auto_bitcast{{{
template <typename _Tp>
  struct _AutoCast
  {
    static_assert(__is_vector_type_v<_Tp>);

    const _Tp __x;

    template <typename _Up, typename _UVT = _VectorTraits<_Up>>
      _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
      { return __intrin_bitcast<typename _UVT::type>(__x); }
  };

template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
  __auto_bitcast(const _Tp& __x)
  { return {__x}; }

template <typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC constexpr
  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
  { return {__x._M_data}; }

// }}}
// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^

#if _GLIBCXX_SIMD_HAVE_SSE_ABI
// __bool_storage_member_type{{{
#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
template <size_t _Size>
  struct __bool_storage_member_type
  {
    static_assert((_Size & (_Size - 1)) != 0,
		  "This trait may only be used for non-power-of-2 sizes. "
		  "Power-of-2 sizes must be specialized.");
    using type =
      typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
  };

template <>
  struct __bool_storage_member_type<1> { using type = bool; };

template <>
  struct __bool_storage_member_type<2> { using type = __mmask8; };

template <>
  struct __bool_storage_member_type<4> { using type = __mmask8; };

template <>
  struct __bool_storage_member_type<8> { using type = __mmask8; };

template <>
  struct __bool_storage_member_type<16> { using type = __mmask16; };

template <>
  struct __bool_storage_member_type<32> { using type = __mmask32; };

template <>
  struct __bool_storage_member_type<64> { using type = __mmask64; };
#endif // _GLIBCXX_SIMD_HAVE_AVX512F

// }}}
// __intrinsic_type (x86){{{
// the following excludes bool via __is_vectorizable
#if _GLIBCXX_SIMD_HAVE_SSE
template <typename _Tp, size_t _Bytes>
  struct __intrinsic_type<_Tp, _Bytes,
			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
  {
    static_assert(!is_same_v<_Tp, long double>,
		  "no __intrinsic_type support for long double on x86");

    static constexpr size_t _S_VBytes = _Bytes <= 16   ? 16
					: _Bytes <= 32 ? 32
						       : 64;

    using type [[__gnu__::__vector_size__(_S_VBytes)]]
    = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
  };
#endif // _GLIBCXX_SIMD_HAVE_SSE

// }}}
#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
// __intrinsic_type (ARM){{{
#if _GLIBCXX_SIMD_HAVE_NEON
template <>
  struct __intrinsic_type<float, 8, void>
  { using type = float32x2_t; };

template <>
  struct __intrinsic_type<float, 16, void>
  { using type = float32x4_t; };

#if _GLIBCXX_SIMD_HAVE_NEON_A64
template <>
  struct __intrinsic_type<double, 8, void>
  { using type = float64x1_t; };

template <>
  struct __intrinsic_type<double, 16, void>
  { using type = float64x2_t; };
#endif

#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np)                                   \
template <>                                                                    \
  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>,                      \
			  _Np * _Bits / 8, void>                               \
  { using type = int##_Bits##x##_Np##_t; };                                    \
template <>                                                                    \
  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>,     \
			  _Np * _Bits / 8, void>                               \
  { using type = uint##_Bits##x##_Np##_t; }
_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
#undef _GLIBCXX_SIMD_ARM_INTRIN

template <typename _Tp, size_t _Bytes>
  struct __intrinsic_type<_Tp, _Bytes,
			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
  {
    static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
    using _Ip = __int_for_sizeof_t<_Tp>;
    using _Up = conditional_t<
      is_floating_point_v<_Tp>, _Tp,
      conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
    static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
		  "should use explicit specialization above");
    using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
  };
#endif // _GLIBCXX_SIMD_HAVE_NEON

// }}}
// __intrinsic_type (PPC){{{
#ifdef __ALTIVEC__
template <typename _Tp>
  struct __intrinsic_type_impl;

#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                          \
  template <>                                                                  \
    struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
_GLIBCXX_SIMD_PPC_INTRIN(float);
#ifdef __VSX__
_GLIBCXX_SIMD_PPC_INTRIN(double);
#endif
_GLIBCXX_SIMD_PPC_INTRIN(signed char);
_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
_GLIBCXX_SIMD_PPC_INTRIN(signed short);
_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
_GLIBCXX_SIMD_PPC_INTRIN(signed int);
_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
#if defined __VSX__ || __SIZEOF_LONG__ == 4
_GLIBCXX_SIMD_PPC_INTRIN(signed long);
_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
#endif
#ifdef __VSX__
_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
#endif
#undef _GLIBCXX_SIMD_PPC_INTRIN

template <typename _Tp, size_t _Bytes>
  struct __intrinsic_type<_Tp, _Bytes,
			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
  {
    static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
    // allow _Tp == long double with -mlong-double-64
    static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
		  "no __intrinsic_type support for 128-bit floating point on PowerPC");
#ifndef __VSX__
    static_assert(!(is_same_v<_Tp, double>
		    || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
		  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
#endif
    using type =
      typename __intrinsic_type_impl<
		 conditional_t<is_floating_point_v<_Tp>,
			       conditional_t<_S_is_ldouble, double, _Tp>,
			       __int_for_sizeof_t<_Tp>>>::type;
  };
#endif // __ALTIVEC__

// }}}
// _SimdWrapper<bool>{{{1
template <size_t _Width>
  struct _SimdWrapper<bool, _Width,
		      void_t<typename __bool_storage_member_type<_Width>::type>>
  {
    using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
    using value_type = bool;

    static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
    __as_full_vector() const { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
      : _M_data(__k) {};

    _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
    { return _M_data & (_BuiltinType(1) << __i); }

    template <size_t __i>
      _GLIBCXX_SIMD_INTRINSIC constexpr value_type
      operator[](_SizeConstant<__i>) const
      { return _M_data & (_BuiltinType(1) << __i); }

    _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
    {
      if (__x)
	_M_data |= (_BuiltinType(1) << __i);
      else
	_M_data &= ~(_BuiltinType(1) << __i);
    }

    _GLIBCXX_SIMD_INTRINSIC
    constexpr bool _M_is_constprop() const
    { return __builtin_constant_p(_M_data); }

    _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
    {
      if (__builtin_constant_p(_M_data))
	{
	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
	  constexpr _BuiltinType __active_mask
	    = ~_BuiltinType() >> (__nbits - _Width);
	  return (_M_data & __active_mask) == 0;
	}
      return false;
    }

    _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
    {
      if (__builtin_constant_p(_M_data))
	{
	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
	  constexpr _BuiltinType __active_mask
	    = ~_BuiltinType() >> (__nbits - _Width);
	  return (_M_data & __active_mask) == __active_mask;
	}
      return false;
    }

    _BuiltinType _M_data;
  };

// _SimdWrapperBase{{{1
template <bool _MustZeroInitPadding, typename _BuiltinType>
  struct _SimdWrapperBase;

template <typename _BuiltinType>
  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
  {
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
      : _M_data(__init)
    {}

    _BuiltinType _M_data;
  };

template <typename _BuiltinType>
  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
					      // never become SNaN
  {
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
      : _M_data(__init)
    {}

    _BuiltinType _M_data;
  };

// }}}
// _SimdWrapper{{{
template <typename _Tp, size_t _Width>
  struct _SimdWrapper<
    _Tp, _Width,
    void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
    : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
			 && sizeof(_Tp) * _Width
			      == sizeof(__vector_type_t<_Tp, _Width>),
		       __vector_type_t<_Tp, _Width>>
  {
    using _Base
      = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
			   && sizeof(_Tp) * _Width
				== sizeof(__vector_type_t<_Tp, _Width>),
			 __vector_type_t<_Tp, _Width>>;

    static_assert(__is_vectorizable_v<_Tp>);
    static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then

    using _BuiltinType = __vector_type_t<_Tp, _Width>;
    using value_type = _Tp;

    static inline constexpr size_t _S_full_size
      = sizeof(_BuiltinType) / sizeof(value_type);
    static inline constexpr int _S_size = _Width;
    static inline constexpr bool _S_is_partial = _S_full_size != _S_size;

    using _Base::_M_data;

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
    __as_full_vector() const
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
      : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
	[&](auto __i) { return __init.begin()[__i.value]; })) {}

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
      = default;
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;

    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
    operator=(const _SimdWrapper&) = default;
    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
    operator=(_SimdWrapper&&) = default;

    template <typename _V, typename = enable_if_t<disjunction_v<
			     is_same<_V, __vector_type_t<_Tp, _Width>>,
			     is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
      _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
      // __vector_bitcast can convert e.g. __m128 to __vector(2) float
      : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}

    template <typename... _As,
	      typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
				      && sizeof...(_As) <= _Width)>>
      _GLIBCXX_SIMD_INTRINSIC constexpr
      operator _SimdTuple<_Tp, _As...>() const
      {
	const auto& dd = _M_data; // workaround for GCC7 ICE
	return __generate_from_n_evaluations<sizeof...(_As),
					     _SimdTuple<_Tp, _As...>>([&](
	  auto __i) constexpr { return dd[int(__i)]; });
      }

    _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
    { return _M_data; }

    _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
    { return _M_data[__i]; }

    template <size_t __i>
      _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
      { return _M_data[__i]; }

    _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
    { _M_data[__i] = __x; }

    _GLIBCXX_SIMD_INTRINSIC
    constexpr bool _M_is_constprop() const
    { return __builtin_constant_p(_M_data); }

    _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
    {
      if (__builtin_constant_p(_M_data))
	{
	  bool __r = true;
	  if constexpr (is_floating_point_v<_Tp>)
	    {
	      using _Ip = __int_for_sizeof_t<_Tp>;
	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
	      __execute_n_times<_Width>(
		[&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
	    }
	  else
	    __execute_n_times<_Width>(
	      [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
	  return __r;
	}
      return false;
    }

    _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
    {
      if (__builtin_constant_p(_M_data))
	{
	  bool __r = true;
	  if constexpr (is_floating_point_v<_Tp>)
	    {
	      using _Ip = __int_for_sizeof_t<_Tp>;
	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
	      __execute_n_times<_Width>(
		[&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
	    }
	  else
	    __execute_n_times<_Width>(
	      [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
	  return __r;
	}
      return false;
    }
  };

// }}}

// __vectorized_sizeof {{{
template <typename _Tp>
  constexpr size_t
  __vectorized_sizeof()
  {
    if constexpr (!__is_vectorizable_v<_Tp>)
      return 0;

    if constexpr (sizeof(_Tp) <= 8)
      {
	// X86:
	if constexpr (__have_avx512bw)
	  return 64;
	if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
	  return 64;
	if constexpr (__have_avx2)
	  return 32;
	if constexpr (__have_avx && is_floating_point_v<_Tp>)
	  return 32;
	if constexpr (__have_sse2)
	  return 16;
	if constexpr (__have_sse && is_same_v<_Tp, float>)
	  return 16;
	/* The following is too much trouble because of mixed MMX and x87 code.
	 * While nothing here explicitly calls MMX instructions of registers,
	 * they are still emitted but no EMMS cleanup is done.
	if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
	  return 8;
	 */

	// PowerPC:
	if constexpr (__have_power8vec
		      || (__have_power_vmx && (sizeof(_Tp) < 8))
		      || (__have_power_vsx && is_floating_point_v<_Tp>) )
	  return 16;

	// ARM:
	if constexpr (__have_neon_a64
		      || (__have_neon_a32 && !is_same_v<_Tp, double>) )
	  return 16;
	if constexpr (__have_neon
		      && sizeof(_Tp) < 8
		      // Only allow fp if the user allows non-ICE559 fp (e.g.
		      // via -ffast-math). ARMv7 NEON fp is not conforming to
		      // IEC559.
		      && (__support_neon_float || !is_floating_point_v<_Tp>))
	  return 16;
      }

    return sizeof(_Tp);
  }

// }}}
namespace simd_abi {
// most of simd_abi is defined in simd_detail.h
template <typename _Tp>
  inline constexpr int max_fixed_size
    = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;

// compatible {{{
#if defined __x86_64__ || defined __aarch64__
template <typename _Tp>
  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
#elif defined __ARM_NEON
// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
// ABI?)
template <typename _Tp>
  using compatible
    = conditional_t<(sizeof(_Tp) < 8
		     && (__support_neon_float || !is_floating_point_v<_Tp>)),
		    _VecBuiltin<16>, scalar>;
#else
template <typename>
  using compatible = scalar;
#endif

// }}}
// native {{{
template <typename _Tp>
  constexpr auto
  __determine_native_abi()
  {
    constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
    if constexpr (__bytes == sizeof(_Tp))
      return static_cast<scalar*>(nullptr);
    else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
      return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
    else
      return static_cast<_VecBuiltin<__bytes>*>(nullptr);
  }

template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;

// }}}
// __default_abi {{{
#if defined _GLIBCXX_SIMD_DEFAULT_ABI
template <typename _Tp>
  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
#else
template <typename _Tp>
  using __default_abi = compatible<_Tp>;
#endif

// }}}
} // namespace simd_abi

// traits {{{1
// is_abi_tag {{{2
template <typename _Tp, typename = void_t<>>
  struct is_abi_tag : false_type {};

template <typename _Tp>
  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
  : public _Tp::_IsValidAbiTag {};

template <typename _Tp>
  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;

// is_simd(_mask) {{{2
template <typename _Tp>
  struct is_simd : public false_type {};

template <typename _Tp>
  inline constexpr bool is_simd_v = is_simd<_Tp>::value;

template <typename _Tp>
  struct is_simd_mask : public false_type {};

template <typename _Tp>
inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;

// simd_size {{{2
template <typename _Tp, typename _Abi, typename = void>
  struct __simd_size_impl {};

template <typename _Tp, typename _Abi>
  struct __simd_size_impl<
    _Tp, _Abi,
    enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
    : _SizeConstant<_Abi::template _S_size<_Tp>> {};

template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  struct simd_size : __simd_size_impl<_Tp, _Abi> {};

template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;

// simd_abi::deduce {{{2
template <typename _Tp, size_t _Np, typename = void>
  struct __deduce_impl;

namespace simd_abi {
/**
 * @tparam _Tp   The requested `value_type` for the elements.
 * @tparam _Np    The requested number of elements.
 * @tparam _Abis This parameter is ignored, since this implementation cannot
 * make any use of it. Either __a good native ABI is matched and used as `type`
 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
 * the best matching native ABIs.
 */
template <typename _Tp, size_t _Np, typename...>
  struct deduce : __deduce_impl<_Tp, _Np> {};

template <typename _Tp, size_t _Np, typename... _Abis>
  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
} // namespace simd_abi

// }}}2
// rebind_simd {{{2
template <typename _Tp, typename _V, typename = void>
  struct rebind_simd;

template <typename _Tp, typename _Up, typename _Abi>
  struct rebind_simd<
    _Tp, simd<_Up, _Abi>,
    void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
  {
    using type
      = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
  };

template <typename _Tp, typename _Up, typename _Abi>
  struct rebind_simd<
    _Tp, simd_mask<_Up, _Abi>,
    void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
  {
    using type
      = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
  };

template <typename _Tp, typename _V>
  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;

// resize_simd {{{2
template <int _Np, typename _V, typename = void>
  struct resize_simd;

template <int _Np, typename _Tp, typename _Abi>
  struct resize_simd<_Np, simd<_Tp, _Abi>,
		     void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };

template <int _Np, typename _Tp, typename _Abi>
  struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
		     void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };

template <int _Np, typename _V>
  using resize_simd_t = typename resize_simd<_Np, _V>::type;

// }}}2
// memory_alignment {{{2
template <typename _Tp, typename _Up = typename _Tp::value_type>
  struct memory_alignment
  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};

template <typename _Tp, typename _Up = typename _Tp::value_type>
  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;

// class template simd [simd] {{{1
template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  class simd;

template <typename _Tp, typename _Abi>
  struct is_simd<simd<_Tp, _Abi>> : public true_type {};

template <typename _Tp>
  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;

template <typename _Tp, int _Np>
  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;

template <typename _Tp, size_t _Np>
  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;

// class template simd_mask [simd_mask] {{{1
template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  class simd_mask;

template <typename _Tp, typename _Abi>
  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};

template <typename _Tp>
  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;

template <typename _Tp, int _Np>
  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;

template <typename _Tp, size_t _Np>
  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;

// casts [simd.casts] {{{1
// static_simd_cast {{{2
template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
	  typename = void>
  struct __static_simd_cast_return_type;

template <typename _Tp, typename _A0, typename _Up, typename _Ap>
  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
					void>
  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};

template <typename _Tp, typename _Up, typename _Ap>
  struct __static_simd_cast_return_type<
    _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
  { using type = _Tp; };

template <typename _Tp, typename _Ap>
  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
					enable_if_t<__is_vectorizable_v<_Tp>>
#else
					void
#endif
					>
  { using type = simd<_Tp, _Ap>; };

template <typename _Tp, typename = void>
  struct __safe_make_signed { using type = _Tp;};

template <typename _Tp>
  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
  {
    // the extra make_unsigned_t is because of PR85951
    using type = make_signed_t<make_unsigned_t<_Tp>>;
  };

template <typename _Tp>
  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;

template <typename _Tp, typename _Up, typename _Ap>
  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
					enable_if_t<__is_vectorizable_v<_Tp>>
#else
					void
#endif
					>
  {
    using type = conditional_t<
      (is_integral_v<_Up> && is_integral_v<_Tp> &&
#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
       is_signed_v<_Up> != is_signed_v<_Tp> &&
#endif
       is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
      simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
  };

template <typename _Tp, typename _Up, typename _Ap,
	  typename _R
	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
  static_simd_cast(const simd<_Up, _Ap>& __x)
  {
    if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
      return __x;
    else
      {
	_SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
	  __c;
	return _R(__private_init, __c(__data(__x)));
      }
  }

namespace __proposed {
template <typename _Tp, typename _Up, typename _Ap,
	  typename _R
	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
  {
    using _RM = typename _R::mask_type;
    return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
			      typename _RM::simd_type::value_type>(__x)};
  }

template <typename _To, typename _Up, typename _Abi>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  _To
  simd_bit_cast(const simd<_Up, _Abi>& __x)
  {
    using _Tp = typename _To::value_type;
    using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
    using _From = simd<_Up, _Abi>;
    using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
    // with concepts, the following should be constraints
    static_assert(sizeof(_To) == sizeof(_From));
    static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
    static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
#if __has_builtin(__builtin_bit_cast)
    return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
#else
    return {__private_init, __bit_cast<_ToMember>(__data(__x))};
#endif
  }

template <typename _To, typename _Up, typename _Abi>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  _To
  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
  {
    using _From = simd_mask<_Up, _Abi>;
    static_assert(sizeof(_To) == sizeof(_From));
    static_assert(is_trivially_copyable_v<_From>);
    // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
    // copyable.
    if constexpr (is_simd_v<_To>)
      {
	using _Tp = typename _To::value_type;
	using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
	static_assert(is_trivially_copyable_v<_ToMember>);
#if __has_builtin(__builtin_bit_cast)
	return {__private_init, __builtin_bit_cast(_ToMember, __x)};
#else
	return {__private_init, __bit_cast<_ToMember>(__x)};
#endif
      }
    else
      {
	static_assert(is_trivially_copyable_v<_To>);
#if __has_builtin(__builtin_bit_cast)
	return __builtin_bit_cast(_To, __x);
#else
	return __bit_cast<_To>(__x);
#endif
      }
  }
} // namespace __proposed

// simd_cast {{{2
template <typename _Tp, typename _Up, typename _Ap,
	  typename _To = __value_type_or_identity_t<_Tp>>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
    -> decltype(static_simd_cast<_Tp>(__x))
  { return static_simd_cast<_Tp>(__x); }

namespace __proposed {
template <typename _Tp, typename _Up, typename _Ap,
	  typename _To = __value_type_or_identity_t<_Tp>>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
    -> decltype(static_simd_cast<_Tp>(__x))
  { return static_simd_cast<_Tp>(__x); }
} // namespace __proposed

// }}}2
// resizing_simd_cast {{{
namespace __proposed {
/* Proposed spec:

template <class T, class U, class Abi>
T resizing_simd_cast(const simd<U, Abi>& x)

p1  Constraints:
    - is_simd_v<T> is true and
    - T::value_type is the same type as U

p2  Returns:
    A simd object with the i^th element initialized to x[i] for all i in the
    range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
    than simd_size_v<U, Abi>, the remaining elements are value-initialized.

template <class T, class U, class Abi>
T resizing_simd_cast(const simd_mask<U, Abi>& x)

p1  Constraints: is_simd_mask_v<T> is true

p2  Returns:
    A simd_mask object with the i^th element initialized to x[i] for all i in
the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
    than simd_size_v<U, Abi>, the remaining elements are initialized to false.

 */

template <typename _Tp, typename _Up, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
  resizing_simd_cast(const simd<_Up, _Ap>& __x)
  {
    if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
      return __x;
    else if constexpr (simd_size_v<_Up, _Ap> == 1)
      {
	_Tp __r{};
	__r[0] = __x[0];
	return __r;
      }
    else if constexpr (_Tp::size() == 1)
      return __x[0];
    else if constexpr (sizeof(_Tp) == sizeof(__x)
		       && !__is_fixed_size_abi_v<_Ap>)
      return {__private_init,
	      __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
		_Ap::_S_masked(__data(__x))._M_data)};
    else
      {
	_Tp __r{};
	__builtin_memcpy(&__data(__r), &__data(__x),
			 sizeof(_Up)
			   * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
	return __r;
      }
  }

template <typename _Tp, typename _Up, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
  {
    return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
			      typename _Tp::simd_type::value_type>(__x)};
  }
} // namespace __proposed

// }}}
// to_fixed_size {{{2
template <typename _Tp, int _Np>
  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
  { return __x; }

template <typename _Tp, int _Np>
  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
  { return __x; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC auto
  to_fixed_size(const simd<_Tp, _Ap>& __x)
  {
    return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
      auto __i) constexpr { return __x[__i]; });
  }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC auto
  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
  {
    constexpr int _Np = simd_mask<_Tp, _Ap>::size();
    fixed_size_simd_mask<_Tp, _Np> __r;
    __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
    return __r;
  }

// to_native {{{2
template <typename _Tp, int _Np>
  _GLIBCXX_SIMD_INTRINSIC
  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
  to_native(const fixed_size_simd<_Tp, _Np>& __x)
  {
    alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
    __x.copy_to(__mem, vector_aligned);
    return {__mem, vector_aligned};
  }

template <typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC
  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
  {
    return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
  }

// to_compatible {{{2
template <typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
  {
    alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
    __x.copy_to(__mem, vector_aligned);
    return {__mem, vector_aligned};
  }

template <typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC
  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
  { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }

// masked assignment [simd_mask.where] {{{1

// where_expression {{{1
// const_where_expression<M, T> {{{2
template <typename _M, typename _Tp>
  class const_where_expression
  {
    using _V = _Tp;
    static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);

    struct _Wrapper { using value_type = _V; };

  protected:
    using _Impl = typename _V::_Impl;

    using value_type =
      typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;

    _GLIBCXX_SIMD_INTRINSIC friend const _M&
    __get_mask(const const_where_expression& __x)
    { return __x._M_k; }

    _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
    __get_lvalue(const const_where_expression& __x)
    { return __x._M_value; }

    const _M& _M_k;
    _Tp& _M_value;

  public:
    const_where_expression(const const_where_expression&) = delete;
    const_where_expression& operator=(const const_where_expression&) = delete;

    _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
      : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}

    _GLIBCXX_SIMD_INTRINSIC _V
    operator-() const&&
    {
      return {__private_init,
	      _Impl::template _S_masked_unary<negate>(__data(_M_k),
						      __data(_M_value))};
    }

    template <typename _Up, typename _Flags>
      [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
      {
	return {__private_init,
		_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
				      _Flags::template _S_apply<_V>(__mem))};
      }

    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_INTRINSIC void
      copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
      {
	_Impl::_S_masked_store(__data(_M_value),
			       _Flags::template _S_apply<_V>(__mem),
			       __data(_M_k));
      }
  };

// const_where_expression<bool, T> {{{2
template <typename _Tp>
  class const_where_expression<bool, _Tp>
  {
    using _M = bool;
    using _V = _Tp;

    static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);

    struct _Wrapper { using value_type = _V; };

  protected:
    using value_type =
      typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;

    _GLIBCXX_SIMD_INTRINSIC friend const _M&
    __get_mask(const const_where_expression& __x)
    { return __x._M_k; }

    _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
    __get_lvalue(const const_where_expression& __x)
    { return __x._M_value; }

    const bool _M_k;
    _Tp& _M_value;

  public:
    const_where_expression(const const_where_expression&) = delete;
    const_where_expression& operator=(const const_where_expression&) = delete;

    _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
      : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}

    _GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
    { return _M_k ? -_M_value : _M_value; }

    template <typename _Up, typename _Flags>
      [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
      { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }

    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_INTRINSIC void
      copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
      {
	if (_M_k)
	  __mem[0] = _M_value;
      }
  };

// where_expression<M, T> {{{2
template <typename _M, typename _Tp>
  class where_expression : public const_where_expression<_M, _Tp>
  {
    using _Impl = typename const_where_expression<_M, _Tp>::_Impl;

    static_assert(!is_const<_Tp>::value,
		  "where_expression may only be instantiated with __a non-const "
		  "_Tp parameter");

    using typename const_where_expression<_M, _Tp>::value_type;
    using const_where_expression<_M, _Tp>::_M_k;
    using const_where_expression<_M, _Tp>::_M_value;

    static_assert(
      is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
    static_assert(_M::size() == _Tp::size(), "");

    _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
    { return __x._M_value; }

  public:
    where_expression(const where_expression&) = delete;
    where_expression& operator=(const where_expression&) = delete;

    _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
      : const_where_expression<_M, _Tp>(__kk, dd) {}

    template <typename _Up>
      _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
      {
	_Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
				__to_value_type_or_member_type<_Tp>(
				  static_cast<_Up&&>(__x)));
      }

#define _GLIBCXX_SIMD_OP_(__op, __name)                                        \
  template <typename _Up>                                                      \
    _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&&                 \
    {                                                                          \
      _Impl::template _S_masked_cassign(                                       \
	__data(_M_k), __data(_M_value),                                        \
	__to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)),          \
	[](auto __impl, auto __lhs, auto __rhs) constexpr {                    \
	return __impl.__name(__lhs, __rhs);                                    \
	});                                                                    \
    }                                                                          \
  static_assert(true)
    _GLIBCXX_SIMD_OP_(+, _S_plus);
    _GLIBCXX_SIMD_OP_(-, _S_minus);
    _GLIBCXX_SIMD_OP_(*, _S_multiplies);
    _GLIBCXX_SIMD_OP_(/, _S_divides);
    _GLIBCXX_SIMD_OP_(%, _S_modulus);
    _GLIBCXX_SIMD_OP_(&, _S_bit_and);
    _GLIBCXX_SIMD_OP_(|, _S_bit_or);
    _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
    _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
    _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
#undef _GLIBCXX_SIMD_OP_

    _GLIBCXX_SIMD_INTRINSIC void operator++() &&
    {
      __data(_M_value)
	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
						       __data(_M_value));
    }

    _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
    {
      __data(_M_value)
	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
						       __data(_M_value));
    }

    _GLIBCXX_SIMD_INTRINSIC void operator--() &&
    {
      __data(_M_value)
	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
						       __data(_M_value));
    }

    _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
    {
      __data(_M_value)
	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
						       __data(_M_value));
    }

    // intentionally hides const_where_expression::copy_from
    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_INTRINSIC void
      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
      {
	__data(_M_value)
	  = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
				  _Flags::template _S_apply<_Tp>(__mem));
      }
  };

// where_expression<bool, T> {{{2
template <typename _Tp>
  class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
  {
    using _M = bool;
    using typename const_where_expression<_M, _Tp>::value_type;
    using const_where_expression<_M, _Tp>::_M_k;
    using const_where_expression<_M, _Tp>::_M_value;

  public:
    where_expression(const where_expression&) = delete;
    where_expression& operator=(const where_expression&) = delete;

    _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
      : const_where_expression<_M, _Tp>(__kk, dd) {}

#define _GLIBCXX_SIMD_OP_(__op)                                                \
    template <typename _Up>                                                    \
      _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&&                  \
      { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }

    _GLIBCXX_SIMD_OP_(=)
    _GLIBCXX_SIMD_OP_(+=)
    _GLIBCXX_SIMD_OP_(-=)
    _GLIBCXX_SIMD_OP_(*=)
    _GLIBCXX_SIMD_OP_(/=)
    _GLIBCXX_SIMD_OP_(%=)
    _GLIBCXX_SIMD_OP_(&=)
    _GLIBCXX_SIMD_OP_(|=)
    _GLIBCXX_SIMD_OP_(^=)
    _GLIBCXX_SIMD_OP_(<<=)
    _GLIBCXX_SIMD_OP_(>>=)
  #undef _GLIBCXX_SIMD_OP_

    _GLIBCXX_SIMD_INTRINSIC void operator++() &&
    { if (_M_k) ++_M_value; }

    _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
    { if (_M_k) ++_M_value; }

    _GLIBCXX_SIMD_INTRINSIC void operator--() &&
    { if (_M_k) --_M_value; }

    _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
    { if (_M_k) --_M_value; }

    // intentionally hides const_where_expression::copy_from
    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_INTRINSIC void
      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
      { if (_M_k) _M_value = __mem[0]; }
  };

// where {{{1
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
  { return {__k, __value}; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC
    const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
    where(const typename simd<_Tp, _Ap>::mask_type& __k,
	  const simd<_Tp, _Ap>& __value)
  { return {__k, __value}; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC
    where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
    where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
	  simd_mask<_Tp, _Ap>& __value)
  { return {__k, __value}; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC
    const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
    where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
	  const simd_mask<_Tp, _Ap>& __value)
  { return {__k, __value}; }

template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
  where(_ExactBool __k, _Tp& __value)
  { return {__k, __value}; }

template <typename _Tp>
  _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
  where(_ExactBool __k, const _Tp& __value)
  { return {__k, __value}; }

  template <typename _Tp, typename _Ap>
    void where(bool __k, simd<_Tp, _Ap>& __value) = delete;

  template <typename _Tp, typename _Ap>
    void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;

// proposed mask iterations {{{1
namespace __proposed {
template <size_t _Np>
  class where_range
  {
    const bitset<_Np> __bits;

  public:
    where_range(bitset<_Np> __b) : __bits(__b) {}

    class iterator
    {
      size_t __mask;
      size_t __bit;

      _GLIBCXX_SIMD_INTRINSIC void __next_bit()
      { __bit = __builtin_ctzl(__mask); }

      _GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
      {
	// 01100100 - 1 = 01100011
	__mask &= (__mask - 1);
	// __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
      }

    public:
      iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
      iterator(const iterator&) = default;
      iterator(iterator&&) = default;

      _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
      { return __bit; }

      _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
      { return __bit; }

      _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
      {
	__reset_lsb();
	__next_bit();
	return *this;
      }

      _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
      {
	iterator __tmp = *this;
	__reset_lsb();
	__next_bit();
	return __tmp;
      }

      _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
      { return __mask == __rhs.__mask; }

      _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
      { return __mask != __rhs.__mask; }
    };

    iterator begin() const
    { return __bits.to_ullong(); }

    iterator end() const
    { return 0; }
  };

template <typename _Tp, typename _Ap>
  where_range<simd_size_v<_Tp, _Ap>>
  where(const simd_mask<_Tp, _Ap>& __k)
  { return __k.__to_bitset(); }

} // namespace __proposed

// }}}1
// reductions [simd.reductions] {{{1
template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  reduce(const simd<_Tp, _Abi>& __v,
	 _BinaryOperation __binary_op = _BinaryOperation())
  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }

template <typename _M, typename _V, typename _BinaryOperation = plus<>>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x,
	 typename _V::value_type __identity_element,
	 _BinaryOperation __binary_op)
  {
    if (__builtin_expect(none_of(__get_mask(__x)), false))
      return __identity_element;

    _V __tmp = __identity_element;
    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
				__data(__get_lvalue(__x)));
    return reduce(__tmp, __binary_op);
  }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
  { return reduce(__x, 0, __binary_op); }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
  { return reduce(__x, 1, __binary_op); }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
  { return reduce(__x, ~typename _V::value_type(), __binary_op); }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
  { return reduce(__x, 0, __binary_op); }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
  { return reduce(__x, 0, __binary_op); }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  hmin(const simd<_Tp, _Abi>& __v) noexcept
  {
    return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum());
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  hmax(const simd<_Tp, _Abi>& __v) noexcept
  {
    return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum());
  }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  typename _V::value_type
  hmin(const const_where_expression<_M, _V>& __x) noexcept
  {
    using _Tp = typename _V::value_type;
    constexpr _Tp __id_elem =
#ifdef __FINITE_MATH_ONLY__
      __finite_max_v<_Tp>;
#else
      __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
#endif
    _V __tmp = __id_elem;
    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
				__data(__get_lvalue(__x)));
    return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
  }

template <typename _M, typename _V>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  typename _V::value_type
  hmax(const const_where_expression<_M, _V>& __x) noexcept
  {
    using _Tp = typename _V::value_type;
    constexpr _Tp __id_elem =
#ifdef __FINITE_MATH_ONLY__
      __finite_min_v<_Tp>;
#else
      [] {
	if constexpr (__value_exists_v<__infinity, _Tp>)
	  return -__infinity_v<_Tp>;
	else
	  return __finite_min_v<_Tp>;
      }();
#endif
    _V __tmp = __id_elem;
    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
				__data(__get_lvalue(__x)));
    return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
  }

// }}}1
// algorithms [simd.alg] {{{
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  {
    const auto pair_of_members
      = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
    return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
	    simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
  }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
	const simd<_Tp, _Ap>& __hi)
  {
    using _Impl = typename _Ap::_SimdImpl;
    return {__private_init,
	    _Impl::_S_min(__data(__hi),
			  _Impl::_S_max(__data(__lo), __data(__v)))};
  }

// }}}

template <size_t... _Sizes, typename _Tp, typename _Ap,
	  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
  split(const simd<_Tp, _Ap>&);

// __extract_part {{{
template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
  _SimdWrapper<_Tp, _Np / _Total * _Combine>
  __extract_part(const _SimdWrapper<_Tp, _Np> __x);

template <int Index, int Parts, int _Combine = 1, typename _Tp, typename _A0,
	  typename... _As>
  _GLIBCXX_SIMD_INTRINSIC auto
  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);

// }}}
// _SizeList {{{
template <size_t _V0, size_t... _Values>
  struct _SizeList
  {
    template <size_t _I>
      static constexpr size_t _S_at(_SizeConstant<_I> = {})
      {
	if constexpr (_I == 0)
	  return _V0;
	else
	  return _SizeList<_Values...>::template _S_at<_I - 1>();
      }

    template <size_t _I>
      static constexpr auto _S_before(_SizeConstant<_I> = {})
      {
	if constexpr (_I == 0)
	  return _SizeConstant<0>();
	else
	  return _SizeConstant<
	    _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
      }

    template <size_t _Np>
      static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
      {
	if constexpr (_Np == 0)
	  return _SizeList();
	else
	  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
      }
  };

// }}}
// __extract_center {{{
template <typename _Tp, size_t _Np>
  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
  __extract_center(_SimdWrapper<_Tp, _Np> __x)
  {
    static_assert(_Np >= 4);
    static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
#if _GLIBCXX_SIMD_X86INTRIN    // {{{
    if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
      {
	const auto __intrin = __to_intrin(__x);
	if constexpr (is_integral_v<_Tp>)
	  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
	    _mm512_shuffle_i32x4(__intrin, __intrin,
				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
	else if constexpr (sizeof(_Tp) == 4)
	  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
	    _mm512_shuffle_f32x4(__intrin, __intrin,
				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
	else if constexpr (sizeof(_Tp) == 8)
	  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
	    _mm512_shuffle_f64x2(__intrin, __intrin,
				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
	else
	  __assert_unreachable<_Tp>();
      }
    else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
      return __vector_bitcast<_Tp>(
	_mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
		       __hi128(__vector_bitcast<double>(__x)), 1));
    else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
      return __vector_bitcast<_Tp>(
	_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
			__lo128(__vector_bitcast<_LLong>(__x)),
			sizeof(_Tp) * _Np / 4));
    else
#endif // _GLIBCXX_SIMD_X86INTRIN }}}
      {
	__vector_type_t<_Tp, _Np / 2> __r;
	__builtin_memcpy(&__r,
			 reinterpret_cast<const char*>(&__x)
			   + sizeof(_Tp) * _Np / 4,
			 sizeof(_Tp) * _Np / 2);
	return __r;
      }
  }

template <typename _Tp, typename _A0, typename... _As>
  _GLIBCXX_SIMD_INTRINSIC
  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
  {
    if constexpr (sizeof...(_As) == 0)
      return __extract_center(__x.first);
    else
      return __extract_part<1, 4, 2>(__x);
  }

// }}}
// __split_wrapper {{{
template <size_t... _Sizes, typename _Tp, typename... _As>
  auto
  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
  {
    return split<_Sizes...>(
      fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
							       __x));
  }

// }}}

// split<simd>(simd) {{{
template <typename _V, typename _Ap,
	  size_t Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == Parts * _V::size()
	      && is_simd_v<_V>, array<_V, Parts>>
  split(const simd<typename _V::value_type, _Ap>& __x)
  {
    using _Tp = typename _V::value_type;
    if constexpr (Parts == 1)
      {
	return {simd_cast<_V>(__x)};
      }
    else if (__x._M_is_constprop())
      {
	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
	  auto __i) constexpr {
	  return _V([&](auto __j) constexpr {
	    return __x[__i * _V::size() + __j];
	  });
	});
      }
    else if constexpr (
      __is_fixed_size_abi_v<_Ap>
      && (is_same_v<typename _V::abi_type, simd_abi::scalar>
	|| (__is_fixed_size_abi_v<typename _V::abi_type>
	  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
	  )))
      {
	// fixed_size -> fixed_size (w/o padding) or scalar
#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
      const __may_alias<_Tp>* const __element_ptr
	= reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
	auto __i) constexpr {
	return _V(__element_ptr + __i * _V::size(), vector_aligned);
      });
#else
      const auto& __xx = __data(__x);
      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
	auto __i) constexpr {
	[[maybe_unused]] constexpr size_t __offset
	  = decltype(__i)::value * _V::size();
	return _V([&](auto __j) constexpr {
	  constexpr _SizeConstant<__j + __offset> __k;
	  return __xx[__k];
	});
      });
#endif
    }
  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
    {
      // normally memcpy should work here as well
      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
	auto __i) constexpr { return __x[__i]; });
    }
  else
    {
      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
	auto __i) constexpr {
	if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
	  return _V([&](auto __j) constexpr {
	    return __x[__i * _V::size() + __j];
	  });
	else
	  return _V(__private_init,
		    __extract_part<decltype(__i)::value, Parts>(__data(__x)));
      });
    }
  }

// }}}
// split<simd_mask>(simd_mask) {{{
template <typename _V, typename _Ap,
	  size_t _Parts
	  = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
    _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
  {
    if constexpr (is_same_v<_Ap, typename _V::abi_type>)
      return {__x};
    else if constexpr (_Parts == 1)
      return {__proposed::static_simd_cast<_V>(__x)};
    else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
		       && __is_avx_abi<_Ap>())
      return {_V(__private_init, __lo128(__data(__x))),
	      _V(__private_init, __hi128(__data(__x)))};
    else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
      {
	const bitset __bits = __x.__to_bitset();
	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
	  auto __i) constexpr {
	  constexpr size_t __offset = __i * _V::size();
	  return _V(__bitset_init, (__bits >> __offset).to_ullong());
	});
      }
    else
      {
	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
	  auto __i) constexpr {
	  constexpr size_t __offset = __i * _V::size();
	  return _V(
	    __private_init, [&](auto __j) constexpr {
	      return __x[__j + __offset];
	    });
	});
      }
  }

// }}}
// split<_Sizes...>(simd) {{{
template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
  _GLIBCXX_SIMD_ALWAYS_INLINE
  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
  split(const simd<_Tp, _Ap>& __x)
  {
    using _SL = _SizeList<_Sizes...>;
    using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
    constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
    constexpr size_t _N0 = _SL::template _S_at<0>();
    using _V = __deduced_simd<_Tp, _N0>;

    if (__x._M_is_constprop())
      return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
	auto __i) constexpr {
	using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
	constexpr size_t __offset = _SL::_S_before(__i);
	return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
      });
    else if constexpr (_Np == _N0)
      {
	static_assert(sizeof...(_Sizes) == 1);
	return {simd_cast<_V>(__x)};
      }
    else if constexpr // split from fixed_size, such that __x::first.size == _N0
      (__is_fixed_size_abi_v<
	 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
      {
	static_assert(
	  !__is_fixed_size_abi_v<typename _V::abi_type>,
	  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
	  "fixed_size_simd "
	  "when deduced?");
	// extract first and recurse (__split_wrapper is needed to deduce a new
	// _Sizes pack)
	return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
			 __split_wrapper(_SL::template _S_pop_front<1>(),
					 __data(__x).second));
      }
    else if constexpr ((!is_same_v<simd_abi::scalar,
				   simd_abi::deduce_t<_Tp, _Sizes>> && ...)
		       && (!__is_fixed_size_abi_v<
			     simd_abi::deduce_t<_Tp, _Sizes>> && ...))
      {
	if constexpr (((_Sizes * 2 == _Np) && ...))
	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
		  {__private_init, __extract_part<1, 2>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
		  {__private_init, __extract_part<1, 3>(__data(__x))},
		  {__private_init, __extract_part<2, 3>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<2 * _Np / 3, _Np / 3>>)
	  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
		  {__private_init, __extract_part<2, 3>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<_Np / 3, 2 * _Np / 3>>)
	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
		  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
		  {__private_init, __extract_part<2, 4>(__data(__x))},
		  {__private_init, __extract_part<3, 4>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
		  {__private_init, __extract_part<1, 4>(__data(__x))},
		  {__private_init, __extract_part<1, 2>(__data(__x))}};
	else if constexpr (is_same_v<_SizeList<_Sizes...>,
				     _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
		  {__private_init, __extract_center(__data(__x))},
		  {__private_init, __extract_part<3, 4>(__data(__x))}};
	else if constexpr (((_Sizes * 4 == _Np) && ...))
	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
		  {__private_init, __extract_part<1, 4>(__data(__x))},
		  {__private_init, __extract_part<2, 4>(__data(__x))},
		  {__private_init, __extract_part<3, 4>(__data(__x))}};
	// else fall through
      }
#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
    const __may_alias<_Tp>* const __element_ptr
      = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
      auto __i) constexpr {
      using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
      constexpr size_t __offset = _SL::_S_before(__i);
      constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
      constexpr size_t __a
	= __base_align - ((__offset * sizeof(_Tp)) % __base_align);
      constexpr size_t __b = ((__a - 1) & __a) ^ __a;
      constexpr size_t __alignment = __b == 0 ? __a : __b;
      return _Vi(__element_ptr + __offset, overaligned<__alignment>);
    });
#else
    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
      auto __i) constexpr {
      using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
      const auto& __xx = __data(__x);
      using _Offset = decltype(_SL::_S_before(__i));
      return _Vi([&](auto __j) constexpr {
	constexpr _SizeConstant<_Offset::value + __j> __k;
	return __xx[__k];
      });
    });
#endif
  }

// }}}

// __subscript_in_pack {{{
template <size_t _I, typename _Tp, typename _Ap, typename... _As>
  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
  {
    if constexpr (_I < simd_size_v<_Tp, _Ap>)
      return __x[_I];
    else
      return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
  }

// }}}
// __store_pack_of_simd {{{
template <typename _Tp, typename _A0, typename... _As>
  _GLIBCXX_SIMD_INTRINSIC void
  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
		       const simd<_Tp, _As>&... __xs)
  {
    constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
    __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
    if constexpr (sizeof...(__xs) > 0)
      __store_pack_of_simd(__mem + __n_bytes, __xs...);
  }

// }}}
// concat(simd...) {{{
template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
  inline _GLIBCXX_SIMD_CONSTEXPR
  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
  concat(const simd<_Tp, _As>&... __xs)
  {
    using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
    if constexpr (sizeof...(__xs) == 1)
      return simd_cast<_Rp>(__xs...);
    else if ((... && __xs._M_is_constprop()))
      return simd<_Tp,
		  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
	auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
    else
      {
	_Rp __r{};
	__store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
	return __r;
      }
  }

// }}}
// concat(array<simd>) {{{
template <typename _Tp, typename _Abi, size_t _Np>
  _GLIBCXX_SIMD_ALWAYS_INLINE
  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
  {
    return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
      return concat(__xs...);
    });
  }

// }}}

/// @cond undocumented
// _SmartReference {{{
template <typename _Up, typename _Accessor = _Up,
	  typename _ValueType = typename _Up::value_type>
  class _SmartReference
  {
    friend _Accessor;
    int _M_index;
    _Up& _M_obj;

    _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
    {
      if constexpr (is_arithmetic_v<_Up>)
	return _M_obj;
      else
	return _M_obj[_M_index];
    }

    template <typename _Tp>
      _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
      { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }

  public:
    _GLIBCXX_SIMD_INTRINSIC constexpr
    _SmartReference(_Up& __o, int __i) noexcept
    : _M_index(__i), _M_obj(__o) {}

    using value_type = _ValueType;

    _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;

    _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
    { return _M_read(); }

    template <typename _Tp,
	      typename
	      = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
      {
	_M_write(static_cast<_Tp&&>(__x));
	return {_M_obj, _M_index};
      }

#define _GLIBCXX_SIMD_OP_(__op)                                                \
    template <typename _Tp,                                                    \
	      typename _TT                                                     \
	      = decltype(declval<value_type>() __op declval<_Tp>()),           \
	      typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>,    \
	      typename = _ValuePreservingOrInt<_TT, value_type>>               \
      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference                        \
      operator __op##=(_Tp&& __x) &&                                           \
      {                                                                        \
	const value_type& __lhs = _M_read();                                   \
	_M_write(__lhs __op __x);                                              \
	return {_M_obj, _M_index};                                             \
      }
    _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
    _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
    _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
#undef _GLIBCXX_SIMD_OP_

    template <typename _Tp = void,
	      typename
	      = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
      {
	value_type __x = _M_read();
	_M_write(++__x);
	return {_M_obj, _M_index};
      }

    template <typename _Tp = void,
	      typename
	      = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
      _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
      {
	const value_type __r = _M_read();
	value_type __x = __r;
	_M_write(++__x);
	return __r;
      }

    template <typename _Tp = void,
	      typename
	      = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
      {
	value_type __x = _M_read();
	_M_write(--__x);
	return {_M_obj, _M_index};
      }

    template <typename _Tp = void,
	      typename
	      = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
      _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
      {
	const value_type __r = _M_read();
	value_type __x = __r;
	_M_write(--__x);
	return __r;
      }

    _GLIBCXX_SIMD_INTRINSIC friend void
    swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
      conjunction<
	is_nothrow_constructible<value_type, _SmartReference&&>,
	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
    {
      value_type __tmp = static_cast<_SmartReference&&>(__a);
      static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
      static_cast<_SmartReference&&>(__b) = std::move(__tmp);
    }

    _GLIBCXX_SIMD_INTRINSIC friend void
    swap(value_type& __a, _SmartReference&& __b) noexcept(
      conjunction<
	is_nothrow_constructible<value_type, value_type&&>,
	is_nothrow_assignable<value_type&, value_type&&>,
	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
    {
      value_type __tmp(std::move(__a));
      __a = static_cast<value_type>(__b);
      static_cast<_SmartReference&&>(__b) = std::move(__tmp);
    }

    _GLIBCXX_SIMD_INTRINSIC friend void
    swap(_SmartReference&& __a, value_type& __b) noexcept(
      conjunction<
	is_nothrow_constructible<value_type, _SmartReference&&>,
	is_nothrow_assignable<value_type&, value_type&&>,
	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
    {
      value_type __tmp(__a);
      static_cast<_SmartReference&&>(__a) = std::move(__b);
      __b = std::move(__tmp);
    }
  };

// }}}
// __scalar_abi_wrapper {{{
template <int _Bytes>
  struct __scalar_abi_wrapper
  {
    template <typename _Tp> static constexpr size_t _S_full_size = 1;
    template <typename _Tp> static constexpr size_t _S_size = 1;
    template <typename _Tp> static constexpr size_t _S_is_partial = false;

    template <typename _Tp, typename _Abi = simd_abi::scalar>
      static constexpr bool _S_is_valid_v
	= _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
  };

// }}}
// __decay_abi metafunction {{{
template <typename _Tp>
  struct __decay_abi { using type = _Tp; };

template <int _Bytes>
  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
  { using type = simd_abi::scalar; };

// }}}
// __find_next_valid_abi metafunction {{{1
// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
// recursion at 2 elements in the resulting ABI tag. In this case
// type::_S_is_valid_v<_Tp> may be false.
template <template <int> class _Abi, int _Bytes, typename _Tp>
  struct __find_next_valid_abi
  {
    static constexpr auto _S_choose()
    {
      constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
      using _NextAbi = _Abi<_NextBytes>;
      if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
	return _Abi<_Bytes>();
      else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
			 && _NextAbi::template _S_is_valid_v<_Tp>)
	return _NextAbi();
      else
	return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
    }

    using type = decltype(_S_choose());
  };

template <int _Bytes, typename _Tp>
  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
  { using type = simd_abi::scalar; };

// _AbiList {{{1
template <template <int> class...>
  struct _AbiList
  {
    template <typename, int> static constexpr bool _S_has_valid_abi = false;
    template <typename, int> using _FirstValidAbi = void;
    template <typename, int> using _BestAbi = void;
  };

template <template <int> class _A0, template <int> class... _Rest>
  struct _AbiList<_A0, _Rest...>
  {
    template <typename _Tp, int _Np>
      static constexpr bool _S_has_valid_abi
	= _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
	    _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;

    template <typename _Tp, int _Np>
      using _FirstValidAbi = conditional_t<
	_A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
	typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
	typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;

    template <typename _Tp, int _Np>
      static constexpr auto _S_determine_best_abi()
      {
	static_assert(_Np >= 1);
	constexpr int _Bytes = sizeof(_Tp) * _Np;
	if constexpr (_Np == 1)
	  return __make_dependent_t<_Tp, simd_abi::scalar>{};
	else
	  {
	    constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
	    // _A0<_Bytes> is good if:
	    // 1. The ABI tag is valid for _Tp
	    // 2. The storage overhead is no more than padding to fill the next
	    //    power-of-2 number of bytes
	    if constexpr (_A0<_Bytes>::template _S_is_valid_v<
			    _Tp> && __fullsize / 2 < _Np)
	      return typename __decay_abi<_A0<_Bytes>>::type{};
	    else
	      {
		using _Bp =
		  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
		if constexpr (_Bp::template _S_is_valid_v<
				_Tp> && _Bp::template _S_size<_Tp> <= _Np)
		  return _Bp{};
		else
		  return
		    typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
	      }
	  }
      }

    template <typename _Tp, int _Np>
      using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
  };

// }}}1

// the following lists all native ABIs, which makes them accessible to
// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
// matters: Whatever comes first has higher priority.
using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
				__scalar_abi_wrapper>;

// valid _SimdTraits specialization {{{1
template <typename _Tp, typename _Abi>
  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
  : _Abi::template __traits<_Tp> {};

// __deduce_impl specializations {{{1
// try all native ABIs (including scalar) first
template <typename _Tp, size_t _Np>
  struct __deduce_impl<
    _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };

// fall back to fixed_size only if scalar and native ABIs don't match
template <typename _Tp, size_t _Np, typename = void>
  struct __deduce_fixed_size_fallback {};

template <typename _Tp, size_t _Np>
  struct __deduce_fixed_size_fallback<_Tp, _Np,
    enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
  { using type = simd_abi::fixed_size<_Np>; };

template <typename _Tp, size_t _Np, typename>
  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};

//}}}1
/// @endcond

// simd_mask {{{
template <typename _Tp, typename _Abi>
  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
  {
    // types, tags, and friends {{{
    using _Traits = _SimdTraits<_Tp, _Abi>;
    using _MemberType = typename _Traits::_MaskMember;

    // We map all masks with equal element sizeof to a single integer type, the
    // one given by __int_for_sizeof_t<_Tp>. This is the approach
    // [[gnu::vector_size(N)]] types take as well and it reduces the number of
    // template specializations in the implementation classes.
    using _Ip = __int_for_sizeof_t<_Tp>;
    static constexpr _Ip* _S_type_tag = nullptr;

    friend typename _Traits::_MaskBase;
    friend class simd<_Tp, _Abi>;       // to construct masks on return
    friend typename _Traits::_SimdImpl; // to construct masks on return and
					// inspect data on masked operations
  public:
    using _Impl = typename _Traits::_MaskImpl;
    friend _Impl;

    // }}}
    // member types {{{
    using value_type = bool;
    using reference = _SmartReference<_MemberType, _Impl, value_type>;
    using simd_type = simd<_Tp, _Abi>;
    using abi_type = _Abi;

    // }}}
    static constexpr size_t size() // {{{
    { return __size_or_zero_v<_Tp, _Abi>; }

    // }}}
    // constructors & assignment {{{
    simd_mask() = default;
    simd_mask(const simd_mask&) = default;
    simd_mask(simd_mask&&) = default;
    simd_mask& operator=(const simd_mask&) = default;
    simd_mask& operator=(simd_mask&&) = default;

    // }}}
    // access to internal representation (optional feature) {{{
    _GLIBCXX_SIMD_ALWAYS_INLINE explicit
    simd_mask(typename _Traits::_MaskCastType __init)
    : _M_data{__init} {}
    // conversions to internal type is done in _MaskBase

    // }}}
    // bitset interface (extension to be proposed) {{{
    // TS_FEEDBACK:
    // Conversion of simd_mask to and from bitset makes it much easier to
    // interface with other facilities. I suggest adding `static
    // simd_mask::from_bitset` and `simd_mask::to_bitset`.
    _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
    __from_bitset(bitset<size()> bs)
    { return {__bitset_init, bs}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
    __to_bitset() const
    { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }

    // }}}
    // explicit broadcast constructor {{{
    _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
    simd_mask(value_type __x)
    : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}

    // }}}
    // implicit type conversion constructor {{{
  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
    // proposed improvement
    template <typename _Up, typename _A2,
	      typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
      _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
	  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
      simd_mask(const simd_mask<_Up, _A2>& __x)
      : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
  #else
    // conforming to ISO/IEC 19570:2018
    template <typename _Up, typename = enable_if_t<conjunction<
			      is_same<abi_type, simd_abi::fixed_size<size()>>,
			      is_same<_Up, _Up>>::value>>
      _GLIBCXX_SIMD_ALWAYS_INLINE
      simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
      : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
  #endif

    // }}}
    // load constructor {{{
    template <typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE
      simd_mask(const value_type* __mem, _Flags)
      : _M_data(_Impl::template _S_load<_Ip>(
	_Flags::template _S_apply<simd_mask>(__mem))) {}

    template <typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE
      simd_mask(const value_type* __mem, simd_mask __k, _Flags)
      : _M_data{}
      {
	_M_data
	  = _Impl::_S_masked_load(_M_data, __k._M_data,
				  _Flags::template _S_apply<simd_mask>(__mem));
      }

    // }}}
    // loads [simd_mask.load] {{{
    template <typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE void
      copy_from(const value_type* __mem, _Flags)
      {
	_M_data = _Impl::template _S_load<_Ip>(
	  _Flags::template _S_apply<simd_mask>(__mem));
      }

    // }}}
    // stores [simd_mask.store] {{{
    template <typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE void
      copy_to(value_type* __mem, _Flags) const
      { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }

    // }}}
    // scalar access {{{
    _GLIBCXX_SIMD_ALWAYS_INLINE reference
    operator[](size_t __i)
    {
      if (__i >= size())
	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
      return {_M_data, int(__i)};
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE value_type
    operator[](size_t __i) const
    {
      if (__i >= size())
	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
      if constexpr (__is_scalar_abi<_Abi>())
	return _M_data;
      else
	return static_cast<bool>(_M_data[__i]);
    }

    // }}}
    // negation {{{
    _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
    operator!() const
    { return {__private_init, _Impl::_S_bit_not(_M_data)}; }

    // }}}
    // simd_mask binary operators [simd_mask.binary] {{{
  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
    // simd_mask<int> && simd_mask<uint> needs disambiguation
    template <typename _Up, typename _A2,
	      typename
	      = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
      _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
      operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
      {
	return {__private_init,
		_Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
      }

    template <typename _Up, typename _A2,
	      typename
	      = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
      _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
      operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
      {
	return {__private_init,
		_Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
      }
  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
    operator&&(const simd_mask& __x, const simd_mask& __y)
    {
      return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
    operator||(const simd_mask& __x, const simd_mask& __y)
    {
      return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
    operator&(const simd_mask& __x, const simd_mask& __y)
    { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
    operator|(const simd_mask& __x, const simd_mask& __y)
    { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
    operator^(const simd_mask& __x, const simd_mask& __y)
    { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
    operator&=(simd_mask& __x, const simd_mask& __y)
    {
      __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
      return __x;
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
    operator|=(simd_mask& __x, const simd_mask& __y)
    {
      __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
      return __x;
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
    operator^=(simd_mask& __x, const simd_mask& __y)
    {
      __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
      return __x;
    }

    // }}}
    // simd_mask compares [simd_mask.comparison] {{{
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
    operator==(const simd_mask& __x, const simd_mask& __y)
    { return !operator!=(__x, __y); }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
    operator!=(const simd_mask& __x, const simd_mask& __y)
    { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }

    // }}}
    // private_init ctor {{{
    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
    simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
    : _M_data(__init) {}

    // }}}
    // private_init generator ctor {{{
    template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
      _GLIBCXX_SIMD_INTRINSIC constexpr
      simd_mask(_PrivateInit, _Fp&& __gen)
      : _M_data()
      {
	__execute_n_times<size()>([&](auto __i) constexpr {
	  _Impl::_S_set(_M_data, __i, __gen(__i));
	});
      }

    // }}}
    // bitset_init ctor {{{
    _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
    : _M_data(
	_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
    {}

    // }}}
    // __cvt {{{
    // TS_FEEDBACK:
    // The conversion operator this implements should be a ctor on simd_mask.
    // Once you call .__cvt() on a simd_mask it converts conveniently.
    // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
    struct _CvtProxy
    {
      template <typename _Up, typename _A2,
		typename
		= enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
	_GLIBCXX_SIMD_ALWAYS_INLINE
	operator simd_mask<_Up, _A2>() &&
	{
	  using namespace std::experimental::__proposed;
	  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
	}

      const simd_mask<_Tp, _Abi>& _M_data;
    };

    _GLIBCXX_SIMD_INTRINSIC _CvtProxy
    __cvt() const
    { return {*this}; }

    // }}}
    // operator?: overloads (suggested extension) {{{
  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
    operator?:(const simd_mask& __k, const simd_mask& __where_true,
	       const simd_mask& __where_false)
    {
      auto __ret = __where_false;
      _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
      return __ret;
    }

    template <typename _U1, typename _U2,
	      typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
	      typename = enable_if_t<conjunction_v<
		is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
		is_convertible<simd_mask, typename _Rp::mask_type>>>>
      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
      operator?:(const simd_mask& __k, const _U1& __where_true,
		 const _U2& __where_false)
      {
	_Rp __ret = __where_false;
	_Rp::_Impl::_S_masked_assign(
	  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
	  __data(static_cast<_Rp>(__where_true)));
	return __ret;
      }

  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
    template <typename _Kp, typename _Ak, typename _Up, typename _Au,
	      typename = enable_if_t<
		conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
			      is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
      operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
		 const simd_mask<_Up, _Au>& __where_false)
      {
	simd_mask __ret = __where_false;
	_Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
				__where_true._M_data);
	return __ret;
      }
  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__

    // }}}
    // _M_is_constprop {{{
    _GLIBCXX_SIMD_INTRINSIC constexpr bool
    _M_is_constprop() const
    {
      if constexpr (__is_scalar_abi<_Abi>())
	return __builtin_constant_p(_M_data);
      else
	return _M_data._M_is_constprop();
    }

    // }}}

  private:
    friend const auto& __data<_Tp, abi_type>(const simd_mask&);
    friend auto& __data<_Tp, abi_type>(simd_mask&);
    alignas(_Traits::_S_mask_align) _MemberType _M_data;
  };

// }}}

/// @cond undocumented
// __data(simd_mask) {{{
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  __data(const simd_mask<_Tp, _Ap>& __x)
  { return __x._M_data; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  __data(simd_mask<_Tp, _Ap>& __x)
  { return __x._M_data; }

// }}}
/// @endcond

// simd_mask reductions [simd_mask.reductions] {{{
template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
	  if (!__k[__i])
	    return false;
	return true;
      }
    else
      return _Abi::_MaskImpl::_S_all_of(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
	  if (__k[__i])
	    return true;
	return false;
      }
    else
      return _Abi::_MaskImpl::_S_any_of(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
	  if (__k[__i])
	    return false;
	return true;
      }
    else
      return _Abi::_MaskImpl::_S_none_of(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
	  if (__k[__i] != __k[__i - 1])
	    return true;
	return false;
      }
    else
      return _Abi::_MaskImpl::_S_some_of(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
	  __k, [](auto... __elements) { return ((__elements != 0) + ...); });
	if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
	  return __r;
      }
    return _Abi::_MaskImpl::_S_popcount(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  find_first_set(const simd_mask<_Tp, _Abi>& __k)
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
	const size_t _Idx = __call_with_n_evaluations<_Np>(
	  [](auto... __indexes) { return std::min({__indexes...}); },
	  [&](auto __i) { return __k[__i] ? +__i : _Np; });
	if (_Idx >= _Np)
	  __invoke_ub("find_first_set(empty mask) is UB");
	if (__builtin_constant_p(_Idx))
	  return _Idx;
      }
    return _Abi::_MaskImpl::_S_find_first_set(__k);
  }

template <typename _Tp, typename _Abi>
  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  find_last_set(const simd_mask<_Tp, _Abi>& __k)
  {
    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
      {
	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
	const int _Idx = __call_with_n_evaluations<_Np>(
	  [](auto... __indexes) { return std::max({__indexes...}); },
	  [&](auto __i) { return __k[__i] ? int(__i) : -1; });
	if (_Idx < 0)
	  __invoke_ub("find_first_set(empty mask) is UB");
	if (__builtin_constant_p(_Idx))
	  return _Idx;
      }
    return _Abi::_MaskImpl::_S_find_last_set(__k);
  }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
all_of(_ExactBool __x) noexcept
{ return __x; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
any_of(_ExactBool __x) noexcept
{ return __x; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
none_of(_ExactBool __x) noexcept
{ return !__x; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
some_of(_ExactBool) noexcept
{ return false; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
popcount(_ExactBool __x) noexcept
{ return __x; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
find_first_set(_ExactBool)
{ return 0; }

_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
find_last_set(_ExactBool)
{ return 0; }

// }}}

/// @cond undocumented
// _SimdIntOperators{{{1
template <typename _V, typename _Tp, typename _Abi, bool>
  class _SimdIntOperators {};

template <typename _V, typename _Tp, typename _Abi>
  class _SimdIntOperators<_V, _Tp, _Abi, true>
  {
    using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;

    _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
    { return *static_cast<const _V*>(this); }

    template <typename _Up>
      _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
      _S_make_derived(_Up&& __d)
      { return {__private_init, static_cast<_Up&&>(__d)}; }

  public:
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator%=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs % __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator&=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs & __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator|=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs | __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator^=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs ^ __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator<<=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs << __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator>>=(_V& __lhs, const _V& __x)
    { return __lhs = __lhs >> __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator<<=(_V& __lhs, int __x)
    { return __lhs = __lhs << __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V&
    operator>>=(_V& __lhs, int __x)
    { return __lhs = __lhs >> __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator%(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_modulus(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator&(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_and(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator|(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_or(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator^(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_xor(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator<<(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator>>(const _V& __x, const _V& __y)
    {
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator<<(const _V& __x, int __y)
    {
      if (__y < 0)
	__invoke_ub("The behavior is undefined if the right operand of a "
		    "shift operation is negative. [expr.shift]\nA shift by "
		    "%d was requested",
		    __y);
      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
	__invoke_ub(
	  "The behavior is undefined if the right operand of a "
	  "shift operation is greater than or equal to the width of the "
	  "promoted left operand. [expr.shift]\nA shift by %d was requested",
	  __y);
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_shift_left(__data(__x), __y));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
    _V
    operator>>(const _V& __x, int __y)
    {
      if (__y < 0)
	__invoke_ub(
	  "The behavior is undefined if the right operand of a shift "
	  "operation is negative. [expr.shift]\nA shift by %d was requested",
	  __y);
      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
	__invoke_ub(
	  "The behavior is undefined if the right operand of a shift "
	  "operation is greater than or equal to the width of the promoted "
	  "left operand. [expr.shift]\nA shift by %d was requested",
	  __y);
      return _SimdIntOperators::_S_make_derived(
	_Impl::_S_bit_shift_right(__data(__x), __y));
    }

    // unary operators (for integral _Tp)
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
    _V
    operator~() const
    { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
  };

//}}}1
/// @endcond

// simd {{{
template <typename _Tp, typename _Abi>
  class simd : public _SimdIntOperators<
		 simd<_Tp, _Abi>, _Tp, _Abi,
		 conjunction<is_integral<_Tp>,
			     typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
	       public _SimdTraits<_Tp, _Abi>::_SimdBase
  {
    using _Traits = _SimdTraits<_Tp, _Abi>;
    using _MemberType = typename _Traits::_SimdMember;
    using _CastType = typename _Traits::_SimdCastType;
    static constexpr _Tp* _S_type_tag = nullptr;
    friend typename _Traits::_SimdBase;

  public:
    using _Impl = typename _Traits::_SimdImpl;
    friend _Impl;
    friend _SimdIntOperators<simd, _Tp, _Abi, true>;

    using value_type = _Tp;
    using reference = _SmartReference<_MemberType, _Impl, value_type>;
    using mask_type = simd_mask<_Tp, _Abi>;
    using abi_type = _Abi;

    static constexpr size_t size()
    { return __size_or_zero_v<_Tp, _Abi>; }

    _GLIBCXX_SIMD_CONSTEXPR simd() = default;
    _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
    _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
    _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
    _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;

    // implicit broadcast constructor
    template <typename _Up,
	      typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
      simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
      : _M_data(
	_Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
      {}

    // implicit type conversion constructor (convert from fixed_size to
    // fixed_size)
    template <typename _Up>
      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
      simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
	   enable_if_t<
	     conjunction<
	       is_same<simd_abi::fixed_size<size()>, abi_type>,
	       negation<__is_narrowing_conversion<_Up, value_type>>,
	       __converts_to_higher_integer_rank<_Up, value_type>>::value,
	     void*> = nullptr)
      : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}

      // explicit type conversion constructor
#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
    template <typename _Up, typename _A2,
	      typename = decltype(static_simd_cast<simd>(
		declval<const simd<_Up, _A2>&>()))>
      _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
      simd(const simd<_Up, _A2>& __x)
      : simd(static_simd_cast<simd>(__x)) {}
#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST

    // generator constructor
    template <typename _Fp>
      _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
      simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
						declval<_SizeConstant<0>&>())),
					      value_type>* = nullptr)
      : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}

    // load constructor
    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE
      simd(const _Up* __mem, _Flags)
      : _M_data(
	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
      {}

    // loads [simd.load]
    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE void
      copy_from(const _Vectorizable<_Up>* __mem, _Flags)
      {
	_M_data = static_cast<decltype(_M_data)>(
	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
      }

    // stores [simd.store]
    template <typename _Up, typename _Flags>
      _GLIBCXX_SIMD_ALWAYS_INLINE void
      copy_to(_Vectorizable<_Up>* __mem, _Flags) const
      {
	_Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
			_S_type_tag);
      }

    // scalar access
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
    operator[](size_t __i)
    { return {_M_data, int(__i)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
    operator[]([[maybe_unused]] size_t __i) const
    {
      if constexpr (__is_scalar_abi<_Abi>())
	{
	  _GLIBCXX_DEBUG_ASSERT(__i == 0);
	  return _M_data;
	}
      else
	return _M_data[__i];
    }

    // increment and decrement:
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
    operator++()
    {
      _Impl::_S_increment(_M_data);
      return *this;
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
    operator++(int)
    {
      simd __r = *this;
      _Impl::_S_increment(_M_data);
      return __r;
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
    operator--()
    {
      _Impl::_S_decrement(_M_data);
      return *this;
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
    operator--(int)
    {
      simd __r = *this;
      _Impl::_S_decrement(_M_data);
      return __r;
    }

    // unary operators (for any _Tp)
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
    operator!() const
    { return {__private_init, _Impl::_S_negate(_M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
    operator+() const
    { return *this; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
    operator-() const
    { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }

    // access to internal representation (suggested extension)
    _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
    simd(_CastType __init) : _M_data(__init) {}

    // compound assignment [simd.cassign]
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
    operator+=(simd& __lhs, const simd& __x)
    { return __lhs = __lhs + __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
    operator-=(simd& __lhs, const simd& __x)
    { return __lhs = __lhs - __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
    operator*=(simd& __lhs, const simd& __x)
    { return __lhs = __lhs * __x; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
    operator/=(simd& __lhs, const simd& __x)
    { return __lhs = __lhs / __x; }

    // binary operators [simd.binary]
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
    operator+(const simd& __x, const simd& __y)
    { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
    operator-(const simd& __x, const simd& __y)
    { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
    operator*(const simd& __x, const simd& __y)
    { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
    operator/(const simd& __x, const simd& __y)
    { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }

    // compares [simd.comparison]
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator==(const simd& __x, const simd& __y)
    { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator!=(const simd& __x, const simd& __y)
    {
      return simd::_S_make_mask(
	_Impl::_S_not_equal_to(__x._M_data, __y._M_data));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator<(const simd& __x, const simd& __y)
    { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator<=(const simd& __x, const simd& __y)
    {
      return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
    }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator>(const simd& __x, const simd& __y)
    { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }

    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
    operator>=(const simd& __x, const simd& __y)
    {
      return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
    }

    // operator?: overloads (suggested extension) {{{
#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
    operator?:(const mask_type& __k, const simd& __where_true,
	const simd& __where_false)
    {
      auto __ret = __where_false;
      _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
      return __ret;
    }

#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
    // }}}

    // "private" because of the first arguments's namespace
    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
    simd(_PrivateInit, const _MemberType& __init)
    : _M_data(__init) {}

    // "private" because of the first arguments's namespace
    _GLIBCXX_SIMD_INTRINSIC
    simd(_BitsetInit, bitset<size()> __init) : _M_data()
    { where(mask_type(__bitset_init, __init), *this) = ~*this; }

    _GLIBCXX_SIMD_INTRINSIC constexpr bool
    _M_is_constprop() const
    {
      if constexpr (__is_scalar_abi<_Abi>())
	return __builtin_constant_p(_M_data);
      else
	return _M_data._M_is_constprop();
    }

  private:
    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
    _S_make_mask(typename mask_type::_MemberType __k)
    { return {__private_init, __k}; }

    friend const auto& __data<value_type, abi_type>(const simd&);
    friend auto& __data<value_type, abi_type>(simd&);
    alignas(_Traits::_S_simd_align) _MemberType _M_data;
  };

// }}}
/// @cond undocumented
// __data {{{
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  __data(const simd<_Tp, _Ap>& __x)
  { return __x._M_data; }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  __data(simd<_Tp, _Ap>& __x)
  { return __x._M_data; }

// }}}
namespace __float_bitwise_operators { //{{{
template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  {
    return {__private_init,
	    _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
  }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  {
    return {__private_init,
	    _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
  }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  {
    return {__private_init,
	    _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
  }

template <typename _Tp, typename _Ap>
  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
  operator~(const simd<_Tp, _Ap>& __a)
  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
} // namespace __float_bitwise_operators }}}
/// @endcond

/// @}
_GLIBCXX_SIMD_END_NAMESPACE

#endif // __cplusplus >= 201703L
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H

// vim: foldmethod=marker foldmarker={{{,}}}
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Y                                  Y                                                                                                    Y                                     $
                           	U
                     Y       	 e)                     Z       V	 j)                    Z                               Z                               Z           v                    R                              Z          5                    R       +                             e	  1                $          '                    Z           
                    Z       *                     Z       p                              ^ lB                     Z                               Z       O                     us                               	Z                               
Z           
                    Z           E                    һ       ]                     ̓                              :                   	           6           3         
                     p
                         	     T                    ׹     
     Ɛ         
           ^      z ǐ                    Z        X                         
     	                           ܐ                                                                      +                
Z           `                    Z                               Z           ߐ                    Z           p                    Z                                  &                              j:          t                    Z                               Z       O                                                         	 y                    A                              A                               Z                    !                            "           Z      !              #           Z      "              $           Z      #              %                $           &                   %              '                   &              (           Z      ' }             )           Z      (     	         *                )     !         +                   * A "      ,           Z      +     L Yg     -                   ,     & Zg     .                -              /                   .     * qf     0           R8     / c	 +         1           Z      0  >         2           Z      2                 3           !Z      3     ,         4           Z      4              5           Z      5 -          6           Z      6     ?n         7            Z      7     @n         8                   8     2         9                9              :           S     :              ;           s     ;               <                   <     _ ~     =           !     =     `         >           "Z      >                 ?                ? k b         @                @     c         A           P     A M; d         B                B     e         C                                
 libjsamp-java   
 libjsofa-java   
 libkxml2-java     0689e65ca0f8335c5d83e3aaec3cc9fb   alarm-clock-applet   0.4.1-3    gstreamer1.0-plugins-base    gstreamer1.0-plugins-good    libayatana-appindicator3-1   0.4.90   2.39.90    libgstreamer1.0-0    3.11.5  " alarm-clock-applet-gconf-migration  	 playerctl     3a3d8618d35c191f006c46f991ff45dc   0.4.1   
 gconf-service    libgconf-2-4   3.2.5    2.49.2   0.4.1~    032494fc2420e9eafd43ca180bb66fef   libalberta-dev  
 3.0.3-1+b1   3.0.3-1    alberta    libalberta4    libalberta2-dev    3     b25cba8f2ea2f2773152673b759fae8d    afb017f8092816903f5edbc4a5573a11   aldo   0.7.8-2    hamradio   libao4    de36683197f16f74b87f7c868c662216   ale   	 0.9.0.3-5     fc3be6a181259ad7b12bfe19020358da   alembic    1.8.1-2    python3-alembic     cd0efbf0f89bffe2d4dc35fa935c7c7e   python3-importlib-metadata   3.9    python3-importlib-resources    1.3.0     98ec47f7a2dda1713520d880822f9cad   alertmanager-irc-relay  
 0.4.2-1+b9   0.4.2-1     3e3b4f210b982523cf61a95b138644b4   alevt   	 1:1.8.0-2     b69b5f3f83c41724d16a921f749fbcd1   alex  	 3.2.7.1-3   	 sgml-base    1.28    bc94d454c7a2f43370b4ac0cf89a5863   alex4    1.1-9   
 alex4-data  	 libaldmb1   
 liballegro4.4   	 2:4.4.3.1    libdumb1    3566f87583f865118de0e26a364d6511    ad8fd6932daabde0a312d1bb925c8e41   alfa   2.2-1+b1   2.2-1    libcfitsio10   4.2.0~    eb7520e82a18ed6e7eea5d4bbf9fde07   alfred   2023.0-1   libgps28   3.20    dbd5ea646c4ad23f94d4eb5b2ca8d100  
 libalglib-dev    3.19.0-1   alglib  
 libalglib3.19     b4738e0f01a0b7c1868e2aa5e279816b    db996366870b5e4994f3be825a136ae6   algobox    1.1.1+dfsg-1  	 education    libqt5webchannel5    5.6.1     2f71f256ba219bdd2f090147d9147a28   algol68g  
 3.1.2-1+b1   3.1.2-1    libquadmath0    62e357ec894f7a62aa94948299a0a1f4  	 algotutor    0.8.6-6    perl-tk     c41d775ec41f2b56ed5bc23de583267b   alice    0.19-2   perl   libany-moose-perl    libanyevent-dbi-perl   libanyevent-http-perl    libanyevent-irc-perl   libanyevent-perl   libdbd-sqlite3-perl    libfile-sharedir-perl    libirc-formatting-html-perl    libjson-perl   liblist-moreutils-perl    libplack-middleware-session-perl  
 libplack-perl    libsql-abstract-perl   libtext-microtemplate-perl   libtry-tiny-perl   twiggy   libdesktop-notify-perl    3deea78573d420609be015425a85dd98   alien    8.95.6   rpm    dpkg-dev   cpio   rpm2cpio   patch    lintian    lzma    250884c1c7113f08b8c335ac3cf22206   alien-hunter   1.7-10   libbytecode-java   libbiojava-java    1:1.9.5+dfsg   libcommons-cli-java    bioperl     5f7c91ddef3ac915fd4f61d66745892f   alienblaster   1.1.0-11   alienblaster-data    2.27    da1f8f1a6453d62874036331e075d65f    ba7f91b87a7940dd88d237ce23043f59   aliki    0.3.0-5    libfftw3-single3    aa700150ac855fc2eef498cd2e5dcd35   alire   
 1.2.1-1+b1   1.2.1-1   
 libgnatcoll21    23.0.0   libxmlezout7   1.06.2   gprbuild    433c71a6f09598f6a1c63d10ff52d818   alkimia-bin    8.1.1-2    alkimia   
 libalkimia5-8    8.1.0    5.7.0     4c7f272b2ad233a2c850dccb700b919e   alkimia-data    06b0e5716e8eb21661a76054a2526ec9   libkf5newstuff5    5.78.0   libkf5newstuffcore5    5.54.0    703b960ad6218236645badb5dd555698   libalkimia5-dev   
 libgmp-dev    d4f39085a3e8190874c74c3de622b910   plasma-widget-foreigncurrencies    qml-module-org-kde-alkimia   qml-module-qtquick-layouts    dd297f3b7f896c6900efb1bfd52de891    ,  8 ,    #3     $3 { n                       e     R  )     ,   ,    &3     '3 { p          M             I?         *      $      )3     *3 { u          #      @     4W         +            ,3     -3 { w          $%      8	     k     Y  ,            /3     03 { x                     F     Y  -             43     53 { y  33     l{      h     +z     Y  .            73     83 { 1          ԥ      x     VZ     Y  /            :3     ;3 {           L           gv     Y  0            =3     >3 { }          X           +y     Y  1            @3     A3 {           L           `k     Y  2            C3     D3 {           t^           2%     Y  3            F3     G3 {           dJ           \-     Y  4            I3     J3 {           (W           :6*     Y  5     F! $ Z! F!    L3     M3 {                 `V     œl         6     F!  Z! F!    O3     P3 {           @B             n     Y  7     F!  Z! F!    R3     S3 {                  \     '|'     Y  8     F!  Z! F!    U3     V3 |           M             UԌ     Y  9     F!  Z! F!    X3     Y3 |                       F]     Y  :     F!  Z! F!    [3     \3 |           lF             9     Y  ;     F!  Z! F!    ^3     _3 |                  p     GU9     Y  <     F!  Z! F!    a3     b3 |           <B             c     Y  =     F!  Z! F!    d3     e3 |           ؠ                 Y  >     F!  Z! F!    g3     h3 |           B             xt     Y  ?     F!  Z! F!    j3     k3  |           R             !B     Y  @     F!  Z! F!    m3     n3 #|           B             Sd     Y  A     F!  Z! F!    p3     q3 &|           Xf             3q     Y  B     F!  Z! F!    s3     t3 )|           C             K9*     Y  C     F!  Z! F!    v3     w3 -|           u       (     =     Y  D     F!  Z! F!    y3     z3 3|                 ,
     @	(     Y  E     F!  Z! F!    |3     }3 G|           Tr           M.     Y  F     '  ' '     3     3            Tq       h     ל2         G     '  ' '    3     3 Ď           a       D     א     Y  H     `x   tx `x    3     3 Ύ                                I     x h x x     3     3 Ў            5]      xZ    FY         J     y * y ,y     3     3 ׎           m            Ky         K     <z q Nz <z    3     3  ȱ          5             J         L     z   z z    3     3  ʱ          l           
         M     z $ z z    3     3  Ա          4           6     Y  N     {   { {     3     3  ֱ                @     \8         O     { $ { {    3     3 	 ٱ          [      (     ī     Y  P     {  { {     3     3 
 ݱ          T           ;]     Y  Q     {  { {     3     3  ޱ          d           k     Y  R       $ |      3     3  ߱          l@             Yʱ         S     j} T  ^} j}    3     3            x            KZ         T     }  } }    3     3  Ց          <      $     v         U     }  } }    3     3            P             9;
     Y  V     }  } }    3     3    3           T
     2     Y  W     } $ } }    3     3 !           +"          Z     Y  X     }  } }    3     3 #           t4             ,B8      Y  Y     }  } }    3     3 &           6             
s     Y  Z     }  } }    3     3 *                       Ƌ     Y  [      H       3     3 .           n            Z         \            3     3 0           %           ]Im         ]      H       3     3 :   3                 =         ^      H       3     3 A                 $     f     Y  _      H       3     3 I            k                Y  `             3     3 O           _       L     5}5     Y  a             3     3 P           T/            -*O	     Y  b       N~     3     3 R           D;            I
         c     }  <~ }    3     3 U                  
     K         d     } H } }    3     3 V   3           &     Fd         e       Ȃ     3     3 `           P:            ,>:
         f      H  zq     3     3 c   3     s       p     SgI         g     | H       3     3 i    3     Xb             䈺%         h     | H       3     3 n   3     ?      P     JS     Y  i     | H       3     3 | 	  3     P`             q     Y  j     | H       3     3 ~   3           
          Y  k     | H       3     3  
  3     h             n
     Y  l     | H       3     3    3     ,c             H.     Y  m     | H       3      4  
  3     ]             #5     Y  n     | H       4     4    4     c             q     Y  o     | H       4     4    4     b             *     Y  p     |        
4     4                  H     `T     Y  q     |       
4     4            dr             ?      Y  r      L      4     4                       鞇         s        ܆      4     4                        vB         t     & 2  &     4     4                 8    y         u     
         4     4            (H           -0M         v             4     ^P  $                 	     ƛ     Y  w      <       `P     aP  %                               x     
 $  
    cP     dP     7          h            &          y     R   
     fP     gP Ώ 8          6            [].!     Y  z     R   
    iP     jP ӏ :                 <     6v#6"     Y  {     R   
    lP     mP ֏ 9          \            Ey#     Y  |     <  R <     oP     pP  =                     h$         }     <  R <     rP     sP  >          P$      <     AL}%     Y  ~     <  R <     uP     vP  F          T           0*&     Y               xP     yP  H                     +*'                       |P     }P  I  {P     u       T     (                      P     P  K  P     d      X+     *Z)                     P     P Q w          "             *              &Z *  &Z    P     P U y          <=             .{F+              L  , Z     P     P W z                 g     6%,               H       P     P ] ~  P     q       8     
8-              $ H  $     P     P `   P                 `G(.              d  T x'     P     P i           
       P      ne/              &   4     P     P q           ,.      h     "ue0              n  d n     P     P r           \
      8-     ).1                      P     P            .             nş2              > * J >     P     P            `B             3                     P     P                   \      w#A
4              q   q    P     P                   x      5              q   q    P     P            (             :6     Y       | h p |     P     P                       %h7              | h p |    P     P             1           #}|8     Z               P     P            L           Io9               h       P     P            f           B\:               h      P     P                          x|;              b p x b    P     P            v       `     |X<               H       P     P                        
lM =                 "     P     P            ,           U>              j  \ x     P     P            Q            Jy?                     P     P               DV             @               H       P     P            x           ܺ%A               H       P     P ɐ   P     "
      dR     nB              8  D 8    P     P ݐ           P       `      >C              8  D 8    P     P ߐ            _       x     4dD     
Z       8  D 8    P     P               PQ            }E     Z       8  D 8    P     P                   h     xY`F     Z       .  " .     P     P            N            VG                      P     P            @      Xo     ;UAH                     P     P                     Q     |z|I     Z               P     P  Ĳ          4      >     ұJ     Z              P     P  Ų          P            1>tK     Z               P     P                       9AL     Z               P     P  ò                ]     5M     Z               P     P  ɲ          <             HN               h       P     P  ʲ          0(             YO              @ * 4 @     P     P # Ͳ          i       l     7P              d   d    P     P     Ҳ          0      $     ëQ              d   d     Q     Q , Ӳ          |            E!R     Z       d   d    Q     Q 3 ղ          <           ,YTS     Z       d   d    Q     Q 8 ֲ          \`       h     :JfT     Z       d   d    	Q     
Q > ײ                X     XaU     Z       d   d    Q     
Q F ز          x            0|V     Z       d   d    Q     Q P Բ          r             JގmW      Z              Q     Q a ߲                
      VX              B  q B  B      Q     Q x                 l:     { 1Y              B  $ B  B     Q     Q            i           Z              NC   BC  NC      Q     Q            pD             
[              NC   BC  NC      Q     Q                    \      /\     %Z       NC   BC  NC      !Q     "Q                       D8]     &Z       C   C  C      $Q     %Q            Dk       `     X^              *D   "D  *D      'Q     (Q            2           _              xD   jD  xD     *Q     +Q            xR       #    \I`              E  $ E  E      -Q     .Q            #             T*Ha              E   E  E      0Q     1Q             P      T     EI1b              :F   JF  :F     3Q     4Q            I!           :c              F   JF  :F     6Q     7Q            86             "0d     -Z       F   JF  :F     9Q     :Q            0/             ]e     .Z       :F   JF  :F     <Q     =Q            (            uf     /Z       :F   JF  :F     ?Q     @Q ȑ 
          (A             g     0Z       F   JF  :F     BQ     CQ ԑ           j       (     t,h     1Z       `H   rH  `H     EQ     FQ ؑ           hD             D7oAi              `H   rH  `H      HQ     IQ ّ 
          V             R|<j     3Z  Ö     `H   rH  `H     KQ     LQ ܑ           Љ             Zk     4Z  Ė     `H   rH  `H     NQ     OQ            HG       x      2'l     5Z  Ŗ     `H   rH  `H      QQ     RQ            Pn       0     su`m     6Z  Ɩ     `H   rH  `H      TQ     UQ            PB             (9n     7Z  ǖ     ZJ   J  J      WQ     XQ            i      $     xo         Ȗ     ZJ   J  J      ZQ     [Q            @      ?     \Ep     9Z  ɖ     6K   FK  6K     ]Q     ^Q            L      8     h
q         ʖ     6K   FK  6K     `Q     aQ            O
      $!     0";r     ;Z  ˖     6K   FK  6K     cQ     dQ            `           z%s     <Z  ̖     4L  L DL  4L     fQ     gQ             @K           _t         ͖     4L   L  4L      iQ     jQ  "          t       @     
u         Ζ     4L   L  4L      lQ     mQ 	 $          ^       x     grv     ?Z  ϖ     4L   L  4L      oQ     pQ 
 #          h           }w     @Z  Ж     4L   L  4L      rQ     sQ  %          |1       ,     +'x     AZ  і     M   N  M      uQ     vQ  &          м       4     E}y         Җ     M   N  M      xQ     yQ  (          v       h     6@z     CZ  Ӗ     N  L N  N     {Q     |Q     *          Lz
           棇{         Ԗ     N  L 6O  N     ~Q     Q     +          8       h      pY |         Ֆ     O   O  O     Q     Q  ,                     +E}         ֖     O   O  O     Q     Q     0          I          *f~     GZ  ז     O   O  O     Q     Q  -          hP      |     o     HZ  ؖ     O   O  O     Q     Q % .          F           Aʀ     IZ  ٖ     O   O  O     Q     Q - 1          @            "ف     JZ  ږ     O   O  O     Q     Q 2 4          x            P_J     KZ  ۖ     Q  L Q  Q     Q     Q 9 5                     ވx         ܖ     `H   HR  `H      Q     Q > 9          TG             f@         ݖ     `H   HR  `H      Q     Q ? :          6             >#H     NZ  ޖ     4L   R  4L     Q     Q     ;          a            ^         ߖ     4L  L R  4L     Q     Q     <                           PZ       S   S  S      Q     Q @ =          .             dE              S   S  S     Q     Q B >          *             H     RZ       S   S  S      Q     Q     ?          `             qY      SZ       S   S  S     Q     Q D @          p       T      u     TZ       T   T  T      Q     Q F A          T           J&              JU   ZU  JU      Q     Q L C                     Ҕ              U   U  U     Q     Q R D  Q                4              U   U  U     Q     Q [                      +     XZ       U   U  U     Q     Q a G          tT       L     Umn     YZ       U   U  U     Q     Q b                  p     b'a     ZZ       U   U  U     Q     Q f H          X       8     $ƒ     [Z       U   U  U     Q     Q h B                      Oe     \Z       U   U  U     Q     Q n K          |       L     2     ]Z       U   U  U     Q     Q r                            ^Z       U   U  U     Q     Q u I                 0     AR5     _Z       U   U  U     Q     Q v M          f            )a     `Z       U   U  U     Q     Q | N                      [o     aZ       U   U  U     Q     Q  O          z            1?     bZ       U   U  U     Q     Q  P          hO       @     	     cZ       U   U  U     Q     Q                   `     H     dZ       U   U  U     Q     Q  2                L     3(     eZ       U   U  U     Q     Q                        ^+(     fZ       U   U  U     Q     Q  Q          V       T     z     gZ       U   U  U     Q     Q     3          ^           `     hZ       U   U  U     Q     Q     L          P                 iZ       U   U  U     Q     Q  S          Q            -#     jZ       U   U  U     Q     Q  X          j            CI     kZ       U   U  U     Q     Q  Y  Q     lV      8     \@     lZ       U   U  U     Q     Q  \          P       0     8zZ     mZ       U   U  U     Q     Q  ]          4A       L     ],֥     nZ       D\  L P\  D\     Q     Q  _          g           c              ]  L ]  ]     R     R  c          8           ι               ]  L ]  ]     R     R               +           "     qZ         L        R     R F e          r           hk                L        
R     R J g                     r     sZ         L        
R     R M h          t           2     tZ         L        R     R     `          dc       ,     qY      uZ         L        R     R O a                       	jx­     vZ         L        R     R Q j                 h     Ie     wZ         L        R     R R k          $
      4     ()     xZ         L        R     R U m          	      
     ;     yZ  	       L        R      R ^ r          De      hv     ~2     zZ  
       $        "R     #R               <       5     Vy|     {Z       w L *  w    %R     &R            p       p      k}                          (R     )R                   t      	~         
     \            +R     ,R            |
       <      *     ~Z       \          .R     /R               d       P      pY      Z                 1R     2R                      d      pY      Z                                                                           3e02c75e5dcfb34bc7018453d99743b1   all-knowing-dns    1.7-4   
 libmouse-perl    libmousex-nativetraits-perl    libnet-dns-perl    libprivileges-drop-perl     1df6f6c08cc7056f9106168642d482b9   allegro4-doc   2:4.4.3.1-3   
 allegro4.4   liballegro-doc   2:4.3     9b7366b5ced0412260a68241fb797240   liballeggl4-dev   
 liballeggl4.4    liballegro4-dev    liballegro4.2-dev   	 2:4.4.2-3     40ff51e654f3dece2795af8f06c7f198   libxcursor1    libxxf86vm1     126238cc34252455b577540d46ab0a19   libjpgalleg4.4  
 libxpm-dev   libxxf86vm-dev   libxcursor-dev   liballegro-dev   liballegro-dev-common    liballegro4a    5b17ce6a77ff7eb24483c5ae33ba5961   libxxf86dga1   liballegro4.4-plugin-alsa   	 2:4.4.2-4     0b55a0d3ce990915ca710be119686ac7   libjpgalleg4-dev    cf1110d9493c80ae0219cc34674e1ef4    0236f0e177c928689a23d19f083cbac5   libloadpng4-dev   
 libloadpng4.4     8382fb1c7cd5b4080ea300508a818113    8e66d6adcf14dbf4adc2546849facda8   liblogg4-dev  
 liblogg4.4  
 libvorbis-dev     eb3eb8af55cb95f13f93a90341a19233    3bf6f379fb24caced7ba17311e3b4297   allegro5-doc   2:5.2.8.0+dfsg-1   allegro5    cc7e01168096c47a3cdf0971974753fc   liballegro-acodec5-dev   liballegro-acodec5.2   liballegro5-dev   
 libopenal-dev    libopus-dev    libpulse-dev   libdumb1-dev   libflac-dev     f4fc829bcc7525786ca1d44b7b855a9a   liballegro-audio5.2   	 2:5.2.8.0   
 liballegro5.2   	 libflac12    libopusfile0    a17c2c3f1938cb32d4f663648cde91a8   liballegro-audio5-dev     25069f1e766a3d4274621015675dbca5    ff39df8de96c5bc0babbe823f6cfa9bf   liballegro-dialog5-dev   liballegro-dialog5.2    37f581df71d7b02add213849471af87d   2.31.8    9f8845cb202f6dd0dde9ec0e8c15fc03   liballegro-image5-dev    liballegro-image5.2    libwebp-dev     99707de7f88fe7f11111872af07eb08f   libwebp7    2b4f80547e187442f8fd8f121c25dec8   liballegro-physfs5-dev   liballegro-physfs5.2  
 libphysfs-dev     269066c71ffe115cbe1348bd7ac714b7  
 libphysfs1   3.0.2     c0d605b7975a4d3169f40c35dc7f3a51   liballegro-ttf5-dev    liballegro-ttf5.2     2dbd2355e94f821be650945f48d31fc8    ce2c8cf00883a0db8d29a65e65035a1e   liballegro-video5-dev    liballegro-video5.2   
 libtheora-dev     f78cce8daff4f7527b1160da4966a141   2:5.0.6    libogg0    1.0rc3  
 libtheora0   1.0    libvorbis0a     2ba8e426dda31ca77192049385106d68   libgl1-mesa-dev    libglu1-mesa-dev  
 libxrandr-dev     10ae8ae8e5d829419d4a1f0eda9f3bdf   libglx0   
 libopengl0   2:1.2.99.901   libxi6  
 2:1.2.99.4  
 libxrandr2   2:1.2.0     a038892a4832b94a171605cb02239c7b   allelecount    4.3.0-2         O Y  G| Î     Y          Y  { Ŏ    Y   Ǝ    Y   ǎ    Y  
 Ȏ     Y  0 Ɏ    Y  R ʎ    Y   ˎ    Y   ̎ 	   Y      ͎ 
  } Y             Y      ώ    Y  7     
  d Y      ю   O Y   Ҏ   > Y  !{ ӎ    Y  { Ԏ   Z Y  ` Վ   q Y      ֎    Y  c        Y  c ؎   O Y  ю َ    Y  c ڎ    Y  G ێ    Y   ܎    Y   ݎ   	 Y  ;x ގ    Y  { ߎ   
 Y  f2     Y  |     Y  M    
 Y  3u      Y  b  !   Y  l  "  ] Y  DM  #   Y  M  $   Y  z  %   Y  z     &  6 Y  L  '   Y          (   Y  P  )   Y  5  *   Y    +   Y       ,   Y       -   Y  M  .   Y    /  w Y  {  0   Y       1   Y       2   Y  4  3   Y  +  4   Y          5   Y    6   Y       7   Y  S  8  h Y  ؎  9   Y  M  :    Y  R  ;   Y  K  <   Y  s`   =   Y  "b  >  ) Y  b  ?  ! Y    @  " Y       A  ߦ Y  b  B  # Y  *  C  $ Y  :  D  % Y  :  E  & Y          F  ' Y  !     G  h Y    H  ) Y       I  h Y  
 
 J  ) Y       K  ( Y       L  > Y  L  M  ) Y    N  * Y  /     O  x Y  Ea  P  + Y       Q   Y       R   Y  Qb     S  , Y       T  O Y    U  - Y  C  V  . Y       W  / Y  #  X  0 Y       Y  1 Y       Z  2 Y       [  3 Y  a     \  4 Y     ]  5 Y          ^   Y  wc " _  6 Y  '     `  4 Y   $ a  m Y   % b  o Y  /b     c  7 Y      ' d  8 Y      ( e  9 Y   ) f  : Y  "     g  ; Y  . + h  m Y  $ , i  < Y   - j  = Y  ,     k   Y  - / l  O Y  +     m  > Y  * 1 n  ? Y   2 o  @ Y  Hm 3 p  h Y  / 4 q   Y  v{ 5 r   Y  w{ 6 s  A Y  k 7 t   Y      8 u  B Y  I 9 v  C Y  5     w  D Y  U ; x  O Y  3 < y  ϣ Y  { = z  [ Y  ގ > {  t Y  { ? |  E Y  @ @ }  F Y          ~  D Y  0 B   O Y  ; C   G Y  7 D   H Y   E   ޣ Y   F   j Y  ic G    Y  > H   I Y         ' Y  c J   J Y   O K   D Y  A L   O Y  B M   K Y      N   L Y         h Y  L       8 Y  ) Q   M Y  M       N Y  T S   O Y   T   P Y  S       Q Y  K       R Y  Y" W   S Y  n X   T Y  /[ Y   U Y  & Z   O Y  O [    Y      \   Ԥ Y      ]    Y  7 ^   ) Y  
 _   V Y            W Y   a    Y  : b   m Y  Z        Y  a d   O Y  b e    Y      f   ϣ Y  < g   t Y  G h   X Y          Y  c j   o Y  d k   Y Y  m l   Z Y   m   [ Y            O Y  j o    Y      p   ϣ Y  f q   [ Y  | r   \ Y  # s   ] Y  	 t    Y  x{ u    Y  g v    Y      w   ] Y  | x   ^ Y      y   _ Y      z   ` Y      {   a Y            O Y  n }   Z Y            O Y  |    ϣ Y  p     Y  <M    У Y  {    [ Y  q    b Y  Ґ     Y  c    ף Y  u    Z Y  }       O Y  ~     Y  4     Y  5    Z Y         c Y  L    m Y      Z Y         o Y      Z Y         O Y      Z Y         O Y      d Y  6    Z Y         e Y            h Y         Y Y  z       m Y      f Y         g Y            h Y      i Y  .    j Y         k Y  
    l Y  9    O Y      m Y         n Y  (
     Y  M     Y  ]b    o Y  c    p Y      q Y      r Y         s Y             Y  ~N     Y  N    O Y      ԣ Y  
c    t Y  Fb    u Y      v Y         w Y  :    x Y         y Y  x    z Y      { Y            | Y  0       O Y      Y Y  |    a Y  K    } Y  k#    ˣ Y  N|    ~ Y  O|     Y       Y  {     Y       Y          Y          Y   Ï  
   Y      ď 
   Y      ŏ 
   Y      Ə 
   Y  ® Ǐ 
   Y  x ȏ 
   Y   ɏ 
   Y      ʏ 
   Y   ˏ 
   Y      ̏ 	
   Y   ͏ 

   Y          
   Y  Q Ϗ 
  O Y   Џ 

  X Y  
| я 
   Y  ;	 ҏ 
   Y          
   Y  | ԏ 
   Y      Տ 
   Y          
  O Y  Ϗ ׏ 
  h Y   | ؏ 
   Y  o ُ 
  ϣ Y   ڏ 
   Y   ۏ 
   Y  z
 ܏ 
  X Y  Џ ݏ 
   Y  t ޏ 
  ף Y   ߏ 
  ] Y  w     
   Y    
   Y  1  
   Y  Ď   
  O Y  ֏  !
   Y    "
   Y    #
   Y  
  $
   Y    %
   Y  4b  &
   Y  '     '
   Y  c  (
   Y  Fc  )
   Y       *
  դ Y  @c  +
  ֤ Y  Ac  ,
   Y    -
   Y    .
   Y    /
  } Y  ͎  0
   Y     1
  u Y       2
  O Y    3
   Y       4
   Y  x  5
   Y     6
  n Y  {  7
   Y       8
   Y    9
   Y    :
   Y       ;
   Y       <
   Y     =
   Y    >
   Y       ?
   Y       @
   Y    A
   Y    B
  } Y    C
   Y    D
  O Y    E
   Y   	 F
   Y  D 
 G
   Y    H
   Y     I
   Y      
 J
   Y  (  K
   Y       L
   Y    M
  ϣ Y  ُ  N
   Y  ޏ     O
  ' Y  I  P
  O Y    Q
   Y    R
   Y       S
  ( Y  J  T
   Y    U
   Y  U  V
   Y    W
   Y          X
   Y    Y
  ֤ Y    Z
   Y       [
   Y       \
   Y  wE   ]
   Y  { ! ^
   Y      " _
   Y      # `
   Y  * $ a
   Y  -5 % b
   Y  C & c
   Y  B ' d
   Y  +w ( e
   Y  i
 ) f
   Y  l * g
   Y  
 + h
   Y   , i
   Y  e - j
   Y  	 . k
   Y      / l
   Y   0 m
   Y  D
 1 n
   Y  ) 2 o
   Y  Bc 3 p
   Y      4 q
   Y      5 r
   Y   6 s
   Y  P 7 t
   Y      8 u
   Y  C 9 v
   Y      : w
   Y   ; x
   Y   < y
   Y  i = z
   Y   > {
   Y   ? |
   Y   @ }
   Y      A ~
   Y   B 
   Y   C 
   Y  a D 
   Y   E 
   Y  05 F 
   Y  0 G 
   Y      H 
   Y  4 I 
   Y  $ J 
   Y   K 
   Y   L 
   Y   M 
   Y      N 
   Y   O 
   Y  s| P 
   Y          
   Y  2 R 
   Y   S 
  y Y   T 
   Y          
   Y  { V 
  y Y  S     
  O Y   X 
   Y  - Y 
   Y   Z 
   Y   [ 
   Y      \ 
   Y          
  m Y  W ^ 
  p Y  u{ _ 
  t Y       
   Y  ] a 
   Y  r{ b 
   Y  s{ c 
  p Y  ^ d 
   Y   e 
   Y   f 
  Y Y   g 
  t Y  _ h 
  a Y       
   Y   j 
   Y      k 
   Y  n l 
   Y  o m 
   Y  p n 
   Y      o 
   Y      p 
   Y          
  O Y  `     
  ? Y      s 
  @ Y      t 
  h Y  q u 
  Ӧ Y      v 
  ϣ Y   w 
   Y   x 
  H Y  	 y 
   Y      z 
  ԣ Y   { 
   Y  L | 
   Y      } 
   Y   ~ 
   Y  g  
   Y       
  أ Y  y  
   Y          
  m Y  t     
  O Y    
   Y    
   Y       
   Y          
   Y  5  
   Y       
   Y  _  
   Y       
   Y  {     
   Y          
    Z       
   Z       
  O Z    
  ϣ Z  v  
    Z       
   Z  m{  
  f Z  n{  
  ף Z  ~     
   Z  [     
   Z       
   Z    
  ۤ Z  U  
  ܤ Z    
   Z  R  
    Z    
   Z  j  
   Z    
   Z       
   Z    
   Z    
   Z    
   Z  *{     
   Z       
   Z    
  p Z  c  
   Z    
  t Z    
  ˣ Z       
   Z  0W     
  ' Z  M  
  O Z    
  ( Z    
   Z          
  O Z    
  ԣ Z  z  
   Z  `  
   Z    
  	 Z          
  O 	Z       
  
 Z  5  
   Z  8  
   Z  9  
   Z  i  
   Z    
  
 Z       
   Z  a  
  ϣ Z    
   Z  d  
   Z  e  
   Z  #
  
  l Z  {  
   Z  {  
   Z  {  
   Z  :+ Ð     Z  ˞ Đ    Z   Ő    Z   Ɛ    Z  E ǐ    Z  E Ȑ   Ǧ Z             Z   ʐ    Z  ׎ ː   O Z   ̐ 	  ܣ Z  َ ͐ 
   Z  a ΐ   S Z  (| ϐ   ϣ Z   А 
   Z  w ѐ    Z   Ґ   b Z  { Ӑ    Z  ? Ԑ    Z   Ր    Z   ֐    Z      א    Z  } ؐ   ] Z   ِ   ^ Z      ڐ   ף Z  Đ ې   a Z  h ܐ    Z          
Z  1M ސ    
Z             Z         o Z  ː     Z  {     Z  z         Z       !   Z  d  "    Z  .     #  m Z    $  ! Z  f  %  " Z       &  # Z    '  O Z    (  $ Z  v  )  ϣ Z  ϐ  *  % Z  v  +  & Z    ,  ף Z  ڐ  -  ' Z  &  .  ( Z  p  /  ` Z  a  0   Z  ֎  1  ) Z    2  * Z  p|  3  + Z          4  # Z       5  ( Z    6  O Z    7  $ Z       8  ϣ Z    9  & Z       :  Y Z  f  ;   Z    <  ף Z     =  ' Z       >  ( Z  K  ?   Z    @  ) Z       A  * Z          B  , Z    C  - Z       D  . Z          E  O Z   	 F  ϣ Z   
 G  / Z  
  H   Z    I  ף Z   
 J  ( Z    K   Z    L  0 Z       M  1 Z    N  2 Z    O  3 Z          P  O Z    Q  ϣ Z  	  R  % Z       S  ף Z    T  ( Z  
  U  ` Z    V   Z    W  1 Z    X  4 Z          Y  m Z    Z  p Z    [   Z       \  O Z     ]  V Z  a ! ^  5 Z  L " _  6 Z  D     `  ( Z   $ a  O Z   % b  7 Z  Ec & c  8 Z  1 ' d  $ Z   ( e  % Z   ) f  9 Z   * g  : Z  1 + h  ; Z  |     i  < Z  Rn - j  = Z  { . k  > Z  T / l  ? Z  U 0 m  @ Z  V 1 n  A Z      2 o  B Z          p   Z  $ 4 q   Z   5 r  l Z   6 s  C Z  ?{ 7 t   Z       u  > Z      9 v  o Z  3 : w  l Z  5 ; x  D Z   < y  E Z   = z   Z  7     {  F Z  P ? |  > Z  8 @ }   Z  9 A ~  l Z  : B   m Z  ; C   n Z  < D   G Z  ;{ E    Z  =       >  Z  ? G   ?  Z      H   @  Z      I   o  Z  @ J   l  Z  A K   D  Z  B L   E  Z  C M   H  Z  H N   I  Z  dx O     Z  E       F !Z      Q   u !Z  { R    !Z      S    !Z      T   > !Z  F U   ? !Z  G V   @ !Z  H W   m !Z  I X   l !Z  J Y   D !Z  K Z   E !Z  L [   J !Z  Ş \   K !Z  ƞ ]   L !Z  Ǟ ^    !Z  O _   M !Z  c `   N !Z  Tn        "Z   b   O "Z      c   P "Z  ] d   Q "Z  D e   R "Z  0 f   S "Z  l g   z "Z      h   T "Z      i   U "Z      j   V "Z  ? k   W "Z  E l   X "Z  %5 m   Y "Z   n   Z "Z  ' o   [ "Z  L p   } "Z  D q   \ "Z   r   ] "Z   s   ^ "Z  C5 t   _ "Z  { u   ` "Z  g
 v   k "Z  1{ w   a "Z          #Z  / y   դ #Z   z   ֤ #Z   {   ۣ #Z  ` |   O #Z  W }   / #Z      ~    #Z  6c     #Z      b #Z         c #Z         d #Z            ! $Z         e $Z         f $Z         g $Z  e    h $Z      i $Z         j %Z      O %Z  |    ϣ %Z  4     %Z  ^       j &Z             'Z      ϣ 'Z       'Z         h (Z          )Z      k )Z  V
    1 )Z  +{    ԣ )Z         l *Z  l    m *Z      n *Z  V
    o *Z  
     *Z         p *Z          *Z         q *Z      r *Z         s *Z  $:    t *Z         u *Z  ^    w *Z  K    v *Z      w *Z  hm       m +Z      x +Z  y       m ,Z       ,Z  b    ϣ ,Z      l ,Z  X     ,Z  Y     ,Z  Z    m ,Z  :{    s ,Z  N     ,Z       ,Z          -Z      ϣ -Z      ף -Z         y .Z  ˑ    O .Z      ϣ .Z       .Z         O /Z      ϣ /Z       /Z      z /Z            { 0Z  ȑ    | 0Z  ɑ    } 0Z  @    y 0Z         ~ 0Z  ̑     0Z  8    O 0Z   Ñ    ϣ 0Z   đ    0Z  Б ő    0Z   Ƒ    0Z   Ǒ    0Z  ӑ       { 1Z      ɑ   | 1Z      ʑ   } 1Z      ˑ   y 1Z   ̑ 	  ~ 1Z      ͑ 
   1Z      Α   O 1Z   ϑ   ϣ 1Z  Ñ Б 
   1Z  ` ё    1Z  ő ґ    1Z  Ƒ ӑ    1Z            O 2Z  Α Ց   ϣ 2Z  ϑ ֑    2Z  ё ב   ڤ 2Z  v        3Z         m 4Z  ԑ ڑ   ϣ 4Z  Ց ۑ    4Z  ֑       h 5Z   ݑ    5Z  # ޑ    5Z  # ߑ    5Z  #     5Z  #     5Z       5Z  j      5Z  k     !  h 6Z  ܑ  "   6Z       #   6Z       $   6Z       %   6Z       &   6Z       '   6Z       (   6Z          )   7Z  \  *   7Z  5  +   7Z  (  ,  m 7Z  ّ  -  ϣ 7Z  ڑ  .  t 7Z  ۑ  /   7Z    0   7Z  ґ     1  M 8Z    2  ϣ 8Z    3  t 8Z       4  O 9Z    5  T 9Z    6  ף 9Z       7  O :Z       8   ;Z    9  O ;Z    :  ϣ ;Z    ;  ף ;Z       <  O <Z     =  ϣ <Z    >   <Z       ?   =Z          @   >Z       A   >Z    B   >Z          C   ?Z    D   ?Z    E   ?Z          F   @Z          G  m AZ  k{     H   BZ          I   CZ  n| 
 J  m CZ  
     K   DZ       L   DZ  Ĕ     M   GZ  2  N   GZ    O  } GZ  ʑ  P  y GZ    Q   GZ       R   GZ    S  O GZ  
  T   GZ   b  U  ϣ GZ     V  Y GZ    W   GZ       X  ף GZ       Y   IZ       Z  y IZ    [   IZ    \  O IZ     ]   IZ   ! ^  ϣ IZ   " _  Y IZ   # `  ף IZ   $ a   IZ       b  } JZ   & c  y JZ   ' d   JZ   ( e  ~ JZ   ) f  h JZ   * g  ϣ JZ  ! + h  ף JZ  # , i   JZ  $     j   KZ   . k   KZ      / l   KZ      0 m   KZ  3 1 n   KZ  -     o   LZ      3 p   LZ  1 4 q  y LZ  & 5 r   LZ  ' 6 s  O LZ  ) 7 t  ϣ LZ  * 8 u   LZ  +     v   MZ  ` : w   MZ  K ; x   MZ   < y   MZ      = z   MZ       {  o NZ  6     |   OZ          }  } RZ  % A ~  h RZ  >        SZ      C    SZ  B        UZ      E    UZ  D        VZ  R G    VZ      H    VZ  ͑ I    VZ  A J   ϣ VZ  7 K   ף VZ  8       } WZ   M    WZ   N   y WZ  4 O   O WZ  I P   ϣ WZ  J Q   ף WZ  K       } XZ  L S    XZ   T    XZ   U   y XZ  N V   O XZ  O W   ϣ XZ  P X   \ XZ  Q Y    XZ   Z    XZ          YZ   \   y YZ  U ]   O YZ  V ^   ϣ YZ  W _   ף YZ  X `    YZ             ZZ          [Z   c   O [Z  ] d   ϣ [Z  ^ e    [Z  _        \Z  F g    \Z             ]Z  f i   y ]Z   j   O ]Z  c k   ϣ ]Z  d l    ]Z  e m    ]Z             ^Z  G o    ^Z      p    ^Z   q    ^Z            O _Z  j s   ϣ _Z  k t   ף _Z  l        `Z  i        aZ  u w   O aZ  r x   ϣ aZ  s y    aZ  Q z    aZ  { {    aZ             bZ      }    bZ  ~ ~    bZ  |        cZ  h    m cZ  w    ϣ cZ  x     cZ  a     cZ          dZ          eZ       eZ  n    y eZ  v    O eZ      ϣ eZ       eZ          fZ          fZ          fZ       fZ             gZ      y gZ      h gZ      ϣ gZ       gZ       gZ  K        hZ       hZ  =        kZ      y kZ       kZ         O kZ      ϣ kZ       kZ       kZ  ؑ     kZ       kZ       kZ       kZ       kZ          kZ          kZ          kZ            O lZ      ϣ lZ      Y lZ  "     lZ         } mZ       mZ  S     mZ      y mZ       mZ       mZ      O mZ      ϣ mZ      ף mZ      o mZ       mZ          mZ         O nZ         w oZ       oZ       oZ             pZ          pZ  Q     pZ  ;     pZ         h qZ       qZ  #     qZ       qZ   D      libhts3    1.10    f7d2421be2958c8e29f82e4a87bb26db   liballelecount-perl    libconst-fast-perl   libdevel-cover-perl    libfile-slurp-perl   libfile-which-perl   libipc-system-simple-perl    libpod-coverage-perl   libtest-fatal-perl    3643df2ba0a59c066ab75e82c8e21f1a   webext-allow-html-temp   10.0.4-1~deb12u1   allow-html-temp    thunderbird    1:128.1    1:131.x     c25a1f2bf66cd5f5de9fe1909ee898a8   allure  
 0.11.0.0-1    ba2e06db233176c140ea65af8fbfdea7   almanah    0.12.3-2+b1    0.12.3-2   libcryptui0a   3.12.2  
 libecal-2.0-2    3.33.4   libedataserver-1.2-27    3.17   libgcr-base-3-1    3.8.0   
 libgpgme11   1.2.0    libgtksourceview-3.0-1   2.91.4   libgtkspell3-3-0   1.18.0    034aeb320a1186c1cafc6b4acaefdfb1   node-almond   
 0.3.3+dfsg-10    almond   node-requirejs-text     c7fe0588b845d1bd06f4209b2471ff48   alot   0.10-1   python3-configobj    python3-gpg    1.13.1-6  
 python3-magic    2:0.4.15   python3-notmuch2   0.30   python3-twisted    18.4.0  
 python3-urwid    python3-urwidtrees   1.0.3    links    links2   alot-doc    8e537bb8a99f6ea999f43bf42d37d385   2.4.3-5~  
 python-doc    abf6372044e3e44e600e15265fbcf439   alpine   2.26+dfsg-1    mlock    pine  
 alpine-doc   aspell   default-mta    mail-transport-agent    3ddd074b130edc15ad4419408e6d3266    d36b0af30efaab3c303df1eb7e7202ef   alpine-pico     26a939a9b42d10bb338f5e7cb4ba132b   pilot     8668e05d1b69959b8cdbf9c13b48d73f   alpine-chroot-install    binfmt-support   qemu-user-static    fbf5ceef448a4676ae5bdb6720f5b95d   alqalam    0.2-10   texlive-lang-arabic     6a39533e68c6a515eb89f058f569b09d  
 1.2.8-1+b1   1.2.8-1    alsa-lib   libasound2-data   
 alsa-utils   1.2.1    libasound2-plugins    f80c24ce80b76f9116fc6c7d1a8aa34b  
 alsa-ucm-conf    alsa-topology-conf    41f916f1c1cbcf480b3b7fe38fbcef23   libasound2-dev   libasound2-doc  
 libasound-dev     6049c8294e480247b4c02744f09d51fe    73dfd0644e1f116cf7c39ae3f4f19a1f   libasound2-plugin-smixer    690859b414c92870a8828f1886b0d9dd   libatopology-dev  
 libatopology2    1.2.1.1     8b563948f0e5c358d56ca81fe9b46396   1.2.7.1     fa3e8ef096b843719268f3d51920e44f   alsa-oss   1.1.8-2     7e1413025cd44f9937186b01e353aed0  	 1.2.7.1-1    alsa-plugins   1.2.5    libavcodec59   7:5.0    libavutil57   	 0.99.1-1~    libspeexdsp1   libswresample4    d5c7c8a00460bf9dc4d137b2036d2d1e  
 alsa-tools   1.2.5-3    1.0.24.1   as10k1    4d64eace7e51fb1fc1a18ed780ee5e62   alsa-tools-gui   2.35.9   pkexec    7e9c05fa4ed7725d61d80aa2becaf0fc   ld10k1   3.0-3    liblo10k1-0     710801fd69992f981169e47e6047ffe9    126648d0d55b7c1e5e15af7bc991a45c  
 liblo10k1-dev     32a225c612fa5bea87d6a7597fa8a461  	 1.2.5.1-2     5b8bf095ff0102805ed804bd0ad47988    c57964a46d5e0c90f286d77124260cb7   17-1~    3.0-9    1.2.6.1    1.2.2    audio-mixer     a4e555adf5e969eded25828df3763172   libasound2-plugin-equal    0.6-8   	 alsaequal    caps   0.9.11    a9edf531ebd9bda6252a90abe08294f0   alsamixergui   0.9.0rc2-1-10.1+b1  
 x-audio-mixer     eed6350c7b7ae40f4c2d5c4e4c967d6a   alsaplayer-alsa    0.99.81-2+b3  	 0.99.81-2   
 alsaplayer   alsaplayer-esd   alsaplayer-common    alsaplayer-output     7420886e4fa2c05ace07f9a4b877146b   libmad0   	 0.15.1b-3   
 libmikmod3   3.3.3    alsaplayer-gtk   alsaplayer-interface    80e606dafda00973df55a72ed069426f   alsaplayer-daemon     ce25656f2c87939994230ada829b7217    92f6c92d78063bc4d18cfafe59ef93d5   alsaplayer-jack     1d16b9d68fd8b0f5fac2c90fdc216ac8   alsaplayer-nas  	 libaudio2     89193f5335712b72838d93ef967ae5e8   alsaplayer-oss    17a0fb4fbe9967582d9c0233682fa561   alsaplayer-text     4013c8b3af3212527076341561d9538e   alsaplayer-xosd    libxosd2   2.2.14    19eb976ca3160e494c6163cabfe6cf82   libalsaplayer-dev    libalsaplayer0    3be992221c139adea132a22957cc8770    6eedba7564d220661bff3a3e4102a72a   alter-sequence-alignment   1.3.4-6     07869aa0648b93b3ddd48846fa177214  	 altermime   	 0.3.10-12     3b276af8be268e563cfc16ea2999afc0   altos    1.9.16-2   electronics   
 java2-runtime    freetts    libjfreechart-java   nickle   libelf1    0.144    libjansson4    2:1.0.8    altusmetrum-themes   slim-altusmetrum    cf094d7ad02e2efa2e580e53cf661eb0   altree  
 1.3.2-1+b3   1.3.2-1    5.36.0-4   perlapi-5.36.0   libmath-tamuanova-perl   libopenblas0   libatlas3-base  	 libmkl-rt    altree-examples     b6b21ff883122e1527c5ccc7bb5583bd                  _P  $    5o      <l           5o      n             bP  %    5o      n           5o      q     *        eP  7     5o      q     *      5o      
t             hP  8  !  5o      
t           5o      v             kP  :  "  5o      v           5o      Yy             nP  9  #  5o      Yy           5o      v|             qP  =  $  5o      v|           5o      ,            < tP  >  %  5o      ,           5o      =            j wP  F  &  5o      =           5o           a        zP  H  '  5o           a      L  Y                  5o      r     <       ^ ~P  I  (  5o      r     <      a  Y                  5o                   P  K  )  5o                 5o           [        P  w  *  5o           [      5o           .        P  y  +  5o           .      5o      G     Z        P  z  ,  5o      G     Z        Y                  5o                   P  ~  -  5o                   Y          P     5o           (       0 P    .  5o           (      5o                   P    /  5o                 5o           +
       @ P    0  5o           +
      5o      ݱ             P    1  5o      ݱ           5o           2        P    2  5o           2      5o                  l P    3  5o                 5o                   P    4  5o                 5o           C       ( P    5  5o           C      5o      ڿ     $       L P    6  5o      ڿ     $      5o           V        P    7  5o           V      5o      V     H        P    8  5o      V     H      5o           O        P    9  5o           O      5o           3        P    :  5o           3      5o      #            , P    ;  5o      #           5o      6             P    <  5o      6           5o      S             P    =  5o      S           5o      N            8 P    >  5o      N           5o           H        P    ?  5o           H      5o      g             P    @  5o      g           5o      Q     I        P    A  5o      Q     I        Z                  L  Z           P   Z              P 5o                   P    B  5o                 5o      n            d P    C  5o      n           5o                   P    D  5o                 5o           o        P    E  5o           o      5o                   P    F  5o                 5o           f       \ P    G  5o           f      5o      @            T P    H  5o      @           5o      U     6       x P    I  5o      U     6      5o                   P  Ĳ  J  5o                 5o           J        P  Ų  K  5o           J      5o           l       > P    L  5o           l      5o      G     1       l P  ò  M  5o      G     1      5o      y     f        P 	 ɲ  N  5o      y     f      5o                   P  ʲ  O  5o                 5o      ~
             P 
 Ͳ  P  5o      ~
           5o      ]
     S        P  Ҳ  Q  5o      ]
     S      5o           T        Q  Ӳ  R  5o           T      5o                   Q  ղ  S  5o                 5o                   Q  ֲ  T  5o                 5o           G        Q  ײ  U  5o           G      5o      1            ( Q  ز  V  5o      1           5o           ]        Q  Բ  W  5o           ]      5o      $            nB  Q  ߲  X  5o      $           5o      '     _       B  Q    Y  5o      '     _      5o      C+            C  Q !   Z  5o      C+           5o      -            jC  Q #   [  5o      -           5o      0     ]       C   Q %   \  5o      0     ]      5o      2     ?       C  #Q '   ]  5o      2     ?      5o      $5     ?       C  &Q )   ^  5o      $5     ?      5o      d7     f       FD  )Q +   _  5o      d7     f      5o      9            \E  ,Q -   `  5o      9           5o      <     `       E  /Q /   a  5o      <     `      5o      J?             F  2Q 1    b  5o      J?            5o      kB            bF  5Q 3   c  5o      kB           5o       E            F  8Q 5   d  5o       E           5o      G            G  ;Q 7   e  5o      G           5o      J            G  >Q 9   f  5o      J           5o      N            G  AQ ; 
  g  5o      N           5o      gR            "H  DQ =   h  5o      gR           5o      CU            H  GQ ?   i  5o      CU           5o      W            H  JQ A 
  j  5o      W           5o      |Z            jI  MQ C   k  5o      |Z           5o      _]            I  PQ E   l  5o      _]           5o      ?`            I  SQ G   m  5o      ?`           5o      Uc            ,J  VQ I   n  5o      Uc           5o      e            J  YQ K   o  5o      e           5o      h            J  \Q M   p  5o      h           5o      k            K  _Q O   q  5o      k           5o      o     
       K  bQ Q   r  5o      o     
      5o      r            K  eQ S   s  5o      r           5o      (u            L  hQ U    t  5o      (u           5o      x            M  kQ W "  u  5o      x           5o      z            XM  nQ Y $  v  5o      z           5o      }            |M  qQ [ #  w  5o      }           5o                  M  tQ ] %  x  5o                 5o                  8N  wQ _ &  y  5o                 5o      O            N  zQ a (  z  5o      O           5o      D     z       N  }Q c *  {  5o      D     z      5o           t       ZO  Q e +  |  5o           t      5o      4            P  Q g ,  }  5o      4           5o      ޑ            JP  Q i 0  ~  5o      ޑ           5o      ~     o       nP  Q k -    5o      ~     o      5o           n       P  Q m .    5o           n      5o      ]            "Q  Q o 1    5o      ]           5o           )       VQ  Q q 4    5o           )      5o      5            R  Q s 5    5o      5           5o      *            nR  Q u 9    5o      *           5o      ͧ            R  Q w :    5o      ͧ           5o                  S  Q y ;    5o                 5o      h     p       JS  Q { <    5o      h     p      5o      ٯ            S  Q } =    5o      ٯ           5o           (       T  Q  >    5o           (      5o      ϵ            PT  Q  ?    5o      ϵ           5o      Q            T  Q  @    5o      Q           5o      W     "       U  Q  A    5o      W     "      5o      z            U  Q  C    5o      z           F  XZ                  5o                  FV  Q  D    5o                 5o           !       jV  Q      5o           !      5o      7            V  Q  G    5o      7           5o      *            V  Q      5o      *           5o                   W  Q  H    5o                 5o                  PW  Q  B    5o                 5o      3     S       W  Q  K    5o      3     S      5o                  W  Q      5o                 5o      W            W  Q  I    5o      W           5o      =     _       lX  Q  M    5o      =     _      5o           W       X  Q  N    5o           W      5o                  X  Q  O    5o                 5o                  (Y  Q  P    5o                 5o           >       LY  Q      5o           >      5o      )     u       pY  Q  2    5o      )     u      5o           +       Y  Q      5o           +      5o                  Y  Q  Q    5o                 5o                  
Z  Q  3    5o                 5o                  .Z  Q  L    5o                 5o           F       Z  Q  S    5o           F      5o                   Z  Q  X    5o                  [  mZ                  5o                  `[  Q  Y    5o                 5o           p       [  Q  \    5o           p      5o      
            [  Q  ]    5o      
           5o      
     4       \   R  _    5o      
     4      5o      A            T  R  c    5o      A           5o           a       x  R      5o           a      5o      z              	R  e    5o      z           5o      `            (  R  g    5o      `           5o                    R  h    5o                 5o                    R  `    5o                 5o                   ֮  R  a    5o                  5o      O#              R  j    5o      O#           5o      %            ~  R  k    5o      %           5o      (              R  m    5o      (           5o      w,              !R  r    5o      w,           5o      )2              $R      5o      )2           5o      4     d       d  'R      5o      4     d      5o      7            8  *R      5o      7           5o      9              -R      5o      9           5o      <     W         0R      5o      <     W      5o      5?     &         3R      5o      5?     &      5o      \A            8! 6R  Z    5o      \A           5o      YD     Y       \! 9R  E    5o      YD     Y      5o      F     J       ! <R      5o      F     J      5o      H            ! ?R      5o      H           5o      K            ! BR      5o      K           5o      O     9       ! ER      5o      O     9      5o      Q            ! HR      5o      Q           5o      V            ! KR      5o      V           5o      Y     @       ! NR      5o      Y     @      5o      [     U       $! QR      5o      [     U      5o       _     (       x! TR      5o       _     (      5o      Ib            ! WR      5o      Ib           5o      d     -       	! ZR      5o      d     -      5o      #g     S       NZ! ]R      5o      #g     S      5o      wj            rZ! `R      5o      wj           5o      Wm            Z! cR      5o      Wm           5o      op     2       \! fR      5o      op     2      5o      v             ]! iR      5o      v           5o      0z            X]! lR  Ϻ    5o      0z           5o      |     Z       |]! oR      5o      |     Z      5o      P            ]! rR  Ѻ    5o      P           5o                  8^! uR  Ӻ    5o                 5o                  ^! xR 	 պ    5o                 5o                  ^! {R  ֺ    5o                 5o      P            ^! ~R 
 ׺    5o      P           5o                  ^! R  Ժ    5o                 5o           M       T_! R  غ    5o           M      5o      ֒            _! R      5o      ֒           5o      ^     [       _! R  ٺ    5o      ^     [      5o           "       2`! R  ں    5o           "      5o      ݙ     $       `! R  ܺ    5o      ݙ     $      5o           5       `! R      5o           5      5o      8     )       Fa! R      5o      8     )      5o      b            a! R      5o      b           5o      {            c! R !     5o      {           5o      ?     A       c! R #     5o      ?     A      5o           ,       d! R %     5o           ,      5o           g       "e! R '     5o           g      5o                  e! R )     5o                 5o            y       e! R +     5o            y      5o      z            f! R -     5o      z           5o           [       Tf! R / 	    5o           [      5o           X       f! R 1     5o           X      5o      O     k       f! R 3     5o      O     k      5o           [       g! R 5     5o           [      5o                  g! R 7     5o                 5o                  h! R 9     5o                 5o                  Pi! R ; %    5o                 5o      j     *       i! R = (    5o      j     *      5o                  i! R ? )    5o                 5o           o       \j! R A *    5o           o      5o                  j! R C /    5o                 5o                   k! R E 0    5o                 5o      1            Dk! R G 4    5o      1           5o      '            xk! R I 6    5o      '           5o           n       k! R K 1    5o           n      5o      ?            k! R M 7    5o      ?           5o                  l! R O 2    5o                 5o                  l! R Q 9    5o                 >  ^_                  5o      ~            l! R S ;    5o      ~           5o      7            Vm! R U ?    5o      7           D  `_                  5o                  m! R W B    5o                 5o                  rn! R Y G    5o                 5o                  n! R [ J    5o                 5o           @       *o! R ]     5o           @      5o       	            \o! R _ N    5o       	           5o      	            o! R a O    5o      	           5o      0	            o! S c M    5o      0	           5o      P		     _       o! S e P    5o      P		     _      5o      	            q! S g Q    5o      	           5o      	     N       q! 
S i s    5o      	     N      5o      	            
r! 
S k Z    5o      	           5o      	     X       Fr! S m ]     5o      	     X      5o      	     h       jr! S o [    5o      	     h      5o      |	     T       r! S q W    5o      |	     T      5o      	            s! S s 6    5o      	           5o      v	     G       :s! S u a    5o      v	     G      5o       	     X       vs! S w b    5o       	     X      5o      #	     ,       s! "S y ^    5o      #	     ,      5o      D%	            t! %S { c    5o      D%	           5o      '	            8t! (S } d    5o      '	           5o      *	     L       t! +S  e  	  5o      *	     L      5o      -	     8       t! .S  g  
  5o      -	     8      5o      
0	     ^       u! 1S  h    5o      
0	     ^      5o      i2	            u! 4S  i    5o      i2	           5o      5	     a       u! 7S  j  
  5o      5	     a      5o      7	            :v! :S  m    5o      7	           5o      `:	     P       vv! =S  o    5o      `:	     P      5o      =	            w! @S  p    5o      =	           5o      @	     k       ,w! CS  r    5o      @	     k      5o      B	     t       \w! FS  t    5o      B	     t      5o      `E	            w! IS  u    5o      `E	           5o      xH	            x! LS  y    5o      xH	           5o       K	     L       x! OS  z    5o       K	     L      5o      MN	            ,y! RS  {    5o      MN	           5o      _Q	            hy! US      5o      _Q	           5o      nT	            y! XS      5o      nT	           5o      ,W	            y! [S      5o      ,W	           5o      Y	            z! ^S      5o      Y	           5o      \	            >z! aS      5o      \	           5o      N_	     }       J}! dS      5o      N_	     }      5o      d	     -       ! gS      5o      d	     -      5o      f	            ! jS      5o      f	           5o      i	            ! mS      5o      i	           5o      m	            ! pS       5o      m	           5o      p	     ~       @! sS    !  5o      p	     ~      5o      u	            ! vS    "  5o      u	           5o      w	            ! yS    #  5o      w	           5o      ;z	     P       (! |S    $  5o      ;z	     P      5o      |	     R       L! S    %  5o      |	     R      5o      ~	            ! S    &  5o      ~	           5o      v	     E       ! S    '  5o      v	     E      5o      	            j! S    (  5o      	           5o      V	     w       ! S    )  5o      V	     w      5o      Έ	     
       8! S    *  5o      Έ	     
      5o      ܋	            \! S    +  5o      ܋	           5o      	     8       ! S    ,  5o      	     8      5o      א	            ! S    -  5o      א	           5o      	            ! S    .  5o      	           5o      &	     `       ! S    /  5o      &	     `      5o      	     y       Z! S    0  5o      	     y      5o      	            ! S    1  5o      	           5o      	     G       ! S     2  5o      	     G      5o      	            P! S    3  5o      	           5o      ۣ	            ! S    4  5o      ۣ	           5o      	     C       ! S    5  5o      	     C      5o      ֪	            ! S  
  6  5o      ֪	                       36faf63076f27a0aada20ad02635b42a   alttab   1.6.1-1    libxft2    2.1.1   
 2:1.2.99.2   libxrender1    aewm   awesome    blackbox   ctwm   cwm    evilwm   fluxbox    i3-wm    jwm    lwm    matchbox  	 ratpoison    sawfish    twm    wm2     86d91c92e887c0d776c9962c7a8d2471  	 alure-doc    1.2-9    alure     08e0393d10953060c73f616721c8a166   alure-utils    1.2-9+b2  	 libalure1    libalure-dev   1.2-2     db8336d249aafa0cf072b5139614d7d4    795fbf5973232dd6d7157692b859844c   libmodplug1   	 1:0.8.8.5    libmpg123-0    1.6.2     d3dc1e9b7c8ba326cb960d65cc326a58  
 amanda-client    1:3.5.1-11+deb12u2   amanda  
 amanda-common    libxml-simple-perl   dump  	 smbclient     eb340217bc77fc442e67d6d525d8c0fb  
 openbsd-inetd    inet-superserver   update-inetd   5.36.0-7+deb12u1   2.41.1   1:3.5   
 amanda-server   
 1:3.5.1-3~    9be314450886c6bdecdb2064ccfdb4bd   mt-st     7a44af7cb005595215636c1ed7a211fa  
 amap-align   2.2+git20080214.600fc29+dfsg-2    8bbc20fa02f0317c9b1cc6f3abf3506c   amavisd-milter   1.7.2-1    libmilter1.0.1   amavisd-new    amavisd-new-milter  	 1:2.6.4-2    sendmail   postfix     5aa8c00571da46088afa733d6b6dc007   1:2.13.0-3+deb12u1   libarchive-tar-perl    libarchive-zip-perl    libberkeleydb-perl   libconvert-tnef-perl   libconvert-uulib-perl    libdigest-md5-perl   libio-stringy-perl   libmail-dkim-perl    libmailtools-perl    libmime-base64-perl    libmime-tools-perl   libnet-libidn-perl   libnet-server-perl   libtime-hires-perl   libunix-syslog-perl    libnet-snmp-perl   pax    libcompress-raw-zlib-perl    1.51   amavis   libnet-patricia-perl   ripole   apt-listchanges    arj   
 cabextract   clamav  
 clamav-daemon    dspam    lhasa    libauthen-sasl-perl    libdbi-perl    liblwp-protocol-https-perl   libnet-ldap-perl   libnet-ssleay-perl   libsnmp-perl   libwww-perl    lzop   nomarch    p7zip    spamassassin   unrar     d95fd9c270e69763b2674a7c7629b731   amazon-ec2-net-utils   2.3.0-3    udev    40d20a79716bde00741f76da27aa85b7   amazon-ec2-utils    67be1dd0d4bbc1398ceec15316512b4f   amazon-ecr-credential-helper  
 0.6.0-1+b2   0.6.0-1    ca-certificates   	 docker.io   	 docker-ce     e74ed7360f292b67c479b365095e0af9   amb-plugins   
 0.8.1-7+b1   0.8.1-7   
 ladspa-plugin     5896570624067323fd593061bfd00f0d   ambdec   0.7.1-1     55a5b8c43a1bddf956c4e69ea0fddf88   amdgcn-tools   13+b1    llvm-13    lld-13   gcc-10-offload-amdgcn    10.2.1-6   gcc-11-offload-amdgcn   
 11-20210110-0    gcc-snapshot  
 1:20210110-0~     6772d97a613191b1bf7c8b99061efe8f   amfora  
 1.9.2-2+b7   1.9.2-2     f61916feb793317bb5f92c63843bf813   amide    1.0.6-1    libgnomecanvas2-0    libmdc3    0.21.2+dfsg    libvolpack1    1.0b3     68635d5506489a7fdecefeeeeaf0a618   amideco   	 0.31e-3.2     9472bf7f77b7b944a3ca7c59cac2f1c6   amiga-fdisk-cross    0.04-16    amiga-fdisk    amiga-fdisk-bf    134dfbdfd3cf5ff0fc80b91fc41e1a81  
 amispammer   3.3-2.1    libemail-sender-perl   libnet-address-ip-local-perl    c4dfc2174f28ad0ffaaf052da1c80d23  
 libaml-dev   aml    libaml0     da49cb655079a017155424197f1693f6    1a3ef00f4a913a2237dd74869f09c44f   amoebax    0.2.1+dfsg-6   amoebax-data    6687047bd64e61ea991201acfd18fa67    b9d9f80cf70880d45f0c33015a222b1c   python3-amp   
 0.6.1-1+b8   0.6.1-1    amp    python3-ase    3.14.0~   
 python3-scipy    python3-matplotlib   1:1.22.0   python3-numpy-abi9   python3-pexpect    3.5~    c4cc9c2a8cdc16940dc3ed6cf18cff9e   amphetamine   	 0.8.10-21    amphetamine-data   0.8.7-12    84628a9130481aaae590448a13c3a80e  
 0.8.7-15.1    0e43c7567c8534fbf1630b85ea8f4701  
 php-amphp-amp   	 2.6.2-1.1    php   	 amphp-amp   
 php-common    76c5e9b5da55589133528afc4ddafa61   ample    0.5.7-13+b1    0.5.7-13    07df7d683a482f82af8164fe6a9459ce  
 ampliconnoise   
 1.29-10+b2   1.29-10    mafft     cd8d15fcdec00d4fc4f08c519dbff884  	 ampr-ripd   
 2.4.1-1+b1   2.4.1-1     d3a53f9e01e1c22300141858b7ca2895  
 amqp-specs  	 1-0r0-3.1     ee2a2efbd8f66bc78ea8cc5320890b57   ams    2.2.0-1    cmt    mcp-plugins    swh-plugins   
 libclalsadrv2    liblo7   0.26~repack   
 libqt5opengl5    rev-plugins    vco-plugins     ec9619dfb2ce2790837d2121ab75fbd3   amsynth    1.13.2-1   2.30.0  
 libharfbuzz0b    vkeybd   dssi-plugin   
 vst-plugin    42adbf3ede6156bdcbafcf752ad79f66  
 gir1.2-amtk-5    5.6.1-2    amtk   3.22   libamtk-5-0     f1c1e069c74b517892ceee3696f8d759   libamtk-5-common   2.56    e3e93d87809fb44fa39788f91e73dfb1    8623548d7336a46621e328f682b9d02f  
 libamtk-5-dev     a3d5429f20372ff633f37dcd1ce4f156   amtterm    1.4-3   	 xdg-utils    libsoap-lite-perl     d08844c4e2da635676b54112db66d60e   amule   	 1:2.3.3-3    amule-common   libcrypto++8   8.7.0+git220824   	 libgeoip1    1.6.12  	 libixml10    1:1.8.0   	 libupnp13    1:1.8.3   
 3.2.2+dfsg   amule-utils    unzip    amule-utils-gui     5c464e6adf41c7373294617a696146d0    aa97dc31848cd80e35cd00e92da4b1fa   amule-daemon    0eb0fa1f3a72f84aa9c272e9b3f0a18c   amule-gnome-support     7b5da1cdcd6c8dd1ee3a59dc56bde46d   libgd3   2.1.0~alpha~   xmule   	 1.10.0b-1     3a57c98d8bce187bf65ae89385b66814   astats    a9233d00727fe62a812dc8b77ad2e9ca  	 amule-emc   
 0.5.2-4+b1   0.5.2-4     5ccfe8090f7c3b5997fba9d94c6bf8b2   an   1.2-7+b1   1.2-7    wbritish   wordlist    c3a81b8c3754d57849b4791d04d63fde   anacron    2.3-36   cron-daemon    rsyslog    system-log-daemon    powermgmt-base    cd9f07726e1ee3bc93fcfdb799520070   analitza-common    analitza    15f6d44494810efbd452d25193052305   libanalitza-dev    qml-module-org-kde-analitza    libanalitza8   libanalitzagui8    libanalitzaplot8   libanalitzawidgets8    kalgebra-dev    f86a1a12d9304114642eaf951ce52eaa    92ad86650a4e6fad5ca6fbc6be591399    02f734e9dfb655fed0eb2d6024454a29    8850b8c7b9a0aaa910de370924d8aa53  
 libqt5svg5    b0a7e75633fd35963404b7c26a88930b   libqt5quick5   libqt5quick5-gles    qtdeclarative-abi-5-15-8   kalgebra-common     01edccf0ccef447e85d8695698668660   analizo    1.25.4-2   doxygen-doxyparse    libapp-cmd-perl    libchi-perl    libclass-accessor-perl   libclass-inspector-perl         C              D                 D     j         E           J     E              F           c     F     l         G           &     G     m         H                H     n         I                I     Q         J                J     q         K           ;     K     r         L           A     L     s         M           TT     M     t         N           T     N   u         O           Xx     O     AV         P           #Z      P !y i         Q           I     Q              R           $Z      R     I         S                 S              T           +     T              U           M     U           V           %Z      V     0         W           'Z      W n          X           &Z      X  
             Y           (Z      Y                 Z           )Z      Z ^~          [                [     _         \           *Z      \     >         ]           k      ]              ^           _V     ^ u |         _           {V     _ g 
         `           m     `     BR         a           u     a     -         b           M     b              c           H     c              d           #I     d              e           {     e     RG         f           Xo     f              g           O     g  ݥ         h           +Z      h                 i                i D*  i+     j           ,Z      j                 k           -Z      k f	          l           .Z      m 
          m           _Z      n     b         n           /Z      o   Ǒ         o           0Z      p 7 h         p           YZ      q Zt a         q           [Z      r     [         r           gZ      s t          s           R     t           t           1Z      u                 u           2Z      v           v           3Z      w                 w           4Z      y                 x           5Z      z                 y                   {     ݑ (      z                   | o; ޑ       {                   }     ߑ       |           7     ~ %           }           rZ           E         ~                  k P7                           	 ѵ
                    6Z                                  7Z                               eZ                               =          Qz                    8Z                                  9Z                                  :Z       ;
                        ;Z                                  <Z           M                    =Z                                  >Z                                                                   ?Z       k
 	                    AZ                               @Z                                  BZ                                  CZ                                                          DZ       	                        <      @                     EZ                                  FZ       @                        GZ       ~
                     IZ                               JZ           0 