Intel® C++ Compiler 16.0 User and Reference Guide
The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file. For purposes of including a header in your code, use immintrin.h .
|  
			  Intrinsic Name  | 
 
		     
			  Operation  | 
 
		     
			  Corresponding  | 
 
		  
|---|---|---|
|  
			  _mm512_unpackhi_pd, _mm512_mask_unpackhi_pd, _mm512_maskz_unpackhi_pd  | 
 
		     
			  Unpacks and interleaves high packed float64 values.  | 
 
		     
			  VPUNPCKHPD  | 
 
		  
|  
			  _mm512_unpackhi_ps, _mm512_mask_unpackhi_ps, _mm512_maskz_unpackhi_ps  | 
 
		     
			  Unpacks and interleaves high packed float32 values.  | 
 
		     
			  VPUNPCKHPS  | 
 
		  
|  
			  _mm512_unpacklo_pd, _mm512_mask_unpacklo_pd, _mm512_maskz_unpacklo_pd  | 
 
		     
			  Unpacks and interleaves low packed float64 values.  | 
 
		     
			  VPUNPCKLPD  | 
 
		  
|  
			  _mm512_unpacklo_ps, _mm512_mask_unpacklo_ps, _mm512_maskz_unpacklo_ps  | 
 
		     
			  Unpacks and interleaves low packed float32 values.  | 
 
		     
			  VPUNPCKLPS  | 
 
		  
 
	 
| variable | definition | 
|---|---|
| k |  
		   writemask used as a selector  | 
 
	   
| a |  
		   first source vector element  | 
 
	   
| b |  
		   second source vector element  | 
 
	   
| src |  
		   source element to use based on writemask result  | 
 
	   
 
	 
_mm512_unpackhi_pd
extern __m512d __cdecl _mm512_unpackhi_pd(__m512d a, __m512d b);
Unpacks and interleaves float64 elements from the high half of each 128-bit lane in a and b, and stores the result.
 
	 
 
	 
_mm512_mask_unpackhi_pd
extern __m512d __cdecl _mm512_mask_unpackhi_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);
Unpacks and interleaves float64 elements from the high half of each 128-bit lane in a and b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_maskz_unpackhi_pd
extern __m512d __cdecl _mm512_maskz_unpackhi_pd(__mmask8 k, __m512d a, __m512d b);
Unpacks and interleaves float64 elements from the high half of each 128-bit lane in a and b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_unpackhi_ps
extern __m512 __cdecl _mm512_unpackhi_ps(__m512 a, __m512 b);
Unpacks and interleaves float32 elements from the high half of each 128-bit lane in a and b, and stores the result.
 
	 
 
	 
_mm512_mask_unpackhi_ps
extern __m512 __cdecl _mm512_mask_unpackhi_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);
Unpacks and interleaves float32 elements from the high half of each 128-bit lane in a and b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_maskz_unpackhi_ps
extern __m512 __cdecl _mm512_maskz_unpackhi_ps(__mmask16 k, __m512 a, __m512 b);
Unpacks and interleaves float32 elements from the high half of each 128-bit lane in a and b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_unpacklo_pd
extern __m512d __cdecl _mm512_unpacklo_pd(__m512d a, __m512d b);
Unpacks and interleaves float64 elements from the low half of each 128-bit lane in a and b, and stores the result.
 
	 
 
	 
_mm512_mask_unpacklo_pd
extern __m512d __cdecl _mm512_mask_unpacklo_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);
Unpacks and interleaves float64 elements from the low half of each 128-bit lane in a and b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_maskz_unpacklo_pd
extern __m512d __cdecl _mm512_maskz_unpacklo_pd(__mmask8 k, __m512d a, __m512d b);
Unpacks and interleaves float64 elements from the low half of each 128-bit lane in a and b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_unpacklo_ps
extern __m512 __cdecl _mm512_unpacklo_ps(__m512 a, __m512 b);
Unpacks and interleaves float32 elements from the low half of each 128-bit lane in a and b, and stores the result.
 
	 
 
	 
_mm512_mask_unpacklo_ps
extern __m512 __cdecl _mm512_mask_unpacklo_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);
Unpacks and interleaves float32 elements from the low half of each 128-bit lane in a and b, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).
 
	 
 
	 
_mm512_maskz_unpacklo_ps
extern __m512 __cdecl _mm512_maskz_unpacklo_ps(__mmask16 k, __m512 a, __m512 b);
Unpacks and interleaves float32 elements from the low half of each 128-bit lane in a and b, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).