PIPS
avx.h File Reference
#include <immintrin.h>
+ Include dependency graph for avx.h:

Go to the source code of this file.

Macros

#define SIMD_LOAD_V8SF(vec, arr)   vec=_mm256_loadu_ps(arr)
 float More...
 
#define SIMD_LOAD_BROADCAST_V8SF(vec, arr)   vec=_mm256_set1_ps(arr)
 
#define SIMD_LOAD_BROADCAST_V4DF(vec, arr)   vec=_mm256_set1_pd(arr)
 
#define SIMD_LOADA_V8SF(vec, arr)   vec=_mm256_load_ps(arr)
 
#define SIMD_MULPS(vec1, vec2, vec3)   vec1=_mm256_mul_ps(vec2,vec3)
 
#define SIMD_DIVPS(vec1, vec2, vec3)   vec1=_mm256_div_ps(vec2,vec3)
 
#define SIMD_ADDPS(vec1, vec2, vec3)   vec1=_mm256_add_ps(vec2,vec3)
 
#define SIMD_SUBPS(vec1, vec2, vec3)   vec1 = _mm256_sub_ps(vec2, vec3)
 
#define SIMD_MULADDPS(vec1, vec2, vec3, vec4)
 
#define SIMD_SHUFFLE_V8SF(dist, src, i0, i1, i2, i3)   _mm256_shuffle_pd(src,src,_MM_SHUFFLE(i3,i2,i1,i0))
 
#define SIMD_SHUFFLE_V4SF(dist, src, i0, i1, i2, i3)   _mm256_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0))
 
#define SIMD_UMINPS(vec1, vec2)
 umin as in unary minus More...
 
#define SIMD_STORE_V8SF(vec, arr)   _mm256_storeu_ps(arr,vec)
 
#define SIMD_STOREA_V8SF(vec, arr)   _mm256_store_ps(arr,vec)
 
#define SIMD_STORE_GENERIC_V8SF(vec, v0, v1, v2, v3, v4, v5, v6, v7)
 
#define SIMD_ZERO_V8SF(vec)   vec = _mm256_setzero_ps()
 
#define SIMD_LOAD_GENERIC_V8SF(vec, v0, v1, v2, v3, v4, v5, v6, v7)
 
#define SIMD_LOAD_V8SI_TO_V8SF(v, f)
 
#define SIMD_LOAD_V4DF(vec, arr)   vec=_mm256_loadu_pd(arr)
 double More...
 
#define SIMD_MULPD(vec1, vec2, vec3)   vec1=_mm256_mul_pd(vec2,vec3)
 
#define SIMD_ADDPD(vec1, vec2, vec3)   vec1=_mm256_add_pd(vec2,vec3)
 
#define SIMD_MULADDPD(vec1, vec2, vec3, vec4)
 
#define SIMD_UMINPD(vec1, vec2)
 
#define SIMD_COSPD(vec1, vec2)
 
#define SIMD_SINPD(vec1, vec2)
 
#define SIMD_STORE_V4DF(vec, arr)   _mm256_storeu_pd(arr,vec)
 
#define SIMD_STORE_GENERIC_V4DF(vec, v0, v1, v2, v3)
 
#define SIMD_LOAD_GENERIC_V4DF(vec, v0, v1, v2, v3)
 
#define SIMD_STORE_V4DF_TO_V4SF(vec, f)
 conversions More...
 
#define SIMD_LOAD_V4SF_TO_V4DF(vec, f)
 
#define SIMD_LOADA_V4DI(vec, arr)    vec=_mm256_load_si256(arr)
 long long More...
 
#define SIMD_STOREA_V4DI(vec, arr)    vec=_mm256_store_si256(arr)
 
#define SIMD_LOAD_V4DI(vec, arr)    vec=_mm256_loadu_si256(arr)
 
#define SIMD_STORE_V4DI(vec, arr)    vec=_mm256_storeu_si256(arr)
 
#define SIMD_LOADA_V8SI(vec, arr)    vec=_mm256_load_si256(arr)
 int More...
 
#define SIMD_STOREA_V8SI(vec, arr)    vec=_mm256_store_si256(arr)
 
#define SIMD_LOAD_V8SI(vec, arr)    vec=_mm256_loadu_si256(arr)
 
#define SIMD_STORE_V8SI(vec, arr)    vec=_mm256_storeu_si256(arr)
 
#define SIMD_LOADA_V16HI(vec, arr)    vec=_mm256_load_si256(arr)
 short More...
 
#define SIMD_STOREA_V16HI(vec, arr)    vec=_mm256_store_si256(arr)
 
#define SIMD_LOAD_V16HI(vec, arr)    vec=_mm256_loadu_si256(arr)
 
#define SIMD_STORE_V16HI(vec, arr)    vec=_mm256_storeu_si256(arr)
 
#define SIMD_LOADA_V32QI(vec, arr)    vec=_mm256_load_si256(arr)
 char More...
 
#define SIMD_STOREA_V32QI(vec, arr)    vec=_mm256_store_si256(arr)
 
#define SIMD_LOAD_V32QI(vec, arr)    vec=_mm256_loadu_si256(arr)
 
#define SIMD_STORE_V32QI(vec, arr)    vec=_mm256_storeu_si256(arr)
 

Typedefs

typedef double a4df[4] __attribute__((aligned(32)))
 
typedef __m256d v4df
 
typedef __m256 v8sf
 
typedef __m128 v4sf
 
typedef __m256i v4di
 
typedef __m256i v8si
 
typedef __m256i v16hi
 
typedef __m256i v32qi
 

Macro Definition Documentation

◆ SIMD_ADDPD

#define SIMD_ADDPD (   vec1,
  vec2,
  vec3 
)    vec1=_mm256_add_pd(vec2,vec3)

Definition at line 88 of file avx.h.

◆ SIMD_ADDPS

#define SIMD_ADDPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm256_add_ps(vec2,vec3)

Definition at line 27 of file avx.h.

◆ SIMD_COSPD

#define SIMD_COSPD (   vec1,
  vec2 
)
Value:
do { \
double __pips_tmp[4] __attribute__ ((aligned (16))); \
SIMD_STORE_V4DF(vec2, __pips_tmp); \
__pips_tmp[0] = cos(__pips_tmp[0]); \
__pips_tmp[1] = cos(__pips_tmp[1]); \
__pips_tmp[2] = cos(__pips_tmp[2]); \
__pips_tmp[3] = cos(__pips_tmp[3]); \
SIMD_LOAD_V4DF(vec1, __pips_tmp); \
} while(0)
double a4df[4] __attribute__((aligned(32)))
Definition: avx.h:4

Definition at line 102 of file avx.h.

◆ SIMD_DIVPS

#define SIMD_DIVPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm256_div_ps(vec2,vec3)

Definition at line 26 of file avx.h.

◆ SIMD_LOAD_BROADCAST_V4DF

#define SIMD_LOAD_BROADCAST_V4DF (   vec,
  arr 
)    vec=_mm256_set1_pd(arr)

Definition at line 23 of file avx.h.

◆ SIMD_LOAD_BROADCAST_V8SF

#define SIMD_LOAD_BROADCAST_V8SF (   vec,
  arr 
)    vec=_mm256_set1_ps(arr)

Definition at line 22 of file avx.h.

◆ SIMD_LOAD_GENERIC_V4DF

#define SIMD_LOAD_GENERIC_V4DF (   vec,
  v0,
  v1,
  v2,
  v3 
)
Value:
do { \
vec=_mm256_set_pd(v0,v1,v2,v3);\
} while(0)

Definition at line 135 of file avx.h.

◆ SIMD_LOAD_GENERIC_V8SF

#define SIMD_LOAD_GENERIC_V8SF (   vec,
  v0,
  v1,
  v2,
  v3,
  v4,
  v5,
  v6,
  v7 
)
Value:
do { \
float __pips_v[8] __attribute ((aligned (32)));\
vec=_mm256_set_ps(v0,v1,v2,v3,v4,v5,v6,v7);\
} while(0)

Definition at line 65 of file avx.h.

◆ SIMD_LOAD_V16HI

#define SIMD_LOAD_V16HI (   vec,
  arr 
)     vec=_mm256_loadu_si256(arr)

Definition at line 191 of file avx.h.

◆ SIMD_LOAD_V32QI

#define SIMD_LOAD_V32QI (   vec,
  arr 
)     vec=_mm256_loadu_si256(arr)

Definition at line 204 of file avx.h.

◆ SIMD_LOAD_V4DF

#define SIMD_LOAD_V4DF (   vec,
  arr 
)    vec=_mm256_loadu_pd(arr)

double

Definition at line 86 of file avx.h.

◆ SIMD_LOAD_V4DI

#define SIMD_LOAD_V4DI (   vec,
  arr 
)     vec=_mm256_loadu_si256(arr)

Definition at line 164 of file avx.h.

◆ SIMD_LOAD_V4SF_TO_V4DF

#define SIMD_LOAD_V4SF_TO_V4DF (   vec,
  f 
)
Value:
do {\
__m128 vecsf = _mm_load_ps(f);\
vec=_mm256_cvtps_pd(vecsf) ; \
} while(0)
int f(int off1, int off2, int n, float r[n], float a[n], float b[n])
Definition: offsets.c:15

Definition at line 151 of file avx.h.

◆ SIMD_LOAD_V8SF

#define SIMD_LOAD_V8SF (   vec,
  arr 
)    vec=_mm256_loadu_ps(arr)

float

Definition at line 21 of file avx.h.

◆ SIMD_LOAD_V8SI

#define SIMD_LOAD_V8SI (   vec,
  arr 
)     vec=_mm256_loadu_si256(arr)

Definition at line 178 of file avx.h.

◆ SIMD_LOAD_V8SI_TO_V8SF

#define SIMD_LOAD_V8SI_TO_V8SF (   v,
  f 
)
Value:
do { \
float __pips_tmp[8]; \
__pips_tmp[0] = (f)[0]; \
__pips_tmp[1] = (f)[1]; \
__pips_tmp[2] = (f)[2]; \
__pips_tmp[3] = (f)[3]; \
__pips_tmp[4] = (f)[4]; \
__pips_tmp[5] = (f)[5]; \
__pips_tmp[6] = (f)[6]; \
__pips_tmp[7] = (f)[7]; \
SIMD_LOAD_V8SF(v, __pips_tmp); \
} while(0)

Definition at line 71 of file avx.h.

◆ SIMD_LOADA_V16HI

#define SIMD_LOADA_V16HI (   vec,
  arr 
)     vec=_mm256_load_si256(arr)

short

Definition at line 185 of file avx.h.

◆ SIMD_LOADA_V32QI

#define SIMD_LOADA_V32QI (   vec,
  arr 
)     vec=_mm256_load_si256(arr)

char

Definition at line 198 of file avx.h.

◆ SIMD_LOADA_V4DI

#define SIMD_LOADA_V4DI (   vec,
  arr 
)     vec=_mm256_load_si256(arr)

long long

Definition at line 158 of file avx.h.

◆ SIMD_LOADA_V8SF

#define SIMD_LOADA_V8SF (   vec,
  arr 
)    vec=_mm256_load_ps(arr)

Definition at line 24 of file avx.h.

◆ SIMD_LOADA_V8SI

#define SIMD_LOADA_V8SI (   vec,
  arr 
)     vec=_mm256_load_si256(arr)

int

Definition at line 172 of file avx.h.

◆ SIMD_MULADDPD

#define SIMD_MULADDPD (   vec1,
  vec2,
  vec3,
  vec4 
)
Value:
do { \
__m256d __pips_tmp;\
SIMD_MULPD(__pips_tmp,vec3,vec4); \
SIMD_ADDPD(vec1,__pips_tmp,vec2); \
} while(0)

Definition at line 89 of file avx.h.

◆ SIMD_MULADDPS

#define SIMD_MULADDPS (   vec1,
  vec2,
  vec3,
  vec4 
)
Value:
do { \
__m256 __pips_tmp;\
SIMD_MULPS(__pips_tmp,vec3,vec4); \
SIMD_ADDPS(vec1,__pips_tmp,vec2); \
} while(0)

Definition at line 29 of file avx.h.

◆ SIMD_MULPD

#define SIMD_MULPD (   vec1,
  vec2,
  vec3 
)    vec1=_mm256_mul_pd(vec2,vec3)

Definition at line 87 of file avx.h.

◆ SIMD_MULPS

#define SIMD_MULPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm256_mul_ps(vec2,vec3)

Definition at line 25 of file avx.h.

◆ SIMD_SHUFFLE_V4SF

#define SIMD_SHUFFLE_V4SF (   dist,
  src,
  i0,
  i1,
  i2,
  i3 
)    _mm256_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0))

Definition at line 37 of file avx.h.

◆ SIMD_SHUFFLE_V8SF

#define SIMD_SHUFFLE_V8SF (   dist,
  src,
  i0,
  i1,
  i2,
  i3 
)    _mm256_shuffle_pd(src,src,_MM_SHUFFLE(i3,i2,i1,i0))

Definition at line 36 of file avx.h.

◆ SIMD_SINPD

#define SIMD_SINPD (   vec1,
  vec2 
)
Value:
do { \
double __pips_tmp[4] __attribute__ ((aligned (16))); \
SIMD_STORE_V4DF(vec2, __pips_tmp); \
__pips_tmp[0] = sin(__pips_tmp[0]); \
__pips_tmp[1] = sin(__pips_tmp[1]); \
__pips_tmp[2] = sin(__pips_tmp[2]); \
__pips_tmp[3] = sin(__pips_tmp[3]); \
SIMD_LOAD_V4DF(vec1, __pips_tmp); \
} while(0)

Definition at line 113 of file avx.h.

◆ SIMD_STORE_GENERIC_V4DF

#define SIMD_STORE_GENERIC_V4DF (   vec,
  v0,
  v1,
  v2,
  v3 
)
Value:
do { \
double __pips_tmp[4]; \
SIMD_STORE_V4DF(vec,&__pips_tmp[0]); \
*(v0)=__pips_tmp[0]; \
*(v1)=__pips_tmp[1]; \
*(v2)=__pips_tmp[2]; \
*(v3)=__pips_tmp[3]; \
} while (0)

Definition at line 125 of file avx.h.

◆ SIMD_STORE_GENERIC_V8SF

#define SIMD_STORE_GENERIC_V8SF (   vec,
  v0,
  v1,
  v2,
  v3,
  v4,
  v5,
  v6,
  v7 
)
Value:
do { \
float __pips_tmp[4] __attribute__ ((aligned (32))); \
SIMD_STOREA_V8SF(vec,&__pips_tmp[0]); \
*(v0)=__pips_tmp[0]; \
*(v1)=__pips_tmp[1]; \
*(v2)=__pips_tmp[2]; \
*(v3)=__pips_tmp[3]; \
*(v4)=__pips_tmp[4]; \
*(v5)=__pips_tmp[5]; \
*(v6)=__pips_tmp[6]; \
*(v7)=__pips_tmp[7]; \
} while (0)

Definition at line 50 of file avx.h.

◆ SIMD_STORE_V16HI

#define SIMD_STORE_V16HI (   vec,
  arr 
)     vec=_mm256_storeu_si256(arr)

Definition at line 194 of file avx.h.

◆ SIMD_STORE_V32QI

#define SIMD_STORE_V32QI (   vec,
  arr 
)     vec=_mm256_storeu_si256(arr)

Definition at line 207 of file avx.h.

◆ SIMD_STORE_V4DF

#define SIMD_STORE_V4DF (   vec,
  arr 
)    _mm256_storeu_pd(arr,vec)

Definition at line 124 of file avx.h.

◆ SIMD_STORE_V4DF_TO_V4SF

#define SIMD_STORE_V4DF_TO_V4SF (   vec,
  f 
)
Value:
do { \
double __pips_tmp[4]; \
SIMD_STORE_V4DF(vec, __pips_tmp); \
(f)[0] = __pips_tmp[0]; \
(f)[1] = __pips_tmp[1]; \
(f)[2] = __pips_tmp[2]; \
(f)[3] = __pips_tmp[3]; \
} while(0)

conversions

Definition at line 141 of file avx.h.

◆ SIMD_STORE_V4DI

#define SIMD_STORE_V4DI (   vec,
  arr 
)     vec=_mm256_storeu_si256(arr)

Definition at line 167 of file avx.h.

◆ SIMD_STORE_V8SF

#define SIMD_STORE_V8SF (   vec,
  arr 
)    _mm256_storeu_ps(arr,vec)

Definition at line 48 of file avx.h.

◆ SIMD_STORE_V8SI

#define SIMD_STORE_V8SI (   vec,
  arr 
)     vec=_mm256_storeu_si256(arr)

Definition at line 181 of file avx.h.

◆ SIMD_STOREA_V16HI

#define SIMD_STOREA_V16HI (   vec,
  arr 
)     vec=_mm256_store_si256(arr)

Definition at line 188 of file avx.h.

◆ SIMD_STOREA_V32QI

#define SIMD_STOREA_V32QI (   vec,
  arr 
)     vec=_mm256_store_si256(arr)

Definition at line 201 of file avx.h.

◆ SIMD_STOREA_V4DI

#define SIMD_STOREA_V4DI (   vec,
  arr 
)     vec=_mm256_store_si256(arr)

Definition at line 161 of file avx.h.

◆ SIMD_STOREA_V8SF

#define SIMD_STOREA_V8SF (   vec,
  arr 
)    _mm256_store_ps(arr,vec)

Definition at line 49 of file avx.h.

◆ SIMD_STOREA_V8SI

#define SIMD_STOREA_V8SI (   vec,
  arr 
)     vec=_mm256_store_si256(arr)

Definition at line 175 of file avx.h.

◆ SIMD_SUBPS

#define SIMD_SUBPS (   vec1,
  vec2,
  vec3 
)    vec1 = _mm256_sub_ps(vec2, vec3)

Definition at line 28 of file avx.h.

◆ SIMD_UMINPD

#define SIMD_UMINPD (   vec1,
  vec2 
)
Value:
do { \
__m256d __pips_tmp; \
__pips_tmp = _mm256_setzero_pd(); \
vec1 = _mm256_sub_pd(__pips_tmp, vec2); \
} while(0)

Definition at line 95 of file avx.h.

◆ SIMD_UMINPS

#define SIMD_UMINPS (   vec1,
  vec2 
)
Value:
do { \
__m256 __pips_tmp; \
__pips_tmp = _mm256_setzero_ps(); \
vec1 = _mm256_sub_ps(__pips_tmp, vec2); \
} while(0)

umin as in unary minus

Definition at line 41 of file avx.h.

◆ SIMD_ZERO_V8SF

#define SIMD_ZERO_V8SF (   vec)    vec = _mm256_setzero_ps()

Definition at line 64 of file avx.h.

Typedef Documentation

◆ __attribute__

typedef char a32qi [32] __attribute__((aligned(32)))

Definition at line 4 of file avx.h.

◆ v16hi

typedef __m256i v16hi

Definition at line 17 of file avx.h.

◆ v32qi

typedef __m256i v32qi

Definition at line 18 of file avx.h.

◆ v4df

typedef __m256d v4df

Definition at line 12 of file avx.h.

◆ v4di

typedef __m256i v4di

Definition at line 15 of file avx.h.

◆ v4sf

typedef __m128 v4sf

Definition at line 14 of file avx.h.

◆ v8sf

typedef __m256 v8sf

Definition at line 13 of file avx.h.

◆ v8si

typedef __m256i v8si

Definition at line 16 of file avx.h.