PIPS
sse.h File Reference
#include <xmmintrin.h>
#include <emmintrin.h>
+ Include dependency graph for sse.h:

Go to the source code of this file.

Macros

#define SIMD_LOAD_V4SI(vec, arr)   vec=_mm_loadu_si128((__m128i*)arr)
 int More...
 
#define SIMD_LOADA_V4SI(vec, arr)   vec=_mm_load_si128((__m128i*)arr)
 
#define SIMD_LOAD_BROADCAST_V4SI(vec, val)   vec=_mm_set1_si128(val)
 
#define SIMD_MULD(vec1, vec2, vec3)   vec1=_mm_mul_epi32(vec2,vec3)
 
#define SIMD_ADDD(vec1, vec2, vec3)   vec1=_mm_add_epi32(vec2,vec3)
 
#define SIMD_SUBD(vec1, vec2, vec3)   vec1 = _mm_sub_epi32(vec2, vec3)
 
#define SIMD_STORE_V4SI(vec, arr)   _mm_storeu_si128((__m128i*)arr,vec)
 
#define SIMD_STOREA_V4SI(vec, arr)   _mm_store_si128((__m128i*)arr,vec)
 
#define SIMD_LOAD_V4SF(vec, arr)   vec=_mm_loadu_ps(arr)
 float More...
 
#define SIMD_LOADA_V4SF(vec, arr)   vec=_mm_load_ps(arr)
 
#define SIMD_LOAD_BROADCAST_V4SF(vec, val)   vec=_mm_set1_ps(val)
 
#define SIMD_MULPS(vec1, vec2, vec3)   vec1=_mm_mul_ps(vec2,vec3)
 
#define SIMD_DIVPS(vec1, vec2, vec3)   vec1=_mm_div_ps(vec2,vec3)
 
#define SIMD_ADDPS(vec1, vec2, vec3)   vec1=_mm_add_ps(vec2,vec3)
 
#define SIMD_SUBPS(vec1, vec2, vec3)   vec1 = _mm_sub_ps(vec2, vec3)
 
#define SIMD_MULADDPS(vec1, vec2, vec3, vec4)
 
#define SIMD_SHUFFLE_V4SF(dist, src, i0, i1, i2, i3)   dist=_mm_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0)
 
#define SIMD_UMINPS(vec1, vec2)
 umin as in unary minus More...
 
#define SIMD_STORE_V4SF(vec, arr)   _mm_storeu_ps(arr,vec)
 
#define SIMD_STOREA_V4SF(vec, arr)   _mm_store_ps(arr,vec)
 
#define SIMD_STORE_GENERIC_V4SF(vec, v0, v1, v2, v3)
 
#define SIMD_ZERO_V4SF(vec)   vec = _mm_setzero_ps()
 
#define SIMD_INVERT_V4SF(vec)   vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1))
 
#define SIMD_LOAD_GENERIC_V4SF(vec, v0, v1, v2, v3)
 
#define SIMD_STORE_MASKED_V4SF(vec, arr)
 handle padded value, this is a very bad implementation ... More...
 
#define SIMD_LOAD_V4SI_TO_V4SF(v, f)
 
#define SIMD_LOAD_V2DF(vec, arr)   vec=_mm_loadu_pd(arr)
 double More...
 
#define SIMD_MULPD(vec1, vec2, vec3)   vec1=_mm_mul_pd(vec2,vec3)
 
#define SIMD_ADDPD(vec1, vec2, vec3)   vec1=_mm_add_pd(vec2,vec3)
 
#define SIMD_MULADDPD(vec1, vec2, vec3, vec4)
 
#define SIMD_UMINPD(vec1, vec2)
 
#define SIMD_COSPD(vec1, vec2)
 
#define SIMD_SINPD(vec1, vec2)
 
#define SIMD_STORE_V2DF(vec, arr)   _mm_storeu_pd(arr,vec)
 
#define SIMD_STORE_GENERIC_V2DF(vec, v0, v1)
 
#define SIMD_LOAD_GENERIC_V2DF(vec, v0, v1)
 
#define SIMD_STORE_V2DF_TO_V2SF(vec, f)
 conversions More...
 
#define SIMD_LOAD_V2SF_TO_V2DF(vec, f)    SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1])
 
#define SIMD_LOAD_V8HI(vec, arr)    vec = (__m128i*)(arr)
 char More...
 
#define SIMD_STORE_V8HI(vec, arr)    *(__m128i *)(&(arr)[0]) = vec
 
#define SIMD_STORE_V8HI_TO_V8SI(vec, arr)    SIMD_STORE_V8HI(vec,arr)
 
#define SIMD_LOAD_V8SI_TO_V8HI(vec, arr)    SIMD_LOAD_V8HI(vec,arr)
 

Typedefs

typedef float a2sf[2] __attribute__((aligned(16)))
 
typedef __m128 v4sf
 
typedef __m128d v2df
 
typedef __m128i v4si
 
typedef __m128i v8si
 
typedef __m128i v8hi
 

Macro Definition Documentation

◆ SIMD_ADDD

#define SIMD_ADDD (   vec1,
  vec2,
  vec3 
)    vec1=_mm_add_epi32(vec2,vec3)

Definition at line 20 of file sse.h.

◆ SIMD_ADDPD

#define SIMD_ADDPD (   vec1,
  vec2,
  vec3 
)    vec1=_mm_add_pd(vec2,vec3)

Definition at line 99 of file sse.h.

◆ SIMD_ADDPS

#define SIMD_ADDPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm_add_ps(vec2,vec3)

Definition at line 32 of file sse.h.

◆ SIMD_COSPD

#define SIMD_COSPD (   vec1,
  vec2 
)
Value:
do { \
double __pips_tmp[2] __attribute__ ((aligned (16))); \
SIMD_STORE_V2DF(vec2, __pips_tmp); \
__pips_tmp[0] = cos(__pips_tmp[0]); \
__pips_tmp[1] = cos(__pips_tmp[1]); \
SIMD_LOAD_V2DF(vec1, __pips_tmp); \
} while(0)
float a2sf[2] __attribute__((aligned(16)))
Definition: sse.h:4

Definition at line 113 of file sse.h.

◆ SIMD_DIVPS

#define SIMD_DIVPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm_div_ps(vec2,vec3)

Definition at line 31 of file sse.h.

◆ SIMD_INVERT_V4SF

#define SIMD_INVERT_V4SF (   vec)    vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1))

Definition at line 64 of file sse.h.

◆ SIMD_LOAD_BROADCAST_V4SF

#define SIMD_LOAD_BROADCAST_V4SF (   vec,
  val 
)    vec=_mm_set1_ps(val)

Definition at line 29 of file sse.h.

◆ SIMD_LOAD_BROADCAST_V4SI

#define SIMD_LOAD_BROADCAST_V4SI (   vec,
  val 
)    vec=_mm_set1_si128(val)

Definition at line 18 of file sse.h.

◆ SIMD_LOAD_GENERIC_V2DF

#define SIMD_LOAD_GENERIC_V2DF (   vec,
  v0,
  v1 
)
Value:
do { \
double v[2] = { v0,v1}; \
SIMD_LOAD_V2DF(vec,&v[0]); \
} while(0)

Definition at line 139 of file sse.h.

◆ SIMD_LOAD_GENERIC_V4SF

#define SIMD_LOAD_GENERIC_V4SF (   vec,
  v0,
  v1,
  v2,
  v3 
)
Value:
do { \
float __pips_v[4] __attribute ((aligned (16)));\
__pips_v[0]=v0;\
__pips_v[1]=v1;\
__pips_v[2]=v2;\
__pips_v[3]=v3;\
SIMD_LOADA_V4SF(vec,&__pips_v[0]); \
} while(0)

Definition at line 66 of file sse.h.

◆ SIMD_LOAD_V2DF

#define SIMD_LOAD_V2DF (   vec,
  arr 
)    vec=_mm_loadu_pd(arr)

double

Definition at line 97 of file sse.h.

◆ SIMD_LOAD_V2SF_TO_V2DF

#define SIMD_LOAD_V2SF_TO_V2DF (   vec,
  f 
)     SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1])

Definition at line 154 of file sse.h.

◆ SIMD_LOAD_V4SF

#define SIMD_LOAD_V4SF (   vec,
  arr 
)    vec=_mm_loadu_ps(arr)

float

Definition at line 27 of file sse.h.

◆ SIMD_LOAD_V4SI

#define SIMD_LOAD_V4SI (   vec,
  arr 
)    vec=_mm_loadu_si128((__m128i*)arr)

int

Definition at line 16 of file sse.h.

◆ SIMD_LOAD_V4SI_TO_V4SF

#define SIMD_LOAD_V4SI_TO_V4SF (   v,
  f 
)
Value:
do { \
float __pips_tmp[4]; \
__pips_tmp[0] = (f)[0]; \
__pips_tmp[1] = (f)[1]; \
__pips_tmp[2] = (f)[2]; \
__pips_tmp[3] = (f)[3]; \
SIMD_LOAD_V4SF(v, __pips_tmp); \
} while(0)
int f(int off1, int off2, int n, float r[n], float a[n], float b[n])
Definition: offsets.c:15

Definition at line 86 of file sse.h.

◆ SIMD_LOAD_V8HI

#define SIMD_LOAD_V8HI (   vec,
  arr 
)     vec = (__m128i*)(arr)

char

Definition at line 158 of file sse.h.

◆ SIMD_LOAD_V8SI_TO_V8HI

#define SIMD_LOAD_V8SI_TO_V8HI (   vec,
  arr 
)     SIMD_LOAD_V8HI(vec,arr)

Definition at line 166 of file sse.h.

◆ SIMD_LOADA_V4SF

#define SIMD_LOADA_V4SF (   vec,
  arr 
)    vec=_mm_load_ps(arr)

Definition at line 28 of file sse.h.

◆ SIMD_LOADA_V4SI

#define SIMD_LOADA_V4SI (   vec,
  arr 
)    vec=_mm_load_si128((__m128i*)arr)

Definition at line 17 of file sse.h.

◆ SIMD_MULADDPD

#define SIMD_MULADDPD (   vec1,
  vec2,
  vec3,
  vec4 
)
Value:
do { \
__m128 __pips_tmp;\
SIMD_MULPD(__pips_tmp, vec3, vec4);\
SIMD_ADDPD(vec1, __pips_tmp, vec2); \
} while(0)

Definition at line 100 of file sse.h.

◆ SIMD_MULADDPS

#define SIMD_MULADDPS (   vec1,
  vec2,
  vec3,
  vec4 
)
Value:
do { \
__m128 __pips_tmp;\
SIMD_MULPS(__pips_tmp, vec3, vec4);\
SIMD_ADDPS(vec1, __pips_tmp, vec2); \
} while(0)

Definition at line 34 of file sse.h.

◆ SIMD_MULD

#define SIMD_MULD (   vec1,
  vec2,
  vec3 
)    vec1=_mm_mul_epi32(vec2,vec3)

Definition at line 19 of file sse.h.

◆ SIMD_MULPD

#define SIMD_MULPD (   vec1,
  vec2,
  vec3 
)    vec1=_mm_mul_pd(vec2,vec3)

Definition at line 98 of file sse.h.

◆ SIMD_MULPS

#define SIMD_MULPS (   vec1,
  vec2,
  vec3 
)    vec1=_mm_mul_ps(vec2,vec3)

Definition at line 30 of file sse.h.

◆ SIMD_SHUFFLE_V4SF

#define SIMD_SHUFFLE_V4SF (   dist,
  src,
  i0,
  i1,
  i2,
  i3 
)    dist=_mm_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0)

Definition at line 41 of file sse.h.

◆ SIMD_SINPD

#define SIMD_SINPD (   vec1,
  vec2 
)
Value:
do { \
double __pips_tmp[2] __attribute__ ((aligned (16))); \
SIMD_STORE_V2DF(vec2, __pips_tmp); \
__pips_tmp[0] = sin(__pips_tmp[0]); \
__pips_tmp[1] = sin(__pips_tmp[1]); \
SIMD_LOAD_V2DF(vec1, __pips_tmp); \
} while(0)

Definition at line 122 of file sse.h.

◆ SIMD_STORE_GENERIC_V2DF

#define SIMD_STORE_GENERIC_V2DF (   vec,
  v0,
  v1 
)
Value:
do { \
double __pips_tmp[2]; \
SIMD_STORE_V2DF(vec,&__pips_tmp[0]); \
*(v0)=__pips_tmp[0]; \
*(v1)=__pips_tmp[1]; \
} while (0)

Definition at line 132 of file sse.h.

◆ SIMD_STORE_GENERIC_V4SF

#define SIMD_STORE_GENERIC_V4SF (   vec,
  v0,
  v1,
  v2,
  v3 
)
Value:
do { \
float __pips_tmp[4] __attribute__ ((aligned (16))); \
SIMD_STOREA_V4SF(vec,&__pips_tmp[0]); \
*(v0)=__pips_tmp[0]; \
*(v1)=__pips_tmp[1]; \
*(v2)=__pips_tmp[2]; \
*(v3)=__pips_tmp[3]; \
} while (0)

Definition at line 53 of file sse.h.

◆ SIMD_STORE_MASKED_V4SF

#define SIMD_STORE_MASKED_V4SF (   vec,
  arr 
)
Value:
do { \
float __pips_tmp[4] __attribute__ ((aligned (16))); \
SIMD_STOREA_V4SF(vec,&__pips_tmp[0]); \
(arr)[0] = __pips_tmp[0]; \
(arr)[1] = __pips_tmp[1]; \
(arr)[2] = __pips_tmp[2]; \
} while(0)

handle padded value, this is a very bad implementation ...

Definition at line 77 of file sse.h.

◆ SIMD_STORE_V2DF

#define SIMD_STORE_V2DF (   vec,
  arr 
)    _mm_storeu_pd(arr,vec)

Definition at line 131 of file sse.h.

◆ SIMD_STORE_V2DF_TO_V2SF

#define SIMD_STORE_V2DF_TO_V2SF (   vec,
  f 
)
Value:
do { \
double __pips_tmp[2]; \
SIMD_STORE_V2DF(vec, __pips_tmp); \
(f)[0] = __pips_tmp[0]; \
(f)[1] = __pips_tmp[1]; \
} while(0)

conversions

Definition at line 146 of file sse.h.

◆ SIMD_STORE_V4SF

#define SIMD_STORE_V4SF (   vec,
  arr 
)    _mm_storeu_ps(arr,vec)

Definition at line 51 of file sse.h.

◆ SIMD_STORE_V4SI

#define SIMD_STORE_V4SI (   vec,
  arr 
)    _mm_storeu_si128((__m128i*)arr,vec)

Definition at line 23 of file sse.h.

◆ SIMD_STORE_V8HI

#define SIMD_STORE_V8HI (   vec,
  arr 
)     *(__m128i *)(&(arr)[0]) = vec

Definition at line 161 of file sse.h.

◆ SIMD_STORE_V8HI_TO_V8SI

#define SIMD_STORE_V8HI_TO_V8SI (   vec,
  arr 
)     SIMD_STORE_V8HI(vec,arr)

Definition at line 164 of file sse.h.

◆ SIMD_STOREA_V4SF

#define SIMD_STOREA_V4SF (   vec,
  arr 
)    _mm_store_ps(arr,vec)

Definition at line 52 of file sse.h.

◆ SIMD_STOREA_V4SI

#define SIMD_STOREA_V4SI (   vec,
  arr 
)    _mm_store_si128((__m128i*)arr,vec)

Definition at line 24 of file sse.h.

◆ SIMD_SUBD

#define SIMD_SUBD (   vec1,
  vec2,
  vec3 
)    vec1 = _mm_sub_epi32(vec2, vec3)

Definition at line 21 of file sse.h.

◆ SIMD_SUBPS

#define SIMD_SUBPS (   vec1,
  vec2,
  vec3 
)    vec1 = _mm_sub_ps(vec2, vec3)

Definition at line 33 of file sse.h.

◆ SIMD_UMINPD

#define SIMD_UMINPD (   vec1,
  vec2 
)
Value:
do { \
__m128d __pips_tmp; \
__pips_tmp = _mm_setzero_pd(); \
vec1 = _mm_sub_pd(__pips_tmp, vec2); \
} while(0)

Definition at line 106 of file sse.h.

◆ SIMD_UMINPS

#define SIMD_UMINPS (   vec1,
  vec2 
)
Value:
do { \
__m128 __pips_tmp; \
__pips_tmp = _mm_setzero_ps(); \
vec1 = _mm_sub_ps(__pips_tmp, vec2); \
} while(0)

umin as in unary minus

Definition at line 44 of file sse.h.

◆ SIMD_ZERO_V4SF

#define SIMD_ZERO_V4SF (   vec)    vec = _mm_setzero_ps()

Definition at line 63 of file sse.h.

Typedef Documentation

◆ __attribute__

typedef int a8si [4] __attribute__((aligned(16)))

Definition at line 4 of file sse.h.

◆ v2df

typedef __m128d v2df

Definition at line 11 of file sse.h.

◆ v4sf

typedef __m128 v4sf

Definition at line 10 of file sse.h.

◆ v4si

typedef __m128i v4si

Definition at line 12 of file sse.h.

◆ v8hi

typedef __m128i v8hi

Definition at line 14 of file sse.h.

◆ v8si

typedef __m128i v8si

Definition at line 13 of file sse.h.