PIPS
SIMD.c
Go to the documentation of this file.
1 #if !defined(RWBITS) || (RWBITS != 64 && RWBITS != 128 && RWBITS != 256 && RWBITS != 512)
2  #error The register width variable RWBITS must be declared as 64,128,256 or 512 bits.
3 #endif
4 
5 #define RW (RWBITS/8)
6 #define SIZEOF_VEC(T) (RW/sizeof(CTYPE_##T))
7 #define VW(T) SIZEOF_VEC(T)
8 
9 #include <stdint.h>
10 #include <stdarg.h>
11 
12 // Types definition
13 #define CTYPE_PD double
14 #define CTYPE_PS float
15 #define CTYPE_DI int64_t
16 #define CTYPE_D int32_t
17 #define CTYPE_W int16_t
18 #define CTYPE_B int8_t
19 
20 // Types definition with "argument promotion" (used by va_arg)
21 #define CTYPEP_PD double
22 #define CTYPEP_PS double
23 #define CTYPEP_DI int64_t
24 #define CTYPEP_D int32_t
25 #define CTYPEP_W int32_t
26 #define CTYPEP_B int32_t
27 
28 // Types for load/store/conv operations are not the same
29 // that the ones for mathematical operations. (Why ?)
30 // This is a conversion table !
31 #define LSTYPE_PD DF
32 #define LSTYPE_PS SF
33 #define LSTYPE_DI DI
34 #define LSTYPE_D SI
35 #define LSTYPE_W HI
36 #define LSTYPE_B QI
37 
38 // This is a precomputed version of VW(T), needed for LOAD/STORE function names
39 // VM_##RWBITS##_##T
40 #define VW_64_PS 2
41 #define VW_64_PD 1
42 #define VW_64_DI 2
43 #define VW_64_D 2
44 #define VW_64_W 4
45 #define VW_64_B 8
46 #define VW_128_PS 4
47 #define VW_128_PD 2
48 #define VW_128_DI 2
49 #define VW_128_D 4
50 #define VW_128_W 8
51 #define VW_128_B 16
52 #define VW_256_PS 8
53 #define VW_256_PD 4
54 #define VW_256_DI 4
55 #define VW_256_D 8
56 #define VW_256_W 16
57 #define VW_256_B 32
58 #define VW_512_PS 16
59 #define VW_512_PD 8
60 #define VW_512_DI 8
61 #define VW_512_D 16
62 #define VW_512_W 32
63 #define VW_512_B 64
64 
65 // Operations definition
66 #define OP_ADD +
67 #define OP_SUB -
68 #define OP_DIV /
69 #define OP_MUL *
70 
71 // Double-argument simple operaters definition
72 #define OP_F_TYPE(P, T)\
73  CTYPE_##T F_##P##T(int i, va_list ap)\
74  {\
75  CTYPE_##T *v1,*v2,r;\
76  v1 = va_arg(ap, CTYPE_##T *);\
77  v2 = va_arg(ap, CTYPE_##T *);\
78  r = v1[i] OP_##P v2[i];\
79  return r;\
80  }
81 
82 // Muladd uses three parameters
83 // P is unused but present so that _DEF_FOR_TYPES can be used
84 #define OP_MULADD_TYPE(P,T)\
85  CTYPE_##T F_MULADD##T(int i, va_list ap)\
86  {\
87  CTYPE_##T *v1,*v2,*v3;\
88  v1 = va_arg(ap, CTYPE_##T *);\
89  v2 = va_arg(ap, CTYPE_##T *);\
90  v3 = va_arg(ap, CTYPE_##T *);\
91  return v1[i] + v2[i]*v3[i];\
92  }
93 
94 // Unary-minus operation
95 // P is unused but present so that _DEF_FOR_TYPES can be used
96 #define OP_UMIN_TYPE(P,T)\
97  CTYPE_##T F_UMIN##T(int i, va_list ap)\
98  {\
99  CTYPE_##T *v1;\
100  v1 = va_arg(ap, CTYPE_##T *);\
101  return - (v1[i]);\
102  }
103 
104 // SIMD operation macro definition
105 #define SIMD_OP_TYPE(P,T)\
106  void SIMD_##P##T(CTYPE_##T *dst, ...)\
107  {\
108  int i;\
109  va_list ap,ap_f;\
110  va_start(ap,dst);\
111  for (i = 0; i < (VW(T)); i++)\
112  {\
113  va_copy(ap_f,ap);\
114  dst[i] = F_##P##T(i,ap_f);\
115  }\
116  va_end(ap);\
117  }
118 
119 #define _DEF_FOR_TYPES(F,P)\
120  F(P, PS)\
121  F(P, PD)\
122  F(P, DI)\
123  F(P, D)\
124  F(P, W)\
125  F(P, B)
126 
127 #define SIMD_OP(P) _DEF_FOR_TYPES(SIMD_OP_TYPE,P)
128 #define OP_F(P) _DEF_FOR_TYPES(OP_F_TYPE,P)
129 
130 
131 // SIMD load/store macro definition
132 #define _ALIGNED A
133 #define _UNALIGNED V
134 
135 #define SIMD_LOAD_TYPE(A,T) _SIMD_LOAD_TYPE(T,RWBITS,A)
136 #define _SIMD_LOAD_TYPE(T,RWB,A) __SIMD_LOAD_TYPE(T,RWB,A) // Process the "A" macro
137 #define __SIMD_LOAD_TYPE(T,RWB,A) ___SIMD_LOAD_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A) // Define the VM_XX_XX macro (defined above)
138 #define ___SIMD_LOAD_TYPE(T,LST,VW,A) ____SIMD_LOAD_TYPE(T,LST,VW,A) // Process the "VW" and "LST" macros
139 #define ____SIMD_LOAD_TYPE(T,LST,VW,A)\
140  void SIMD_LOAD_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base[VW])\
141  {\
142  int i;\
143  for (i = 0; i < (VW); i++)\
144  vec[i] = base[i];\
145  }\
146  \
147  void SIMD_LOAD_GENERIC_##A##VW##LST(CTYPE_##T vec[VW], ...)\
148  {\
149  int i;\
150  va_list ap;\
151  CTYPE_##T n;\
152  va_start(ap, vec);\
153  for (i = 0; i < (VW); i++)\
154  {\
155  n = (CTYPE_##T) va_arg(ap, CTYPEP_##T);\
156  vec[i] = n;\
157  }\
158  va_end(ap);\
159  }\
160  \
161  void SIMD_LOAD_BROADCAST_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base)\
162  {\
163  int i;\
164  for (i = 0; i < (VW); i++)\
165  vec[i] = base;\
166  }\
167 
168 
169 #define SIMD_STORE_TYPE(A,T) _SIMD_STORE_TYPE(T,RWBITS,A)
170 #define _SIMD_STORE_TYPE(T,RWB,A) __SIMD_STORE_TYPE(T,RWB,A) // Process the "A" macro
171 #define __SIMD_STORE_TYPE(T,RWB,A) ___SIMD_STORE_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A) // Define the VM_XX_XX macro
172 #define ___SIMD_STORE_TYPE(T,LST,VW,A) ____SIMD_STORE_TYPE(T,LST,VW,A) // Process the "VW" and "LST" macro
173 #define ____SIMD_STORE_TYPE(T,LST,VW,A)\
174  void SIMD_STORE_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base[VW])\
175  {\
176  int i;\
177  for (i = 0; i < (VW); i++)\
178  base[i] = vec[i];\
179  }\
180  \
181  void SIMD_STORE_GENERIC_##A##VW##LST(CTYPE_##T vec[VW], ...)\
182  {\
183  int i;\
184  va_list ap;\
185  CTYPE_##T *pn;\
186  va_start(ap, vec);\
187  for (i = 0; i < (VW); i++)\
188  {\
189  pn = va_arg(ap, CTYPE_##T *);\
190  *pn = vec[i];\
191  }\
192  va_end(ap);\
193  }
194 
195 // SIMD zero macros definitions
196 #define SIMD_ZERO_TYPE(A,T) _SIMD_ZERO_TYPE(T,RWBITS,A)
197 #define _SIMD_ZERO_TYPE(T,RWB,A) __SIMD_ZERO_TYPE(T,RWB,A) // Process the "A" macro
198 #define __SIMD_ZERO_TYPE(T,RWB,A) ___SIMD_ZERO_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A) // Define the VM_XX_XX macro (defined above)
199 #define ___SIMD_ZERO_TYPE(T,LST,VW,A) ____SIMD_ZERO_TYPE(T,LST,VW,A) // Process the "VW" and "LST" macros
200 #define ____SIMD_ZERO_TYPE(T,LST,VW,A)\
201  void SIMD_ZERO_##A##VW##LST(CTYPE_##T vec[VW])\
202  {\
203  int i;\
204  for (i = 0; i < (VW); i++)\
205  vec[i] = 0;\
206  }
207 
208 // Shuffle function
209 #define SIMD_SHUFFLE_TYPE(A,T) _SIMD_SHUFFLE_TYPE(T,RWBITS,A)
210 #define _SIMD_SHUFFLE_TYPE(T,RWB,A) __SIMD_SHUFFLE_TYPE(T,RWB,A)
211 #define __SIMD_SHUFFLE_TYPE(T,RWB,A) ___SIMD_SHUFFLE_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
212 #define ___SIMD_SHUFFLE_TYPE(T,LST,VW,A) ____SIMD_SHUFFLE_TYPE(T,LST,VW,A)
213 #define ____SIMD_SHUFFLE_TYPE(T,LST,VW,A) \
214  void SIMD_SHUFFLE_V##VW##LST(CTYPE_##T res[VW], CTYPE_##T vec[VW], ...)\
215  {\
216  int i;\
217  int p;\
218  va_list ap;\
219  va_start(ap, vec);\
220  for (i = 0; i < (VW); i++)\
221  {\
222  p = va_arg(ap, int);\
223  res[p] = vec[i];\
224  }\
225  }
226 
227 // Invert function
228 #define SIMD_INVERT_TYPE(A,T) _SIMD_INVERT_TYPE(T,RWBITS,A)
229 #define _SIMD_INVERT_TYPE(T,RWB,A) __SIMD_INVERT_TYPE(T,RWB,A)
230 #define __SIMD_INVERT_TYPE(T,RWB,A) ___SIMD_INVERT_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
231 #define ___SIMD_INVERT_TYPE(T,LST,VW,A) ____SIMD_INVERT_TYPE(T,LST,VW,A)
232 #define ____SIMD_INVERT_TYPE(T,LST,VW,A) \
233  void SIMD_INVERT_V##VW##LST(CTYPE_##T res[VW], CTYPE_##T vec[VW])\
234  {\
235  int i;\
236  for (i = 0; i < (VW); i++)\
237  res[VW-i-1] = vec[i];\
238  }
239 
240 // Conversion functions
241 
242 /* TO: original type
243  TD: destination type
244  RWD: register width in bits
245  VWD: destination type vector length */
246 #define SIMD_LOAD_CONV(A,TO,TD) _SIMD_LOAD_CONV(A,TO,TD,RWBITS)
247 #define _SIMD_LOAD_CONV(A,TO,TD,RWB) __SIMD_LOAD_CONV(A, TO, TD, RWB)
248 #define __SIMD_LOAD_CONV(A,TO,TD,RWB) ___SIMD_LOAD_CONV(A,TO,TD,VW_##RWB##_##TD,LSTYPE_##TO,LSTYPE_##TD)
249 #define ___SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST) ____SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST)
250 #define ____SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST)\
251  void SIMD_LOAD_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TD dst[VWD], CTYPE_##TO src[VWD])\
252  {\
253  int i;\
254  for (i = 0; i < VWD; i++)\
255  dst[i] = src[i];\
256  }\
257  \
258  void SIMD_LOAD_GENERIC_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TD vec[VWD], ...)\
259  {\
260  int i;\
261  va_list ap;\
262  CTYPE_##TO n;\
263  va_start(ap, vec);\
264  for (i = 0; i < (VWD); i++)\
265  {\
266  n = (CTYPE_##TO) va_arg(ap, CTYPEP_##TO);\
267  vec[i] = n;\
268  }\
269  va_end(ap);\
270  }
271 
272 #define SIMD_STORE_CONV(A,TO,TD) _SIMD_STORE_CONV(A,TO,TD,RWBITS)
273 #define _SIMD_STORE_CONV(A,TO,TD,RWB) __SIMD_STORE_CONV(A, TO, TD, RWB)
274 #define __SIMD_STORE_CONV(A,TO,TD,RWB) ___SIMD_STORE_CONV(A,TO,TD,VW_##RWB##_##TD,LSTYPE_##TO,LSTYPE_##TD)
275 #define ___SIMD_STORE_CONV(A,TO,TD,VWD,TOLST,TDLST) ____SIMD_STORE_CONV(A,TO,TD,VWD,TOLST,TDLST)
276 #define ____SIMD_STORE_CONV(A,TD,TO,VWD,TDLST,TOLST)\
277  void SIMD_STORE_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TO src[VWD], CTYPE_##TD dst[VWD])\
278  {\
279  int i;\
280  for (i = 0; i < VWD; i++)\
281  dst[i] = src[i];\
282  }\
283  void SIMD_STORE_##A##VWD##TDLST##_TO_##A##VWD##TOLST(CTYPE_##TD src[VWD], CTYPE_##TO dst[VWD])\
284  {\
285  int i;\
286  for (i = 0; i < VWD; i++)\
287  dst[i] = src[i];\
288  }
289 
290 #define SIMD_LOADS(A) _DEF_FOR_TYPES(SIMD_LOAD_TYPE,A)
291 #define SIMD_STORES(A) _DEF_FOR_TYPES(SIMD_STORE_TYPE,A)
292 #define SIMD_ZEROS(A) _DEF_FOR_TYPES(SIMD_ZERO_TYPE,A)
293 #define SIMD_SHUFFLES(A) _DEF_FOR_TYPES(SIMD_SHUFFLE_TYPE,A)
294 #define SIMD_INVERTS(A) _DEF_FOR_TYPES(SIMD_INVERT_TYPE,A)
295 
296 #define CTYPE_PD double
297 #define CTYPE_PS float
298 #define CTYPE_DI int64_t
299 #define CTYPE_D int32_t
300 #define CTYPE_W int16_t
301 #define CTYPE_B int8_t
302 
303 #define _DEF_ALL_CONV(F,A) \
304  F(A, PS, PD)\
305  F(A, DI, PD)\
306  F(A, D, PD)\
307  F(A, W, PD)\
308  F(A, B, PD)\
309  F(A, D, PS)\
310  F(A, W, PS)\
311  F(A, B, PS)\
312  F(A, D, DI)\
313  F(A, W, DI)\
314  F(A, B, DI)\
315  F(A, W, D)\
316  F(A, D, W)\
317  F(A, B, D)\
318  F(A, B, W)
319 
320 #define SIMD_LOAD_CONVS(A) _DEF_ALL_CONV(SIMD_LOAD_CONV,A)
321 #define SIMD_STORE_CONVS(A) _DEF_ALL_CONV(SIMD_STORE_CONV,A)
322 
323 // Declare operation functions
324 OP_F(ADD)
325 OP_F(MUL)
326 OP_F(DIV)
327 OP_F(SUB)
328 _DEF_FOR_TYPES(OP_MULADD_TYPE,__UNUSED__)
329 _DEF_FOR_TYPES(OP_UMIN_TYPE,__UNUSED__)
330 
331 // SIMD functions
332 SIMD_OP(ADD)
333 SIMD_OP(MUL)
334 SIMD_OP(DIV)
335 SIMD_OP(SUB)
336 SIMD_OP(MULADD)
337 SIMD_OP(UMIN)
338 
339 // LOAD operations
342 
343 // STORE operations
346 
347 // ZERO operations
350 
351 // Shuffle operations (_aligned unused)
353 
354 // Invert operations (_aligned unused)
356 
357 // Define all possible conversions
360 
363 
#define SIMD_STORES(A)
Definition: SIMD.c:291
#define _UNALIGNED
Definition: SIMD.c:133
#define SIMD_LOADS(A)
Definition: SIMD.c:290
#define SIMD_SHUFFLES(A)
Definition: SIMD.c:293
#define SIMD_ZEROS(A)
Definition: SIMD.c:292
#define SIMD_OP(P)
Definition: SIMD.c:127
#define OP_F(P)
Definition: SIMD.c:128
#define SIMD_INVERTS(A)
Definition: SIMD.c:294
#define SIMD_LOAD_CONVS(A)
Definition: SIMD.c:320
#define SIMD_STORE_CONVS(A)
Definition: SIMD.c:321
#define _DEF_FOR_TYPES(F, P)
Definition: SIMD.c:119
#define _ALIGNED
Definition: SIMD.c:132
#define OP_MULADD_TYPE(P, T)
Definition: SIMD.c:84
#define OP_UMIN_TYPE(P, T)
Definition: SIMD.c:96
@ SUB
Definition: atomic.c:47
@ ADD
Definition: atomic.c:47
#define DIV(x, y, z)
#define MUL(x, y, z)