PIPS
freia_mppa.c
Go to the documentation of this file.
1 /*
2  $Id$
3 
4  Copyright 1989-2017 MINES ParisTech
5 
6  This file is part of PIPS.
7 
8  PIPS is free software: you can redistribute it and/or modify it
9  under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  any later version.
12 
13  PIPS is distributed in the hope that it will be useful, but WITHOUT ANY
14  WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  FITNESS FOR A PARTICULAR PURPOSE.
16 
17  See the GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with PIPS. If not, see <http://www.gnu.org/licenses/>.
21 */
22 
23 #ifdef HAVE_CONFIG_H
24 #include "pips_config.h"
25 #endif
26 
27 #include "genC.h"
28 #include "linear.h"
29 #include "misc.h"
30 
31 #include "ri-util.h"
32 #include "ri.h"
33 
34 #include "prettyprint.h"
35 #include "properties.h"
36 
37 #include "freia.h"
38 #include "hwac.h"
39 
40 #include "freia_mppa.h"
41 
42 /**
43  * @brief Default number of memory slots
44  */
45 #define MPPA_DEFAULT_SMEM_SLOTS 4
46 
47 /**
48  * @brief Placeholder for an undefined slot
49  */
50 #define SMEM_SLOT_UNDEFINED get_int_property("HWAC_MPPA_MAX_SMEM_SLOTS")
51 
52 /**
53  * @brief Array of vertices using SMEM slots (indices) as output
54  *
55  * Links a SMEM slot to a dagvtx user, or NULL if unused
56  */
58 
59 /**
60  * @brief Get first unused SMEM slot
61  */
62 static _int get_free_slot() {
63  unsigned int max_smem_slots = get_int_property("HWAC_MPPA_MAX_SMEM_SLOTS");
65  for (unsigned int i = 0; i < max_smem_slots; i++) {
66  if (smem_slot_users[i] == NULL) {
67  slot = i;
68  break;
69  }
70  }
71  pips_assert("enough SMEM slots", slot != SMEM_SLOT_UNDEFINED);
72  return slot;
73 }
74 
75 /**
76  * @brief Get output slot used by input vertex
77  */
79  unsigned int max_smem_slots = get_int_property("HWAC_MPPA_MAX_SMEM_SLOTS");
81  for (unsigned int i = 0; i < max_smem_slots; i++) {
82  if (smem_slot_users[i] == vtx) {
83  slot = i;
84  break;
85  }
86  }
87  return slot;
88 }
89 
90 /**
91  * @brief Provide a valid unused SMEM slot and link it to vertex
92  *
93  * @param[in] vtx vertex in need of a memory slot
94  * @param[out] slot_users_ht number of uses for this slot
95  *
96  * @return valid SMEM slot
97  */
98 static _int get_a_smem_slot(const dagvtx vtx, hash_table slot_uses_ht) {
99 
100  /* get first unused slot */
101  _int slot = get_free_slot();
102  /* link this slot to current vtx for later use */
103  smem_slot_users[slot] = vtx;
104 
105  _int nb_succs = gen_length(dagvtx_succs(vtx));
106  if (nb_succs > 0) {
107  /* several successors, slot should be stored */
108  hash_put(slot_uses_ht, vtx, (void *)nb_succs);
109  /* and the consumer should deal itself with freeing the slot */
110  }
111 
112  return slot;
113 }
114 
115 /**
116  * @brief Try to find a reusable memory slot for in-place operators
117  *
118  * @param[in] vtx current vertex
119  * @param[in] preds list of predecessors of current vertex
120  * @param[in,out] slot_uses_ht number of uses for this slot
121  *
122  * @return one of preds slots, or SMEM_SLOT_UNDEFINED
123  */
124 static _int reuse_pred_slot(const dagvtx vtx, const list preds,
125  hash_table slot_uses_ht) {
126 
127  _int slot = SMEM_SLOT_UNDEFINED;
128  if (dagvtx_optype(vtx) == spoc_type_poc) {
129  return slot;
130  }
131  FOREACH(dagvtx, pred, preds) {
132  _int pred_rem_use = (_int)hash_get(slot_uses_ht, pred);
133  if (pred_rem_use == 0) {
134  /* pred slot can be reused */
135  slot = get_output_slot(pred);
136  smem_slot_users[slot] = vtx;
137  hash_update(slot_uses_ht, pred, (void *)(pred_rem_use - 1));
138  break;
139  }
140  }
141  _int nb_succs = gen_length(dagvtx_succs(vtx));
142  if (nb_succs > 0) {
143  /* several successors, slot should be stored */
144  hash_put(slot_uses_ht, vtx, (void *)nb_succs);
145  }
146  return slot;
147 }
148 
149 /**
150  * @brief Provide vertex used SMEM slot, update uses table
151  *
152  * @param[in] vtx vertex using an output slot
153  * @param[out] slot_uses_ht number of uses for this slot
154  *
155  * @return valid SMEM slot
156  */
157 static _int use_output_slot(const dagvtx vtx, hash_table slot_uses_ht) {
158  _int slot = get_output_slot(vtx);
159  _int rem_uses = (_int)hash_get(slot_uses_ht, vtx);
160  /* update uses */
161  hash_update(slot_uses_ht, vtx, (void *)(rem_uses - 1));
162 
163  return slot;
164 }
165 
166 /**
167  * @brief Update preds usage table, unused slot list
168  *
169  * @param[in] vtx_preds list of predecessors
170  * @param[out] slot_users_ht number of uses
171  *
172  * @return void
173  */
174 static void process_used_slots(list vtx_preds, hash_table slot_uses_ht) {
175  FOREACH(dagvtx, pred, vtx_preds) {
176  _int slot = get_output_slot(pred);
177  _int rem_uses = (_int)hash_get(slot_uses_ht, pred);
178  if (rem_uses == 0) {
179  /* can reuse slot */
180  smem_slot_users[slot] = NULL;
181  /* delete entry in uses ht */
182  hash_del(slot_uses_ht, pred);
183  }
184  }
185 }
186 
187 /**
188  * @brief Replace dots in string with underscores
189  *
190  * @param[in,out] str allocated string with (or without) dots
191  *
192  * @return input string
193  */
194 static string dots2us(string str) {
195  unsigned long len = strlen(str);
196  for (unsigned int i = 0; i < len; i++) {
197  if (str[i] == '.') {
198  str[i] = '_';
199  }
200  }
201  return str;
202 }
203 
204 /**
205  * @brief Build a dag list of arguments and a string of corresponding parameters
206  *
207  * @param[in] cdg current dag
208  * @param[out] args allocated empty list of arguments (entities)
209  * @param[out] fargs pointer to string of helper parameters
210  *
211  * @return allocated list of helper call arguments
212  */
213 static list mppa_helper_args_params(const dag cdg, string *params) {
214 
215  /* list of argument expressions */
216  list largs = NIL;
217 
218  /* string buffer of helper parameters */
219  string_buffer sb_args = string_buffer_make(true);
220 
221  /* loop over input/output nodes and store image variables into list */
222  set im_done = set_make(set_string);
223  FOREACH(dagvtx, vtx, dag_inputs(cdg)) {
224  entity in = dagvtx_image(vtx);
225  if (in) {
226  expression inexpr = entity_to_expression(in);
227  if (!set_belong_p(im_done, expression_to_string(inexpr))) {
228  largs = CONS(expression, inexpr, largs);
229  set_add_element(im_done, im_done, expression_to_string(inexpr));
230  }
231  }
232  }
233  largs = gen_nreverse(largs);
234  list largsout = NIL;
235  FOREACH(dagvtx, vtx, dag_outputs(cdg)) {
236  entity out = dagvtx_image(vtx);
237  if (out) {
239  if (!set_belong_p(im_done, expression_to_string(outexpr))) {
240  largsout = CONS(expression, outexpr, largsout);
241  set_add_element(im_done, im_done, expression_to_string(outexpr));
242  }
243  }
244  }
245  set_free(im_done);
246  largs = gen_nconc(gen_nreverse(largsout), largs);
247 
248  /* loop over image expressions to fill string buffer */
249  FOREACH(expression, imexpr, largs) {
250  sb_prf(sb_args, "freia_data2d *%s, ", expression_to_string(imexpr));
251  }
252 
253  /* loop over measure vertices */
254  FOREACH(dagvtx, vtx, dag_vertices(cdg)) {
255  if (dagvtx_is_measurement_p(vtx)) {
257  sb_prf(sb_args, "int32_t %s, ",
258  expression_to_string(EXPRESSION(CAR(measargs))));
259  if (gen_length(measargs) >= 3) {
260  string xcoord = expression_to_string(EXPRESSION(gen_nth(1, measargs)));
261  string ycoord = expression_to_string(EXPRESSION(gen_nth(2, measargs)));
262  sb_prf(sb_args, "uint32_t %s, ", dots2us(xcoord));
263  sb_prf(sb_args, "uint32_t %s, ", dots2us(ycoord));
264  }
265  largs = gen_nconc(largs, measargs);
266  }
267  }
268 
269  set se_done = set_make(set_string);
270  FOREACH(dagvtx, vtx, dag_vertices(cdg)) {
271  /* non partially evaluated structuring elements */
272  if (dagvtx_optype(vtx) == spoc_type_poc) {
273  intptr_t se[9];
274  if (!freia_extract_kernel_vtx(vtx, true, &se[0], &se[1], &se[2], &se[3],
275  &se[4], &se[5], &se[6], &se[7], &se[8])) {
276  /* pass SE name as helper parameter */
278  string se = expression_to_string(EXPRESSION(CAR(params)));
279  if (!set_belong_p(se_done, se)) {
280  sb_prf(sb_args, "int32_t *%s, ", se);
281  largs = gen_nconc(largs, params);
282  set_add_element(se_done, se_done, se);
283  }
284  }
285  }
286  }
287  set_free(se_done);
288 
289  /* post-process parameters */
290  string fargs = string_buffer_to_string(sb_args);
291  /* arguments string ends with ", " */
292  if (fargs[strlen(fargs) - 2] == ',') {
293  fargs[strlen(fargs) - 2] = '\0';
294  }
295  /* replace '&' by '*' */
296  for (unsigned int i = 0; i < strlen(fargs); i++) {
297  if (fargs[i] == '&') {
298  fargs[i] = '*';
299  }
300  }
301  *params = fargs;
302 
303  /* cleanup */
304  string_buffer_free(&sb_args);
305  return largs;
306 }
307 
308 /**
309  * @brief Replace FREIA calls by PIPS generated ones
310  *
311  * @param[in] dg dag of several FREIA operations
312  * @param[in] fname function to call
313  * @param[in] split splitted dag index
314  *
315  * @return void
316  */
317 static void mppa_call_helper(const dag dg, const string fname,
318  unsigned int dagi, list largs) {
319  bool call_inserted = false;
320  FOREACH(dagvtx, v, dag_vertices(dg)) {
321  _int op = dagvtx_opid(v);
322  if (op == 0)
323  continue;
324  statement st = dagvtx_statement(v);
325  if (call_inserted)
326  hwac_replace_statement(st, freia_ok(), true);
327  else {
328  entity mppa_helper = local_name_to_top_level_entity(fname);
329  string fname_real = strdup(cat(fname, "_", i2a(dagi)));
330  if (entity_undefined_p(mppa_helper))
331  mppa_helper = freia_create_helper_function(fname_real, NIL);
332  hwac_replace_statement(st, make_call(mppa_helper, largs), false);
333  call_inserted = true;
334  }
335  }
336 }
337 
338 /**
339  * @brief Generate an optimized, FREIA-MPPA low level version of this dag
340  *
341  * @param[in] module current module name
342  * @param[in] cdg current dag
343  * @param[in] fname function name
344  * @param[in] dagi dag index
345  * @param[in,out] helper file to host the output
346  *
347  * @return void
348  */
349 static void mppa_compile_dag(const string module, const dag cdg,
350  const string fname, const int dagi,
351  FILE *const helper) {
352 
353  fprintf(helper, "\n// module=%s fname=%s split=%d\n", module, fname, dagi);
354 
355  /* debug */
356 #ifdef DEBUG_INFO
357  dag_dump(stdout, fname, cdg);
358 #endif /* DEBUG_INFO */
359 
360  string_buffer sb_cmd = string_buffer_make(true);
361  string curr_cmd = strdup(cat("cmd", i2a(dagi)));
362  unsigned int meas_ctr = 0;
363  hash_table meas_off_ht = hash_table_make(hash_pointer, 16);
364  unsigned int instr_ctr = 0;
365  hash_table slots_used_ht = hash_table_make(hash_pointer, 16);
366  list ordered_vtx = gen_nreverse(gen_copy_seq(dag_vertices(cdg)));
367 
368  unsigned int max_smem_slots = get_int_property("HWAC_MPPA_MAX_SMEM_SLOTS");
369 
370  /* slot -> dagvtx user or NULL if free */
371  smem_slot_users = malloc(max_smem_slots * sizeof(dagvtx));
372  for (unsigned int i = 0; i < max_smem_slots; i++) {
373  smem_slot_users[i] = NULL;
374  }
375 
376  string fparams;
377  list largs = mppa_helper_args_params(cdg, &fparams);
378 
379  /* prologue */
380  sb_prf(sb_cmd, "int %s_%d(%s) {\n", fname, dagi, fparams);
381  sb_cat(sb_cmd, "\n");
382  sb_cat(sb_cmd, " mppa_cc_instr_t *instrs;\n"); /* global? */
383  sb_cat(sb_cmd, " unsigned int i = 0;\n"); /* global? */
384  sb_prf(sb_cmd, " mppa_cc_cmd_t %s;\n", curr_cmd);
385  sb_prf(sb_cmd, " instrs = %s.instrs;\n", curr_cmd);
386  /* set number of SMEM slots using property set to non-default value */
387  if (max_smem_slots != MPPA_DEFAULT_SMEM_SLOTS) {
388  pips_assert("non-null number of SMEM slots", max_smem_slots > 0);
389  pips_assert("enough number of SMEM slots", max_smem_slots < 255);
390  sb_cat(sb_cmd, " /* override default SMEM slots number */\n");
391  sb_prf(sb_cmd, " mppa_smem_slots = %d;\n", max_smem_slots);
392  }
393  sb_cat(sb_cmd, "\n");
394 
395  FOREACH(dagvtx, vtx, ordered_vtx) {
396 
397  const freia_api_t *fapi = get_freia_api_vtx(vtx);
398  list preds = dag_vertex_preds(cdg, vtx);
399 
400  /* input nodes */
401  if (same_string_p(dagvtx_operation(vtx), "undefined")) {
402  entity imin = dagvtx_image(vtx);
403  sb_cat(sb_cmd, " instrs[i].kind = MPPA_CMD_GET_IO_TILE;\n");
404  sb_prf(sb_cmd,
405  " instrs[i].com.io_pos = ((io_image_h *)%s->mppa_ptr)->pos;\n",
407  /* put image in an unused SMEM slot */
408  sb_prf(sb_cmd, " instrs[i].com.cc_pos = %d;\n",
409  get_a_smem_slot(vtx, slots_used_ht));
410  }
411 
412  else { /* non-input nodes */
413  sb_prf(sb_cmd, " instrs[i].kind = MPPA_CMD_EXECUTE_KERNEL;\n");
414  sb_prf(sb_cmd, " instrs[i].opr.kernel = %s;\n", fapi->mppa.kernel_enum);
415 
416  /* loop over preds to get slots */
417  unsigned int predi = 0;
418  FOREACH(dagvtx, pred, preds) {
419  sb_prf(sb_cmd, " instrs[i].opr.pos[%d] = %d; /* input */\n", predi + 1,
420  use_output_slot(pred, slots_used_ht));
421  predi++;
422  }
423 
424  if (dagvtx_is_measurement_p(vtx)) { /* reductions results */
425  string measvar =
427  /* remove '&' in first position */
428  if (measvar[0] == '&') {
429  measvar = &measvar[1]; /* good memory management \o/ */
430  }
431  sb_prf(sb_cmd, " instrs[i].opr.red_dst[0] = %s; /* result */\n",
432  measvar);
433  if (fapi->arg_misc_out > 1) {
434  /* store vtx -> offset variable id */
435  hash_put(meas_off_ht, vtx, (void *)(_int)meas_ctr);
436  /* declare offset variable */
437  sb_prf(sb_cmd, " uint32_t meas_off%d;\n", meas_ctr);
438  sb_prf(sb_cmd,
439  " instrs[i].opr.red_dst[1] = &meas_off%d; /* offset */\n",
440  meas_ctr);
441  meas_ctr++;
442  }
443  }
444 
445  else { /* there is an output image */
446 
447  _int slot = reuse_pred_slot(vtx, preds, slots_used_ht);
448  if (slot == SMEM_SLOT_UNDEFINED) {
449  /* preds outputs are reused after */
450  slot = get_a_smem_slot(vtx, slots_used_ht);
451  }
452 
453  sb_prf(sb_cmd, " instrs[i].opr.pos[0] = %d; /* output */\n", slot);
454 
455  /* and maybe some extra input parameters... */
456 
457  if (dagvtx_optype(vtx) == spoc_type_poc) {
458  /* deal with structuring elements */
459  intptr_t se[9];
460  if (freia_extract_kernel_vtx(vtx, true, &se[0], &se[1], &se[2],
461  &se[3], &se[4], &se[5], &se[6], &se[7],
462  &se[8])) {
463  /* se is statically known, do some partial eval */
464  sb_prf(
465  sb_cmd, " instrs[i].opr.scalars[0] = 0b%d%d%d%d%d%d%d%d%d;\n",
466  se[0], se[1], se[2], se[3], se[4], se[5], se[6], se[7], se[8]);
467  } else {
468  /* use a function to convert se to binary constant */
469  sb_prf(sb_cmd, " instrs[i].opr.scalars[0] = se2scal(%s);\n",
472  }
473  }
474 
475  else {
476  /* deal with every parameter of other-than-morphological ops */
477  unsigned int argi = 0;
479  /* partial eval threshold third parameter to redirect to
480  relevant MPPA kernel */
481  if (argi == 2 && dagvtx_optype(vtx) == spoc_type_thr) {
482  if (strcmp(expression_to_string(expr), "1") == 0 ||
483  strcmp(expression_to_string(expr), "true") == 0) {
484  /* hack: overwrite current kernel... */
485  sb_cat(sb_cmd,
486  " instrs[i].opr.kernel = MPPA_KERNEL_THRESHOLD_BIN;\n");
487  }
488  /* skip writing third parameter (avoid obvious overflow) */
489  break;
490  }
491  sb_prf(sb_cmd, " instrs[i].opr.scalars[%d] = %s;\n", argi,
492  expression_to_string(expr));
493  argi++;
494  }
495  }
496  }
497  }
498 
499  /* add put instructions for output nodes (releases slot) */
500  if (gen_in_list_p(vtx, dag_outputs(cdg))) {
501  sb_prf(sb_cmd, " i++; /* instr #%u, vtx #%d */\n", instr_ctr,
502  dagvtx_number(vtx));
503  sb_cat(sb_cmd, "\n");
504  instr_ctr++;
505 
506  entity imout = dagvtx_image(vtx);
507  sb_cat(sb_cmd, " instrs[i].kind = MPPA_CMD_PUT_IO_TILE;\n");
508  sb_prf(sb_cmd, " instrs[i].com.cc_pos = %d;\n", get_output_slot(vtx));
509  sb_prf(sb_cmd,
510  " instrs[i].com.io_pos = ((io_image_h *)%s->mppa_ptr)->pos;\n",
512  }
513 
514  process_used_slots(preds, slots_used_ht);
515 
516  sb_prf(sb_cmd, " i++; /* instr #%u, vtx #%d */\n", instr_ctr,
517  dagvtx_number(vtx));
518  sb_cat(sb_cmd, "\n");
519 
520  instr_ctr++;
521  gen_free_list(preds);
522  }
523 
524  unsigned int max_instrs_per_cmd =
525  get_int_property("HWAC_MPPA_MAX_INSTRS_CMD");
526  pips_assert("cmd can host more instructions",
527  instr_ctr <= max_instrs_per_cmd);
528 
529  /* effectively launch computation */
530  sb_cat(sb_cmd, " /* launch computation... */\n");
531  sb_prf(sb_cmd, " mppa_compute(i, &%s);\n", curr_cmd);
532  sb_cat(sb_cmd, "\n");
533 
534  /* post-process reductions offset */
535  HASH_FOREACH(dagvtx, vtx, _int, value, meas_off_ht) {
536  list vtx_params = freia_get_vertex_params(vtx);
537  string xcoord = expression_to_string(EXPRESSION(gen_nth(1, vtx_params)));
538  string ycoord = expression_to_string(EXPRESSION(gen_nth(2, vtx_params)));
539  pips_assert("coord var name contains more than '&' char",
540  strlen(xcoord) > 1);
541  pips_assert("coord var name contains more than '&' char",
542  strlen(ycoord) > 1);
543  xcoord = dots2us(&xcoord[1]);
544  ycoord = dots2us(&ycoord[1]);
547  sb_prf(sb_cmd, " *%s = meas_off%d %% %s->widthWa;\n", xcoord, value, imin);
548  sb_prf(sb_cmd, " *%s = meas_off%d / %s->widthWa;\n", ycoord, value, imin);
549  sb_cat(sb_cmd, "\n");
550  }
551 
552  /* epilogue */
553  sb_cat(sb_cmd, " return 0;\n");
554  sb_cat(sb_cmd, "}\n");
555 
556  /* dump to helper file */
557  string_buffer_to_file(sb_cmd, helper);
558 
559  /* replace statements */
560  mppa_call_helper(cdg, fname, dagi, largs);
561 
562  /* cleanup */
563  string_buffer_free(&sb_cmd);
564  hash_table_free(meas_off_ht);
565  hash_table_free(slots_used_ht);
567  gen_free_list(ordered_vtx);
568 }
569 
570 /**
571  * @brief Split a dag into several subdags
572  *
573  * @param[in] dg input dag
574  * @param[in] n_dags number of returned sub-dags
575  * @param[in] n_vtx_dag number of vertices per sub-dag
576  *
577  * @return list of sub-dags
578  */
579 static list mppa_dag_split(const dag dg, unsigned int n_dags,
580  unsigned int n_vtx_dag) {
581  list res = NIL;
582  unsigned int upper_bound = get_int_property("HWAC_MPPA_MAX_INSTRS_CMD");
583 
584  for (unsigned int i = 0; i < n_dags; i++) {
585 
586  list vertices = NIL, inputs = NIL, outputs = NIL;
587 
588  unsigned int vtxi = 0;
589  FOREACH(dagvtx, vtx, dag_vertices(dg)) {
590  if (vtxi >= i * n_vtx_dag && vtxi < (i + 1) * n_vtx_dag) {
591  vertices = CONS(dagvtx, vtx, vertices);
592  if (gen_in_list_p(vtx, dag_inputs(dg))) {
593  inputs = CONS(dagvtx, vtx, inputs);
594  }
595  if (gen_in_list_p(vtx, dag_outputs(dg))) {
596  outputs = CONS(dagvtx, vtx, outputs);
597  }
598  }
599  vtxi++;
600  }
601 
602  /* add input nodes */
603  list to_add = NIL;
604  FOREACH(dagvtx, vtx, vertices) {
605  FOREACH(dagvtx, pred, dag_vertex_preds(dg, vtx)) {
606  if (!gen_in_list_p(pred, vertices)) {
607  /* replace by an input node */
609  /* get the right entity */
610  if (e == vtxcontent_out(dagvtx_content(pred))) {
611  pips_assert("e is defined", e != entity_undefined);
612  /* create an input node of opid 0 (undefined) */
613  dagvtx newpred = make_dagvtx(
615 
616  inputs = CONS(dagvtx, newpred, inputs);
617  to_add = CONS(dagvtx, newpred, to_add);
618 
619  dagvtx_succs(newpred) = CONS(dagvtx, vtx, dagvtx_succs(newpred));
620  }
621  }
622  }
623  }
624  }
625  /* add input vertex right before first consumer to optimize memory
626  slots usage */
627  FOREACH(dagvtx, vtx, to_add) {
628  dagvtx first_consumer = DAGVTX(CAR(dagvtx_succs(vtx)));
629  vertices = gen_insert_before(vtx, first_consumer, vertices);
630  }
631 
632  /* add output nodes */
633  FOREACH(dagvtx, vtx, vertices) {
634  FOREACH(dagvtx, succ, dagvtx_succs(vtx)) {
635  if (!gen_in_list_p(succ, vertices)) {
636  /* mark vtx as an output node */
637  outputs = CONS(dagvtx, vtx, outputs);
638  /* remove succ from successors list */
639  gen_remove(&dagvtx_succs(vtx), succ);
640  }
641  }
642  }
643 
644  /* clean some weird cases when an input vtx is also an output*/
645  FOREACH(dagvtx, vtx, inputs) {
646  if (gen_in_list_p(vtx, outputs)) {
647  gen_remove(&outputs, vtx);
648  }
649  }
650 
651  dag subdg = make_dag(gen_nreverse(inputs), gen_nreverse(outputs),
652  gen_nreverse(vertices));
653 
655 
656  pips_assert("small subdags",
657  (gen_length(dag_vertices(subdg)) +
658  gen_length(dag_outputs(subdg))) <= upper_bound);
659 
660  res = CONS(dag, subdg, res);
661  }
662  return res;
663 }
664 
665 /**
666  * @brief Split a dag in several sub-dags if too large
667  *
668  * @param[in] dag to split
669  *
670  * @return list of sub-dags of input dag
671  */
673 
674  list res = NIL;
675  unsigned int upper_bound = get_int_property("HWAC_MPPA_MAX_INSTRS_CMD");
676  unsigned int n_instrs =
678 
679  if (n_instrs <= upper_bound) {
680  /* ok: dag should not be splitted */
681  res = CONS(dag, dg, res);
682  } else {
683  /* not ok: split! */
684  unsigned int n_dags = n_instrs / upper_bound + 1;
685  unsigned int n_vtx_dag = n_instrs / n_dags + 1;
686 
687  /* one more if maybe too large */
688  if (n_vtx_dag > upper_bound - 4) { /* educated guess... */
689  n_dags++;
690  n_vtx_dag = n_instrs / n_dags + 1;
691  }
692 
693  pips_assert("splitted dags are small enough", n_vtx_dag <= upper_bound);
694 
695  /* constraints: balanced dags, try not to separate outputs from
696  producers... */
697  res = mppa_dag_split(dg, n_dags, n_vtx_dag);
698  }
699  return res;
700 }
701 
702 /**
703  * @brief Compile one dag with AIPO optimizations
704  *
705  * @param ls statements underlying the full dag
706  * @param occs image occurences
707  * @param exchanges statements to exchange because of dependences
708  *
709  * @return the list of allocated intermediate images
710  */
712  const hash_table occs, hash_table exchanges,
713  const set output_images, FILE *helper_file,
714  __attribute__((__unused__)) set helpers,
715  int number) {
716 
717  pips_debug(3, "considering %d statements\n", (int)gen_length(ls));
718  pips_assert("some statements", ls);
719 
720  // intermediate images
722  list new_images = dag_fix_image_reuse(fulld, init, occs);
723 
724  // about aipo statistics: no helper file to put them...
725  list added_before = NIL, added_after = NIL;
726  freia_dag_optimize(fulld, exchanges, &added_before, &added_after);
727 
728  // dump final optimised dag
729  dag_dot_dump_prefix(module, "dag_cleaned_", number, fulld, added_before,
730  added_after);
731 
732  string fname_fulldag = strdup(cat(module, "_mppa", HELPER, i2a(number)));
733 
734  /* split on scalars */
735  list ldss = dag_split_on_scalars(
736  fulld, NULL, NULL, (gen_cmp_func_t)dagvtx_ordering, NULL, output_images);
737 
738  pips_debug(3, "dag initial split in %d dags\n", (int)gen_length(ldss));
739 
740  int dagi = 0;
741 
742  set stats = set_make(set_pointer), dones = set_make(set_pointer);
743 
744  FOREACH(dag, dg, ldss) {
745 
746  if (dag_no_image_operation(dg)) {
747  continue;
748  }
749 
750  // fix statements connexity
751  dag_statements(stats, dg);
752  freia_migrate_statements(sq, stats, dones);
753  set_union(dones, dones, stats);
754 
755  /* split large dags */
757 
758  FOREACH(dag, subdg, sub_dags) {
759  mppa_compile_dag(module, subdg, fname_fulldag, dagi, helper_file);
760  dagi++;
761  }
762  }
763 
764  set_free(stats);
765  set_free(dones);
766 
767  // now may put actual allocations, which messes up statement numbers
768  list reals =
769  freia_allocate_new_images_if_needed(ls, new_images, occs, init, NULL);
770 
771  // ??? should it be NIL because it is not useful in AIPO->AIPO?
772  freia_insert_added_stats(ls, added_before, true);
773  added_before = NIL;
774  freia_insert_added_stats(ls, added_after, false);
775  added_after = NIL;
776 
777  // cleanup
778  gen_free_list(new_images);
780 
781  return reals;
782 }
float a2sf[2] __attribute__((aligned(16)))
USER generates a user error (i.e., non fatal) by printing the given MSG according to the FMT.
Definition: 3dnow.h:3
int get_int_property(const string)
pstatement make_pstatement_empty(void)
dagvtx make_dagvtx(vtxcontent a1, list a2)
vtxcontent make_vtxcontent(intptr_t a1, intptr_t a2, pstatement a3, list a4, entity a5)
dag make_dag(list a1, list a2, list a3)
call make_call(entity a1, list a2)
Definition: ri.c:269
struct paramStruct params
static FILE * out
Definition: alias_check.c:128
static graph dg
dg is the dependency graph ; FIXME : should not be static global ?
Definition: chains.c:124
void dag_cleanup_other_statements(dag d)
remove unneeded statements? you must know they are really un-needed!
Definition: dag-utils.c:2191
_int dagvtx_optype(const dagvtx v)
Definition: dag-utils.c:116
list dag_vertex_preds(const dag d, const dagvtx target)
return target predecessor vertices as a list.
Definition: dag-utils.c:680
_int dagvtx_number(const dagvtx v)
returns the vertex number, i.e.
Definition: dag-utils.c:98
bool dag_no_image_operation(dag d)
tell whether we have something to do with images ??? hmmm...
Definition: dag-utils.c:2500
list dag_split_on_scalars(const dag initial, bool(*alone_only)(const dagvtx), dagvtx(*choose_vertex)(const list, bool), gen_cmp_func_t priority, void(*priority_update)(const dag), const set output_images)
split a dag on scalar dependencies only, with a greedy heuristics.
Definition: dag-utils.c:2823
int dagvtx_ordering(const dagvtx *v1, const dagvtx *v2)
Definition: dag-utils.c:148
list dag_fix_image_reuse(dag d, hash_table init, const hash_table occs)
fix intermediate image reuse in dag
Definition: dag-utils.c:2779
bool dagvtx_is_measurement_p(const dagvtx v)
returns whether the vertex is an image measurement operation.
Definition: dag-utils.c:623
entity dagvtx_image(const dagvtx v)
return the produced image or NULL
Definition: dag-utils.c:82
void dag_dump(FILE *out, const string what, const dag d)
for dag debug
Definition: dag-utils.c:212
void freia_dag_optimize(dag d, hash_table exchanges, list *lbefore, list *lafter)
remove dead image operations.
Definition: dag-utils.c:1416
string dagvtx_operation(const dagvtx v)
Definition: dag-utils.c:134
statement dagvtx_statement(const dagvtx v)
return statement if any, or NULL (for input nodes).
Definition: dag-utils.c:56
_int dagvtx_opid(const dagvtx v)
Definition: dag-utils.c:121
void dag_statements(set stats, const dag d)
build the set of actual statements in d
Definition: dag-utils.c:64
void dag_dot_dump_prefix(const string module, const string prefix, int number, const dag d, const list lb, const list la)
Definition: dag-utils.c:504
entity freia_create_helper_function(const string function_name, list lparams)
Definition: freia-utils.c:1030
list freia_get_vertex_params(const dagvtx v)
Definition: freia-utils.c:578
list freia_allocate_new_images_if_needed(list ls, list images, const hash_table occs, const hash_table init, const hash_table signatures)
insert image allocation if needed, for intermediate image inserted before if an image is used only tw...
Definition: freia-utils.c:1650
void freia_migrate_statements(sequence sq, const set stats, const set before)
Definition: freia-utils.c:1905
void hwac_replace_statement(statement s, call newc, bool kill)
replace statement contents with call to c, or continue if kill
Definition: freia-utils.c:720
bool freia_extract_kernel_vtx(dagvtx v, bool strict, intptr_t *k00, intptr_t *k10, intptr_t *k20, intptr_t *k01, intptr_t *k11, intptr_t *k21, intptr_t *k02, intptr_t *k12, intptr_t *k22)
vertex-based version
Definition: freia-utils.c:2012
call freia_ok(void)
build all is well freia constant
Definition: freia-utils.c:695
void freia_insert_added_stats(list ls, list stats, bool before)
insert statements to actual code sequence in "ls" BEWARE that ls is assumed to be in reverse order....
Definition: freia-utils.c:1185
const freia_api_t * get_freia_api_vtx(dagvtx v)
Definition: freia-utils.c:483
#define cat(args...)
Definition: freia.h:41
#define sb_prf(args...)
Definition: freia.h:43
#define HELPER
Definition: freia.h:38
#define sb_cat(args...)
Definition: freia.h:42
static string dots2us(string str)
Replace dots in string with underscores.
Definition: freia_mppa.c:194
static _int reuse_pred_slot(const dagvtx vtx, const list preds, hash_table slot_uses_ht)
Try to find a reusable memory slot for in-place operators.
Definition: freia_mppa.c:124
static void mppa_compile_dag(const string module, const dag cdg, const string fname, const int dagi, FILE *const helper)
Generate an optimized, FREIA-MPPA low level version of this dag.
Definition: freia_mppa.c:349
#define MPPA_DEFAULT_SMEM_SLOTS
Default number of memory slots.
Definition: freia_mppa.c:45
#define SMEM_SLOT_UNDEFINED
Placeholder for an undefined slot.
Definition: freia_mppa.c:50
static void mppa_call_helper(const dag dg, const string fname, unsigned int dagi, list largs)
Replace FREIA calls by PIPS generated ones.
Definition: freia_mppa.c:317
list freia_mppa_compile_calls(string module, dag fulld, sequence sq, list ls, const hash_table occs, hash_table exchanges, const set output_images, FILE *helper_file, __attribute__((__unused__)) set helpers, int number)
Compile one dag with AIPO optimizations.
Definition: freia_mppa.c:711
static list mppa_helper_args_params(const dag cdg, string *params)
Build a dag list of arguments and a string of corresponding parameters.
Definition: freia_mppa.c:213
static _int get_a_smem_slot(const dagvtx vtx, hash_table slot_uses_ht)
Provide a valid unused SMEM slot and link it to vertex.
Definition: freia_mppa.c:98
static _int get_output_slot(dagvtx vtx)
Get output slot used by input vertex.
Definition: freia_mppa.c:78
static list mppa_dag_split(const dag dg, unsigned int n_dags, unsigned int n_vtx_dag)
Split a dag into several subdags.
Definition: freia_mppa.c:579
static void process_used_slots(list vtx_preds, hash_table slot_uses_ht)
Update preds usage table, unused slot list.
Definition: freia_mppa.c:174
static dagvtx * smem_slot_users
Array of vertices using SMEM slots (indices) as output.
Definition: freia_mppa.c:57
static list mppa_dag_maybe_split_instrs_cmd(const dag dg)
Split a dag in several sub-dags if too large.
Definition: freia_mppa.c:672
static _int use_output_slot(const dagvtx vtx, hash_table slot_uses_ht)
Provide vertex used SMEM slot, update uses table.
Definition: freia_mppa.c:157
static _int get_free_slot()
Get first unused SMEM slot.
Definition: freia_mppa.c:62
static int n_dags
Definition: freia_sigmac.c:56
@ spoc_type_thr
Definition: freia_spoc.h:178
@ spoc_type_poc
Definition: freia_spoc.h:176
#define dagvtx_content(x)
#define dag_outputs(x)
#define vtxcontent_out(x)
#define dag_inputs(x)
#define dagvtx_succs(x)
#define vtxcontent_inputs(x)
#define dag_vertices(x)
#define DAGVTX(x)
DAGVTX.
void * malloc(YYSIZE_T)
void free(void *)
list gen_nreverse(list cp)
reverse a list in place
Definition: list.c:304
void gen_remove(list *cpp, const void *o)
remove all occurences of item o from list *cpp, which is thus modified.
Definition: list.c:685
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
list gen_copy_seq(list l)
Copy a list structure.
Definition: list.c:501
size_t gen_length(const list l)
Definition: list.c:150
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
Definition: list.c:344
#define CAR(pcons)
Get the value of the first element of a list.
Definition: newgen_list.h:92
void gen_free_list(list l)
free the spine of the list
Definition: list.c:327
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
Definition: list.c:734
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
gen_chunk gen_nth(int n, const list l)
to be used as ENTITY(gen_nth(3, l))...
Definition: list.c:710
list gen_full_copy_list(list l)
Copy a list structure with element copy.
Definition: list.c:535
list gen_insert_before(const void *no, const void *o, list l)
Definition: list.c:238
hash_table hash_table_make(hash_key_type key_type, size_t size)
Definition: hash.c:294
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
Definition: hash.c:449
void hash_put(hash_table htp, const void *key, const void *val)
This functions stores a couple (key,val) in the hash table pointed to by htp.
Definition: hash.c:364
void hash_update(hash_table htp, const void *key, const void *val)
update key->val in htp, that MUST be pre-existent.
Definition: hash.c:491
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
Definition: hash.c:327
void * hash_del(hash_table htp, const void *key)
this function removes from the hash table pointed to by htp the couple whose key is equal to key.
Definition: hash.c:439
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
Definition: misc-local.h:172
char * i2a(int)
I2A (Integer TO Ascii) yields a string for a given Integer.
Definition: string.c:121
@ hash_pointer
Definition: newgen_hash.h:32
#define HASH_FOREACH(key_type, k, value_type, v, ht)
Definition: newgen_hash.h:71
#define same_string_p(s1, s2)
void set_free(set)
Definition: set.c:332
bool set_belong_p(const set, const void *)
Definition: set.c:194
set set_union(set, const set, const set)
Definition: set.c:211
@ set_pointer
Definition: newgen_set.h:44
@ set_string
Definition: newgen_set.h:42
set set_make(set_type)
Create an empty set of any type but hash_private.
Definition: set.c:102
set set_add_element(set, const set, const void *)
Definition: set.c:152
string string_buffer_to_string(const string_buffer)
return malloc'ed string from string buffer sb
void string_buffer_to_file(const string_buffer, FILE *)
put string buffer into file.
void string_buffer_free(string_buffer *)
free string buffer structure, also free string contents according to the dup field
Definition: string_buffer.c:82
string_buffer string_buffer_make(bool dup)
allocate a new string buffer
Definition: string_buffer.c:58
intptr_t _int
_INT
Definition: newgen_types.h:53
int(* gen_cmp_func_t)(const void *, const void *)
Definition: newgen_types.h:114
static char * module
Definition: pips.c:74
string expression_to_string(expression e)
Definition: expression.c:77
entity local_name_to_top_level_entity(const char *n)
This function try to find a top-level entity from a local name.
Definition: entity.c:1450
static int init
Maximal value set for Fortran 77.
Definition: entity.c:320
expression entity_to_expression(entity e)
if v is a constant, returns a constant call.
Definition: expression.c:165
#define ENTITY(x)
ENTITY.
Definition: ri.h:2755
#define EXPRESSION(x)
EXPRESSION.
Definition: ri.h:1217
#define entity_undefined_p(x)
Definition: ri.h:2762
#define entity_undefined
Definition: ri.h:2761
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
char * strdup()
#define intptr_t
Definition: stdint.in.h:294
internally defined structure.
Definition: string_buffer.c:47
FI: I do not understand why the type is duplicated at the set level.
Definition: set.c:59
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
FREIA API function name -> SPoC hardware description (and others?)
Definition: freia.h:71
mppa_hw_t mppa
Definition: freia.h:93
unsigned int arg_misc_out
Definition: freia.h:82
string kernel_enum
Definition: freia_mppa.h:34