PIPS
loop_fusion.c
Go to the documentation of this file.
1 /*
2 
3  $Id: loop_fusion.c 23495 2018-10-24 09:19:47Z coelho $
4 
5  Copyright 1989-2009 MINES ParisTech
6 
7  This file is part of PIPS.
8 
9  PIPS is free software: you can redistribute it and/or modify it
10  under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  any later version.
13 
14  PIPS is distributed in the hope that it will be useful, but WITHOUT ANY
15  WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  FITNESS FOR A PARTICULAR PURPOSE.
17 
18  See the GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with PIPS. If not, see <http://www.gnu.org/licenses/>.
22 
23  */
24 
25 // do not compile unless required
26 #include "phases.h"
27 #if defined(BUILDER_LOOP_FUSION) || \
28  defined(BUILDER_LOOP_FUSION_WITH_REGIONS)
29 
30 /* Functions to perform the greedy loop fusion of a loop sequence */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include "genC.h"
37 #include "linear.h"
38 
39 #include "misc.h"
40 #include "pipsdbm.h"
41 #include "properties.h"
42 
43 #include "ri.h"
44 #include "ri-util.h"
45 #include "prettyprint.h" // for debugging
46 
47 #include "text-util.h" // print_words
48 #include "effects-util.h" // effect_scalar_p & others
49 
50 #include "control.h" // module_reorder
51 #include "effects-generic.h" // used
52 #include "effects-simple.h" // print_effect
53 #include "effects-convex.h" // used
54 
55 #include "dg.h"
56 /* Instantiation of the dependence graph: */
57 typedef dg_arc_label arc_label;
59 #include "graph.h"
60 /* Just to be able to use ricedg.h: */
61 #include "ricedg.h"
62 
63 #include "semantics.h" // used
64 #include "transformer.h" // free_value_mappings
65 
66 #include "chains.h" // statement_dependence_graph
67 
68 /**
69  * Fusion configuration
70  */
71 typedef struct fusion_params {
72  bool maximize_parallelism; // Prevent sequentializing loop that were parallel
73  bool greedy; // Fuse as much a we can, and not only loops that have reuse
74  bool coarse; // Use a coarse grain algorithm
75  unsigned int max_fused_per_loop; // Threshold to limit the number of fusion per loop
76 } *fusion_params;
77 struct fusion_block;
78 typedef struct fusion_block **fbset;
79 
80 
81 // Forward declaration
82 static bool fusion_loops(statement sloop1,
83  set contained_stmts_loop1,
84  statement sloop2,
85  set contained_stmts_loop2,
86  bool maximize_parallelism,
87  bool coarse_grain);
88 
89 /**
90  * Structure to hold block used for the fusion selection algorithm.
91  * It's used in a sequence of statements to keep track of the precedence
92  * constraints between statements while fusing some of them
93  */
94 typedef struct fusion_block {
95  int num;
96  int id; // real original num
97  statement s; // The main statement (header for loops)
98  // statements inside the block (in case of loop, header won't belong to)
99  set statements;
100  fbset successors; // set of blocks that depend from this one. Precedence constraint
101  fbset rr_successors; // set of blocks that reuse data used in this one, false dep
102  fbset predecessors; // set of blocks this one depends from. Precedence constraint
103  fbset rr_predecessors; // set of blocks that use data reused in this one, false dep
104  bool is_a_loop;
105  unsigned int count_fusion; // Count the number of fusion that have occur
106  bool processed; // Flag that indicates if the block have already been processed
107  // (multiple paths can lead to multiple costly tried of the same fusion)
108 }*fusion_block;
109 
110 /* Newgen list foreach compatibility */
111 #define fusion_block_TYPE fusion_block
112 #define fusion_block_CAST(x) ((fusion_block)((x).p))
113 
114 static size_t max_num;
115 #ifdef __SSE2__
116 #include <xmmintrin.h>
117 static inline void fbset_clear(fbset self) {
118  __m128i z = _mm_setzero_si128();
119  for(fbset iter = self, end=self+max_num;iter!=end;iter+=sizeof(__m128i)/sizeof(fusion_block))
120  _mm_store_si128((__m128i*)iter,z);
121 }
122 static inline fbset fbset_make() {
123  fbset self =_mm_malloc(max_num*sizeof(fusion_block),32);
124  fbset_clear(self);
125  return self;
126 }
127 static inline void fbset_free(fbset fb) {
128  _mm_free(fb);
129 }
130 static void fbset_union(fbset self, fbset other) {
131  for(size_t i=0;i<max_num;i+=sizeof(__m128i)/sizeof(fusion_block)){
132  __m128i s = _mm_load_si128((__m128i*)&self[i]),
133  o = _mm_load_si128((__m128i*)&other[i]);
134  s=_mm_or_si128(s,o);
135  _mm_store_si128((__m128i*)&self[i],s);
136  }
137 }
138 #else
139 static inline void fbset_clear(fbset self) {
140  memset(self,0,sizeof(*self)*max_num);
141 }
142 static inline fbset fbset_make() {
143  return calloc(max_num,sizeof(fusion_block));
144 }
145 static inline void fbset_free(fbset fb) {
146  free(fb);
147 }
148 static void fbset_union(fbset self, fbset other) {
149  for(size_t i=0;i<max_num;i++)
150  if(other[i])
151  self[i]=other[i];
152 }
153 #endif
154 
155 static void fbset_difference(fbset self, fbset other) {
156  for(size_t i=0;i<max_num;i++)
157  if(other[i])
158  self[i]=NULL;
159 }
160 static inline void fbset_del_element(fbset self, fusion_block e) {
161  assert(e->id>=0);
162  self[e->id]=NULL;
163 }
164 static inline void fbset_add_element(fbset self, fusion_block e) {
165  assert(e->id>=0);
166  self[e->id]=e;
167 }
168 static bool fbset_belong_p(fbset self, fusion_block e) {
169  assert(e->id>=0);
170  return self[e->id]!=NULL;
171 }
172 static bool fbset_empty_p(fbset self) {
173  for(size_t i=0;i<max_num;i++)
174  if(self[i])
175  return false;
176  return true;
177 }
178 
179 #define FBSET_FOREACH(e,s) \
180  fusion_block e;\
181  for(size_t __i=0;__i<max_num;__i++)\
182  if((e=s[__i]))
183 
184 /**
185  * Get node in the DG corresponding to given statement ordering
186  */
187 static hash_table ordering_to_dg_mapping;
188 static vertex ordering_to_vertex(int ordering) {
189  long int lordering = ordering;
190  return (vertex)hash_get(ordering_to_dg_mapping, (void *)lordering);
191 }
192 
193 /**
194  * Just a debug function, might not be here...
195  */
196 static void print_graph(graph dependence_graph) {
197 
198  MAP(VERTEX,
199  a_vertex,
200  {
201  statement s1 = vertex_to_statement(a_vertex);
202 
203  fprintf( stderr, "Statement %d\n", (int)statement_ordering( s1 ) );
204  MAP(SUCCESSOR, a_successor,
205  {
206  vertex v2 = successor_vertex(a_successor);
208  dg_arc_label an_arc_label = successor_arc_label(a_successor);
209  fprintf(stderr, "\t%d --> %d with conflicts\n", (int)statement_ordering( s1 ), (int)statement_ordering( s2 ) );
210 
211  MAP(CONFLICT, a_conflict,
212  {
213 
214  fprintf(stderr, "\t\tfrom ");
215  print_words(stderr, words_effect(conflict_source( a_conflict ) ) );
216  fprintf(stderr, " to ");
217  print_words(stderr, words_effect(conflict_sink( a_conflict ) ) );
218  if( cone_undefined != conflict_cone( a_conflict ) ) {
219  MAP(INT,
220  level,
221  {
222  fprintf(stderr, " cone level %d", level);
223  },
224  cone_levels(conflict_cone(a_conflict)));
225  }
226  fprintf(stderr, "\n");
227 
228  },
229  dg_arc_label_conflicts(an_arc_label));
230  },
231  vertex_successors(a_vertex));
232  },
234 
235 }
236 
237 /**
238  * Debug function that print block informations
239  */
240 static void print_block(fusion_block block) {
241  fprintf(stderr, "Block %d (fused %d times), predecessors : ",
242  block->num, block->count_fusion);
243  FBSET_FOREACH(pred,block->predecessors) {
244  fprintf(stderr, "%d, ", pred->num);
245  }
246  fprintf(stderr, " | successors : ");
247  FBSET_FOREACH(succ,block->successors) {
248  fprintf(stderr, "%d, ", succ->num);
249  }
250  fprintf(stderr, " | rr_predecessors : ");
251  FBSET_FOREACH(rr_pred,block->rr_predecessors) {
252  fprintf(stderr, "%d, ", rr_pred->num);
253  }
254  fprintf(stderr, " | rr_successors : ");
255  FBSET_FOREACH(rr_succ,block->rr_successors) {
256  fprintf(stderr, "%d, ", rr_succ->num);
257  }
258  fprintf(stderr, "\n");
259 }
260 
261 /**
262  * Add statement 's' to the set 'stmts'. To be called with gen_context_recurse
263  * to record all statement in a branch of the IR tree.
264  */
265 static bool record_statements(statement s, set stmts) {
266  set_add_element(stmts, stmts, s);
267  return true;
268 }
269 
270 
271 /**
272  * Debug function that print a list of blocks
273  */
274 static void print_blocks(list /* of fusion_block */ blocks) {
275  FOREACH(fusion_block, block, blocks) {
276  print_block(block);
277  }
278 }
279 
280 /**
281  * @brief Check that two loop statements have the same bounds
282  */
283 static bool loops_have_same_bounds_p(loop loop1, loop loop2) {
284  bool same_p = false;
285 
286  range r1 = loop_range(loop1);
287  range r2 = loop_range(loop2);
288 
289  same_p = range_equal_p(r1, r2);
290 
291  return same_p;
292 }
293 
294 
295 #if 0
296 /**
297  * @brief Check that two loop have the same header (same index variable and
298  * same bounds)
299  */
300 static bool loop_has_same_header_p(loop loop1, loop loop2) {
301 
302  entity index1 = loop_index(loop1);
303  entity index2 = loop_index(loop2);
304 
305  // This assumes no side effects of loop iterations on the bound expressions
306  if(loops_have_same_bounds_p(loop1,loop2) && index1 == index2) {
307  return true;
308  }
309  return false;
310 }
311 #endif
312 
313 
314 /**
315  * DIRTY HACK
316  * Replace entity in effects associated to a statement
317  */
318 struct entity_pair
319 {
320  entity old;
321  entity new;
322 };
323 
324 
325 static void replace_entity_effects_walker(statement s, void *_thecouple ) {
326  struct entity_pair *thecouple = _thecouple;
327  list effs = load_proper_rw_effects_list( s );
328  ifdebug(7) {
329  pips_debug(7,"Handling statement :");
330  print_statement(s);
331  pips_debug(7,"Effects :");
332  print_effects(effs);
333  fprintf(stderr,"\n");
334  }
335 
336  FOREACH(effect, eff, effs) {
337  replace_entity(eff, thecouple->old, thecouple->new);
338  }
339  ifdebug(7) {
340  pips_debug(7,"Effects after :");
341  print_effects(effs);
342  fprintf(stderr,"\n");
343  }
344 
345 }
346 
347 /* temporary block statement for candidate fused body */
348 static statement fused_statement = statement_undefined;
349 
350 /* current ordering for generated statement */
351 static int next_ordering = 999999;
352 
353 
354 /*
355  * Allocate a temporary block statement for sequence.
356  */
357 static statement make_temporary_fused_statement(list sequence) {
358  if(statement_undefined_p(fused_statement)) {
359  // Construct the fused sequence
360  fused_statement = make_block_statement(sequence);
361  statement_ordering( fused_statement) = next_ordering++; // FIXME : dirty
362  pips_assert("ordering defined", ordering_to_statement_initialized_p());
364 
365  // Fix a little bit proper effects so that chains will be happy with it
366  store_proper_rw_effects_list(fused_statement, NIL);
367  } else {
369  }
370  return fused_statement;
371 }
372 
373 /*
374  *
375  */
376 static void free_temporary_fused_statement() {
377  if(!statement_undefined_p(fused_statement)) {
378 
379  //sefault ! Don't know why...
380  //delete_rw_effects(fused_statement);
381  //
383  free_statement(fused_statement);
384  fused_statement = statement_undefined;
385  }
386 
387 }
388 
389 
390 
391 static bool coarse_fusable_loops_p(statement sloop1,
392  statement sloop2,
393  bool maximize_parallelism) {
394  loop loop1 = statement_loop(sloop1);
395  statement inner_stat1 = loop_body(loop1);
396  loop loop2 = statement_loop(sloop2);
397  statement inner_stat2 = loop_body(loop2);
398 
401  return false;
402 
403  /* get the loop body preconditions */
404  transformer body_prec1 = load_statement_precondition(inner_stat1);
405  transformer body_prec2 = load_statement_precondition(inner_stat2);
406 
407  /* do not declare as parallel a loop which is never executed */
408  if (transformer_empty_p(body_prec1) || transformer_empty_p(body_prec2)) {
409  pips_debug(1, "non feasible inner statement (empty precondition), abort fusion\n");
410  return false;
411  }
412 
413  /* ...needed by TestCoupleOfReferences(): */
414  list l_enclosing_loops1 = CONS(STATEMENT, sloop1, NIL);
415  //list l_enclosing_loops2 = CONS(STATEMENT, sloop2, NIL);
416 
417  /* Get the loop invariant regions for the loop body: */
418  list l_reg1 = load_invariant_rw_effects_list(inner_stat1);
419  list l_reg2 = load_invariant_rw_effects_list(inner_stat2);
420 
421  /* To store potential conflicts to study: */
422  list l_conflicts = NIL;
423 
424  /* To keep track of a current conflict disabling the parallelization: */
425  bool may_conflicts_p = false;
426 
427  pips_debug(1,"begin\n");
428 
429  pips_debug(1,"building conflicts\n");
430  ifdebug(2) {
431  fprintf(stderr, "original invariant regions:\n");
432  print_regions(l_reg1);
433  print_regions(l_reg2);
434  }
435 
436  /* First, builds list of conflicts: */
437  FOREACH(EFFECT, reg1, l_reg1) {
438  entity e1 = effect_entity(reg1);
439  if (e1!=loop_index(loop1)
440  && gen_chunk_undefined_p(gen_find_eq(e1,loop_locals(loop1))) // Ignore private variable
441  && store_effect_p(reg1) // Ignore non memory effect
442  ) {
444  int d1 = gen_length(reference_indices(r));
446 
447  /* Search for a write-read/read-write conflict */
448  FOREACH(EFFECT, reg2, l_reg2) {
449  reference r2 = effect_any_reference(reg2);
450  int d2 = gen_length(reference_indices(r2));
451  entity e1 = region_entity(reg1);
452  entity e2 = region_entity(reg2);
453 
454  if (store_effect_p(reg2)
455  && ( (d1<=d2 && region_write_p(reg1) && region_read_p(reg2))
456  || (d1>=d2 && region_read_p(reg1) && region_write_p(reg2))
457  || (region_write_p(reg1) && region_write_p(reg2))
458  )
459  && same_entity_p(e1,e2) // String manipulation at the end
460  ) {
461  /* Add a write-read conflict */
462  conf = make_conflict(reg1, reg2, cone_undefined);
463  l_conflicts = gen_nconc(l_conflicts, CONS(CONFLICT, conf, NIL));
464  }
465  }
466  }
467  }
468 
469  /* THEN, TESTS CONFLICTS */
470  pips_debug(1,"testing conflicts\n");
471  /* We want to test for write/read and read/write dependences at the same
472  * time. */
473  Finds2s1 = true;
474  FOREACH(CONFLICT, conf, l_conflicts) {
475  effect reg1 = conflict_source(conf);
476  effect reg2 = conflict_sink(conf);
477  list levels = NIL;
478  list levelsop = NIL;
479  Ptsg gs = SG_UNDEFINED;
480  Ptsg gsop = SG_UNDEFINED;
481 
482  ifdebug(2) {
483  fprintf(stderr, "testing conflict from:\n");
484  print_region(reg1);
485  fprintf(stderr, "\tto:\n");
486  print_region(reg2);
487  }
488 
489  // Patch the region to mimic the same loop index
490  entity i1 = loop_index(loop1);
491  entity i2 = loop_index(loop2);
492  if(i1!=i2) {
493  reg2=copy_effect(reg2);
494  replace_entity(reg2, loop_index(loop2), loop_index(loop1));
495  list l_reg2 = CONS(REGION, reg2, NIL);
496 
497  // First project to eliminate the index that can have been added because
498  // of the preconditions, this should be safe because it is not used in the
499  // body
500  list tmp_i = CONS(entity,i1, NIL );
501  project_regions_along_variables(l_reg2, tmp_i);
502  free(tmp_i);
503 
504  // Substitue i2 with i1 in the regions associated to loop2's body
505  all_regions_variable_rename(l_reg2,i2,i1);
506  }
507 
508 
509  /** CHEAT on the ordering !
510  * We make that in order that the dependence test believe that statements
511  * from the first loop comes before statements from the second loop.
512  * It may not be the case after a loop of reordering due to previous fusion
513  */
514  intptr_t ordering1 = statement_ordering(inner_stat1);
515  statement_ordering(inner_stat1) = 1;
516  intptr_t ordering2 = statement_ordering(inner_stat2);
517  statement_ordering(inner_stat2) = 2;
518 
519  /* Use the function TestCoupleOfReferences from ricedg. */
520  /* We only consider one loop at a time, disconnected from
521  * the other enclosing and inner loops. Thus l_enclosing_loops
522  * only contains the current loop statement.
523  * The list of loop variants is empty, because we use loop invariant
524  * regions (they have been composed by the loop transformer).
525  */
526  levels = TestCoupleOfReferences(l_enclosing_loops1, region_system(reg1),
527  inner_stat1, reg1, effect_any_reference(reg1),
528  l_enclosing_loops1, region_system(reg2),
529  inner_stat2, reg2, effect_any_reference(reg2),
530  NIL, &gs, &levelsop, &gsop);
531 
532  // Restore the ordering
533  statement_ordering(inner_stat1) = ordering1;
534  statement_ordering(inner_stat2) = ordering2;
535 
536 
537  ifdebug(2) {
538  fprintf(stderr, "result:\n");
539  if (ENDP(levels) && ENDP(levelsop))
540  fprintf(stderr, "\tno dependence\n");
541 
542  if (!ENDP(levels)) {
543  fprintf(stderr, "\tdependence at levels: ");
544  FOREACH(INT, l, levels) {
545  fprintf(stderr, " %d", l);
546  }
547  fprintf(stderr, "\n");
548 
549  if (!SG_UNDEFINED_P(gs)) {
550  Psysteme sc = SC_UNDEFINED;
551  fprintf(stderr, "\tdependence cone:\n");
552  sg_fprint_as_dense(stderr, gs, gs->base);
553  sc = sg_to_sc_chernikova(gs);
554  fprintf(stderr,"\tcorresponding linear system:\n");
556  sc_rm(sc);
557  }
558  }
559  if (!ENDP(levelsop)) {
560  fprintf(stderr, "\topposite dependence at levels: ");
561  FOREACH(INT, l, levelsop)
562  fprintf(stderr, " %d", l);
563  fprintf(stderr, "\n");
564 
565  if (!SG_UNDEFINED_P(gsop)) {
566  Psysteme sc = SC_UNDEFINED;
567  fprintf(stderr, "\tdependence cone:\n");
568  sg_fprint_as_dense(stderr, gsop, gsop->base);
569  sc = sg_to_sc_chernikova(gsop);
570  fprintf(stderr,"\tcorresponding linear system:\n");
572  sc_rm(sc);
573  }
574  }
575  }
576 
577  /* If we have a level we may be into trouble */
578  if (!ENDP(levels)) {
579  // Here are the forward dependences, carried or not...
580  FOREACH(INT, l, levels) {
581  if(l==1) {
582  // This is a loop carried dependence, break the parallelism but safe !
583  pips_debug(1,"Loop carried forward dependence, break parallelism ");
584  if(loop_parallel_p(loop1) || loop_parallel_p(loop2)) {
585  if(maximize_parallelism) {
586  ifdebug(1) {
587  fprintf(stderr,"then it is fusion preventing!\n");
588  }
589  may_conflicts_p = true;
590  } else {
591  ifdebug(1) {
592  fprintf(stderr," but both loops are sequential, then fuse!\n");
593  }
594  }
595  } else ifdebug(1) {
596  fprintf(stderr," but fuse anyway!\n");
597  }
598  } else if(l==2) {
599  // This is a loop independent dependence, seems safe to me !
600  pips_debug(1,"Loop independent forward dependence, safe...\n");
601  may_conflicts_p = false;
602  } else {
603  pips_user_error("I don't know what to do with a dependence level of "
604  "%d here!\n",l);
605  }
606  }
607  }
608  if (!ENDP(levelsop)) {
609  // Here are the backward dependences, carried or not...
610  FOREACH(INT, l, levelsop) {
611  if(l==1) {
612  // This is a loop carried dependence, not preventing fusion but
613  // breaking the parallelism
614  pips_debug(1,"Loop carried backward dependence, fusion preventing !\n");
615  may_conflicts_p = true;
616  } else if(l==2) {
617  // This is a non-carried dependence backware dependence
618  // Hey wait a minute, how is it possible ???
619  pips_user_error("Loop independent backward dependence... weird !\n");
620  } else {
621  pips_user_error("I don't know what to do with a dependence level of "
622  "%d here !\n",l);
623  }
624  }
625  }
626 
627  gen_free_list(levels);
628  gen_free_list(levelsop);
629  if (!SG_UNDEFINED_P(gs))
630  sg_rm(gs);
631  if (!SG_UNDEFINED_P(gsop))
632  sg_rm(gsop);
633 
634 
635  if(may_conflicts_p)
636  break;
637  }
638 
639  /* Finally, free conflicts */
640  pips_debug(1,"freeing conflicts\n");
641  FOREACH(CONFLICT, c, l_conflicts) {
644  free_conflict(c);
645  }
646  gen_free_list(l_conflicts);
647 
648  gen_free_list(l_enclosing_loops1);
649 
650  pips_debug(1,"end\n");
651 
652  return !may_conflicts_p;
653 
654 }
655 
656 
657 /**
658  *
659  */
660 static bool coarse_fusion_loops(statement sloop1,
661  statement sloop2,
662  bool maximize_parallelism) {
663  loop loop1 = statement_loop(sloop1);
664  loop loop2 = statement_loop(sloop2);
665  statement body_loop1 = loop_body(loop1);
666  statement body_loop2 = loop_body(loop2);
667 
668  // Check if loops have fusion compatible headers, else abort
669  if(!loops_have_same_bounds_p(loop1, loop2)) {
670  pips_debug(4,"Fusion aborted because of incompatible loop headers\n");
671  return false;
672  }
673 
674  bool coarse_fusable_p = coarse_fusable_loops_p(sloop1,sloop2,maximize_parallelism);
675 
676  if(coarse_fusable_p) {
677  pips_debug(2,"Fuse the loops now\n");
678  entity index1 = loop_index(loop1);
679  entity index2 = loop_index(loop2);
680  if(index1!=index2) {
681  set ref_entities = get_referenced_entities(loop2);
682  // Assert that index1 is not referenced in loop2
683  if(set_belong_p(ref_entities,index1)) {
684  pips_debug(3,"First loop index (%s) is used in the second loop, we don't"
685  " know how to handle this case !\n",
686  entity_name(index1));
687  return false;
688  }
689  }
690 
691  // Merge loop locals
692  FOREACH(ENTITY,e,loop_locals(loop2)) {
693 
694  if(e != loop_index(loop2) && !gen_in_list_p(e,loop_locals(loop1))) {
696  }
697  }
698 
699  ifdebug(3) {
700  pips_debug(3,"Before fusion : ");
701  print_statement(sloop1);
702  print_statement(sloop2);
703  }
704 
705  /* Here a lot of things are broken :
706  * - The regions associated to both body must be merged
707  * - The ordering is broken if a new sequence is created :-(
708  * - ?
709  */
710  intptr_t ordering =statement_ordering(body_loop1); // Save ordering
711  // Append body 2 to body 1
712  insert_statement(body_loop1,body_loop2,false);
713  statement_ordering(body_loop1) = ordering;
714 
715  // FIXME insert_statement() does a bad job here :-(
716  // I should fix it but I'm lazy now so use the bazooka:
717  clean_up_sequences(body_loop1);
718 
719  // Merge regions list for body
720  list l_reg1 = load_invariant_rw_effects_list(body_loop1);
721  list l_reg2 = load_invariant_rw_effects_list(body_loop2);
722  if(loop_index(loop1)!=loop_index(loop2)) {
723  // Patch the regions to be expressed with the correct loop index
724  all_regions_variable_rename(l_reg2,index2,index1);
725  // FI: FIXME we should check that index2 is dead on exit of loop2
726  replace_entity((void *)body_loop1, index2, index1);
727  }
728  gen_nconc(l_reg1,l_reg2);
729  store_invariant_rw_effects_list(body_loop1,l_reg1);
730  store_invariant_rw_effects_list(body_loop2,NIL); // No sharing
731 
732  // Free the body_loop2
733  // Hum, only the sequence, not the inner statements
734  // Keep the leak for now... FIXME
735 
736 
737  ifdebug(3) {
738  pips_debug(3,"After fusion : ");
739  print_statement(sloop1);
740  }
741  }
742 
743  return coarse_fusable_p;
744 }
745 
746 
747 /**
748  * @brief Try to fuse the two liront naoop recomputing a DG !
749  * Dependences are check against the new body
750  * but other constraints such as some statement between the two loops are not
751  * handled and must be enforced outside.
752  *
753  * FIXME High leakage
754  */
755 static bool fine_fusion_loops(statement sloop1,
756  set contained_stmts_loop1,
757  statement sloop2,
758  set contained_stmts_loop2,
759  bool maximize_parallelism) {
760  pips_assert("Previous is a loop", statement_loop_p( sloop1 ) );
761  pips_assert("Current is a loop", statement_loop_p( sloop2 ) );
762  bool success = false;
763 
764 
765  loop loop1 = statement_loop(sloop1);
766  loop loop2 = statement_loop(sloop2);
767  statement body_loop1 = loop_body(loop1);
768  statement body_loop2 = loop_body(loop2);
769 
770  // Check if loops have fusion compatible headers, else abort
771  if(!loops_have_same_bounds_p(loop1, loop2)) {
772  pips_debug(4,"Fusion aborted because of incompatible loop headers\n");
773  return false;
774  }
775 
776 
777 
778  entity index1 = loop_index(loop1);
779  entity index2 = loop_index(loop2);
780  if(index1!=index2) {
781  pips_debug(4,"Replace second loop index (%s) with first one (%s)\n",
782  entity_name(index2), entity_name(index1));
783  // Get all variable referenced in loop2 body to find if index1 is referenced
784  // This could be optimized by a search for index1 and abort if found.
785  // this would avoid building a set
786  set ref_entities = get_referenced_entities(loop2);
787 
788  // Assert that index1 is not referenced in loop2
789  if(set_belong_p(ref_entities,index1)) {
790  pips_debug(3,"First loop index (%s) is used in the second loop, we don't"
791  " know how to handle this case !\n",
792  entity_name(index1));
793  return false;
794  } else {
795  // FI: FIXME we should check that index2 is dead on exit of loop2
796  replace_entity((void *)body_loop2, index2, index1);
797 
798  // Replace entities in effects
799  struct entity_pair thecouple = { index2, index1 };
800  gen_context_recurse(body_loop2, &thecouple,
802  replace_entity_effects_walker);
803  }
804  set_free(ref_entities);
805  }
806 
807 
808  // Be sure that loop bodies are encapsulated in sequence
809  if(!statement_sequence_p(body_loop1)) {
811  body_loop1 = loop_body(loop1);
812  statement_ordering( body_loop1 ) = next_ordering++; // FIXME : dirty
814  // Fix a little bit proper effects so that chains will be happy with it
815  store_proper_rw_effects_list(body_loop1, NIL); // FIXME should lead to a call to delete_rw_effects();
816 
817  }
818 
819  if(!statement_sequence_p(body_loop2)) {
820  loop_body(loop2) = make_block_statement(CONS(statement, body_loop2, NIL ));
821  body_loop2 = loop_body(loop2);
822  statement_ordering( body_loop2 ) = next_ordering++; // FIXME : dirty
824  // Fix a little bit proper effects so that chains will be happy with it
825  store_proper_rw_effects_list(body_loop2, NIL); // FIXME should lead to a call to delete_rw_effects();
826  }
827 
828  // Build a list with the statements from loop 1 followed by stmts for loop 2
831  list fused = gen_nconc(seq1, seq2);
832 
833 
834  // Let's check if the fusion is valid
835 
836  // Construct the fused sequence
837  loop_body( loop1 ) = make_temporary_fused_statement(fused);
838 
839  // Stuff for Chains and DG
841 
842 
843  // Build chains
844  // do not debug on/off all the time : costly (read environment + atoi )?
845  //debug_on("CHAINS_DEBUG_LEVEL");
846  graph candidate_dg = statement_dependence_graph(sloop1);
847  //debug_off();
848 
849  ifdebug(5) {
850  pips_debug(5, "Candidate CHAINS :\n");
851  print_graph(candidate_dg);
852  }
853 
854  // Build DG
855  // do not debug on/off all the time : costly (read environment + atoi )?
856  //debug_on("RICEDG_DEBUG_LEVEL");
857  candidate_dg = compute_dg_on_statement_from_chains_in_place(sloop1, candidate_dg);
858  //debug_off();
859 
860  // Cleaning
863 
864  ifdebug(5) {
865  pips_debug(5, "Candidate DG :\n");
866  print_graph(candidate_dg);
867  pips_debug(5, "Candidate fused loop :\n");
868  print_statement(sloop1);
869  }
870 
871 
872  // Let's validate the fusion now
873  // No write dep between a statement from loop2 to statement from loop1
874  success = true;
875  FOREACH( vertex, v, graph_vertices(candidate_dg) ) {
879 
880  // Look if there's a loop carried dependence, that would be bad, but only
881  // for parallel loop nest !
882  if(maximize_parallelism && loop_parallel_p(loop1)) {
883  FOREACH( successor, a_successor, vertex_successors(v))
884  {
885  vertex v2 = successor_vertex(a_successor);
887  arc_label dal = successor_arc_label(a_successor);
888  int statement_ordering2 = dg_vertex_label_statement(dvl2);
889  statement stmt2 = ordering_to_statement(statement_ordering2);
891  effect e_sink = conflict_sink(c);
892  effect e_source = conflict_source(c);
893  if((effect_write_p(e_source) && store_effect_p(e_source))
894  || (effect_write_p(e_sink) && store_effect_p(e_sink))) {
895 
896  // Inner loop indices conflicts aren't preventing fusion
897  if(statement_loop_p(stmt1) &&
898  effect_variable(e_source) == loop_index(statement_loop(stmt1))) {
899  continue;
900  }
901  if(statement_loop_p(stmt2) &&
902  effect_variable(e_sink) == loop_index(statement_loop(stmt2))) {
903  continue;
904  }
905 
906  // Scalar can make any conflict ! The loop are parallel thus it has
907  // to be private :-)
908  if(loop_parallel_p(loop2) &&
909  (effect_scalar_p(e_sink) || effect_scalar_p(e_source))) {
910  continue;
911  }
912 
913  // Get the levels and try to find out if the fused loop carries the
914  // conflict
915  if(cone_undefined != conflict_cone(c)) {
916  list levels = cone_levels(conflict_cone(c));
917  FOREACH(INT, l, levels) {
918  if(l == 1) {
919  // Hum seems bad... This a loop carried dependence !
920  success = false;
921  ifdebug(2) {
922  pips_debug(2,"This loop carried dependence is breaking parallism !\n");
923  fprintf(stderr, "From : ");
924  print_effect(e_source);
925  fprintf(stderr, "to : ");
926  print_effect(e_sink);
927  }
928  break;
929  }
930  }
931  }
932  }
933  if(!success) break;
934  }
935  }
936  }
937  if(!success) {
938  break;
939  }
940  // Check that the source of the conflict is in the "second" loop body
941  if(set_belong_p(contained_stmts_loop2,stmt1)) {
942  FOREACH( successor, a_successor, vertex_successors(v))
943  {
944  vertex v2 = successor_vertex(a_successor);
946  arc_label an_arc_label = successor_arc_label(a_successor);
947  int statement_ordering2 = dg_vertex_label_statement(dvl2);
948  statement stmt2 = ordering_to_statement(statement_ordering2);
949 
950  // Check that the sink of the conflict is in the "first" loop body
951  if(set_belong_p(contained_stmts_loop1, stmt2)) {
952  FOREACH( conflict, c, dg_arc_label_conflicts(an_arc_label))
953  {
954  effect e_sink = conflict_sink(c);
955  effect e_source = conflict_source(c);
956  ifdebug(6) {
957  pips_debug(6,
958  "Considering arc : from statement %d :",
961  pips_debug(6, " to statement %d :", statement_ordering2);
963  }
964  if((effect_write_p(e_source) && store_effect_p(e_source))
965  || (effect_write_p(e_sink) && store_effect_p(e_sink))) {
966 
967  // Inner loop indices conflicts aren't preventing fusion
968 
969  if(statement_loop_p(stmt1) && effect_variable(e_source)
970  == loop_index(statement_loop(stmt1))) {
971  continue;
972  }
973 
974  if(statement_loop_p(stmt2) && effect_variable(e_sink)
975  == loop_index(statement_loop(stmt2))) {
976  continue;
977  }
978 
979  ifdebug(6) {
980  pips_debug(6,
981  "Arc preventing fusion : from statement %d :",
984  pips_debug(6, " to statement %d :", statement_ordering2);
986  }
987  success = false;
988  }
989  }
990  } else {
991  ifdebug(6) {
992  pips_debug(6,
993  "Arc ignored (%d,%d) : from statement %d :",
994  (int)statement_ordering(sloop2), (int)statement_ordering(sloop1), statement_ordering);
996  pips_debug(6, " to statement %d :", statement_ordering2);
997  print_statement(ordering_to_statement(statement_ordering2));
998  }
999  }
1000  }
1001  }
1002  }
1003 
1004  // No longer need the DG
1005  free_graph(candidate_dg);
1006 
1007 
1008  bool inner_success = false;
1009  if(success && get_bool_property("LOOP_FUSION_KEEP_PERFECT_PARALLEL_LOOP_NESTS")
1010  && loop_parallel_p(loop1) && loop_parallel_p(loop2)
1011  ) {
1012  // Check if we have perfect loop nests, and thus prevents losing parallelism
1013  statement inner_loop1 = get_first_inner_perfectly_nested_loop(body_loop1);
1014  statement inner_loop2 = get_first_inner_perfectly_nested_loop(body_loop2);
1015  if(!statement_undefined_p(inner_loop1) && !statement_undefined_p(inner_loop2)) {
1016  pips_debug(4,"Ensure that we don't break parallel loop nests !\n");
1017  if(loop_parallel_p(statement_loop(inner_loop1)) &&
1018  loop_parallel_p(statement_loop(inner_loop2))) {
1019  // Record statements
1020  set stmts1 = set_make(set_pointer);
1021  gen_context_recurse(inner_loop1,stmts1,statement_domain,record_statements,gen_null2);
1022 
1023  set stmts2 = set_make(set_pointer);
1024  gen_context_recurse(inner_loop2,stmts2,statement_domain,record_statements,gen_null2);
1025 
1026  pips_debug(4,"Try to fuse inner loops !\n");
1027  success = fusion_loops(inner_loop1,stmts1,inner_loop2,stmts2,maximize_parallelism,false);
1028  if(success) {
1029  inner_success = true;
1030  pips_debug(4,"Inner loops fused :-)\n");
1031  } else {
1032  pips_debug(4,"Inner loops not fusable :-(\n");
1033  }
1034 
1035  set_free(stmts1);
1036  set_free(stmts2);
1037  } else if(loop_parallel_p(statement_loop(inner_loop1)) ||
1038  loop_parallel_p(statement_loop(inner_loop2))) {
1039  success = false;
1040  }
1041 
1042  } else if((!statement_undefined_p(inner_loop1) && loop_parallel_p(statement_loop(inner_loop1))) ||
1043  (!statement_undefined_p(inner_loop2) && loop_parallel_p(statement_loop(inner_loop2)))) {
1044  // We have one perfect loop nest deeper than the other, prevent fusion !
1045  success = false;
1046  }
1047  }
1048 
1049  if(success) {
1050  // Cleaning FIXME
1051  // Fix real DG
1052  // Fix statement ordering
1053  // Fix loop_execution (still parallel ?)
1054  // If index2 is different from index 1 and if index2 is live on
1055  // exit, its exit value should be restored by an extra
1056  // assignment here
1057  // ...
1058  loop_body(loop1) = body_loop1;
1059 
1060  // Merge loop locals
1061  FOREACH(ENTITY,e,loop_locals(loop2)) {
1062 
1063  if(e != loop_index(loop2) && !gen_in_list_p(e,loop_locals(loop1))) {
1065  }
1066  }
1067 
1068  if(!inner_success) { // Usual case
1070  sequence_statements(statement_sequence(body_loop1)) = fused;
1073  //free_statement(sloop2); SG causes lost comments and lost extensions, MA should check this
1074  } else {
1075  // Inner loops have been fused
1076  gen_free_list(fused);
1077  }
1078  } else {
1079  // FI: this also should be controlled by information about the
1080  // liveness of both indices; also index1 must not be used in
1081  // loop2 as a temporary; so the memory effects of loops 2 should
1082  // be checked before attempting the first substitution
1083  if(index1!=index2) {
1084  replace_entity((void *)body_loop2, index1, index2);
1085  struct entity_pair thecouple = { index1, index2 };
1086  gen_context_recurse(body_loop2, &thecouple,
1087  statement_domain, gen_true2, replace_entity_effects_walker);
1088  }
1089  // Cleaning FIXME
1090  loop_body(loop1) = body_loop1;
1091  gen_free_list(fused);
1092  }
1093 
1094  ifdebug(3) {
1095  pips_debug(3, "End of fusion_loops\n\n");
1096  print_statement(sloop1);
1097  pips_debug(3, "\n********************\n");
1098  }
1099  return success;
1100 }
1101 
1102 
1103 
1104 /**
1105  * @brief Try to fuse the two loop. Dependences are check against the new body
1106  * but other constraints such as some statement between the two loops are not
1107  * handled and must be enforced outside.
1108  *
1109  */
1110 static bool fusion_loops(statement sloop1,
1111  set contained_stmts_loop1,
1112  statement sloop2,
1113  set contained_stmts_loop2,
1114  bool maximize_parallelism,
1115  bool coarse_grain) {
1116  pips_assert("Previous is a loop", statement_loop_p( sloop1 ) );
1117  pips_assert("Current is a loop", statement_loop_p( sloop2 ) );
1118  loop loop1 = statement_loop(sloop1);
1119  loop loop2 = statement_loop(sloop2);
1120 
1121  // If requested, fuse only look of the same kind (parallel/sequential).
1122  if( maximize_parallelism && ((loop_parallel_p(loop1)
1123  && !loop_parallel_p(loop2)) || (!loop_parallel_p(loop1)
1124  && loop_parallel_p(loop2)))) {
1125  pips_debug(4,"Fusion aborted because of fuse_maximize_parallelism property"
1126  ", loop_parallel_p(loop1)=>%d | loop_parallel_p(loop2)=>%d\n"
1128 
1129  // Abort to preserve parallelism
1130  return false;
1131  }
1132 
1133  if(coarse_grain) {
1134  return coarse_fusion_loops(sloop1,sloop2,maximize_parallelism);
1135  } else {
1136  return fine_fusion_loops(sloop1,contained_stmts_loop1,sloop2,contained_stmts_loop2,
1137  maximize_parallelism);
1138  }
1139 }
1140 
1141 
1142 /**
1143  * Create an empty block
1144  */
1145 static fusion_block make_empty_block(int num) {
1146  fusion_block block = (fusion_block)malloc(sizeof(struct fusion_block));
1147  block->num = num;
1148  block->id = num;
1149  block->s = NULL;
1150  block->statements = set_make(set_pointer);
1151  block->successors = fbset_make();
1152  block->rr_successors = fbset_make();
1153  block->predecessors = fbset_make();
1154  block->rr_predecessors = fbset_make();
1155  block->is_a_loop = false;
1156  block->count_fusion = 0;
1157  block->processed = false;
1158 
1159  return block;
1160 }
1161 
1162 
1163 static void free_block_list(list blocks) {
1164  FOREACH(fusion_block,b,blocks) {
1165  set_free(b->statements);
1166  fbset_free(b->successors);
1167  fbset_free(b->rr_successors);
1168  fbset_free(b->predecessors);
1169  fbset_free(b->rr_predecessors);
1170  free(b);
1171  }
1173 }
1174 
1175 /**
1176  * Create a block with statement 's' as a root and given the number 'num'.
1177  */
1178 static fusion_block make_block_from_statement(statement s, int num) {
1179  // Create the new block
1180  fusion_block b = make_empty_block(num);
1181 
1182  // Record the original statement
1183  b->s = s;
1184 
1185  // Populate the block statements
1186  gen_context_recurse(s, b->statements, statement_domain, record_statements, gen_null2);
1187 
1188  // Mark the block a loop if applicable
1189  if(statement_loop_p(s)) {
1190  b->is_a_loop = true;
1191  // Remove loop header from the list of statements
1192  set_del_element(b->statements, b->statements, b->s);
1193  }
1194 
1195  pips_debug(3,"Block created : num %d ; is_a_loop : %d\n",
1196  b->num,b->is_a_loop);
1197  return b;
1198 }
1199 
1200 /**
1201  * Find the block owning the statement corresponding to the given ordering
1202  */
1203 static fusion_block get_block_from_ordering(int ordering, list block_list) {
1204  statement s = ordering_to_statement(ordering);
1205  FOREACH(fusion_block, b, block_list) {
1206  if(set_belong_p(b->statements, s)) {
1207  return b;
1208  }
1209  }
1210  return NULL;
1211 }
1212 
1213 /**
1214  * Update b by computing the set of successors using the dependence graph and
1215  * the set of statements inside b
1216  */
1217 static void compute_successors(fusion_block b, list block_list) {
1218  pips_assert("Expect successors list to be initially empty",
1219  fbset_empty_p(b->successors) && fbset_empty_p(b->rr_successors));
1220  // Loop over statements that belong to this block
1221  SET_FOREACH(statement,s,b->statements)
1222  {
1223  int ordering = statement_ordering(s);
1224  vertex v = ordering_to_vertex(ordering); // Using DG
1225  pips_debug(5, " > Statement %d ", ordering);
1226 
1227  if(v != HASH_UNDEFINED_VALUE) {
1228  // Statement has a node in the graph
1229  pips_debug(5, " has a vertex in DG !\n");
1230  // Loop over successors in DG
1231  FOREACH( SUCCESSOR, a_successor, vertex_successors( v ) )
1232  {
1233  // Loop over conflicts between current statement and the successor
1234  dg_arc_label an_arc_label = successor_arc_label(a_successor);
1235  FOREACH( CONFLICT, a_conflict,dg_arc_label_conflicts(an_arc_label))
1236  {
1237  // We have a dependence
1238  // ... or not, read after read are not real one when
1239  // dealing with precedence !
1240  int sink_ordering = vertex_ordering(successor_vertex(a_successor));
1241  pips_debug(5, "Considering dependence to statement %d\n",
1242  sink_ordering);
1243 
1244  // Try to recover the sink block for this dependence.
1245  // We might not find any, because dependence can be related to
1246  // a statement outside from current sequence scope or can be related
1247  // to a loop header, which we ignore.
1248  fusion_block sink_block = get_block_from_ordering(sink_ordering,
1249  block_list);
1250  if(sink_block == NULL) {
1251  pips_debug(2,"No block found for ordering %d, dependence ignored\n",
1252  sink_ordering);
1253  } else {
1254  // It's a forward pass, we only add precedence on blocks
1255  // with ordering higher to current one
1256  if(sink_block->num > b->num) {
1257  // We have a successor !
1258  if(action_write_p(effect_action(conflict_sink(a_conflict)))
1259  || action_write_p(effect_action(conflict_source(a_conflict)))) {
1260  // There's a real dependence here
1261  fbset_add_element(b->successors,
1262  (void *)sink_block);
1263  // Mark current block as a predecessor ;-)
1264  fbset_add_element(sink_block->predecessors,
1265  (void *)b);
1266  } else {
1267  // Read-read dependence is interesting to fuse, but is not a
1268  // precedence constraint
1269  fbset_add_element(b->rr_successors,
1270  (void *)sink_block);
1271  // Mark current block as a rr_predecessor ;-)
1272  fbset_add_element(sink_block->rr_predecessors,
1273  (void *)b);
1274  }
1275 
1276  break; // One conflict with each successor is enough
1277  }
1278  }
1279  }
1280  }
1281  }
1282  }
1283  // Optimization, do not try two time the same fusion !
1284  fbset_difference(b->rr_successors, b->successors);
1285  fbset_difference(b->rr_predecessors, b->predecessors);
1286 }
1287 
1288 
1289 /**
1290  * Prune the graph so that we have a DAG. There won't be anymore more than
1291  * one path between two block in the predecessors/successors tree. We keep only
1292  * longest path, no shortcut :-)
1293  */
1294 static void prune_successors_tree_aux(fusion_block b, fbset full_succ) {
1295  pips_debug(8,"visiting %d\n",b->num);
1296 
1297  fbset full_succ_of_succ = fbset_make();
1298  FBSET_FOREACH(succ, b->successors) {
1299  prune_successors_tree_aux(succ, full_succ_of_succ);
1300  fbset_union(full_succ,full_succ_of_succ);
1301  fbset_clear(full_succ_of_succ);
1302  }
1303  fbset_free(full_succ_of_succ);
1304 
1305  FBSET_FOREACH(succ_of_succ,full_succ){
1306  fbset_del_element(succ_of_succ->predecessors, b);
1307  fbset_del_element(succ_of_succ->rr_predecessors, b);
1308  fbset_del_element(b->successors,succ_of_succ);
1309  fbset_del_element(b->rr_successors,succ_of_succ);
1310  }
1311  fbset_union(full_succ,b->successors);
1312 }
1313 
1314 static fbset prune_successors_tree(fusion_block b) {
1315  pips_debug(8,"visiting %d\n",b->num);
1316  fbset full_succ = fbset_make();
1317  prune_successors_tree_aux(b, full_succ);
1318  return full_succ;
1319 }
1320 
1321 
1322 static void get_all_path_heads(fusion_block b, set heads) {
1323  if(fbset_empty_p(b->predecessors)) {
1324  set_add_element(heads,heads,b);
1325  } else {
1326  FBSET_FOREACH(pred,b->predecessors) {
1327  get_all_path_heads(pred,heads);
1328  }
1329  }
1330 }
1331 
1332 /**
1333  * Merge two blocks (successors, predecessors, statements).
1334  */
1335 static void merge_blocks(fusion_block block1, fusion_block block2) {
1336 //FIXME not always the case
1337 //pips_assert("block1->num < block2->num expected",block1->num < block2->num);
1338 
1339  ifdebug(3) {
1340  pips_debug(3,"Merging blocks :\n");
1341  print_block(block1);
1342  print_block(block2);
1343  }
1344 
1345  // merge predecessors
1346  fbset_union(block1->predecessors, block2->predecessors);
1347  // merge rr_predecessors
1348  fbset_union(block1->rr_predecessors, block2->rr_predecessors);
1349  // merge successors
1350  fbset_union( block1->successors, block2->successors);
1351  // merge rr_successors
1352  fbset_union(block1->rr_successors, block2->rr_successors);
1353  // merge statement
1354  set_union(block1->statements, block1->statements, block2->statements);
1355 
1356  // Replace block2 with block1 as a predecessor of his successors
1357  FBSET_FOREACH(succ,block2->successors) {
1358  fbset_add_element(succ->predecessors, block1);
1359  fbset_del_element(succ->predecessors, block2);
1360  }
1361 
1362  // Replace block2 with block1 as a predecessor of his rr_successors
1363  FBSET_FOREACH(rr_succ,block2->rr_successors) {
1364  fbset_add_element(rr_succ->rr_predecessors, block1);
1365  fbset_del_element(rr_succ->rr_predecessors, block2);
1366  }
1367 
1368  // Replace block2 with block1 as a successor of his predecessors
1369  FBSET_FOREACH(pred,block2->predecessors) {
1370  if(pred != block1) {
1371  fbset_add_element(pred->successors, block1);
1372  }
1373  fbset_del_element(pred->successors, block2);
1374  }
1375 
1376  // Replace block2 with block1 as a successor of his rr_predecessors
1377  FBSET_FOREACH(rr_pred,block2->rr_predecessors) {
1378  if(rr_pred != block1) {
1379  fbset_add_element(rr_pred->rr_successors, block1);
1380  }
1381  fbset_del_element(rr_pred->rr_successors, block2);
1382  }
1383 
1384  // Remove block1 from predecessors and successors of ... block1
1385  fbset_del_element(block1->predecessors, block1);
1386  fbset_del_element(block1->successors, block1);
1387  // Remove block1 from rr_predecessors and rr_successors of ... block1
1388  fbset_del_element(block1->rr_predecessors, block1);
1389  fbset_del_element(block1->rr_successors, block1);
1390 
1391 
1392  block2->num = -1; // Disable block, will be garbage collected
1393 
1394  // Fix the graph to be a tree
1395  // Fixme : Heavy :-(
1396  set heads = set_make(set_pointer);
1397  get_all_path_heads(block1,heads);
1398  SET_FOREACH(fusion_block,b,heads) {
1399  fbset_free(prune_successors_tree(b));
1400  }
1401  set_free(heads);
1402 
1403  // Do not loose comments and extensions
1404  if(!empty_comments_p(statement_comments(block2->s)) &&
1405  !blank_string_p(statement_comments(block2->s)))
1406  append_comments_to_statement(block1->s, statement_comments(block2->s));
1409 
1410  block1->count_fusion++;
1411 
1412  ifdebug(4) {
1413  pips_debug(4,"After merge :\n");
1414  print_block(block1);
1415  print_block(block2);
1416  }
1417 }
1418 
1419 
1420 /**
1421  * @brief Checks if precedence constraints allow fusing two blocks
1422  */
1423 static bool fusable_blocks_p( fusion_block b1, fusion_block b2, unsigned int fuse_limit) {
1424  bool fusable_p = false;
1425  if(b1!=b2 && b1->num>=0 && b2->num>=0 && b1->is_a_loop && b2->is_a_loop
1426  && b1->count_fusion<fuse_limit && b2->count_fusion<fuse_limit) {
1427  // Blocks are active and are loops
1428 
1429  if(fbset_belong_p(b2->successors,b1)) {
1430  ifdebug(6) {
1431  pips_debug(6,"b1 is a successor of b2, fusion prevented !\n");
1432  print_block(b1);
1433  print_block(b2);
1434  }
1435  fusable_p = false;
1436  } else if(fbset_belong_p(b1->successors,b2)) {
1437  // Adjacent blocks are fusable
1438  ifdebug(6) {
1439  pips_debug(6,"blocks are fusable because directly connected\n");
1440  print_block(b1);
1441  print_block(b2);
1442  }
1443  fusable_p = true;
1444  } else {
1445  // If there's some constraint, we won't be able to fuse them
1446  // here is a heavy way to check that, better not to think about
1447  // algorithm complexity :-(
1448  pips_debug(6,"Getting full successors for b1 (%d)\n",b1->num);
1449  fusion_block* full_succ_b1 = prune_successors_tree(b1);
1450  if(!fbset_belong_p(full_succ_b1,b2)) {
1451  // b2 is not a successors of a successor of a .... of b1
1452  // look at the opposite !
1453  pips_debug(6,"Getting full successors for b2 (%d)\n",b2->num);
1454  fusion_block* full_succ_b2 = prune_successors_tree(b2);
1455  if(!fbset_belong_p(full_succ_b2,b1)) {
1456  fusable_p = true;
1457  }
1458  fbset_free(full_succ_b2);
1459  }
1460  fbset_free(full_succ_b1);
1461  }
1462  }
1463  return fusable_p;
1464 }
1465 
1466 
1467 /**
1468  * Try to fuse two blocks (if they are loop...)
1469  * @return true if a fusion occured !
1470  */
1471 static bool fuse_block( fusion_block b1,
1472  fusion_block b2,
1473  bool maximize_parallelism,
1474  bool coarse) {
1475  bool return_val = false; // Tell is a fusion has occured
1476  if(!b1->is_a_loop) {
1477  pips_debug(5,"B1 (%d) is a not a loop, skip !\n",b1->num);
1478  } else if(!b2->is_a_loop) {
1479  pips_debug(5,"B2 (%d) is a not a loop, skip !\n",b2->num);
1480  } else if(b1->num==-1) {
1481  pips_debug(5,"B2 (%d) is disabled, skip !\n",b1->num);
1482  } else if(b2->num==-1) {
1483  pips_debug(5,"B2 (%d) is disabled, skip !\n",b2->num);
1484  } else {
1485  // Try to fuse
1486  pips_debug(4,"Try to fuse %d with %d\n",b1->num, b2->num);
1487  if(fusion_loops(b1->s,b1->statements, b2->s, b2->statements, maximize_parallelism, coarse)) {
1488  pips_debug(2, "Loop have been fused\n");
1489  // Now fuse the corresponding blocks
1490  merge_blocks(b1, b2);
1491  return_val=true;
1492  }
1493  }
1494  return return_val;
1495 }
1496 
1497 
1498 /**
1499  * This function first try to fuse b with its successors (if b is a loop and if
1500  * there's any loop in the successor list) ; then it recurse on each successor.
1501  *
1502  * @param b is the current block
1503  * @param fuse_count is the number of successful fusion done
1504  */
1505 static bool try_to_fuse_with_successors(fusion_block b,
1506  int *fuse_count,
1507  bool maximize_parallelism,
1508  unsigned int fuse_limit,
1509  bool coarse) {
1510  // First step is to try to fuse with each successor
1511  if(!b->processed && b->is_a_loop && b->num>=0 && b->count_fusion < fuse_limit) {
1512  pips_debug(5,"Block %d is a loop, try to fuse with successors !\n",b->num);
1513  FBSET_FOREACH( succ, b->successors)
1514  {
1515  pips_debug(6,"Handling successor : %d\n",succ->num);
1516  if(fuse_block(b, succ, maximize_parallelism,coarse)) {
1517  /* predecessors and successors set have been modified for the current
1518  * block... we can no longer continue in this loop, so we stop and let
1519  * the caller restart the computation
1520  */
1521  (*fuse_count)++;
1522  return true;
1523  }
1524  }
1525  }
1526  // Second step is recursion on successors (if any)
1527  FBSET_FOREACH(succ, b->successors) {
1528  if(try_to_fuse_with_successors(succ,
1529  fuse_count,
1530  maximize_parallelism,
1531  fuse_limit,
1532  coarse))
1533  return true;
1534  }
1535 
1536  b->processed = false;
1537  return false;
1538 }
1539 
1540 
1541 /**
1542  * This function first try to fuse b with its rr_successors (if b is a loop and
1543  * if there's any loop in the rr_successor list)
1544  *
1545  * @param b is the current block
1546  * @param fuse_count is the number of successful fusion done
1547  */
1548 static void try_to_fuse_with_rr_successors(fusion_block b,
1549  int *fuse_count,
1550  bool maximize_parallelism,
1551  unsigned int fuse_limit,
1552  bool coarse) {
1553  if(b->is_a_loop && b->count_fusion < fuse_limit) {
1554  pips_debug(5,"Block %d is a loop, try to fuse with rr_successors !\n",b->num);
1555  FBSET_FOREACH(succ, b->rr_successors)
1556  {
1557  if(fusable_blocks_p(b,succ,fuse_limit) &&
1558  fuse_block(b, succ, maximize_parallelism,coarse)) {
1559  /* predecessors and successors set have been modified for the current
1560  * block... we can no longer continue in this loop, let's restart
1561  * current function at the beginning and end this one.
1562  *
1563  * FIXME : performance impact may be high ! Should not try to fuse
1564  * again with the same block
1565  *
1566  */
1567  (*fuse_count)++;
1568  try_to_fuse_with_rr_successors(b,
1569  fuse_count,
1570  maximize_parallelism,
1571  fuse_limit,
1572  coarse);
1573  return;
1574  }
1575  }
1576  }
1577 
1578  return;
1579 }
1580 
1581 
1582 /**
1583  * This function loop over all possible couple of blocks and then try to fuse
1584  * all of them that validated precedence constaints
1585  *
1586  * @param blocks is the list of available blocks
1587  * @param fuse_count is the number of successful fusion done
1588  */
1589 static void fuse_all_possible_blocks(list blocks,
1590  int *fusion_count,
1591  bool maximize_parallelism,
1592  unsigned int fuse_limit,
1593  bool coarse) {
1594  FOREACH(fusion_block, b1, blocks) {
1595  if(b1->is_a_loop && b1->count_fusion<fuse_limit) {
1596  FOREACH(fusion_block, b2, blocks) {
1597  if(fusable_blocks_p(b1,b2,fuse_limit)) {
1598  if(fuse_block(b1, b2, maximize_parallelism,coarse)) {
1599  (*fusion_count)++;
1600  }
1601  }
1602  }
1603  }
1604  }
1605 }
1606 
1607 
1608 /**
1609  * Try to fuse every loops in the given sequence
1610  */
1611 static bool fusion_in_sequence(sequence s, fusion_params params) {
1612 
1613  /*
1614  * Construct "block" decomposition
1615  */
1616 
1617  /* Keep track of all blocks created */
1618  list block_list = NIL;
1619 
1620 
1621  // Keep track of the number of loop founded, to enable or disable next stage
1622  int number_of_loop = 0, i=0;
1623 
1624  // Loop over the list of statements in the sequence and compute blocks
1625  list stmts = sequence_statements(s);
1626  // We have to give a number to each block. It'll be used for regenerating
1627  // the list of statement in the sequence wrt initial order.
1628  max_num = gen_length(stmts);
1629 #ifdef __SSE2__
1630  max_num=4*((max_num+3)/4);
1631 #endif
1632  FOREACH(statement, st, stmts) {
1633  fusion_block b = make_block_from_statement(st, i);
1634  block_list = gen_cons(b, block_list);
1635  i++;
1636  if(statement_loop_p(st)) {
1637  number_of_loop++;
1638  }
1639  }
1640 
1641  block_list = gen_nreverse(block_list);
1642 
1643  // We only continue now if we have at least 2 loops. What can we fuse else ?
1644  if(number_of_loop > 1) {
1645  // Construct now predecessors/successors relationships for all blocks
1646  FOREACH(fusion_block, block, block_list) {
1647  compute_successors(block, block_list);
1648  }
1649  /*
1650  * Prune the graph so that we have a DAG
1651  */
1652  FOREACH(fusion_block, block, block_list) {
1653  if(fbset_empty_p(block->predecessors)) { // Block has no predecessors
1654  fbset_free(prune_successors_tree(block));
1655  }
1656  }
1657 
1658  ifdebug(3) {
1659  print_blocks(block_list);
1660  }
1661  // Loop over blocks and find fusion candidate (loop with compatible header)
1662  /*
1663  * Now we call a recursive method on blocks that don't have any
1664  * predecessor. The others will be visited recursively.
1665  */
1666  int fuse_count = 0;
1667 restart_loop: ;
1668  FOREACH(fusion_block, block, block_list) {
1669  if(fbset_empty_p(block->predecessors)) { // Block has no predecessors
1670  pips_debug(2,
1671  "Operate on block %d (is_a_loop %d)\n",
1672  block->num,
1673  block->is_a_loop);
1674 
1675  if(try_to_fuse_with_successors(block,
1676  &fuse_count,
1677  params->maximize_parallelism,
1678  params->max_fused_per_loop,
1679  params->coarse)) {
1680  // We fused some blocks, let's restart the process !
1681  // FIXME : we shouldn't have to restart the process, but it'll require
1682  // hard work in try_to_fuse_with_successors, so we'll do that later...
1683  goto restart_loop;
1684  }
1685 
1686  }
1687  }
1688 
1689  /* Here we try to fuse each block with its rr_successors, this is to benefit
1690  * from reuse (read-read) !
1691  */
1692  FOREACH(fusion_block, block, block_list) {
1693  if(block->num>=0) { // Block is active
1694  try_to_fuse_with_rr_successors(block,
1695  &fuse_count,
1696  params->maximize_parallelism,
1697  params->max_fused_per_loop,
1698  params->coarse);
1699  }
1700  }
1701 
1702  if(params->greedy) {
1703  /*
1704  * We allow the user to request a greedy fuse, which mean that we fuse as
1705  * much as we can, even if there's no reuse !
1706  */
1707  ifdebug(3) {
1708  print_blocks(block_list);
1709  }
1710  fuse_all_possible_blocks(block_list,
1711  &fuse_count,
1712  params->maximize_parallelism,
1713  params->max_fused_per_loop,
1714  params->coarse);
1715  }
1716 
1717 
1718 
1719  if(fuse_count > 0) {
1720  /*
1721  * Cleaning : removing old blocks that have been fused
1722  */
1723  list block_iter = block_list;
1724  list prev_block_iter = NIL;
1725  while(block_iter != NULL) {
1726  fusion_block block = (fusion_block)CAR(block_iter).p;
1727 
1728  if(block->num < 0) { // to be removed
1729  if(prev_block_iter==NIL) {
1730  block_list = CDR(block_iter);
1731  } else {
1732  CDR(prev_block_iter) = CDR(block_iter);
1733  }
1734  list garbage = block_iter;
1735  block_iter = CDR(block_iter);
1736  free(garbage);
1737  } else {
1738  prev_block_iter = block_iter;
1739  block_iter = CDR(block_iter);
1740  }
1741  }
1742 
1743  /* Regenerate sequence now
1744  * We will process as follow :
1745  * - find every eligible block WRT to precedence constraint
1746  * - schedule eligible blocks according to their original position
1747  * - loop until there's no longer block to handle
1748  */
1749  list new_stmts = NIL;
1750 
1751  ifdebug(3) {
1752  pips_debug(3,"Before regeneration\n");
1753  print_blocks(block_list);
1754  }
1755 
1756  // Loop until every block have been regenerated
1757  int block_count = gen_length(block_list);
1758  while(block_count > 0) {
1759 restart_generation:
1760  block_count = 0;
1761  int active_blocks = 0;
1762  bool at_least_one_block_scheduled = false;
1763  // First loop, construct eligible blocks
1764  FOREACH(fusion_block, block, block_list)
1765  {
1766  if(block->num < 0) {
1767  continue; // block is disabled
1768  }
1769  active_blocks++;
1770 
1771  if(fbset_empty_p(block->predecessors)) { // Block has no predecessors
1772  // Block is eligible
1773  ifdebug(3) {
1774  pips_debug(3,"Eligible : ");
1775  print_block(block);
1776  }
1777  // Schedule block
1778  new_stmts = CONS(statement,block->s,new_stmts);
1779  block->num = -1; // Disable block
1780  at_least_one_block_scheduled = true;
1781 
1782  // Release precedence constraint on successors
1783  FBSET_FOREACH(succ,block->successors)
1784  {
1785  fbset_del_element(succ->predecessors, block);
1786  }
1787  // We have free some constraints, and thus we restart the process
1788  // to ensure that we generate in an order as close as possible to
1789  // the original code
1790  goto restart_generation;
1791  } else {
1792  ifdebug(3) {
1793  pips_debug(3,"Not eligible : ");
1794  print_block(block);
1795  }
1796 
1797  // Number of block alive
1798  block_count++;
1799  }
1800  }
1801  if(!at_least_one_block_scheduled && active_blocks>0) {
1802  pips_internal_error("No block scheduled, we have interdependence "
1803  "in the block tree, which means it's not a tree ! Abort...\n");
1804  }
1805  }
1806 
1807  // Replace original list with the new one
1808  sequence_statements( s) = gen_nreverse(new_stmts);
1809  }
1810  }
1811 
1812  // No leak
1813  free_block_list(block_list);
1814 
1815  return true;
1816 }
1817 
1818 /**
1819  * Will try to fuse as many loops as possible in the IR subtree rooted by 's'
1820  */
1821 static void compute_fusion_on_statement(statement s, bool coarse) {
1822  // Get user preferences with some properties
1823  struct fusion_params params;
1824  params.maximize_parallelism = get_bool_property("LOOP_FUSION_MAXIMIZE_PARALLELISM");
1825  params.coarse = coarse;
1826  params.greedy = get_bool_property("LOOP_FUSION_GREEDY");
1827  params.max_fused_per_loop = get_int_property("LOOP_FUSION_MAX_FUSED_PER_LOOP");
1828 
1829  // Go on fusion on every sequence of statement founded
1830  gen_context_recurse( s, &params, sequence_domain, fusion_in_sequence, gen_null2);
1831 }
1832 
1833 /**
1834  * Loop fusion main entry point
1835  */
1836 static bool module_loop_fusion(char * module_name, bool region_based) {
1839 
1840  /* Get the true ressource, not a copy. */
1842  module_name,
1843  true);
1845 
1848 
1849  /* The proper effect to detect the I/O operations: */
1851  module_name,
1852  true));
1853 
1854  /* Mandatory for DG construction and module_to_value_mapping() */
1856  module_name,
1857  true));
1858 
1859  /* Get the data dependence graph */
1861  module_name,
1862  true);
1863 
1864  ordering_to_dg_mapping = compute_ordering_to_dg_mapping(dependence_graph);
1865 
1866 
1867  if(region_based) {
1868  /* use preconditions to check that loop bodies are not dead code
1869  */
1871  module_name,
1872  true));
1873 
1874  /* Build mapping between variables and semantics informations: */
1876 
1877  /* Get and use invariant read/write regions */
1879  module_name,
1880  true));
1881  }
1882 
1883 
1884 
1885 
1886  debug_on("LOOP_FUSION_DEBUG_LEVEL");
1887 
1888  // Here we go ! Let's fuse :-)
1889  compute_fusion_on_statement(module_statement,region_based);
1890 
1891  /* Reorder the module, because some statements have been deleted, and others
1892  * have been reordered
1893  */
1895 
1896  /* Store the new code */
1898 
1899  // Free resources
1900  free_temporary_fused_statement(); // Free...
1901  hash_table_free(ordering_to_dg_mapping);
1902  ordering_to_dg_mapping = NULL;
1908 
1909  if(region_based) {
1913  }
1914 
1915  pips_debug(2, "done for %s\n", module_name);
1916  debug_off();
1917 
1918  /* Should have worked:
1919  *
1920  * Do we want to provide statistics about the number of fused loops?
1921  *
1922  * How do we let PyPS know the number of loops fused?
1923  */
1924  return true;
1925 }
1926 
1927 
1928 /**
1929  * Loop fusion with DG ; PIPSMake entry point
1930  */
1931 bool loop_fusion(char * module_name) {
1932  return module_loop_fusion(module_name, false);
1933 }
1934 
1935 /**
1936  * Loop fusion with Regions ; PIPSMake entry point
1937  */
1939  return module_loop_fusion(module_name, true);
1940 }
1941 
1942 #endif // BUILDER_LOOP_FUSION*
int get_int_property(const string)
void free_conflict(conflict p)
Definition: dg.c:63
conflict make_conflict(effect a1, effect a2, cone a3)
Definition: dg.c:96
effect copy_effect(effect p)
EFFECT.
Definition: effects.c:448
void free_graph(graph p)
Definition: graph.c:23
void free_statement(statement p)
Definition: ri.c:2189
struct paramStruct params
dg_vertex_label vertex_label
Definition: delay.c:64
dg_arc_label arc_label
Definition: delay.c:63
static graph dependence_graph
Definition: delay.c:93
static statement module_statement
Definition: alias_check.c:125
@ INT
Definition: atomic.c:48
static int num
Definition: bourdoncle.c:137
graph statement_dependence_graph(statement s)
Compute from a given statement, the dependency graph.
Definition: chains.c:1218
Psysteme sg_to_sc_chernikova(Ptsg sg)
Definition: chernikova.c:58
bool clean_up_sequences(statement s)
Recursively clean up the statement sequences by fusing them if possible and by removing useless one.
struct _newgen_struct_statement_ * statement
Definition: cloning.h:21
static list predecessors(statement st, graph tg)
static list successors(list l)
static list blocks
lisp of loops
#define dg_vertex_label_statement(x)
Definition: dg.h:235
#define conflict_sink(x)
Definition: dg.h:167
#define cone_levels(x)
Definition: dg.h:128
#define CONFLICT(x)
CONFLICT.
Definition: dg.h:134
#define dg_arc_label_conflicts(x)
Definition: dg.h:201
#define conflict_source(x)
Definition: dg.h:165
#define cone_undefined
Definition: dg.h:104
#define conflict_undefined
Definition: dg.h:140
struct _newgen_struct_dg_vertex_label_ * dg_vertex_label
Definition: dg.h:68
#define conflict_cone(x)
Definition: dg.h:169
#define region_write_p(reg)
#define region_entity(reg)
#define region_system(reg)
#define region_read_p(reg)
useful region macros
#define REGION
void all_regions_variable_rename(list, entity, entity)
void print_regions(list)
void project_regions_along_variables(list, list)
void project_regions_along_variables(list l_reg, list l_param) input : a list of regions to project,...
list load_invariant_rw_effects_list(statement)
void store_proper_rw_effects_list(statement, list)
list load_proper_rw_effects_list(statement)
void reset_proper_rw_effects(void)
void set_proper_rw_effects(statement_effects)
void set_cumulated_rw_effects(statement_effects)
void reset_invariant_rw_effects(void)
void store_invariant_rw_effects_list(statement, list)
void set_invariant_rw_effects(statement_effects)
void reset_cumulated_rw_effects(void)
list words_effect(effect)
#define effect_any_reference(e)
FI: cannot be used as a left hand side.
#define effect_write_p(eff)
#define effect_variable(e)
For COMPATIBILITY purpose only - DO NOT USE anymore.
entity effect_entity(effect)
cproto-generated files
Definition: effects.c:52
bool store_effect_p(effect)
Definition: effects.c:1062
bool effect_scalar_p(effect)
Definition: effects.c:567
#define effect_action(x)
Definition: effects.h:642
#define effect_undefined
Definition: effects.h:614
#define action_write_p(x)
Definition: effects.h:314
#define EFFECT(x)
EFFECT.
Definition: effects.h:608
bool blank_string_p(const char *s)
Definition: entity_names.c:245
const char * module_name(const char *s)
Return the module part of an entity name.
Definition: entity_names.c:296
bool get_bool_property(const string)
FC 2015-07-20: yuk, moved out to prevent an include cycle dependency include "properties....
#define gen_chunk_undefined_p(c)
Definition: genC.h:75
#define gen_context_recurse(start, ctxt, domain_number, flt, rwt)
Definition: genC.h:285
void * malloc(YYSIZE_T)
void free(void *)
bool success
Definition: gpips-local.h:59
#define successor_vertex(x)
Definition: graph.h:118
#define successor_arc_label(x)
Definition: graph.h:116
struct _newgen_struct_graph_ * graph
Definition: graph.h:31
#define vertex_vertex_label(x)
Definition: graph.h:152
#define vertex_successors(x)
Definition: graph.h:154
#define SUCCESSOR(x)
SUCCESSOR.
Definition: graph.h:86
#define graph_vertices(x)
Definition: graph.h:82
#define VERTEX(x)
VERTEX.
Definition: graph.h:122
statement make_block_statement(list)
Make a block statement from a list of statement.
Definition: statement.c:616
void reset_current_module_entity(void)
Reset the current module entity.
Definition: static.c:97
void reset_current_module_statement(void)
Reset the current module statement.
Definition: static.c:221
statement set_current_module_statement(statement)
Set the current module statement.
Definition: static.c:165
entity set_current_module_entity(entity)
static.c
Definition: static.c:66
void replace_entity(void *s, entity old, entity new)
per variable version of replace_entities.
Definition: replace.c:113
void gen_null2(__attribute__((unused)) void *u1, __attribute__((unused)) void *u2)
idem with 2 args, to please overpeaky compiler checks
Definition: genClib.c:2758
bool gen_true2(__attribute__((unused)) gen_chunk *u1, __attribute__((unused)) void *u2)
Definition: genClib.c:2785
bool loop_parallel_p(loop l)
Test if a loop is parallel.
Definition: loop.c:393
void clean_enclosing_loops(void)
Definition: loop.c:58
statement get_first_inner_perfectly_nested_loop(statement stat)
Return the inner loop in a perfect loop-nest.
Definition: loop.c:508
statement_mapping loops_mapping_of_statement(statement stat)
Definition: loop.c:155
#define ENDP(l)
Test if a list is empty.
Definition: newgen_list.h:66
list gen_nreverse(list cp)
reverse a list in place
Definition: list.c:304
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
list gen_copy_seq(list l)
Copy a list structure.
Definition: list.c:501
size_t gen_length(const list l)
Definition: list.c:150
list gen_cons(const void *item, const list next)
Definition: list.c:888
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
Definition: list.c:344
#define CAR(pcons)
Get the value of the first element of a list.
Definition: newgen_list.h:92
void gen_free_list(list l)
free the spine of the list
Definition: list.c:327
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
Definition: list.c:734
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
#define CDR(pcons)
Get the list less its first element.
Definition: newgen_list.h:111
void * gen_find_eq(const void *item, const list seq)
Definition: list.c:422
#define MAP(_map_CASTER, _map_item, _map_code, _map_list)
Apply/map an instruction block on all the elements of a list (old fashioned)
Definition: newgen_list.h:226
list gen_full_copy_list(list l)
Copy a list structure with element copy.
Definition: list.c:535
string db_get_memory_resource(const char *rname, const char *oname, bool pure)
Return the pointer to the resource, whatever it is.
Definition: database.c:755
#define DB_PUT_MEMORY_RESOURCE(res_name, own_name, res_val)
conform to old interface.
Definition: pipsdbm-local.h:66
sequence statement_sequence(statement)
Get the sequence of a statement sequence.
Definition: statement.c:1328
loop statement_loop(statement)
Get the loop of a statement.
Definition: statement.c:1374
bool statement_loop_p(statement)
Definition: statement.c:349
bool statement_sequence_p(statement)
Statement classes induced from instruction type.
Definition: statement.c:335
void append_comments_to_statement(statement, string)
Append a comment string (if non empty) to the comments of a statement, if the c.
Definition: statement.c:1889
void insert_statement(statement, statement, bool)
This is the normal entry point.
Definition: statement.c:2570
bool empty_comments_p(const char *)
Definition: statement.c:107
bool statement_may_contain_exiting_intrinsic_call_p(statement)
Definition: statement.c:4022
char end
Definition: gtk_status.c:82
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
Definition: hash.c:449
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
Definition: hash.c:327
statement vertex_to_statement(vertex v)
Vertex_to_statement looks for the statement that is pointed to by vertex v.
Definition: util.c:45
loop loop1
tiling_sequence.c
void * memset(void *str, int c, size_t len)
memset.c – set an area of memory to a given value Copyright (C) 1991, 2003, 2009-2011 Free Software F...
Definition: memset.c:23
#define debug_on(env)
Definition: misc-local.h:157
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
Definition: misc-local.h:172
#define pips_internal_error
Definition: misc-local.h:149
#define debug_off()
Definition: misc-local.h:160
#define pips_user_error
Definition: misc-local.h:147
#define assert(ex)
Definition: newgen_assert.h:41
#define HASH_UNDEFINED_VALUE
value returned by hash_get() when the key is not found; could also be called HASH_KEY_NOT_FOUND,...
Definition: newgen_hash.h:56
set set_del_element(set, const set, const void *)
Definition: set.c:265
#define SET_FOREACH(type_name, the_item, the_set)
enumerate set elements in their internal order.
Definition: newgen_set.h:78
void set_free(set)
Definition: set.c:332
bool set_belong_p(const set, const void *)
Definition: set.c:194
set set_union(set, const set, const set)
Definition: set.c:211
@ set_pointer
Definition: newgen_set.h:44
set set_make(set_type)
Create an empty set of any type but hash_private.
Definition: set.c:102
set set_add_element(set, const set, const void *)
Definition: set.c:152
hash_table set_ordering_to_statement(statement s)
To be used instead of initialize_ordering_to_statement() to make sure that the hash table ots is in s...
Definition: ordering.c:172
statement ordering_to_statement(int o)
Get the statement associated to a given ordering.
Definition: ordering.c:111
void reset_ordering_to_statement(void)
Reset the mapping from ordering to statement.
Definition: ordering.c:185
bool overwrite_ordering_of_the_statement_to_current_mapping(statement stat)
Overwrite the statement for its ordering, if any, in the hash-map.
Definition: ordering.c:142
bool ordering_to_statement_initialized_p()
Test if the ordering to statement is initialized.
Definition: ordering.c:63
#define print_effect(e)
Definition: print.c:336
#define print_region(x)
Definition: print.c:343
#define print_effects(e)
Definition: print.c:334
void print_statement(statement)
Print a statement on stderr.
Definition: statement.c:98
bool module_reorder(statement body)
Reorder a module and recompute order to statement if any.
Definition: reorder.c:244
const char * entity_local_name(entity e)
entity_local_name modified so that it does not core when used in vect_fprint, since someone thought t...
Definition: entity.c:453
bool same_entity_p(entity e1, entity e2)
predicates on entities
Definition: entity.c:1321
entity module_name_to_entity(const char *mn)
This is an alias for local_name_to_top_level_entity.
Definition: entity.c:1479
set get_referenced_entities(void *elem)
retrieves the set of entities used in elem beware that this entities may be formal parameters,...
Definition: entity.c:3063
bool range_equal_p(range r1, range r2)
Definition: expression.c:1522
void reset_enclosing_loops_map(void)
void set_enclosing_loops_map(statement_mapping)
#define loop_body(x)
Definition: ri.h:1644
struct _newgen_struct_sequence_ * sequence
Definition: ri.h:351
#define ENTITY(x)
ENTITY.
Definition: ri.h:2755
#define statement_ordering(x)
Definition: ri.h:2454
#define statement_domain
newgen_sizeofexpression_domain_defined
Definition: ri.h:362
#define entity_name(x)
Definition: ri.h:2790
#define sequence_statements(x)
Definition: ri.h:2360
#define reference_indices(x)
Definition: ri.h:2328
#define statement_extensions(x)
Definition: ri.h:2464
#define instruction_sequence(x)
Definition: ri.h:1514
#define loop_locals(x)
Definition: ri.h:1650
#define statement_instruction(x)
Definition: ri.h:2458
#define statement_comments(x)
Definition: ri.h:2456
#define loop_range(x)
Definition: ri.h:1642
#define extensions_extension(x)
Definition: ri.h:1330
#define statement_undefined_p(x)
Definition: ri.h:2420
#define sequence_domain
newgen_reference_domain_defined
Definition: ri.h:346
#define loop_index(x)
Definition: ri.h:1640
#define statement_undefined
Definition: ri.h:2419
#define STATEMENT(x)
STATEMENT.
Definition: ri.h:2413
bool Finds2s1
Definition: ricedg.h:160
list TestCoupleOfReferences(list n1, Psysteme sc1 __attribute__((unused)), statement s1, effect ef1, reference r1, list n2, Psysteme sc2 __attribute__((unused)), statement s2, effect ef2, reference r2, list llv, Ptsg *gs __attribute__((unused)), list *levelsop __attribute__((unused)), Ptsg *gsop __attribute__((unused)))
Definition: ricedg.c:905
graph compute_dg_on_statement_from_chains_in_place(statement s, graph chains)
Definition: ricedg.c:2342
int vertex_ordering(vertex)
Definition: util.c:51
hash_table compute_ordering_to_dg_mapping(graph)
Define a mapping from the statement ordering to the dependence graph vertices:
Definition: util.c:70
void sc_rm(Psysteme ps)
void sc_rm(Psysteme ps): liberation de l'espace memoire occupe par le systeme de contraintes ps;
Definition: sc_alloc.c:277
#define level
Value b2
Definition: sc_gram.c:105
Value b1
booleen indiquant quel membre est en cours d'analyse
Definition: sc_gram.c:105
void sc_fprint(FILE *fp, Psysteme ps, get_variable_name_t nom_var)
void sc_fprint(FILE * f, Psysteme ps, char * (*nom_var)()): cette fonction imprime dans le fichier po...
Definition: sc_io.c:220
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
void module_to_value_mappings(entity m)
void module_to_value_mappings(entity m): build hash tables between variables and values (old,...
Definition: mappings.c:624
transformer load_statement_precondition(statement)
void reset_precondition_map(void)
void set_precondition_map(statement_mapping)
s1
Definition: set.c:247
#define SG_UNDEFINED
Definition: sg-local.h:73
#define SG_UNDEFINED_P(sg)
Definition: sg-local.h:74
#define ifdebug(n)
Definition: sg.c:47
void sg_rm(Ptsg sg)
void sg_rm(Ptsg sg): liberation de l'espace memoire occupe par un systeme generateur
Definition: sg.c:249
void sg_fprint_as_dense(FILE *f, Ptsg sg, Pbase b)
void sg_fprint_as_dense(FILE * f, Ptsg sg): impression d'un systeme generateur
Definition: sg.c:298
#define intptr_t
Definition: stdint.in.h:294
FI: I do not understand why the type is duplicated at the set level.
Definition: set.c:59
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
Representation d'un systeme generateur par trois ensembles de sommets de rayons et de droites.
Definition: sg-local.h:66
Pbase base
Definition: sg-local.h:70
struct block block
void print_words(FILE *fd, cons *lw)
Definition: print.c:263
bool loop_fusion_with_regions(char *)
bool loop_fusion(char *)
loop_fusion.c
bool transformer_empty_p(transformer t)
If true is returned, the transformer certainly is empty.
Definition: transformer.c:2455
void free_value_mappings(void)
Normal call to free the mappings.
Definition: value.c:1212
char *(* get_variable_name_t)(Variable)
Definition: vecteur-local.h:62