PIPS
nest_parallelization.c
Go to the documentation of this file.
1 /*
2 
3  $Id: nest_parallelization.c 23065 2016-03-02 09:05:50Z coelho $
4 
5  Copyright 1989-2016 MINES ParisTech
6 
7  This file is part of PIPS.
8 
9  PIPS is free software: you can redistribute it and/or modify it
10  under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  any later version.
13 
14  PIPS is distributed in the hope that it will be useful, but WITHOUT ANY
15  WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  FITNESS FOR A PARTICULAR PURPOSE.
17 
18  See the GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with PIPS. If not, see <http://www.gnu.org/licenses/>.
22 
23 */
24 
25 // do not compile unless required
26 #include "phases.h"
27 #ifdef BUILDER_NEST_PARALLELIZATION
28 
29 #ifdef HAVE_CONFIG_H
30  #include "pips_config.h"
31 #endif
32  /* loop nest parallelization */
33 
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <limits.h>
39 
40 #include "genC.h"
41 #include "linear.h"
42 
43 #include "misc.h"
44 #include "pipsdbm.h"
45 
46 #include "ri.h"
47 #include "ri-util.h"
48 #include "prettyprint.h" // for debugging
49 
50 #include "text-util.h" // print_text
51 #include "control.h" // module_body_reorder
52 
53 #include "dg.h"
54 typedef dg_arc_label arc_label;
56 #include "graph.h"
57 
58 #include "conversion.h" // look_for_nested_loops_statements
59 #include "transformations.h" // loop_unroll, loop_strip_mine, ...
60 
61 /* dependence graph */
62 static graph dg;
63 
64 /* only one level of parallelism is allowed; this is checked
65  * intra-procedurally only for the time being; pipsmake mechanism would
66  * make an interprocedural propagation easy but it would be hard to defer
67  * the parallel loop choice up to a whole program analysis; * This static
68  * variable is a screwed up mechanism; the information should be
69  * recursively maintainted by look_for_nested_loops() and propagated
70  * downwards only (not forwards as with the static variable...).
71  *
72  * The same result could be achived by restarting a different occurence of
73  * look_for_nested_loops() with a simple vectorization trasnformation and
74  * another predicate checking for internal loops or for anything I want.
75  * No state memorization is needed...
76  */
77 static bool parallel_loop_has_been_selected;
78 
79 /* No lambda closure in C */
80 static entity current_loop_index = entity_undefined;
81 ␌
82 /* Transformation strategy for an isolated loop */
83 
84 /* the parallelization and vectorization strategy is based on iteration numbers */
85 #define UNKNOWN_LOOP_COUNT 0
86 #define SMALL_LOOP_COUNT 1
87 #define MEDIUM_LOOP_COUNT 2
88 #define LARGE_LOOP_COUNT 3
89 
90 #define SEQUENTIAL_DIRECTION 0
91 #define VECTOR_DIRECTION 1
92 #define PARALLEL_DIRECTION 2
93 
94 /* the transformation strategy is chosen according to the loop iteration count
95  */
96 typedef struct transformation_strategy {
97  int maximum_iteration_count;
98  statement (*loop_transformation)();
99 } transformation_strategy;
100 
101 
102 ␌
103 static statement loop_preserve(statement s, __attribute__((unused)) int c)
104 {
105  pips_debug(9, "begin\n");
106 
107  pips_debug(9, "end\n");
108 
109  return s;
110 }
111 
112 static statement loop_vectorize(statement s, __attribute__((unused)) int c)
113 {
114  loop l = statement_loop(s);
115 
116  pips_debug(9, "begin\n");
117 
119 
120  pips_debug(9, "end\n");
121 
122  return s;
123 }
124 
125 static statement tuned_loop_parallelize(statement s, __attribute__((unused)) int c)
126 {
127  loop l = statement_loop(s);
128 
129  pips_debug(9, "begin\n");
130 
131  /* FI: the recursive computation of parallel_loop_has_been_selected is not implemented */
132  if(parallel_loop_has_been_selected && !parallel_loop_has_been_selected)
133  ;
134  else {
135  /* the body complexity should be checked and, if it is a constant,
136  a strip-mining factor should be derived to get the best
137  possible load balancing */
138 
139  /* the easy way to go is to use the processor number to make chunks
140  */
141 
143 
144  /* the outer loop is tagged as parallel, the inner loop is sequential */
146  parallel_loop_has_been_selected = true;
147  }
148 
149  pips_debug(9, "end\n");
150 
151  return s;
152 }
153 
154 static statement tuned_loop_unroll(statement s, int c)
155 {
157  range lr = loop_range(il);
158  expression lb = range_lower(lr),
159  ub = range_upper(lr),
160  inc = range_increment(lr);
161  intptr_t lbval, ubval, incval;
162 
163  pips_debug(9, "begin\n");
164 
165  if (expression_integer_value(lb, &lbval)
166  && expression_integer_value(ub, &ubval)
167  && expression_integer_value(inc, &incval)) {
168  full_loop_unroll(s);
169  }
170  else if(c > 1) {
171  loop_unroll(s, c);
172  }
173 
174  pips_debug(9, "end\n");
175 
176  return s;
177 }
178 
179 static bool current_loop_index_p(reference r)
180 {
181  return reference_variable(r) == current_loop_index;
182 }
183 
184 static statement tuned_loop_strip_mine(statement s)
185 {
186 
187  pips_debug(9, "begin\n");
188 
189  /* strip it for the vector registers and the parallel processors */
191 
192  /* outer parallel loop */
193  /* FI: should be kept sequential if another outer parallel loop has been defined... */
194  if(get_processor_number() != 1) {
196  }
197  else {
199  }
200 
201  /* inner vector loop */
202  pips_assert("tuned_loop_strip_mine", instruction_loop_p(statement_instruction(s)));
203  if(get_vector_register_number() > 0) {
205  }
206  else {
208  }
209 
210  pips_debug(9, "end\n");
211 
212  return s;
213 }
214 
215 ␌
216 static bool always_select_p(__attribute__((unused)) statement s)
217 {
218  return true;
219 }
220 static bool carried_dependence_p(__attribute__((unused))statement s)
221 {
222  return false;
223 }
224 ␌
225 static Pvecteur estimate_range_count(range r)
226 {
231 
232  pips_debug(9, "begin\n");
233 
235  Pvecteur vinc = (Pvecteur) normalized_linear(ninc);
236 
237  if(vect_constant_p(vinc)) {
238  Pvecteur vlb = (Pvecteur) normalized_linear(nlb);
239  Pvecteur vub = (Pvecteur) normalized_linear(nub);
240 
241  count = vect_substract(vub, vlb);
242  vect_add_elem(&count, TCST, 1);
243  count = vect_div(count, vect_coeff(TCST, vinc));
244  }
245  else {
247  }
248  }
249  else {
251  }
252 
253  ifdebug(9) {
254  debug(9, "estimate_range_count", "output\n");
255  vect_dump(count);
256  }
257  pips_debug(9, "end\n");
258 
259  return count;
260 }
261 
262 static Pvecteur estimate_loop_iteration_count(loop l)
263 {
264  return estimate_range_count(loop_range(l));
265 }
266 static int numerical_loop_iteration_count(loop l)
267 {
268  Pvecteur count = estimate_loop_iteration_count(l);
269  int c;
270 
272  c = -1;
273  else {
274  if( vect_constant_p(count)) {
275  Value v = vect_coeff(TCST, count);
276  c = VALUE_TO_INT(v);
277  }
278  else
279  c = -1;
280  vect_rm(count);
281  }
282 
283  return c;
284 }
285 
286 static transformation_strategy
287  one_loop_transformation_strategies
288  [PARALLEL_DIRECTION+1][LARGE_LOOP_COUNT+1] =
289  {
290  {{-1, loop_preserve},
291  {4, tuned_loop_unroll },
292  {80, loop_preserve },
293  {INT_MAX, loop_preserve}},
294  {{-1, loop_vectorize},
295  {4, tuned_loop_unroll },
296  {80, loop_vectorize },
297  {INT_MAX, tuned_loop_strip_mine}},
298  {{-1, tuned_loop_parallelize},
299  {4, tuned_loop_unroll },
300  {80, tuned_loop_parallelize },
301  {INT_MAX, tuned_loop_parallelize}}
302  };
303 static
304 statement one_loop_parallelization(statement s)
305 {
306  statement new_s = s;
307  int c;
308  int kind;
309  int size;
310 
311  pips_debug(9, "begin - input loop\n");
312  if(get_debug_level()>=9) {
313  print_statement(s);
314  pips_assert("one_loop_parallelization", statement_consistent_p(s));
315  }
316 
317  /* find out the loop iteration count c */
318  c = numerical_loop_iteration_count(statement_loop(s));
319 
320  /* find out the loop kind */
321  if(carried_dependence_p(s))
322  kind = SEQUENTIAL_DIRECTION;
324  kind = VECTOR_DIRECTION;
325  else
326  kind = PARALLEL_DIRECTION;
327 
328  /* select the proper transformation */
329  for( size = UNKNOWN_LOOP_COUNT; size <= LARGE_LOOP_COUNT; size++)
330  if( c <= one_loop_transformation_strategies[kind][size].maximum_iteration_count)
331  break;
332 
333  if(size>LARGE_LOOP_COUNT) {
334  pips_internal_error("cannot find a transformation strategy"
335  " for kind %d and count %d\n",
336  kind, c);
337  }
338  else {
339  pips_debug(9, "kind = %d, size = %d, c = %d\n", kind, size, c);
340  (* one_loop_transformation_strategies[kind][size].loop_transformation)
341  (s, c);
342  }
343 
344 
345  ifdebug(9) {
346  pips_debug(9, "output loop\n");
347  print_statement(s);
348  pips_assert("one_loop_parallelization", statement_consistent_p(s));
349  pips_debug(9, "end\n");
350  }
351 
352  return new_s;
353 }
354 static int look_for_references_in_expression(expression , reference (*) (reference), bool (*) (reference));
355 
356 static int look_for_references_in_range(range r, reference (*reference_transformation) (reference), bool (*reference_predicate) (reference))
357 {
358  int count = 0;
359  expression rl = range_lower(r);
360  expression ru = range_upper(r);
361  expression ri = range_increment(r);
362 
363  pips_debug(5, "begin\n");
364 
365  count += look_for_references_in_expression(rl, reference_transformation, reference_predicate);
366  count += look_for_references_in_expression(ru, reference_transformation, reference_predicate);
367  count += look_for_references_in_expression(ri, reference_transformation, reference_predicate);
368 
369  pips_debug(5, "end %d\n", count);
370 
371  return count;
372 }
373 
374 static int look_for_references_in_call(call c, reference (*reference_transformation) (reference), bool (*reference_predicate) (reference))
375 {
376  value vin;
377  entity f;
378  int count = 0;
379 
380  pips_debug(5, "begin\n");
381 
382  f = call_function(c);
383  vin = entity_initial(f);
384 
385  switch (value_tag(vin)) {
386  case is_value_constant:
387  /* nothing to replace */
388  break;
389  case is_value_symbolic:
390  pips_internal_error("case is_value_symbolic: not implemented");
391  break;
392  case is_value_intrinsic:
393  case is_value_unknown:
394  case is_value_code:
395  {
396  /* We assume that it is legal to replace arguments (because it should
397  have been verified with the effects that the index is not WRITTEN).
398  */
400  {
401  count += look_for_references_in_expression(e,
402  reference_transformation,
403  reference_predicate);
404  }
405  } break;
406  default:
407  pips_internal_error("unknown tag: %d",
408  (int) value_tag(vin));
409 
410  }
411 
412  pips_debug(5, "end %d\n", count);
413 
414  return count;
415 }
416 static int look_for_references_in_expression(expression e, reference (*reference_transformation) (reference), bool (*reference_predicate) (reference))
417 {
418  syntax s = expression_syntax(e);
419  int count = 0;
420 
421  pips_debug(5, "begin\n");
422 
423  switch(syntax_tag(s)) {
424  case is_syntax_reference: {
426  if ( (*reference_predicate)(r)) {
427  reference new_r = (*reference_transformation)(syntax_reference(s));
428  /* FI: if a free must be performed, onlye reference_transformation()
429  * knows about it */
430  /* reference_free(syntax_reference(s)); */
431  syntax_reference(s) = new_r;
432  count = 1;
433  }
434 
435  MAPL(lexpr, {
436  expression indice = EXPRESSION(CAR(lexpr));
437  count += look_for_references_in_expression(indice, reference_transformation,
438  reference_predicate);
439  }, reference_indices(r));
440  }
441  break;
442  case is_syntax_range:
443  count = look_for_references_in_range(syntax_range(s), reference_transformation,
444  reference_predicate);
445  break;
446  case is_syntax_call:
447  count = look_for_references_in_call(syntax_call(s), reference_transformation,
448  reference_predicate);
449  break;
450  default:
451  pips_internal_error("unknown tag: %d",
452  (int) syntax_tag(expression_syntax(e)));
453  }
454 
455  pips_debug(5, "end %d\n", count);
456 
457  return count;
458 }
459 static int look_for_references_in_statement(statement s, reference (*reference_transformation) (reference), bool (*reference_predicate) (reference))
460 {
462  int count = 0;
463 
464  pips_debug(5, "begin\n");
465 
466  switch(instruction_tag(inst)) {
467  case is_instruction_block :
468  MAPL( sts, {
469  count += look_for_references_in_statement(STATEMENT(CAR(sts)),
470  reference_transformation,
471  reference_predicate);
472  }, instruction_block(inst));
473  break;
474  case is_instruction_test : {
475  /* legal if no statement redefines ref */
476  test t = instruction_test(inst);
477  count = look_for_references_in_expression(test_condition(t), reference_transformation,
478  reference_predicate);
479  count += look_for_references_in_statement(test_true(t), reference_transformation,
480  reference_predicate);
481  count += look_for_references_in_statement(test_false(t), reference_transformation,
482  reference_predicate);
483  break;
484  }
485  case is_instruction_loop : {
486  loop l = instruction_loop(inst);
487  count = look_for_references_in_range(loop_range(l), reference_transformation,
488  reference_predicate);
489  count += look_for_references_in_statement(loop_body(l), reference_transformation,
490  reference_predicate);
491  break;
492  }
493  case is_instruction_whileloop : {
495  count = look_for_references_in_expression(whileloop_condition(l), reference_transformation,
496  reference_predicate);
497  count += look_for_references_in_statement(whileloop_body(l), reference_transformation,
498  reference_predicate);
499  break;
500  }
501  case is_instruction_call :
502  count = look_for_references_in_call(instruction_call(inst), reference_transformation,
503  reference_predicate);
504  break;
505  case is_instruction_goto :
506  pips_internal_error("case is_instruction_goto");
507  break;
509  /* FI: there is no reason not to do something here! */
510  pips_internal_error("case is_instruction_unstructured");
511  break;
512  default:
513  pips_internal_error("Bad instruction tag");
514  }
515 
516  pips_debug(5, "end %d\n", count);
517 
518  return count;
519 }
520 static reference reference_identity(reference r)
521 {
522  return r;
523 }
524 
525 static bool constant_array_reference_p(reference r)
526 {
527  /* Uses a static global variable, current_loop_index */
528  list li = reference_indices(r);
529 
530  ifdebug(9) {
531  pips_debug(9, "begin: index=%s reference=",
532  entity_local_name(current_loop_index));
533  print_reference(r);
534  putc('\n', stderr);
535  }
536 
537  // The scalar references are constant with respect to any loop
538  // FI: this should be upgraded to cope with C structures...
539  if(!array_reference_p(r)) {
540  pips_debug(9, "end: FALSE\n");
541  return false;
542  }
543 
544  /* FI: this is a very approximatw evaluation that assumes no
545  induction variables, affine or not */
546  FOREACH(EXPRESSION, i, li) {
547  int count = look_for_references_in_expression(i, reference_identity,
548  current_loop_index_p);
549 
550  if(count!=0) {
551  pips_debug(9, "end: count=%d FALSE\n", count);
552  return false;
553  }
554  }
555 
556  pips_debug(9, "end: TRUE\n");
557  return true;
558 }
559 ␌
560 static bool contiguous_array_reference_p(reference r)
561 {
562  /* Uses a static global variable, current_loop_index */
563  list li = reference_indices(r);
564  expression se = expression_undefined; // subscript expression
566  bool contiguous_p = false;
567 
568  /* The test could be improved by checking that the offset with
569  respect to the loop index is constant within the loop nest:
570  e.g. i+n**2 is a contiguous access */
571  if(!ENDP(li)) {
573  se = EXPRESSION(CAR(gen_last(li)));
574  }
576  se = EXPRESSION(CAR(li));
577  }
578  nse = NORMALIZE_EXPRESSION(se);
579  if(normalized_linear_p(nse)) {
580  Pvecteur vse = normalized_linear(nse);
581  if(vect_dimension(vse)==1
582  && vect_coeff((Variable) current_loop_index, vse)==VALUE_ONE)
583  contiguous_p = true;
584  }
585  // This consider 2*i as a contiguous reference... The above check
586  //on VALUE_ONE might have to be relaxed
587  //return expression_reference_p(first_index) &&
588  //(reference_variable(expression_reference(first_subscript))
589  // == current_loop_index);
590  }
591 
592  return contiguous_p;
593 }
594 
595 
596 #if 0
597 static statement mark_loop_as_parallel(list lls, __attribute__((unused)) bool (*unused)(statement))
598 {
599  statement ls = STATEMENT(CAR(lls));
601 
602  return ls;
603 }
604 
605 static int current_loop_depth = -1;
606 static bool nth_loop_p(__attribute__((unused))statement s)
607 {
608  /* FI: this is *wrong* but should work for a demo :-( */
609  static int count = 0;
610 
611  count++;
612  return count == current_loop_depth;
613 }
614 #endif
615 
616 
617 
618 
619 ␌
620 
621 ␌
622 /* FI: there are at least two problems:
623  *
624  * - transformations like loop coalescing and full loop unrolling are
625  * not considered when the iteration counts are small (although they
626  * are considered in one_loop_parallelization!)
627  *
628  * - the cost function should be non-linear (C has conditional
629  * expressions:-)
630  *
631  * Besides, it's bugged
632  */
633 static statement loop_nest_parallelization(list lls)
634 {
635  /* FI: see Corinne and Yi-Qing; in which order is this loop list?!? */
636  statement s = STATEMENT(CAR(lls=gen_nreverse(lls)));
637  int loop_count = gen_length(lls);
638 #define DIRECTION_CONTIGUOUS_COUNT 0
639 #define DIRECTION_PARALLEL_P 1
640 #define DIRECTION_REUSE_COUNT 2
641 #define DIRECTION_ITERATION_COUNT 3
642 #define CHARACTERISTICS_NUMBER 4
643  int *characteristics[CHARACTERISTICS_NUMBER];
644  int ln;
645  int i;
646  int vector_loop_number;
647  int optimal_performance;
648 
649  pips_debug(8, "begin\n");
650 
651  /* gather information about each loop direction */
652  // Allocate arrays to store the parallelism and locality information
653  for(i=0; i < CHARACTERISTICS_NUMBER; i++)
654  characteristics[i] = (int *) malloc(loop_count*(sizeof(ln)));
655 
656  // Look for contiguous references
657  ln = 0;
658  FOREACH(STATEMENT, ls, lls) {
659  current_loop_index = loop_index(statement_loop(ls));
660  *(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln) =
661  look_for_references_in_statement(ls, reference_identity, contiguous_array_reference_p);
662  ln++;
663  }
664 
665  // Assume the the parallel code has been internalized, i.e. the
666  // parallelization information has been stored into the resource "code".
667  ln = 0;
668  FOREACH(STATEMENT, ls, lls) {
669  /* FI: the dependence graph should be used !!! */
670  /* Or use internalize parallel code... */
671  loop l = statement_loop(ls);
672  *(characteristics[DIRECTION_PARALLEL_P]+ln) =
674  ln++;
675  }
676 
677  ln = 0;
678  FOREACH(STATEMENT, ls, lls) {
679  current_loop_index = loop_index(statement_loop(ls));
680  *(characteristics[DIRECTION_REUSE_COUNT]+ln) =
681  look_for_references_in_statement(ls,
682  reference_identity,
683  constant_array_reference_p);
684  ln++;
685  }
686 
687  ln = 0;
688  FOREACH(STATEMENT, ls, lls) {
689  *(characteristics[DIRECTION_ITERATION_COUNT]+ln) =
690  numerical_loop_iteration_count(statement_loop(ls));
691  ln++;
692  }
693 
694  /* Display the information obtained about each loop */
695  ifdebug(8) {
696  ln = 0;
697  FOREACH(STATEMENT, ls, lls) {
698  (void) fprintf(stderr,"loop %d index %s\t#contiguous %d\t// %s\t#reuse %d\t#range %d\n",
699  ln,
701  *(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln),
702  bool_to_string(*(characteristics[DIRECTION_PARALLEL_P]+ln)),
703  *(characteristics[DIRECTION_REUSE_COUNT]+ln),
704  *(characteristics[DIRECTION_ITERATION_COUNT]+ln));
705  ln++;
706  }
707  }
708 
709  /* choose and apply a transformation if at least one parallel loop
710  has been found */
711 
712  /* choose as vector loop a parallel loop optimizing a tradeoff
713  * between contiguity and iteration count
714  */
715  optimal_performance = 0; // FI: could now be -1
716  vector_loop_number = -1;
717  for(ln = 0; ln < loop_count; ln++) {
718  /* FI: these two constants should be provided by the target description (see target.c) */
719 #define REUSE_WEIGHT 8
720 #define CONTIGUITY_WEIGHT 4
721 #define ITERATION_COUNT_WEIGHT 1
722  int performance =
723  REUSE_WEIGHT*(*(characteristics[DIRECTION_REUSE_COUNT]+ln))
724  + CONTIGUITY_WEIGHT*(*(characteristics[DIRECTION_CONTIGUOUS_COUNT]+ln));
725  int iteration_count = *(characteristics[DIRECTION_ITERATION_COUNT]+ln);
726 
727  /* If the iteration count is unknown, the iteration count is
728  -1, which may lead to a negative performance when all
729  other coefficients are 0, which may happen with complex
730  subscript expressions */
731  performance += ITERATION_COUNT_WEIGHT*(iteration_count);
732 
733  // You can have no contiguity and no reuse and no known loop bound
734  // and end up with performance==-1
735  // pips_assert("performance is strictly greater than 0", performance > 0);
736 
737  // FI: see case nested06, the matrix multiplication
738  // If the best loop is not parallel, then some kind of tiling
739  // and unrolling should be performed to keep the best loop
740  // inside (unrolled) while having a parallel loop around
741  // Rgeister pressure should also be taken into account to
742  // decide the tiling factor, which becomes the unrolling
743  // factor
744 
745  // This kind of stuff can be performed at PyPS level or by PoCC
746 
747  // Look for the best vector loop
748  if(*(characteristics[DIRECTION_PARALLEL_P]+ln)
749  && performance > optimal_performance) {
750  optimal_performance = performance;
751  vector_loop_number = ln;
752  }
753  }
754  if(vector_loop_number != -1) {
755 
756  ifdebug(8) {
757  pips_debug(8, "Vector loop is loop %d with performance %d\n",
758  vector_loop_number, optimal_performance);
759  }
760 
761  if(vector_loop_number != loop_count-1) {
762  /* the vector direction is not the innermost loop: exchange! */
763  pips_debug(8, "Interchange innermost loop with vector loop\n");
764  /* lls is expected in the other order :-( */
765  /* interchange_two_loops does now preserve parallel
766  loops; if parallel loops there are: do not forget to
767  intrernalize the parallelism.
768  */
770  vector_loop_number+1,
771  loop_count);
772  }
773  else {
774  // No vector loop has been found
775  ;
776  }
777  }
778  else {
779  pips_debug(8, "No loop interchange\n");
780  }
781 
782  /* mark vector loop as parallel */
783  /* FI: this is very very bad code; interchange_two_loops() should preserve
784  * loop execution
785  */
786  //current_loop_depth = loop_count;
787  //look_for_nested_loop_statements(s, mark_loop_as_parallel, nth_loop_p);
788 
789  pips_debug(8, "end\n");
790 
791  return s;
792 }
793 
794 static statement parallelization(list lls, __attribute__((unused)) bool (*loop_predicate) (statement))
795 {
797 
798  // The debug level is reset by the function looking for nested loops
799  debug_on("NEST_PARALLELIZATION_DEBUG_LEVEL");
800  pips_debug(8, "begin\n");
801 
802  pips_assert("the loop list is not empty", gen_length(lls)!= 0);
803 
804  if(gen_length(lls) == 1) {
805  s = one_loop_parallelization(STATEMENT(CAR(lls)));
806  }
807  else {
808  s = loop_nest_parallelization(lls);
809  }
810 
811  pips_debug(8, "end\n");
812 
813  debug_off();
814 
815  return s;
816 }
817 
818 bool nest_parallelization(const char* module_name)
819 {
820  entity module;
822  statement mod_parallel_stat = statement_undefined;
823 
826 
827  pips_assert("\"module\" is a module", entity_module_p(module));
828 
829  /* DBR_CODE will be changed into DBR_PARALLELIZED_CODE */
831  (statement) db_get_memory_resource(DBR_CODE, module_name, false) );
833 
834  mod_parallel_stat = copy_statement(mod_stat);
835 
836  dg = (graph) db_get_memory_resource(DBR_DG, module_name, true);
837 
838  /* Make sure the dependence graph points towards the code copy */
839  set_ordering_to_statement(mod_parallel_stat);
840 
841  debug_on("NEST_PARALLELIZATION_DEBUG_LEVEL");
842 
843  parallel_loop_has_been_selected = false;
844 
845  look_for_nested_loop_statements(mod_parallel_stat,
846  parallelization,
847  always_select_p);
848 
849  /* Regenerate statement_ordering for the parallel
850  code. module_body_reorder() checks the unique mapping
851  ordering_to_statement to make sure that no inconsistency is
852  introduced. */
854  module_body_reorder(mod_parallel_stat);
855 
856  ifdebug(7)
857  {
858  fprintf(stderr, "\nparallelized code %s:", module_name);
859  if (statement_consistent_p((statement)mod_parallel_stat))
860  fprintf(stderr," gen consistent ");
861  }
862 
863  debug_off();
864 
865  DB_PUT_MEMORY_RESOURCE(DBR_PARALLELIZED_CODE,
867  (char*) mod_parallel_stat);
868 
871  // Already performed befpre the reordering
872  //reset_ordering_to_statement();
873 
874  return true;
875 }
876 
877 #endif // BUILDER_NEST_PARALLELIZATION
878 
879 
880 
881 
882 
883 
float a2sf[2] __attribute__((aligned(16)))
USER generates a user error (i.e., non fatal) by printing the given MSG according to the FMT.
Definition: 3dnow.h:3
statement copy_statement(statement p)
STATEMENT.
Definition: ri.c:2186
bool statement_consistent_p(statement p)
Definition: ri.c:2195
static int count
Definition: SDG.c:519
dg_vertex_label vertex_label
Definition: delay.c:64
dg_arc_label arc_label
Definition: delay.c:63
#define VALUE_TO_INT(val)
int Value
#define VALUE_ONE
static graph dg
dg is the dependency graph ; FIXME : should not be static global ?
Definition: chains.c:124
struct _newgen_struct_statement_ * statement
Definition: cloning.h:21
bool vect_constant_p(Pvecteur)
bool vect_constant_p(Pvecteur v): v contains only a constant term, may be zero
Definition: predicats.c:211
void look_for_nested_loop_statements(statement, statement(*)(list, bool(*)(statement)), bool(*)(statement))
look_for_nested_loops.c
const char * module_name(const char *s)
Return the module part of an entity name.
Definition: entity_names.c:296
void * malloc(YYSIZE_T)
struct _newgen_struct_graph_ * graph
Definition: graph.h:31
void reset_current_module_entity(void)
Reset the current module entity.
Definition: static.c:97
void reset_current_module_statement(void)
Reset the current module statement.
Definition: static.c:221
statement set_current_module_statement(statement)
Set the current module statement.
Definition: static.c:165
statement get_current_module_statement(void)
Get the current module statement.
Definition: static.c:208
entity set_current_module_entity(entity)
static.c
Definition: static.c:66
entity get_current_module_entity(void)
Get the entity of the current module.
Definition: static.c:85
#define ENDP(l)
Test if a list is empty.
Definition: newgen_list.h:66
list gen_nreverse(list cp)
reverse a list in place
Definition: list.c:304
size_t gen_length(const list l)
Definition: list.c:150
#define CAR(pcons)
Get the value of the first element of a list.
Definition: newgen_list.h:92
list gen_last(list l)
Return the last element of a list.
Definition: list.c:578
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
#define MAPL(_map_list_cp, _code, _l)
Apply some code on the addresses of all the elements of a list.
Definition: newgen_list.h:203
string db_get_memory_resource(const char *rname, const char *oname, bool pure)
Return the pointer to the resource, whatever it is.
Definition: database.c:755
#define DB_PUT_MEMORY_RESOURCE(res_name, own_name, res_val)
conform to old interface.
Definition: pipsdbm-local.h:66
loop statement_loop(statement)
Get the loop of a statement.
Definition: statement.c:1374
bool assignment_block_or_statement_p(statement)
Definition: statement.c:142
static statement mod_stat
We want to keep track of the current statement inside the recurse.
Definition: impact_check.c:41
void vect_dump(Pvecteur v)
void vect_dump(Pvecteur v): print sparse vector v on stderr.
Definition: io.c:304
int vect_dimension(Pvecteur v)
int vect_dimension(Pvecteur v): calcul du nombre de composantes non nulles et non constantes d'un vec...
Definition: reductions.c:64
void loop_unroll(statement loop_statement, int rate)
fallbacks on do_loop_unroll without statement post processing
Definition: loop_unroll.c:748
void full_loop_unroll(statement loop_statement)
get rid of the loop by body replication;
Definition: loop_unroll.c:788
#define debug_on(env)
Definition: misc-local.h:157
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
Definition: misc-local.h:172
#define pips_internal_error
Definition: misc-local.h:149
#define debug_off()
Definition: misc-local.h:160
int get_debug_level(void)
GET_DEBUG_LEVEL returns the current debugging level.
Definition: debug.c:67
void debug(const int the_expected_debug_level, const char *calling_function_name, const char *a_message_format,...)
ARARGS0.
Definition: debug.c:189
string bool_to_string(bool)
Definition: string.c:243
int f(int off1, int off2, int n, float r[n], float a[n], float b[n])
Definition: offsets.c:15
hash_table set_ordering_to_statement(statement s)
To be used instead of initialize_ordering_to_statement() to make sure that the hash table ots is in s...
Definition: ordering.c:172
void reset_ordering_to_statement(void)
Reset the mapping from ordering to statement.
Definition: ordering.c:185
static char * module
Definition: pips.c:74
void print_reference(reference r)
Definition: expression.c:142
void print_statement(statement)
Print a statement on stderr.
Definition: statement.c:98
bool module_body_reorder(statement body)
Reorder a module.
Definition: reorder.c:211
#define NORMALIZE_EXPRESSION(e)
#define is_instruction_block
soft block->sequence transition
#define instruction_block(i)
const char * entity_local_name(entity e)
entity_local_name modified so that it does not core when used in vect_fprint, since someone thought t...
Definition: entity.c:453
entity module_name_to_entity(const char *mn)
This is an alias for local_name_to_top_level_entity.
Definition: entity.c:1479
bool entity_module_p(entity e)
Definition: entity.c:683
bool expression_integer_value(expression e, intptr_t *pval)
Definition: eval.c:792
bool array_reference_p(reference r)
predicates on references
Definition: expression.c:1861
bool c_language_module_p(entity m)
Definition: module.c:447
bool fortran_language_module_p(entity m)
Definition: module.c:452
#define value_tag(x)
Definition: ri.h:3064
#define execution_tag(x)
Definition: ri.h:1207
#define loop_body(x)
Definition: ri.h:1644
#define normalized_undefined
Definition: ri.h:1745
#define loop_execution(x)
Definition: ri.h:1648
#define syntax_reference(x)
Definition: ri.h:2730
#define syntax_tag(x)
Definition: ri.h:2727
#define normalized_linear_p(x)
Definition: ri.h:1779
#define instruction_loop_p(x)
Definition: ri.h:1518
#define call_function(x)
Definition: ri.h:709
#define reference_variable(x)
Definition: ri.h:2326
#define range_upper(x)
Definition: ri.h:2290
#define instruction_loop(x)
Definition: ri.h:1520
#define test_false(x)
Definition: ri.h:2837
@ is_value_intrinsic
Definition: ri.h:3034
@ is_value_unknown
Definition: ri.h:3035
@ is_value_constant
Definition: ri.h:3033
@ is_value_code
Definition: ri.h:3031
@ is_value_symbolic
Definition: ri.h:3032
#define syntax_range(x)
Definition: ri.h:2733
@ is_syntax_range
Definition: ri.h:2692
@ is_syntax_call
Definition: ri.h:2693
@ is_syntax_reference
Definition: ri.h:2691
#define range_increment(x)
Definition: ri.h:2292
#define EXPRESSION(x)
EXPRESSION.
Definition: ri.h:1217
#define entity_undefined
Definition: ri.h:2761
#define expression_undefined
Definition: ri.h:1223
@ is_instruction_goto
Definition: ri.h:1473
@ is_instruction_unstructured
Definition: ri.h:1475
@ is_instruction_whileloop
Definition: ri.h:1472
@ is_instruction_test
Definition: ri.h:1470
@ is_instruction_call
Definition: ri.h:1474
@ is_instruction_loop
Definition: ri.h:1471
#define instruction_tag(x)
Definition: ri.h:1511
#define test_true(x)
Definition: ri.h:2835
#define reference_indices(x)
Definition: ri.h:2328
#define syntax_call(x)
Definition: ri.h:2736
#define test_condition(x)
Definition: ri.h:2833
#define instruction_whileloop(x)
Definition: ri.h:1523
#define range_lower(x)
Definition: ri.h:2288
#define whileloop_body(x)
Definition: ri.h:3162
#define statement_instruction(x)
Definition: ri.h:2458
#define instruction_call(x)
Definition: ri.h:1529
#define loop_range(x)
Definition: ri.h:1642
@ is_execution_parallel
Definition: ri.h:1190
@ is_execution_sequential
Definition: ri.h:1189
#define call_arguments(x)
Definition: ri.h:711
#define instruction_test(x)
Definition: ri.h:1517
#define whileloop_condition(x)
Definition: ri.h:3160
#define normalized_linear(x)
Definition: ri.h:1781
#define expression_syntax(x)
Definition: ri.h:1247
#define execution_parallel_p(x)
Definition: ri.h:1211
#define loop_index(x)
Definition: ri.h:1640
#define statement_undefined
Definition: ri.h:2419
#define STATEMENT(x)
STATEMENT.
Definition: ri.h:2413
#define entity_initial(x)
Definition: ri.h:2796
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...
char * strdup()
Pvecteur vect_div(Pvecteur v, Value x)
Pvecteur vect_div(Pvecteur v, Value x): division du vecteur v par le scalaire x, si x est different d...
Definition: scalaires.c:52
#define ifdebug(n)
Definition: sg.c:47
#define intptr_t
Definition: stdint.in.h:294
statement loop_strip_mine(statement loop_statement, int chunk_size, int chunk_number)
loop_strip_mine():
Definition: strip_mine.c:64
le type des coefficients dans les vecteurs: Value est defini dans le package arithmetique
Definition: vecteur-local.h:89
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
statement interchange_two_loops(list, int, int)
int get_vector_register_length(void)
Definition: util.c:266
bool nest_parallelization(const char *)
nest_parallelization.c
int get_vector_register_number(void)
Definition: util.c:271
int get_processor_number(void)
Definition: util.c:261
#define TCST
VARIABLE REPRESENTANT LE TERME CONSTANT.
#define VECTEUR_UNDEFINED
struct Svecteur * Pvecteur
void * Variable
arithmetique is a requirement for vecteur, but I do not want to inforce it in all pips files....
Definition: vecteur-local.h:60
#define VECTEUR_UNDEFINED_P(v)
void vect_rm(Pvecteur v)
void vect_rm(Pvecteur v): desallocation des couples de v;
Definition: alloc.c:78
Pvecteur vect_substract(Pvecteur v1, Pvecteur v2)
Pvecteur vect_substract(Pvecteur v1, Pvecteur v2): allocation d'un vecteur v dont la valeur est la di...
Definition: binaires.c:75
void vect_add_elem(Pvecteur *pvect, Variable var, Value val)
void vect_add_elem(Pvecteur * pvect, Variable var, Value val): addition d'un vecteur colineaire au ve...
Definition: unaires.c:72
Value vect_coeff(Variable var, Pvecteur vect)
Variable vect_coeff(Variable var, Pvecteur vect): coefficient de coordonnee var du vecteur vect —> So...
Definition: unaires.c:228