PIPS
loop_nest_annotate.c
Go to the documentation of this file.
1 /*
2 
3  $Id:loop_nest_annotate.c 15433 2009-09-22 06:49:48Z creusillet $
4 
5  Copyright 2009-2010 HPC Project
6 
7  This file is part of PIPS.
8 
9  PIPS is free software: you can redistribute it and/or modify it
10  under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  any later version.
13 
14  PIPS is distributed in the hope that it will be useful, but WITHOUT ANY
15  WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  FITNESS FOR A PARTICULAR PURPOSE.
17 
18  See the GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with PIPS. If not, see <http://www.gnu.org/licenses/>.
22 
23  */
24 #ifdef HAVE_CONFIG_H
25 #include "pips_config.h"
26 #endif
27 
28 /* Loop nests transformation phase for par4all :
29 
30  gpu_loop_nest_annotate takes a module that is of the form :
31 
32 
33  typedef float float_t;
34  void p4a_kernel_wrapper_1(float_t save[501][501], float_t space[501][501], int i, int j);
35  void p4a_kernel_launcher_1(float_t save[501][501], float_t space[501][501])
36  {
37  int i;
38  int j;
39  kernel2:
40  for(i = 0; i <= 100; i += 1)
41  for(j = 0; j <= 200; j += 1)
42  p4a_kernel_wrapper_1(save, space, i+1, j+1);
43  }
44 
45  and transforms it into :
46 
47  typedef float float_t;
48  void p4a_kernel_wrapper_1(float_t save[501][501], float_t space[501][501], int i, int j);
49  void p4a_kernel_launcher_1(float_t save[501][501], float_t space[501][501])
50  {
51  int i;
52  int j;
53  kernel2:
54  // Loop nest P4A begin, 2D(200, 100)
55  for(i = 0; i <= 100; i += 1)
56  for(j = 0; j <= 200; j += 1)
57  // Loop nest P4A end
58  if (i <= 100 && j <= 200)
59  p4a_kernel_wrapper_1(save, space, i+1, j+1);
60  }
61 
62  for further generation of CUDA code.
63  }
64  */
65 
66 /* Ansi includes */
67 #include <stdio.h>
68 #include <string.h>
69 
70 /* Newgen includes */
71 #include "genC.h"
72 
73 #include "boolean.h"
74 #include "arithmetique.h"
75 #include "vecteur.h"
76 #include "contrainte.h"
77 #include "ray_dte.h"
78 #include "sommet.h"
79 #include "sg.h"
80 #include "sc.h"
81 #include "polyedre.h"
82 #include "matrix.h"
83 
84 /* Pips includes */
85 #include "linear.h"
86 #include "ri.h"
87 #include "effects.h"
88 
89 #include "database.h"
90 #include "ri-util.h"
91 #include "prettyprint.h"
92 #include "effects-util.h"
93 #include "constants.h"
94 #include "misc.h"
95 #include "control.h"
96 #include "text-util.h"
97 #include "pipsdbm.h"
98 #include "resources.h"
99 #include "properties.h"
100 #include "effects-simple.h"
101 
102 #define COMMA ","
103 #define OPENPAREN "("
104 #define CLOSEPAREN ")"
105 
106 /* In modern PIPS programming, all is passed through a context instead of
107  having a global variable. This should allow some PIPS parallelization
108  some days... :-) */
109 
110 typedef struct {
115  /* True only when we reach the inner annotated loop: */
117  /* True if we only deal with parallel loop nests */
119  /* The generation may fail because of an unhandled case for isntance */
120  bool fail_p;
124 
125 /* Push a loop that matches the criterion for annotation */
126 static bool loop_push(loop l, gpu_lna_context * p) {
127  if(p->max_loop_nest_depth == -1) {
128  /* This is the first loop we met in this loop nest */
130  return true;
131  }
132 
133  /* Let's compute the loop_nest_depth */
137  : depth_of_perfect_loop_nest(current_stat);
138  }
139 
141  /* this loop does not belong to the perfectly nested loops */
142  return false;
143  else {
145  p->loop_nest_depth++;
146  // Go on recursing:
147  return true;
148  }
149  return false;
150 }
151 
152 /* Do the real annotation work on previously marked loops bottom-up */
153 static void loop_annotate(loop l, gpu_lna_context * p) {
154  /* We have to select the operators that are different in C and FORTRAN */
155  string and_op =
158  string
159  less_op =
162  /* The first time we enter this function is when we reach the innermost
163  loop nest level.
164  */
165  if(p->inner_loop == loop_undefined) {
166  expression guard_exp = expression_undefined;
167 
168  /* We are at the innermost loop nest level */
169  p->inner_loop = l;
170  /* First we build the guard to be added to the loop body statement using the
171  enclosing loops upper bounds.
172  And we push on l_number_iter_exp an expression representing the
173  number of iteration of each loop;
174  currently, we do not check that variables modified inside loops
175  are not used in loop bounds expressions.
176  but we take care of loop indices used in deeper loop bounds.
178  entity c_index = loop_index(c_loop);
179  range c_range = loop_range(c_loop);
180  expression c_lower = copy_expression(range_lower(c_range));
181  expression c_upper = copy_expression(range_upper(c_range));
182  expression c_inc = range_increment(c_range);
183  expression c_number_iter_exp = expression_undefined;
184  expression c_guard;
185 
186  if(expression_constant_p(c_inc) && expression_to_int(c_inc) == 1) {
187  /* first check if the lower bound depend on enclosing loop indices */
188  list l_eff_lower_bound = proper_effects_of_expression(c_lower);
189  list l_eff_upper_bound = proper_effects_of_expression(c_upper);
190 
191  /* We have to clean the list of effect from any "preference" since the
192  * next loop modify the references and can make our "preference" invalid
193  */
194  void remove_preferences(void * obj);
195  {
196  FOREACH(effect,e,l_eff_lower_bound) {
198  }
199  }
200  {
201  FOREACH(effect,e,l_eff_upper_bound) {
203  }
204  }
205 
206  FOREACH(LOOP, other_loop, p->l_enclosing_loops) {
207  if(other_loop != l) {
208  range range_other_loop = loop_range(other_loop);
209  expression lower_other_loop = range_lower(range_other_loop);
210  expression upper_other_loop = range_upper(range_other_loop);
211 
212  if(effects_read_variable_p(l_eff_lower_bound,
213  loop_index(other_loop))) {
214  expression new_lower_1 = c_lower;
215  expression new_lower_2 = copy_expression(c_lower);
216  replace_entity_by_expression(new_lower_1,
217  loop_index(other_loop),
218  lower_other_loop);
219  (void)simplify_expression(&new_lower_1);
220  replace_entity_by_expression(new_lower_2,
221  loop_index(other_loop),
222  upper_other_loop);
223 
224  (void)simplify_expression(&new_lower_2);
225  c_lower = make_min_expression(new_lower_1,
226  new_lower_2,
228  }
229  if(effects_read_variable_p(l_eff_upper_bound,
230  loop_index(other_loop))) {
231  expression new_upper_1 = c_upper;
232  expression new_upper_2 = copy_expression(c_upper);
233  replace_entity_by_expression(new_upper_1,
234  loop_index(other_loop),
235  lower_other_loop);
236  (void)simplify_expression(&new_upper_1);
237  replace_entity_by_expression(new_upper_2,
238  loop_index(other_loop),
239  upper_other_loop);
240  (void)simplify_expression(&new_upper_2);
241 
242  c_upper = make_max_expression(new_upper_1,
243  new_upper_2,
245  }
246  }
247  }
248 
249  c_guard
250  = MakeBinaryCall(entity_intrinsic(less_op),
252  NIL)),
253  copy_expression(range_upper(c_range)));
254 
255  if(expression_undefined_p(guard_exp))
256  guard_exp = c_guard;
257  else
258  guard_exp = MakeBinaryCall(entity_intrinsic(and_op),
259  guard_exp,
260  c_guard);
261 
262  pips_debug(2, "guard expression : %s\n",
263  expression_to_string(guard_exp));
264 
265  /* Keep the number of iterations for the generation of the
266  outermost comment */
267  c_number_iter_exp = make_op_exp(MINUS_OPERATOR_NAME, c_upper, c_lower);
268  c_number_iter_exp = make_op_exp(PLUS_OPERATOR_NAME,
269  c_number_iter_exp,
270  int_to_expression(1));
271  /* We will have deepest loop size first: */
272  p->l_number_iter_exp = CONS(EXPRESSION, c_number_iter_exp,
273  p->l_number_iter_exp);
274 
275  } else {
276  p->fail_p = true;
277  pips_user_warning("case not handled: loop increment is not 1.\n");
278  }
279  }
280  if(!p->fail_p) {
281  p->guard_expression = guard_exp;
282  }
283 
284  }
285 
286  /* We are now on our way back in the recursion; we do nothing, unless
287  we are at the uppermost level.
288  */
289  if(gen_length(p->l_enclosing_loops) == 1) {
290  if(!p->fail_p)
291  // if the process has succeeded, we add the outermost comment :
292  // Loop nest P4A begin, xD(upper_bound,..) and the inner guard.
293  {
295  // Then we add the comment such as: '// Loop nest P4A begin,3D(200, 100)'
296  string outer_s;
297  (void)asprintf(&outer_s,
298  "%s Loop nest P4A begin,%dD" OPENPAREN,
300  p->loop_nest_depth);
301 
302  bool first_iteration = true;
303  /* Output inner dimension first: */FOREACH(EXPRESSION, upper_exp, p->l_number_iter_exp) {
304  string buf;
305  string buf1 = expression_to_string(upper_exp);
306  if(first_iteration)
307  /* Concatenate the dimension of the innermost loop: */
308  (void)asprintf(&buf, "%s%s", outer_s, buf1);
309  else
310  /* Idem for other dimensions, but do not forget to insert the ', ' */
311  (void)asprintf(&buf, "%s%s%s", outer_s, COMMA" ", buf1);
312  free(outer_s);
313  free(buf1);
314  outer_s = buf;
315  first_iteration = false;
316  }
317  (void)asprintf(&statement_comments(current_stat),
318  "%s"CLOSEPAREN"\n",
319  outer_s);
320  free(outer_s);
322  loop_body(p->inner_loop),
324  /* Then we add the comment : // Loop nest P4A end */statement_comments(guard_s)
326  " Loop nest P4A end\n",
327  NULL));
328  loop_body(p->inner_loop) = guard_s;
329 
330  /* reset context */
331  p->loop_nest_depth = 0;
332  p->max_loop_nest_depth = -1;
334  p->l_number_iter_exp = NIL;
335  p->fail_p = false;
338  }
339 
340  else
341  // the process has failed: we clean everything and reset context
342  {
343  p->loop_nest_depth = 0;
344  p->max_loop_nest_depth = -1;
346  p->l_number_iter_exp = NIL;
347  p->fail_p = false;
352  }
353  }
354  }
355  if(gen_length(p->l_enclosing_loops)) {
357  }
358  return;
359 }
360 
361 /**
362  * annotates loop nests in the following way :
363  *
364  * for(i=0; i<=100; i++)
365  * for(j=0; j<=200; j++)
366  * foo();
367  *
368  * ==>
369  *
370  * // Loop nest P4A begin,2D(200, 100)
371  * for(i=0; i<=100; i++)
372  * for(j=0; j<=200; j++)
373  * // Loop nest P4A end
374  * if (i<=100&&j<=200)
375  * foo();
376  *
377  * for loops must have been transformed into loops.
378  *
379  * @param mod_name name of the module
380  *
381  * @return true
382  */
384  /* Initialize context */
385  gpu_lna_context c;
388  c.max_loop_nest_depth = -1;
389  c.loop_nest_depth = 0;
390  c.inner_reached = false;
392  = get_bool_property("GPU_LOOP_NEST_ANNOTATE_PARALLEL");
393  c.fail_p = false;
396 
397  /* Annotate the loop nests of the module. */
399 
400  /* Clean up things: (hasn't it been done previously in loop_annotate?) */
402 
403  return true;
404 }
405 
407  // Use this module name and this environment variable to set
409  PIPS_PHASE_PRELUDE(module_name, "P4A_LOOP_NEST_ANOTATE_DEBUG_LEVEL");
410 
412 
413  // Put back the new statement module
415  ;
416  // The macro above does a "return TRUE" indeed.
417 }
418 
419 /**
420  * Callback for gen_recurse
421  * Parallelize perfectly nested loop nest, till we reach the magic comment
422  *
423  * FIXME : should detect the beginning sentinel, but since we use it in launcher,
424  * it has no importance at that time
425  */
427  char **comment=NULL;
428  if(statement_loop_p(s)) {
430  // Check the inner comment to find out the sentinel and stop recursion
432  } else {
434  }
435 
436  // Check sentinel
437  if(comment && !empty_comments_p(*comment) && NULL != strstr(*comment, "Loop nest P4A end")) {
438  // stop recursion
439  return false;
440  }
441  return true;
442 }
443 
444 /** Parallelize the launcher based on loop nest annotate sentinels */
445 bool gpu_parallelize_annotated_loop_nest(const string mod_name) {
446  // Use this module name and this environment variable to set
448  "GPU_IFY_DEBUG_LEVEL");
449 
450  // Parallelize loops
453 
454  // Put back the new statement module
456  ;
457  // The macro above does a "return TRUE" indeed.
458 }
459 
460 
461 /**
462  * Callback for gen_recurse
463  * Remove annotation on a loop nest
464  */
466  string comment = statement_comments(s);
468  && (NULL != strstr(comment, "Loop nest P4A end")|| NULL != strstr(comment, "Loop nest P4A begin"))) {
469  // clear the comment
470  // We may instead filter out only the annotation inside the comment
472  }
473  return true;
474 }
475 
476 /** Remove all annotations on a loop nest */
477 bool gpu_clear_annotations_on_loop_nest(const string mod_name) {
478  // Use this module name and this environment variable to set
480  "GPU_IFY_DEBUG_LEVEL");
481 
482  // Parallelize loops
485 
486  // Put back the new statement module
488  ;
489  // The macro above does a "return TRUE" indeed.
490 }
expression copy_expression(expression p)
EXPRESSION.
Definition: ri.c:850
reference make_reference(entity a1, list a2)
Definition: ri.c:2083
test make_test(expression a1, statement a2, statement a3)
Definition: ri.c:2607
void free_expression(expression p)
Definition: ri.c:853
void remove_preferences(void *)
delay.c
Definition: delay.c:89
static statement module_statement
Definition: alias_check.c:125
struct _newgen_struct_statement_ * statement
Definition: cloning.h:21
static string c_loop(loop l)
list proper_effects_of_expression(expression)
bool effects_read_variable_p(list, entity)
Definition: effects.c:1123
const char * module_name(const char *s)
Return the module part of an entity name.
Definition: entity_names.c:296
bool get_bool_property(const string)
FC 2015-07-20: yuk, moved out to prevent an include cycle dependency include "properties....
static void comment(string_buffer code, spoc_hardware_type hw, dagvtx v, int stage, int side, bool flip)
Definition: freia_spoc.c:52
#define gen_context_recurse(start, ctxt, domain_number, flt, rwt)
Definition: genC.h:285
#define gen_recurse(start, domain_number, flt, rwt)
Definition: genC.h:283
void gen_full_free_list(list l)
Definition: genClib.c:1023
void free(void *)
statement make_empty_block_statement(void)
Build an empty statement (block/sequence)
Definition: statement.c:625
void replace_entity_by_expression(void *s, entity ent, expression exp)
replace all reference to entity ent by expression exp in s.
Definition: replace.c:220
gen_chunk * gen_get_ancestor(int, const void *)
return the first ancestor object found of the given type.
Definition: genClib.c:3560
void gen_null(__attribute__((unused)) void *unused)
Ignore the argument.
Definition: genClib.c:2752
bool loop_parallel_p(loop l)
Test if a loop is parallel.
Definition: loop.c:393
int depth_of_perfect_loop_nest(statement s)
Compute the depth of a perfect loop-nest.
Definition: loop.c:476
int depth_of_parallel_perfect_loop_nest(statement s)
Compute the depth of a parallel perfect loop-nest.
Definition: loop.c:436
#define POP(l)
Modify a list pointer to point on the next element of the list.
Definition: newgen_list.h:59
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
size_t gen_length(const list l)
Definition: list.c:150
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
Definition: list.c:344
void gen_free_list(list l)
free the spine of the list
Definition: list.c:327
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
#define PIPS_PHASE_POSTLUDE(new_module_statement)
End a transformation phase by putting back into PIPS the (possibly) modified statement.
#define PIPS_PHASE_PRELUDE(module_name, debug_env_var)
Start a phase that use a module CODE.
loop statement_loop(statement)
Get the loop of a statement.
Definition: statement.c:1374
bool statement_loop_p(statement)
Definition: statement.c:349
char ** find_first_statement_comment(statement)
Find the first non-empty comment of a statement, if any returns a pointer to the comment if found,...
Definition: statement.c:1772
bool empty_comments_p(const char *)
Definition: statement.c:107
bool expression_constant_p(expression)
HPFC module by Fabien COELHO.
Definition: expression.c:2453
enum language_utype get_prettyprint_language_tag()
Definition: language.c:67
static void loop_annotate(loop l, gpu_lna_context *p)
Do the real annotation work on previously marked loops bottom-up.
bool clear_annotated_loop_nest(statement s)
Callback for gen_recurse Remove annotation on a loop nest.
static bool parallelize_annotated_loop_nest(statement s)
Callback for gen_recurse Parallelize perfectly nested loop nest, till we reach the magic comment.
#define CLOSEPAREN
#define COMMA
Loop nests transformation phase for par4all :
bool gpu_parallelize_annotated_loop_nest(const string mod_name)
Parallelize the launcher based on loop nest annotate sentinels.
#define OPENPAREN
bool gpu_loop_nest_annotate_on_statement(statement s)
annotates loop nests in the following way :
bool gpu_loop_nest_annotate(const char *module_name)
bool gpu_clear_annotations_on_loop_nest(const string mod_name)
Remove all annotations on a loop nest.
static bool loop_push(loop l, gpu_lna_context *p)
Push a loop that matches the criterion for annotation.
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
#define pips_user_warning
Definition: misc-local.h:146
#define asprintf
Definition: misc-local.h:225
string concatenate(const char *,...)
Return the concatenation of the given strings.
Definition: string.c:183
#define string_undefined
Definition: newgen_types.h:40
string expression_to_string(expression e)
Definition: expression.c:77
string get_comment_sentinel()
Start a single line comment.
Definition: misc.c:154
#define C_LESS_OR_EQUAL_OPERATOR_NAME
#define C_AND_OPERATOR_NAME
#define MINUS_OPERATOR_NAME
#define PLUS_OPERATOR_NAME
#define test_to_statement(t)
#define AND_OPERATOR_NAME
FI: intrinsics are defined at a third place after bootstrap and effects! I guess the name should be d...
#define LESS_OR_EQUAL_OPERATOR_NAME
entity entity_intrinsic(const char *name)
FI: I do not understand this function name (see next one!).
Definition: entity.c:1292
expression reference_to_expression(reference r)
Definition: expression.c:196
bool simplify_expression(expression *pexp)
use polynomials to simplify an expression in some cases this operation can change the basic of the ex...
Definition: expression.c:3770
int expression_to_int(expression exp)
================================================================
Definition: expression.c:2205
expression make_min_expression(expression e1, expression e2, enum language_utype lang)
Definition: expression.c:1600
expression MakeBinaryCall(entity f, expression eg, expression ed)
Creates a call expression to a function with 2 arguments.
Definition: expression.c:354
expression int_to_expression(_int i)
transform an int into an expression and generate the corresponding entity if necessary; it is not cle...
Definition: expression.c:1188
expression make_op_exp(char *op_name, expression exp1, expression exp2)
================================================================
Definition: expression.c:2012
expression make_max_expression(expression e1, expression e2, enum language_utype lang)
Definition: expression.c:1579
#define execution_tag(x)
Definition: ri.h:1207
#define loop_body(x)
Definition: ri.h:1644
#define LOOP(x)
LOOP.
Definition: ri.h:1606
#define loop_execution(x)
Definition: ri.h:1648
#define loop_undefined
Definition: ri.h:1612
#define loop_domain
newgen_language_domain_defined
Definition: ri.h:218
#define range_upper(x)
Definition: ri.h:2290
#define statement_domain
newgen_sizeofexpression_domain_defined
Definition: ri.h:362
#define range_increment(x)
Definition: ri.h:2292
#define EXPRESSION(x)
EXPRESSION.
Definition: ri.h:1217
#define expression_undefined
Definition: ri.h:1223
#define expression_undefined_p(x)
Definition: ri.h:1224
#define range_lower(x)
Definition: ri.h:2288
#define statement_comments(x)
Definition: ri.h:2456
#define loop_range(x)
Definition: ri.h:1642
@ is_execution_parallel
Definition: ri.h:1190
@ is_language_c
Definition: ri.h:1567
#define loop_index(x)
Definition: ri.h:1640
char * strdup()
static char buf[BSZ]
Definition: split_file.c:157
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
In modern PIPS programming, all is passed through a context instead of having a global variable.
bool gpu_loop_nest_annotate_parallel_p
True if we only deal with parallel loop nests.
bool fail_p
The generation may fail because of an unhandled case for isntance.
expression guard_expression
bool inner_reached
True only when we reach the inner annotated loop: