PIPS
manage_parallel_loops.c
Go to the documentation of this file.
1 /*
2  Copyright 1989-2016 MINES ParisTech - HPC Project
3 
4  This file is part of PIPS.
5 
6  PIPS is free software: you can redistribute it and/or modify it
7  under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  any later version.
10 
11  PIPS is distributed in the hope that it will be useful, but WITHOUT ANY
12  WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  FITNESS FOR A PARTICULAR PURPOSE.
14 
15  See the GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with PIPS. If not, see <http://www.gnu.org/licenses/>.
19 
20 */
21 
22 // do not compile unless required
23 #include "phases.h"
24 #if defined(BUILDER_LIMIT_NESTED_PARALLELISM) || \
25  defined(BUILDER_LIMIT_PARALLELISM_USING_COMPLEXITY)
26 
27 #ifdef HAVE_CONFIG_H
28  #include "pips_config.h"
29 #endif
30 
31 #include "genC.h"
32 #include "linear.h"
33 
34 #include "misc.h"
35 #include "pipsdbm.h"
36 #include "properties.h"
37 
38 #include "ri.h"
39 #include "ri-util.h"
40 #include "prettyprint.h" // for debugging
41 
42 #include "control.h" // PIPS_PHASE_POSTLUDE
43 
44 #include "complexity_ri.h"
45 #include "complexity.h" // used
46 
47 static list current = NIL;
48 static list next = NIL;
49 static list privates = NIL;
50 
51 /// @brief the fonction aims at identifing the parallel loops and queues them
52 /// in the next list.
53 /// @return false when a parallel loop is found
54 /// @param l, the loop to process
55 static bool identify_outer_loops (loop l) {
57  next = gen_loop_cons (l, next);
58  return false;
59  }
60  return true;
61 }
62 
63 /// @brief collect the privates variables of inner loops
64 /// @return TRUE
65 /// @param l, the loop to process
66 static bool collect_privates (loop l) {
68  list var = loop_private_variables_as_entites (l, true, true);
69  privates = gen_nconc (privates, var);
70  }
71  return true;
72 }
73 
74 /// @brief make the inner loops sequential
75 /// @param l, the loop to process
76 static void process_loop (loop l) {
78  // this loop is an innner loop -> make it sequential
80  }
81 }
82 
83 /**
84 **/
85 bool limit_nested_parallelism (const string module_name)
86 {
87  // Use this module name and this environment variable to set
89  "MANAGE_PARALLEL_LOOPS_DEBUG_LEVEL");
90 
91  int threshold = get_int_property ("NESTED_PARALLELISM_THRESHOLD");
92  if (threshold > 0) {
93  // initialize the next list with all outer parallel loops
94  gen_recurse(mod_stmt, loop_domain, identify_outer_loops, gen_null);
95  current = next;
96  next = NIL;
97  for (int i = 2; i <= threshold; i++) {
98  // mark the nested loop at level i
99  FOREACH (LOOP, l, current) {
100  gen_recurse(loop_body (l), loop_domain, identify_outer_loops, gen_null);
101  }
103  current = next;
104  next = NIL;
105  }
106  }
107 
108  // Targeted outer loops have been identified. They need to be processed
109  // inne loops are marked sequential and local variables are moved
110  // at the outer loop level
111  FOREACH (LOOP, l, current) {
112  gen_recurse(loop_body (l), loop_domain, collect_privates, process_loop);
113  // need to merge entity one by one otherwise a newgen assertion
114  // (about "no sharing of cons") raises
115  list locals = loop_locals (l);
116  FOREACH (ENTITY, e, privates) {
117  if (gen_in_list_p (e, locals) == false) {
118  locals = gen_entity_cons (e, locals);
119  }
120  }
121  loop_locals (l) = locals;
122  gen_free_list (privates);
123  privates = NIL;
124  }
125 
127  current = NIL;
128 
129  // Put back the new statement module
130  PIPS_PHASE_POSTLUDE(mod_stmt);
131 
132  return true;
133 }
134 
135 /*****************************************************************/
136 
137 typedef struct limit_uninteresting_parallelism_context{
138  bool (*loop_cost_testing_function)(statement, struct limit_uninteresting_parallelism_context *);
139  int startup_overhead;
140  int bandwidth;
141  int frequency;
142  list parallel_loops;
143 } limit_uninteresting_parallelism_context;
144 
145 static void init_limit_uninteresting_parallelism_context(limit_uninteresting_parallelism_context *p_ctxt,
146  bool (*loop_cost_testing_function)(statement, limit_uninteresting_parallelism_context *))
147 {
148  p_ctxt->loop_cost_testing_function = loop_cost_testing_function;
149  p_ctxt->startup_overhead = get_int_property("COMPUTATION_INTENSITY_STARTUP_OVERHEAD");
150  p_ctxt->bandwidth = get_int_property("COMPUTATION_INTENSITY_BANDWIDTH");
151  p_ctxt->frequency = get_int_property("COMPUTATION_INTENSITY_FREQUENCY");
152  p_ctxt->parallel_loops = NIL;
153 }
154 
155 
156 /** Cost function to test whether a loop is worth parallelizing
157 
158  Currently tests whether the highest coefficient in the whole loop
159  complexity polynome divided by COMPUTATION_INTENSITY_FREQUENCY
160  is higher than COMPUTATION_INTENSITY_STARTUP_OVERHEAD + 10.
161  */
162 static bool complexity_cost_effective_loop_p(statement s,
163  limit_uninteresting_parallelism_context * p_ctxt)
164 {
165  pips_assert("input statement must be a loop", statement_loop_p(s));
166  bool result = true;
168 
169  Ppolynome instruction_time = polynome_dup(complexity_polynome(comp));
170  polynome_scalar_mult(&instruction_time, 1.f/p_ctxt->frequency);
171  polynome_scalar_add(&instruction_time, p_ctxt->startup_overhead);
172 
173  int max_degree = polynome_max_degree(instruction_time);
174  pips_debug(1, "max_degree is: %d\n", max_degree);
175  float coeff=-1.f;
176  for(Ppolynome p = instruction_time; !POLYNOME_NUL_P(p); p = polynome_succ(p))
177  {
178  int curr_degree = (int)vect_sum(monome_term(polynome_monome(p)));
179  if(curr_degree == max_degree) {
180  coeff = monome_coeff(polynome_monome(p));
181  break;
182  }
183  }
184  polynome_rm(&instruction_time);
185 
186  pips_debug(1, "coeff is: %f\n", coeff);
187  result = (coeff > (float) (p_ctxt->startup_overhead + 10));
188  return result;
189 }
190 
191 static bool limit_uninteresting_parallelism_statement_in(statement s,
192  limit_uninteresting_parallelism_context * p_ctxt)
193 {
194  if (statement_loop_p(s))
195  {
196  pips_debug(1, "Entering loop statement with ordering: %03zd and number: %03zd\n",
198  ifdebug(1) {
199  print_statement(s);
200  }
201  loop l = statement_loop(s);
202  if (loop_parallel_p(l))
203  p_ctxt->parallel_loops = CONS(LOOP, l, p_ctxt->parallel_loops);
204 
205  }
206  return true;
207 }
208 
209 static void limit_uninteresting_parallelism_statement_out(statement s,
210  limit_uninteresting_parallelism_context * p_ctxt)
211 {
212 
213  if (statement_loop_p(s))
214  {
215  pips_debug(1, "Dealing with loop statement with ordering: %03zd and number: %03zd\n",
217  ifdebug(1) {
218  print_statement(s);
219  }
220 
221  loop l = statement_loop(s);
222  if (loop_parallel_p(l) && ! p_ctxt->loop_cost_testing_function(s, p_ctxt))
223  {
224  POP(p_ctxt->parallel_loops);
226  /* now deal with loop locals: they must be propagated back to outer parallel loops */
227  list l_locals = loop_locals(l);
228  entity index = loop_index(l);
229  if (!ENDP(p_ctxt->parallel_loops) && !ENDP(l_locals))
230  {
231  loop previous_parallel_loop = LOOP(CAR(p_ctxt->parallel_loops));
232  list previous_parallel_loop_locals = loop_locals(previous_parallel_loop);
233  list to_add = NIL;
234  FOREACH(ENTITY, local, l_locals)
235  {
236  if (local != index
237  && gen_find_eq( local, previous_parallel_loop_locals ) == entity_undefined
238  // && we should check that local is not declared inside the embedding loop
239  )
240  to_add = CONS(ENTITY, local, to_add);
241  }
242  loop_locals(previous_parallel_loop) = gen_append(previous_parallel_loop_locals, to_add);
243  }
244  }
245  pips_debug(1, "leaving loop\n");
246 
247  }
248 }
249 
250 /**
251 **/
253 {
254 
256  "MANAGE_PARALLEL_LOOPS_DEBUG_LEVEL");
258 
259  limit_uninteresting_parallelism_context ctxt;
260  init_limit_uninteresting_parallelism_context(&ctxt, complexity_cost_effective_loop_p);
262  statement_domain, limit_uninteresting_parallelism_statement_in, limit_uninteresting_parallelism_statement_out);
263 
265  // Put back the new statement module
266  PIPS_PHASE_POSTLUDE(mod_stmt);
267 
268  return true;
269 }
270 
271 #endif // BUILDER_LIMIT_*
int get_int_property(const string)
list gen_entity_cons(entity p, list l)
Definition: ri.c:2537
list gen_loop_cons(loop p, list l)
Definition: ri.c:1281
void const char const char const int
struct _newgen_struct_statement_ * statement
Definition: cloning.h:21
Ppolynome complexity_polynome(complexity comp)
Because complexity is composed of two elements, we use this function to get the first element : polyn...
Definition: comp_math.c:506
complexity load_statement_complexity(statement)
void reset_complexity_map(void)
void set_complexity_map(statement_mapping)
const char * module_name(const char *s)
Return the module part of an entity name.
Definition: entity_names.c:296
#define gen_context_recurse(start, ctxt, domain_number, flt, rwt)
Definition: genC.h:285
#define gen_recurse(start, domain_number, flt, rwt)
Definition: genC.h:283
statement get_current_module_statement(void)
Get the current module statement.
Definition: static.c:208
void gen_null(__attribute__((unused)) void *unused)
Ignore the argument.
Definition: genClib.c:2752
bool loop_parallel_p(loop l)
Test if a loop is parallel.
Definition: loop.c:393
list loop_private_variables_as_entites(loop obj, bool local, bool index)
Get the variables local or private to a loop.
Definition: loop.c:338
#define ENDP(l)
Test if a list is empty.
Definition: newgen_list.h:66
#define POP(l)
Modify a list pointer to point on the next element of the list.
Definition: newgen_list.h:59
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
Definition: list.c:344
#define CAR(pcons)
Get the value of the first element of a list.
Definition: newgen_list.h:92
void gen_free_list(list l)
free the spine of the list
Definition: list.c:327
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
Definition: list.c:734
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
void * gen_find_eq(const void *item, const list seq)
Definition: list.c:422
list gen_append(list l1, const list l2)
Definition: list.c:471
#define PIPS_PHASE_POSTLUDE(new_module_statement)
End a transformation phase by putting back into PIPS the (possibly) modified statement.
#define PIPS_PHASE_PRELUDE(module_name, debug_env_var)
Start a phase that use a module CODE.
string db_get_memory_resource(const char *rname, const char *oname, bool pure)
Return the pointer to the resource, whatever it is.
Definition: database.c:755
loop statement_loop(statement)
Get the loop of a statement.
Definition: statement.c:1374
bool statement_loop_p(statement)
Definition: statement.c:349
Value vect_sum(Pvecteur v)
Value vect_sum(Pvecteur v): somme des coefficients d'un vecteur (i.e.
Definition: reductions.c:261
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
Definition: misc-local.h:172
int bool
we cannot use an enum or stdbool because we need to be compatible with newgen, thus boolean need to h...
Definition: newgen_types.h:78
Ppolynome polynome_dup(Ppolynome pp)
Ppolynome polynome_dup(Ppolynome pp) creates and returns a copy of pp.
Definition: pnome-alloc.c:211
void polynome_rm(Ppolynome *ppp)
void polynome_rm(Ppolynome* ppp) frees space occupied by polynomial *ppp returns *ppp pointing to POL...
Definition: pnome-alloc.c:170
int polynome_max_degree(Ppolynome pp)
int polynome_max_degree(Ppolynome pp) returns the degree of polynomial pp Let's hope there aren't too...
Definition: pnome-reduc.c:113
void polynome_scalar_add(Ppolynome *ppp, float term)
void polynome_scalar_add(Ppolynome* ppp, float term) (*ppp) = (*ppp) + term !usage: polynome_scalar_a...
Definition: pnome-scal.c:86
void polynome_scalar_mult(Ppolynome *ppp, float factor)
void polynome_scalar_mult(Ppolynome* ppp, float factor) (*ppp) = factor * (*ppp) !...
Definition: pnome-scal.c:46
#define monome_term(pm)
#define polynome_monome(pp)
#define monome_coeff(pm)
Macros definitions.
#define POLYNOME_NUL_P(pp)
#define polynome_succ(pp)
void print_statement(statement)
Print a statement on stderr.
Definition: statement.c:98
#define execution_tag(x)
Definition: ri.h:1207
#define loop_body(x)
Definition: ri.h:1644
#define LOOP(x)
LOOP.
Definition: ri.h:1606
#define loop_execution(x)
Definition: ri.h:1648
#define loop_domain
newgen_language_domain_defined
Definition: ri.h:218
#define ENTITY(x)
ENTITY.
Definition: ri.h:2755
#define statement_ordering(x)
Definition: ri.h:2454
#define statement_domain
newgen_sizeofexpression_domain_defined
Definition: ri.h:362
#define entity_undefined
Definition: ri.h:2761
#define loop_locals(x)
Definition: ri.h:1650
@ is_execution_sequential
Definition: ri.h:1189
#define statement_number(x)
Definition: ri.h:2452
#define execution_parallel_p(x)
Definition: ri.h:1211
#define loop_index(x)
Definition: ri.h:1640
#define ifdebug(n)
Definition: sg.c:47
static size_t current
Definition: string.c:115
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
bool limit_parallelism_using_complexity(const string)
bool limit_nested_parallelism(const string)
manage_parallel_loops.c