PIPS
gpu_memory.c
Go to the documentation of this file.
1 /* A simple phase that allocate memory on an accelerator and add memory
2  transfer around a kernel
3 
4  Ronan.Keryell@hpc-project.com
5 */
6 #ifdef HAVE_CONFIG_H
7  #include "pips_config.h"
8 #endif
9 
10 #include "genC.h"
11 #include "linear.h"
12 #include "ri.h"
13 #include "effects.h"
14 #include "ri-util.h"
15 #include "effects-util.h"
16 #include "misc.h"
17 #include "effects-generic.h"
18 #include "effects-simple.h"
19 #include "control.h"
20 #include "callgraph.h"
21 #include "pipsdbm.h"
22 #include "resources.h"
23 
24 /** Store the loop nests found that meet the spec to be executed on a
25  GPU. Use a list and not a set or hash_map to have always the same
26  order */
28 
29 
30 static bool
32  /* An interesting loop must be parallel first...
33 
34  We recurse on statements instead of loops in order to pick
35  informations on the statement itself, such as pragmas
36  */
37  int parallel_loop_nest_depth = depth_of_parallel_perfect_loop_nest(s);
38  ifdebug(3) {
39  pips_debug(1, "Statement %td with // depth %d\n", statement_number(s),
40  parallel_loop_nest_depth);
41  print_statement(s);
42  }
43  if (parallel_loop_nest_depth > 0) {
44  // Register the loop-nest (note the list is in the reverse order):
46  /* Since we only outline outermost loop-nest, stop digging further in
47  this statement: */
48  pips_debug(1, "Statement %td marked to be outlined\n", statement_number(s));
49  return false;
50  }
51  // This statement is not a parallel loop, go on digging:
52  return true;
53 }
54 
55 
56 static void
58  ifdebug(1) {
59  pips_debug(1, "Parallel loop-nest of depth %d\n", depth);
60  print_statement(s);
61  }
62  /* First outline the innermost code (the kernel itself) to avoid
63  spoiling its memory effects if we start with the outermost code
64  first: */
65  list sk = CONS(STATEMENT,
67  NIL);
68  outliner(build_new_top_level_module_name("kernel_wrapper",false), sk);
69 
70  // Outline the kernel launcher:
71  list sl = CONS(STATEMENT, s, NIL);
72  outliner(build_new_top_level_module_name("kernel_launcher",false), sl);
73 }
74 
75 
76 bool gpu_memory(const char * module_name) {
77  // Use this module name and this environment variable to set
79  "GPU_IFY_DEBUG_LEVEL");
80 
81  // Get the effects and use them:
83 
84  // Apply the transformation on the kernel calls:
86 
87  // No longer use effects:
89 
90  // We may have outline some code, so recompute the callees:
93 
94  // Put back the new statement module
96 }
statement outliner(const char *, list)
outline the statements in statements_to_outline into a module named outline_module_name the outlined ...
Definition: outlining.c:1327
static statement module_statement
Definition: alias_check.c:125
callees compute_callees(const statement stat)
Recompute the callees of a module statement.
Definition: callgraph.c:355
void set_cumulated_rw_effects(statement_effects)
void reset_cumulated_rw_effects(void)
const char * module_name(const char *s)
Return the module part of an entity name.
Definition: entity_names.c:296
#define gen_recurse(start, domain_number, flt, rwt)
Definition: genC.h:283
static void gpu_memory_apply(statement s, int depth)
Definition: gpu_memory.c:57
list loop_nests_to_outline
A simple phase that allocate memory on an accelerator and add memory transfer around a kernel.
Definition: gpu_memory.c:27
static bool mark_loop_to_outline(const statement s)
Definition: gpu_memory.c:31
bool gpu_memory(const char *module_name)
Definition: gpu_memory.c:76
statement get_current_module_statement(void)
Get the current module statement.
Definition: static.c:208
void * gen_identity(const void *x)
Just return the argument.
Definition: genClib.c:2807
statement perfectly_nested_loop_to_body_at_depth(statement s, int depth)
Extract the loop-body of a perfect loop-nest at a given depth.
Definition: loop.c:646
int depth_of_parallel_perfect_loop_nest(statement s)
Compute the depth of a parallel perfect loop-nest.
Definition: loop.c:436
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
#define PIPS_PHASE_POSTLUDE(new_module_statement)
End a transformation phase by putting back into PIPS the (possibly) modified statement.
#define PIPS_PHASE_PRELUDE(module_name, debug_env_var)
Start a phase that use a module CODE.
string db_get_memory_resource(const char *rname, const char *oname, bool pure)
Return the pointer to the resource, whatever it is.
Definition: database.c:755
#define DB_PUT_MEMORY_RESOURCE(res_name, own_name, res_val)
conform to old interface.
Definition: pipsdbm-local.h:66
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145
void print_statement(statement)
Print a statement on stderr.
Definition: statement.c:98
string build_new_top_level_module_name(const char *prefix, bool prevent_suffix)
Get a new name for a module built from a prefix.
Definition: module.c:55
#define call_domain
newgen_callees_domain_defined
Definition: ri.h:58
#define statement_number(x)
Definition: ri.h:2452
#define STATEMENT(x)
STATEMENT.
Definition: ri.h:2413
#define ifdebug(n)
Definition: sg.c:47
The structure used to build lists in NewGen.
Definition: newgen_list.h:41
static int depth
la sequence de nids