PIPS
freia_terapix.c File Reference
#include <stdint.h>
#include <stdlib.h>
#include "genC.h"
#include "misc.h"
#include "linear.h"
#include "ri.h"
#include "effects.h"
#include "ri-util.h"
#include "effects-util.h"
#include "properties.h"
#include "freia.h"
#include "freia_spoc_private.h"
#include "hwac.h"
+ Include dependency graph for freia_terapix.c:

Go to the source code of this file.

Macros

#define NORTH(v)   ((void*) (((_int)v)+0))
 
#define SOUTH(v)   ((void*) (((_int)v)+1))
 
#define WEST(v)   ((void*) (((_int)v)+2))
 
#define EAST(v)   ((void*) (((_int)v)+3))
 
#define IMG_PTR   "imagelet_"
 
#define RED_PTR   "reduction_"
 
#define ip2s(n)   i2a(get_int_property(n))
 

Functions

static void compute_dead_vertices (set deads, const set computed, const dag d, const dagvtx v)
 
static void erosion_optimization (dagvtx v, bool *north, bool *south, bool *west, bool *east)
 tell whether the kernel is used on each of the 4 directions. More...
 
static void update_erosions (const dag d, const dagvtx v, hash_table erosion)
 update_erosions(). More...
 
static int dag_terapix_measures (const dag d, hash_table erosion, int *width, int *cost, int *nops, int *north, int *south, int *west, int *east)
 compute some measures about DAG d. More...
 
static list dag_vertex_pred_imagelets (const dag d, const dagvtx v, const hash_table allocation)
 of ints More...
 
static boolterapix_gram_init (void)
 allocate bitfield to described used cells in global memory. More...
 
static void terapix_gram_allocate (bool *used, int width, int height, int *x, int *y)
 terapix allocate widthxheight in global memory More...
 
static _int select_imagelet (set availables, int *nimgs, bool first)
 Return the first/last available imagelet, or create one if necessary This ensures that the choice is deterministic. More...
 
static void terapix_image (string_buffer sb, int ff, int n)
 generate an image symbolic pointer (a name:-). More...
 
static void terapix_mcu_img (string_buffer code, int op, string ref, int n)
 set a double buffered image argument. More...
 
static void terapix_mcu_int (string_buffer code, int op, string ref, int val)
 set an integer argument. More...
 
static void terapix_mcu_val (string_buffer code, int op, string r, string s)
 set some value string argument. More...
 
static void terapix_mcu_pval (string_buffer code, int op, string ref, string p, string s)
 set some prefixed value string argument. More...
 
static void gram_param (string_buffer code, string_buffer decl, string name, dagvtx v, hash_table hparams, int width, int height, bool is_kernel, bool *used)
 copy some operator parameters in the global ram (aka gram). More...
 
static void terapix_gram_management (string_buffer code, string_buffer decl, int op, const freia_api_t *api, const dagvtx v, hash_table hparams, bool *used)
 manage GRAM global memory to pass parameters. More...
 
static void terapix_macro_code (string_buffer code, string_buffer decl, int op, const freia_api_t *api, bool *used, hash_table hparams, const dagvtx v, const list ins, int out)
 generate terapix code for More...
 
static void terapix_init_row (string_buffer decl, string_buffer code, string base, string suff, string mem, int nrow, string val, bool *used)
 initialize a few rows at mem address with value val More...
 
static void terapix_initialize_memory (string_buffer decl, string_buffer body, int nop, string mem, const freia_api_t *api, bool *used)
 initialize the memory at addr depending on the operation to perform More...
 
static void terapix_get_reduction (string_buffer decl, string_buffer tail, int n_op, string mem, const freia_api_t *api)
 generate reduction extraction code More...
 
static _int freia_terapix_call (const string module, const string fname_dag, string_buffer code, dag thedag, list *params)
 generate a terapix call for dag thedag. More...
 
static int freia_trpx_compile_one_dag (string module, list ls, dag d, string fname_fulldag, int n_split, int n_cut, set global_remainings, FILE *helper_file, set helpers, int stnb, hash_table signatures)
 generate terapix code for this one dag, which should be already split. More...
 
static void dag_terapix_erosion (const dag d, hash_table erosion)
 fill in erosion hash table from dag d. More...
 
static void dag_terapix_reset_erosion (const dag d)
 
static int dagvtx_terapix_priority (const dagvtx *v1, const dagvtx *v2)
 comparison function for sorting dagvtx in qsort, this is deep voodoo, because the priority has an impact on correctness? that should not be the case as only computations allowed by dependencies are schedule. More...
 
static bool not_implemented (dagvtx v)
 whether vertex is not implemented in terapix More...
 
static bool terapix_not_implemented (dag d)
 whether dag is not implemented in terapix More...
 
static dagvtx choose_terapix_vertex (const list lv, bool started)
 choose a vertex, avoiding non combinable stuff if the list is started More...
 
static int cut_decision (dag d, hash_table erosion)
 would it seem interesting to split d? More...
 
static dag cut_perform (dag d, int cut, hash_table erodes, dag fulld, const set output_images)
 cut dag "d", possibly a subdag of "fulld", at "erosion" "cut" More...
 
static void migrate_statements (sequence sq, dag d, set dones)
 
list freia_trpx_compile_calls (string module, dag fulld, sequence sq, list ls, const hash_table occs, hash_table exchanges, const set output_images, FILE *helper_file, set helpers, int number)
 do compile a list of statements for terapix More...
 

Variables

static hash_table erosion = NULL
 global variable used by the dagvtx_terapix_priority function, because qsort does not allow to pass some descriptor. More...
 

Macro Definition Documentation

◆ EAST

#define EAST (   v)    ((void*) (((_int)v)+3))

Definition at line 89 of file freia_terapix.c.

◆ IMG_PTR

#define IMG_PTR   "imagelet_"

Definition at line 327 of file freia_terapix.c.

◆ ip2s

#define ip2s (   n)    i2a(get_int_property(n))

◆ NORTH

#define NORTH (   v)    ((void*) (((_int)v)+0))

Definition at line 86 of file freia_terapix.c.

◆ RED_PTR

#define RED_PTR   "reduction_"

Definition at line 328 of file freia_terapix.c.

◆ SOUTH

#define SOUTH (   v)    ((void*) (((_int)v)+1))

Definition at line 87 of file freia_terapix.c.

◆ WEST

#define WEST (   v)    ((void*) (((_int)v)+2))

Definition at line 88 of file freia_terapix.c.

Function Documentation

◆ choose_terapix_vertex()

static dagvtx choose_terapix_vertex ( const list  lv,
bool  started 
)
static

choose a vertex, avoiding non combinable stuff if the list is started

Definition at line 1606 of file freia_terapix.c.

1607 {
1608  pips_assert("list contains vertices", lv);
1609  if (started)
1610  {
1611  FOREACH(dagvtx, v, lv)
1612  if (!not_implemented(v))
1613  return v;
1614  }
1615  // just return the first vertex
1616  return DAGVTX(CAR(lv));
1617 }
#define DAGVTX(x)
DAGVTX.
static bool not_implemented(dagvtx v)
whether vertex is not implemented in terapix
#define CAR(pcons)
Get the value of the first element of a list.
Definition: newgen_list.h:92
#define FOREACH(_fe_CASTER, _fe_item, _fe_list)
Apply/map an instruction block on all the elements of a list.
Definition: newgen_list.h:179
#define pips_assert(what, predicate)
common macros, two flavors depending on NDEBUG
Definition: misc-local.h:172

◆ compute_dead_vertices()

static void compute_dead_vertices ( set  deads,
const set  computed,
const dag  d,
const dagvtx  v 
)
static
Returns
the dead vertices (their output is dead) after computing v in d. ??? should it take care that an output node is never dead?

Definition at line 52 of file freia_terapix.c.

54 {
55  list preds = dag_vertex_preds(d, v);
56  set futured_computed = set_dup(computed);
57  set_add_element(futured_computed, futured_computed, v);
58  FOREACH(dagvtx, p, preds)
59  if (// !gen_in_list_p(p, dag_outputs(d)) &&
60  list_in_set_p(dagvtx_succs(p), futured_computed))
61  set_add_element(deads, deads, p);
62  gen_free_list(preds);
63  set_free(futured_computed);
64 }
list dag_vertex_preds(const dag d, const dagvtx target)
return target predecessor vertices as a list.
Definition: dag-utils.c:680
#define dagvtx_succs(x)
void gen_free_list(list l)
free the spine of the list
Definition: list.c:327
bool list_in_set_p(const list, const set)
Definition: set.c:201
void set_free(set)
Definition: set.c:332
set set_dup(const set)
Definition: set.c:143
set set_add_element(set, const set, const void *)
Definition: set.c:152
FI: I do not understand why the type is duplicated at the set level.
Definition: set.c:59
The structure used to build lists in NewGen.
Definition: newgen_list.h:41

References dag_vertex_preds(), dagvtx_succs, FOREACH, gen_free_list(), list_in_set_p(), set_add_element(), set_dup(), and set_free().

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ cut_decision()

static int cut_decision ( dag  d,
hash_table  erosion 
)
static

would it seem interesting to split d?

Returns
the erosion up to which to split, or 0 of no split should we also/instead consider the expected cost?

Definition at line 1625 of file freia_terapix.c.

1626 {
1627  int com_cost_per_row = get_int_property(trpx_dmabw_prop);
1628  int width, cost, nops, n, s, w, e;
1629  (void)dag_terapix_measures(d, erosion, &width, &cost, &nops, &n, &s, &w, &e);
1630 
1631  // bye bye...
1632  if (width==0) return 0;
1633 
1634  int nins = gen_length(dag_inputs(d)), nouts = gen_length(dag_outputs(d));
1635 
1636  // if we assume that the imagelet size is quite large, say around 128
1637  // even with double buffers. The only reason to cut is because
1638  // of the erosion on the side which reduces the amount of valid data,
1639  // but there is really a point to do that only communications are still
1640  // masked by computations after splitting the dag...
1641 
1642  // first we compute a possible number of splits
1643  // computation cost = communication cost (in cycle per imagelet row)
1644  // communication cost = (nins + 2*width*n_splits + nouts) * cost_per_row
1645  // the width is taken as the expected number of images to extract and
1646  // reinject (hence 2*) if the dag is split.
1647  // this is really an approximation... indeed, nothing ensures that
1648  // the initial input is not still alive at the chosen cut?
1649 
1650  // for anr999 the gradient of depth 10 is just enough to cover the coms.
1651  // for lp, about 1(.2) split is suggested.
1652 
1653  // compute number of cuts, that is the number of amortizable load/store
1654  // ??? maybe I should incorporate a margin?
1655  double n_cuts;
1656 
1657  // please note that these formula are somehow approximated and the results
1658  // may be proved wrong.
1659  if (trpx_overlap_io_p())
1660  {
1661  // number of image to communicate is MAX(#in,#out)
1662  int nimgs = nins>nouts? nins: nouts;
1663  // the overhead of a cut is one transfer
1664  n_cuts = ((1.0*cost/com_cost_per_row)-nimgs)/(1.0*width);
1665  }
1666  else
1667  n_cuts = ((1.0*cost/com_cost_per_row)-nins-nouts)/(2.0*width);
1668 
1669  pips_debug(2, "cost=%d com_cost=%d nins=%d width=%d nouts=%d n_cuts=%f\n",
1670  cost, com_cost_per_row, nins, width, nouts, n_cuts);
1671 
1672  if (n_cuts < 1.0) return 0;
1673 
1674  // we also have to check that there is a significant erosion!
1675  // I first summarize the erosion to the max(n,s,e,w)
1676  // grrr... C really lacks a stupid max/min function varyadic!
1677  // I could compute per direction, if necessary...
1678  int erode = n;
1679  if (s>erode) erode=s;
1680  if (e>erode) erode=e;
1681  if (w>erode) erode=w;
1682 
1683  // then we should decide...
1684  // there should be enough computations to amortize a split,
1685  // given that an erode/dilate costs about 15 cycles per row
1686  // there should be about 2 of them to amortize/hide one imagelet transfer,
1687  // whether as input or output.
1688 
1689  int cut = erode/((int)(n_cuts+1));
1690 
1691  // try to fix the balance chosen by the integer division
1692  // hmmm... should really look at the weights to choose a side here...
1693  if (erode%2==1 && n_cuts<2.0 && nouts<=nins)
1694  cut++;
1695 
1696  return cut;
1697 }
int get_int_property(const string)
void const char const char const int
void erode(int width, int height, type_t img_in[width][height], type_t img_scratch[width][height], type_t img_out[width][height])
Definition: erode_dilate.c:80
#define dag_outputs(x)
#define dag_inputs(x)
static hash_table erosion
global variable used by the dagvtx_terapix_priority function, because qsort does not allow to pass so...
static int dag_terapix_measures(const dag d, hash_table erosion, int *width, int *cost, int *nops, int *north, int *south, int *west, int *east)
compute some measures about DAG d.
#define trpx_overlap_io_p()
Definition: freia_terapix.h:51
#define trpx_dmabw_prop
Definition: freia_terapix.h:35
size_t gen_length(const list l)
Definition: list.c:150
#define pips_debug
these macros use the GNU extensions that allow variadic macros, including with an empty list.
Definition: misc-local.h:145

◆ cut_perform()

static dag cut_perform ( dag  d,
int  cut,
hash_table  erodes,
dag  fulld,
const set  output_images 
)
static

cut dag "d", possibly a subdag of "fulld", at "erosion" "cut"

Definition at line 1701 of file freia_terapix.c.

1703 {
1704  pips_debug(2, "cutting with cut=%d\n", cut);
1705  pips_assert("something cut width", cut>0);
1706 
1707  set
1708  // current set of vertices to group
1710  // all vertices which are considered computed
1711  done = set_make(set_pointer);
1712 
1713  list lcurrent = NIL, computables;
1714  set_assign_list(done, dag_inputs(d));
1715 
1716  // GLOBAL
1717  pips_assert("erosion is clean", erosion==NULL);
1720 
1721  // transitive closure
1722  bool changed = true;
1723  while (changed &&
1724  (computables = dag_computable_vertices(d, done, done, current)))
1725  {
1726  // ensure determinism
1728  changed = false;
1729  FOREACH(dagvtx, v, computables)
1730  {
1731  // keep erosion up to cut
1732  // hmmm. what about \sigma_{d \in NSEW} erosion_d ?
1733  // would not work because the erosion only make sense if it is
1734  // the same for all imagelet, or said otherwise the erosion is
1735  // aligned to the worst case so that tiling can reasonnably take place.
1736  if ((((_int) hash_get(erodes, NORTH(v))) <= cut) &&
1737  (((_int) hash_get(erodes, SOUTH(v))) <= cut) &&
1738  (((_int) hash_get(erodes, EAST(v))) <= cut) &&
1739  (((_int) hash_get(erodes, WEST(v))) <= cut))
1740  {
1742  set_add_element(done, done, v);
1743  lcurrent = CONS(dagvtx, v, lcurrent);
1744  changed = true;
1745  }
1746  }
1747 
1748  // cleanup
1749  gen_free_list(computables), computables = NIL;
1750  }
1751 
1752  // cleanup GLOBAL
1753  hash_table_free(erosion), erosion = NULL;
1754 
1755  lcurrent = gen_nreverse(lcurrent);
1756  pips_assert("some vertices where extracted", lcurrent!=NIL);
1757 
1758  // build extracted dag
1759  dag nd = make_dag(NIL, NIL, NIL);
1760  FOREACH(dagvtx, v, lcurrent)
1761  {
1762  // pips_debug(7, "extracting node %" _intFMT "\n", dagvtx_number(v));
1764  }
1765  dag_compute_outputs(nd, NULL, output_images, NIL, false);
1767 
1768  // cleanup full dag
1769  FOREACH(dagvtx, v, lcurrent)
1770  dag_remove_vertex(d, v);
1771 
1772  // ??? should not be needed?
1773  freia_hack_fix_global_ins_outs(fulld, nd);
1775 
1776  ifdebug(1)
1777  {
1780  }
1781 
1782  // cleanup
1783  gen_free_list(lcurrent), lcurrent = NIL;
1784  set_free(done);
1785  set_free(current);
1786  return nd;
1787 }
dag make_dag(list a1, list a2, list a3)
void dag_cleanup_other_statements(dag d)
remove unneeded statements? you must know they are really un-needed!
Definition: dag-utils.c:2191
void dag_consistency_asserts(dag d)
do some consistency checking...
Definition: dag-utils.c:531
void freia_hack_fix_global_ins_outs(dag dfull, dag d)
catch some cases of missing outs between splits...
Definition: dag-utils.c:2166
dagvtx copy_dagvtx_norec(dagvtx v)
copy a vertex, but without its successors.
Definition: dag-utils.c:611
list dag_computable_vertices(dag d, const set computed, const set maybe, const set currents)
return the vertices which may be computed from the list of available images, excluding vertices in ex...
Definition: dag-utils.c:2307
void dag_remove_vertex(dag d, const dagvtx v)
remove vertex v from dag d.
Definition: dag-utils.c:570
void dag_compute_outputs(dag d, const hash_table occs, const set output_images, const list ld, bool inloop)
(re)compute the list of GLOBAL input & output images for this dag ??? BUG the output is rather an app...
Definition: dag-utils.c:2073
void dag_append_vertex(dag d, dagvtx nv)
append new vertex nv to dag d.
Definition: dag-utils.c:632
#define WEST(v)
Definition: freia_terapix.c:88
static void dag_terapix_erosion(const dag d, hash_table erosion)
fill in erosion hash table from dag d.
#define EAST(v)
Definition: freia_terapix.c:89
static int dagvtx_terapix_priority(const dagvtx *v1, const dagvtx *v2)
comparison function for sorting dagvtx in qsort, this is deep voodoo, because the priority has an imp...
#define SOUTH(v)
Definition: freia_terapix.c:87
#define NORTH(v)
Definition: freia_terapix.c:86
list gen_nreverse(list cp)
reverse a list in place
Definition: list.c:304
#define NIL
The empty list (nil in Lisp)
Definition: newgen_list.h:47
#define CONS(_t_, _i_, _l_)
List element cell constructor (insert an element at the beginning of a list)
Definition: newgen_list.h:150
void gen_sort_list(list l, gen_cmp_func_t compare)
Sorts a list of gen_chunks in place, to avoid allocations...
Definition: list.c:796
hash_table hash_table_make(hash_key_type key_type, size_t size)
Definition: hash.c:294
void * hash_get(const hash_table htp, const void *key)
this function retrieves in the hash table pointed to by htp the couple whose key is equal to key.
Definition: hash.c:449
void hash_table_free(hash_table htp)
this function deletes a hash table that is no longer useful.
Definition: hash.c:327
@ hash_pointer
Definition: newgen_hash.h:32
set set_assign_list(set, const list)
assigns a list contents to a set all duplicated elements are lost
Definition: set.c:474
@ set_pointer
Definition: newgen_set.h:44
set set_make(set_type)
Create an empty set of any type but hash_private.
Definition: set.c:102
intptr_t _int
_INT
Definition: newgen_types.h:53
int(* gen_cmp_func_t)(const void *, const void *)
Definition: newgen_types.h:114
#define ifdebug(n)
Definition: sg.c:47
static size_t current
Definition: string.c:115

◆ dag_terapix_erosion()

static void dag_terapix_erosion ( const dag  d,
hash_table  erosion 
)
static

fill in erosion hash table from dag d.

Definition at line 1415 of file freia_terapix.c.

1416 {
1417  int i = 0;
1418  dag_terapix_measures(d, erosion, &i, &i, &i, &i, &i, &i, &i);
1419 }

References dag_terapix_measures(), and erosion.

Referenced by dag_terapix_reset_erosion().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ dag_terapix_measures()

static int dag_terapix_measures ( const dag  d,
hash_table  erosion,
int width,
int cost,
int nops,
int north,
int south,
int west,
int east 
)
static

compute some measures about DAG d.

Returns
its length (depth) width, aka maximum live produced image in a level by level terapix computation cost (per row) scheduling (could do a better job with a better scheduling?) maximal erosion in all four directions

Definition at line 157 of file freia_terapix.c.

161 {
162  set processed = set_make(set_pointer);
163  int dcost = 0, dlength = 0, dwidth = gen_length(dag_inputs(d)), dnops = 0;
164  bool keep_erosion = erosion!=NULL;
165  // vertex output -> NSWE erosion (v+0 to v+3 is N S W E)
166  if (!keep_erosion) erosion = hash_table_make(hash_pointer, 0);
167 
168  FOREACH(dagvtx, in, dag_inputs(d))
169  update_erosions(d, in, erosion);
170 
171  list lv;
172  while ((lv = dag_computable_vertices(d, processed, processed, processed)))
173  {
174  dlength++;
175  int level_width = 0;
176  FOREACH(dagvtx, v, lv)
177  {
178  const freia_api_t * api = dagvtx_freia_api(v);
179  if (freia_convolution_p(v)) // special handling...
180  {
181  _int w, h;
182  if (freia_convolution_width_height(v, &w, &h, false))
183  dcost += 8+(api->terapix.cost*w*h); // hmmm? 3x3 is 35?
184  else
185  dcost += 35; // au pif 3x3
186  }
187  else
188  dcost += api->terapix.cost;
189  // only count non null operations
190  if (api->terapix.cost && api->terapix.cost!=-1) dnops ++;
191  if (api->arg_img_out) level_width++;
192  update_erosions(d, v, erosion);
193  }
194  if (level_width>dwidth) dwidth = level_width;
195 
196  set_append_list(processed, lv);
197  gen_free_list(lv);
198  }
199 
200  // update width
201  int nouts = gen_length(dag_outputs(d));
202  if (nouts>dwidth) dwidth = nouts;
203 
204  // compute overall worth erosion
205  int n=0, s=0, w=0, e=0;
207  {
208  if ((_int)hash_get(erosion, NORTH(out))>n)
209  n = (int) (_int) hash_get(erosion, NORTH(out));
210  if ((_int)hash_get(erosion, SOUTH(out))>s)
211  s = (int) (_int) hash_get(erosion, SOUTH(out));
212  if ((_int)hash_get(erosion, WEST(out))>w)
213  w = (int) (_int) hash_get(erosion, WEST(out));
214  if ((_int)hash_get(erosion, EAST(out))>e)
215  e = (int) (_int) hash_get(erosion, EAST(out));
216  }
217 
218  // cleanup
219  set_free(processed);
220  if (!keep_erosion) hash_table_free(erosion);
221 
222  // return results
223  *north = n, *south = s, *west = w, *east = e,
224  *width = dwidth, *cost = dcost, *nops = dnops;
225  return dlength;
226 }
static FILE * out
Definition: alias_check.c:128
bool freia_convolution_p(dagvtx v)
is it the convolution special case?
Definition: freia-utils.c:1441
bool freia_convolution_width_height(dagvtx v, _int *pw, _int *ph, bool check)
get width & height of convolution
Definition: freia-utils.c:1449
#define dagvtx_freia_api(v)
Definition: freia.h:97
static void update_erosions(const dag d, const dagvtx v, hash_table erosion)
update_erosions().
Definition: freia_terapix.c:95
set set_append_list(set, const list)
add list l items to set s, which is returned.
Definition: set.c:460
FREIA API function name -> SPoC hardware description (and others?)
Definition: freia.h:71
terapix_hw_t terapix
Definition: freia.h:90
unsigned int arg_img_out
Definition: freia.h:79

References freia_api_t::arg_img_out, terapix_hw_t::cost, dag_computable_vertices(), dag_inputs, dag_outputs, dagvtx_freia_api, EAST, erosion, FOREACH, freia_convolution_p(), freia_convolution_width_height(), gen_free_list(), gen_length(), hash_get(), hash_pointer, hash_table_free(), hash_table_make(), int, NORTH, out, set_append_list(), set_free(), set_make(), set_pointer, SOUTH, freia_api_t::terapix, update_erosions(), and WEST.

Referenced by dag_terapix_erosion(), and freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ dag_terapix_reset_erosion()

static void dag_terapix_reset_erosion ( const dag  d)
static

Definition at line 1426 of file freia_terapix.c.

1427 {
1428  pips_assert("erosion is allocated", erosion!=NULL);
1431 }
void hash_table_clear(hash_table htp)
Clears all entries of a hash table HTP.
Definition: hash.c:305

References dag_terapix_erosion(), erosion, hash_table_clear(), and pips_assert.

+ Here is the call graph for this function:

◆ dag_vertex_pred_imagelets()

static list dag_vertex_pred_imagelets ( const dag  d,
const dagvtx  v,
const hash_table  allocation 
)
static

of ints

Returns
the list of inputs to vertex v as imagelet numbers.

Definition at line 230 of file freia_terapix.c.

232 {
233  list limagelets = NIL;
235  {
236  dagvtx prod = dagvtx_get_producer(d, v, img, 0);
237  pips_assert("some producer found!", prod!=NULL);
238  limagelets =
239  gen_nconc(limagelets,
240  CONS(int, (int)(_int) hash_get(allocation, prod), NIL));
241  }
242  return limagelets;
243 }
dagvtx dagvtx_get_producer(const dag d, const dagvtx sink, const entity e, _int before_number)
return (last) producer of image e for vertex sink, or NULL if none found.
Definition: dag-utils.c:156
#define dagvtx_content(x)
#define vtxcontent_inputs(x)
list gen_nconc(list cp1, list cp2)
physically concatenates CP1 and CP2 but do not duplicates the elements
Definition: list.c:344
int allocation
External variables for direct call to PIP.
Definition: pip.c:92

References allocation, CONS, dagvtx_content, dagvtx_get_producer(), FOREACH, gen_nconc(), hash_get(), NIL, pips_assert, and vtxcontent_inputs.

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ dagvtx_terapix_priority()

static int dagvtx_terapix_priority ( const dagvtx v1,
const dagvtx v2 
)
static

comparison function for sorting dagvtx in qsort, this is deep voodoo, because the priority has an impact on correctness? that should not be the case as only computations allowed by dependencies are schedule.

tells v1 < (before) v2 => -1

Definition at line 1439 of file freia_terapix.c.

1440 {
1441  pips_assert("global erosion is set", erosion!=NULL);
1442 
1443  // ??? should prioritize if more outputs?
1444  // ??? should prioritize inplace?
1445  // ??? should prioritize no erosion first? levels do that currrently?
1446  string why = "none";
1447  int result = 0;
1448  vtxcontent
1449  c1 = dagvtx_content(*v1),
1450  c2 = dagvtx_content(*v2);
1451  const freia_api_t
1452  * a1 = dagvtx_freia_api(*v1),
1453  * a2 = dagvtx_freia_api(*v2);
1454 
1455  // prioritize first scalar ops, measures and last copies
1456  // if there is only one of them
1457  if (vtxcontent_optype(c1)!=vtxcontent_optype(c2))
1458  {
1459  // non implemented stuff
1461  result = 1, why = "impl";
1462  else if (!freia_aipo_terapix_implemented(a2))
1463  result = -1, why = "impl";
1464  // scalars operations first to remove (scalar) dependences
1465  else if (vtxcontent_optype(c1)==spoc_type_oth)
1466  result = -1, why = "scal";
1467  else if (vtxcontent_optype(c2)==spoc_type_oth)
1468  result = 1, why = "scal";
1469  // then measurements are put first
1470  else if (vtxcontent_optype(c1)==spoc_type_mes)
1471  result = -1, why = "mes";
1472  else if (vtxcontent_optype(c2)==spoc_type_mes)
1473  result = 1, why = "mes";
1474  // the copies are performed last...
1475  else if (vtxcontent_optype(c1)==spoc_type_nop)
1476  result = 1, why = "copy";
1477  else if (vtxcontent_optype(c2)==spoc_type_nop)
1478  result = -1, why = "copy";
1479  // idem with image generation...
1480  else if (vtxcontent_optype(c1)==spoc_type_alu &&
1481  vtxcontent_inputs(c1)==NIL)
1482  result = 1, why = "gen";
1483  else if (vtxcontent_optype(c2)==spoc_type_alu &&
1484  vtxcontent_inputs(c2)==NIL)
1485  result = -1, why = "gen";
1486  // ??? do inplace last
1487  // ??? or ONLY if there is a shared input?
1488  else if (a1->terapix.inplace && !a2->terapix.inplace)
1489  result = 1, why = "inplace";
1490  else if (!a1->terapix.inplace && a2->terapix.inplace)
1491  result = -1, why = "inplace";
1492  }
1493 
1494  // ??? priorise when an image is freed
1495 
1496  if (result==0 &&
1497  // is there an image output?
1502  {
1503  ifdebug(6) {
1504  dagvtx_dump(stderr, "v1", *v1);
1505  dagvtx_dump(stderr, "v2", *v2);
1506  }
1507  pips_assert("erosion is defined",
1508  hash_defined_p(erosion, NORTH(*v1)) &&
1509  hash_defined_p(erosion, NORTH(*v2)));
1510 
1511  // try to conclude with erosions:
1512  // not sure about the right partial order to use...
1513  int e1 = (int)
1514  ((_int) hash_get(erosion, NORTH(*v1)) +
1515  (_int) hash_get(erosion, SOUTH(*v1)) +
1516  (_int) hash_get(erosion, WEST(*v1)) +
1517  (_int) hash_get(erosion, EAST(*v1))),
1518  e2 = (int)
1519  ((_int) hash_get(erosion, NORTH(*v2)) +
1520  (_int) hash_get(erosion, SOUTH(*v2)) +
1521  (_int) hash_get(erosion, WEST(*v2)) +
1522  (_int) hash_get(erosion, EAST(*v2)));
1523 
1524  pips_debug(6, "e1=%d, e2=%d\n", e1, e2);
1525 
1526  if (e1!=e2)
1527  result = e1-e2, why = "erosion";
1528  }
1529 
1530  // ??? I should look at in place?
1531  // ??? I should look at the number live uses?
1532 
1533  if (result==0)
1534  {
1535  // if not set by previous case, use other criterions
1536  int
1537  l1 = (int) gen_length(vtxcontent_inputs(c1)),
1538  l2 = (int) gen_length(vtxcontent_inputs(c2));
1539 
1540  // count non mesure successors:
1541  int nms1 = 0, nms2 = 0;
1542 
1543  FOREACH(dagvtx, vs1, dagvtx_succs(*v1))
1544  if (dagvtx_optype(vs1)!=spoc_type_mes) nms1++;
1545 
1546  FOREACH(dagvtx, vs2, dagvtx_succs(*v2))
1547  if (dagvtx_optype(vs2)!=spoc_type_mes) nms2++;
1548 
1549  if (l1!=l2 && (l1==0 || l2==0))
1550  // put image generators at the end, after any other computation
1551  result = l2-l1, why = "args";
1552  else if (nms1!=nms2 && l1==1 && l2==1)
1553  // the less successors the better? the rational is:
1554  // - mesures are handled before and do not have successors anyway,
1555  // - so this is about whether a result of an unary op is reused by
1556  // two nodes, in which case it will just jam the pipeline, so
1557  // try to put other computations before it. Note that mes
1558  // successors do not really count, as the image is not lost.
1559  result = nms1 - nms2, why = "succs";
1560  else if (l1!=l2)
1561  // else ??? no effect on my validation.
1562  result = l2-l1, why = "args2";
1563  else if (vtxcontent_optype(c1)!=vtxcontent_optype(c2))
1564  // otherwise use the op types, which are somehow ordered
1565  // so that if all is well the pipe is filled in order.
1566  result = vtxcontent_optype(c1) - vtxcontent_optype(c2), why = "ops";
1567  else
1568  // if all else fails, rely on statement numbers.
1569  result = dagvtx_number(*v1) - dagvtx_number(*v2), why = "stats";
1570  }
1571 
1572  pips_debug(6, "%" _intFMT " %s %s %" _intFMT " %s (%s)\n",
1573  dagvtx_number(*v1), dagvtx_operation(*v1),
1574  result<0? ">": (result==0? "=": "<"),
1575  dagvtx_number(*v2), dagvtx_operation(*v2), why);
1576 
1577  pips_assert("total order", v1==v2 || result!=0);
1578  return result;
1579 }
_int dagvtx_optype(const dagvtx v)
Definition: dag-utils.c:116
_int dagvtx_number(const dagvtx v)
returns the vertex number, i.e.
Definition: dag-utils.c:98
void dagvtx_dump(FILE *out, const string name, const dagvtx v)
for dag debug.
Definition: dag-utils.c:186
string dagvtx_operation(const dagvtx v)
Definition: dag-utils.c:134
bool freia_aipo_terapix_implemented(const freia_api_t *api)
whether api available with Ter@pix
Definition: freia-utils.c:1426
@ spoc_type_mes
Definition: freia_spoc.h:179
@ spoc_type_nop
Definition: freia_spoc.h:174
@ spoc_type_oth
Definition: freia_spoc.h:173
@ spoc_type_alu
Definition: freia_spoc.h:177
#define vtxcontent_optype(x)
bool hash_defined_p(const hash_table htp, const void *key)
true if key has e value in htp.
Definition: hash.c:484
#define _intFMT
Definition: newgen_types.h:57

References _intFMT, dagvtx_content, dagvtx_dump(), dagvtx_freia_api, dagvtx_number(), dagvtx_operation(), dagvtx_optype(), dagvtx_succs, EAST, erosion, FOREACH, freia_aipo_terapix_implemented(), gen_length(), hash_defined_p(), hash_get(), ifdebug, terapix_hw_t::inplace, int, NIL, NORTH, pips_assert, pips_debug, SOUTH, spoc_type_alu, spoc_type_mes, spoc_type_nop, spoc_type_oth, freia_api_t::terapix, vtxcontent_inputs, vtxcontent_optype, and WEST.

+ Here is the call graph for this function:

◆ erosion_optimization()

static void erosion_optimization ( dagvtx  v,
bool north,
bool south,
bool west,
bool east 
)
static

tell whether the kernel is used on each of the 4 directions.

Definition at line 68 of file freia_terapix.c.

70 {
71  // default result
72  *north = true, *south = true, *west = true, *east = true;
73  intptr_t k00, k10, k20, k01, k11, k21, k02, k12, k22;
74  freia_extract_kernel_vtx(v, false,
75  &k00, &k10, &k20, &k01, &k11, &k21, &k02, &k12, &k22);
76  // summarize for each four directions
77  *north = k00 || k10 || k20;
78  *south = k02 || k12 || k22;
79  *west = k00 || k01 || k02;
80  *east = k20 || k21 || k22;
81 }
bool freia_extract_kernel_vtx(dagvtx v, bool strict, intptr_t *k00, intptr_t *k10, intptr_t *k20, intptr_t *k01, intptr_t *k11, intptr_t *k21, intptr_t *k02, intptr_t *k12, intptr_t *k22)
vertex-based version
Definition: freia-utils.c:2012
#define intptr_t
Definition: stdint.in.h:294

References freia_extract_kernel_vtx(), and intptr_t.

Referenced by update_erosions().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ freia_terapix_call()

static _int freia_terapix_call ( const string  module,
const string  fname_dag,
string_buffer  code,
dag  thedag,
list params 
)
static

generate a terapix call for dag thedag.

the memory allocation is managed here. however this function is dumb, the scheduling is just inherited as is...

Returns
number of output images...
Parameters
paramsof expression

Definition at line 710 of file freia_terapix.c.

716 {
717  // total number of imagelets used for computing the dag
718  // will be updated later, implicitely derived from the scheduling
719  int n_imagelets = 0;
720  // number of input images
721  int n_ins = gen_length(dag_inputs(thedag));
722  // number of output images
723  int n_outs = gen_length(dag_outputs(thedag));
724  // number of needed double buffers for I/Os.
725  // this is also the number of I/O images
726  int n_double_buffers;
727 
728  if (trpx_overlap_io_p())
729  n_double_buffers = n_ins+n_outs;
730  else
731  n_double_buffers = (n_ins>n_outs)? n_ins: n_outs; // max(#ins, #outs)
732 
733  pips_assert("some I/O images", n_double_buffers>0);
734 
735  // the memory will be decremented for "measures" data (reductions),
736  // and then divided among imagelets
737  int available_memory = get_int_property(trpx_mem_prop);
738 
740  head = string_buffer_make(true),
741  decl = string_buffer_make(true),
742  init = string_buffer_make(true),
743  body = string_buffer_make(true),
744  dbio = string_buffer_make(true),
745  tail = string_buffer_make(true);
746 
747  // array variable name in caller -> local kernel parameter
748  // used to detect if a kernel is already available, so as to skip
749  // its copy and share the generated parameter.
751 
752  // number of arguments to generated function
753  int nargs = 0;
754 
755  // get stats
756  int length, width, cost, nops, n, s, w, e;
757  length = dag_terapix_measures(thedag, NULL,
758  &width, &cost, &nops, &n, &s, &w, &e);
759 
760  int comm = get_int_property(trpx_dmabw_prop);
761 
762 // integer property to string
763 #define ip2s(n) i2a(get_int_property(n))
764 
765  // show stats in function's comments
766  sb_cat(head, "\n"
767  "/* FREIA terapix helper function for module ", module, "\n");
768  sb_cat(head, " *\n");
769  // show terapix code generation parameters
770  sb_cat(head, " * RAMPE = ", ip2s(trpx_mem_prop), "\n");
771  sb_cat(head, " * NPE = ", ip2s(trpx_npe_prop), "\n");
772  sb_cat(head, " * DMA BW = ", ip2s(trpx_dmabw_prop), "\n");
773  sb_cat(head, " * GRAM W = ", ip2s(trpx_gram_width), "\n");
774  sb_cat(head, " * GRAM H = ", ip2s(trpx_gram_height), "\n");
775  sb_cat(head, " * DAG CUT = ", get_string_property(trpx_dag_cut), "\n");
776  sb_cat(head, " * OVERLAP = ", bool_to_string(trpx_overlap_io_p()), "\n");
777  sb_cat(head, " * IMAGE H = ", ip2s("FREIA_IMAGE_HEIGHT"), "\n");
778  sb_cat(head, " * MAX SIZE = ", ip2s(trpx_max_size), "\n");
779  sb_cat(head, " *\n");
780  // show dag statistics
781  sb_cat(head, " * ", i2a(n_ins), " input image", n_ins>1? "s": "");
782  sb_cat(head, ", ", i2a(n_outs), " output image", n_outs>1? "s": "", "\n");
783  sb_cat(head, " * ", i2a(nops), " image operations in dag\n");
784  sb_cat(head, " * dag length is ", i2a(length));
785  sb_cat(head, ", dag width is ", i2a(width), "\n");
786  sb_cat(head, " * costs in cycles per imagelet row:\n");
787  sb_cat(head, " * - computation: ", i2a(cost), "\n");
788  // number of transfers depends on overlapping
789  int n_trs = trpx_overlap_io_p()? (n_ins>n_outs? n_ins: n_outs): n_ins+n_outs;
790  sb_cat(head, " * - communication: ", i2a(comm*n_trs), "\n");
791  sb_cat(head, " */\n");
792 
793  // generate function declaration
794  sb_cat(head, "freia_status ", fname_dag, "(");
795  for (int i = 0; i<n_outs; i++)
796  sb_cat(head, nargs++? ",": "", "\n " FREIA_IMAGE "o", i2a(i));
797  for (int i = 0; i<n_ins; i++)
798  sb_cat(head, nargs++? ",": "", "\n const " FREIA_IMAGE "i", i2a(i));
799  // other arguments to come...
800 
801  // corresponding helper call arguments
802  list limg = NIL;
803  FOREACH(dagvtx, voa, dag_outputs(thedag))
804  limg = CONS(entity, vtxcontent_out(dagvtx_content(voa)), limg);
805  FOREACH(dagvtx, via, dag_inputs(thedag))
806  limg = CONS(entity, vtxcontent_out(dagvtx_content(via)), limg);
807  limg = gen_nreverse(limg);
808 
809  sb_cat(decl,
810  "{\n"
811  " // declarations:\n"
812  " freia_microcode mcode;\n"
813  " freia_op_param param;\n"
814  " freia_dynamic_param dyn_param;\n"
815  " terapix_gram gram;\n"
816  " int i;\n" // not always used...
817  " freia_status ret = FREIA_OK;\n"
818  " // data structures for reductions\n"
819  " terapix_mcu_macrocode mem_init;\n"
820  " freia_reduction_results redres;\n"
821  " terapix_reduction redter;\n"
822  " // overall structure which describes the computation\n"
823  " terapix_mcu_instr mcu_instr;\n");
824 
825  sb_cat(body,
826  "\n"
827  " // body:\n"
828  " // mcode param\n"
829  " mcode.raw = (void*) terapix_ucode_array;\n"
830  " mcode.size = TERAPIX_UCODE_SIZE_T;\n"
831  " freia_mg_write_microcode(&mcode);\n"
832  "\n"
833  " // dyn_param contents\n"
834  " dyn_param.raw = &gram;\n"
835  " dyn_param.size = sizeof(terapix_gram);\n"
836  "\n"
837  " // redres contents\n"
838  " redres.raw = (void*) &redter;\n"
839  " redres.size = sizeof(terapix_reduction);\n"
840  "\n");
841 
842  // string_buffer head, decls, end, settings;
843 
844  // schedule to imagelet numbers as needed...
845  // use a named pointer the value of which will be known later,
846  // depending on the number of needed imagelets
847  // operation -> imagelet number
848  // the imagelet number is inverted if it is an I/O
850  set computed = set_make(set_pointer);
851 
852  // the GRAM initialization may be shared between helper calls?
853  bool * used = terapix_gram_init();
854 
855  // currently available imagelets
856  set avail_img = set_make(set_pointer);
857 
858  // output images are the first ones when I/O comms overlap
859  if (trpx_overlap_io_p())
860  while (n_imagelets<n_outs)
861  set_add_element(avail_img, avail_img, (void*) (_int) ++n_imagelets);
862 
863  if (n_ins)
864  {
865  // ??? they should be given in the order of the arguments
866  // when calling the runtime function.
867  int n = 0;
868  sb_cat(dbio, "\n // inputs:\n");
869  FOREACH(dagvtx, in, dag_inputs(thedag))
870  {
871  // update primary imagelet number
872  n_imagelets++;
873  set_add_element(computed, computed, in);
874  // ??? stupid bug which filters undefined values, i.e. -16
875  // I should really use a container...
876  hash_put(allocation, in, (void*) (_int) -n_imagelets);
877 
878  string sn = strdup(i2a(n)), si = strdup(i2a(n_imagelets));
879 
880  // ??? tell that n_imagelets is an input
881  sb_cat(dbio, " // - imagelet ", si, " is i", sn, " for ",
883  "\n");
884 
885  sb_cat(dbio, " tile_in[0][", sn, "].x = " IMG_PTR "io_", si, "_0;\n");
886  sb_cat(dbio, " tile_in[0][", sn, "].y = 0;\n");
887  sb_cat(dbio, " tile_in[1][", sn, "].x = " IMG_PTR "io_", si, "_1;\n");
888  sb_cat(dbio, " tile_in[1][", sn, "].y = 0;\n");
889  free(sn);
890  free(si);
891  n++;
892  }
893  sb_cat(dbio, "\n");
894  }
895  else
896  {
897  sb_cat(dbio, "\n // no input\n\n");
898  }
899 
900  // complete if need be, there will be AT LEAST this number of images
901  while (n_imagelets<n_double_buffers)
902  set_add_element(avail_img, avail_img, (void*) (_int) ++n_imagelets);
903 
904  set deads = set_make(set_pointer);
905  // newly created parameters at this round
906 
907  // generate code for every computation vertex
908  int n_ops = 0;
909  list vertices = gen_nreverse(gen_copy_seq(dag_vertices(thedag)));
910  FOREACH(dagvtx, current, vertices)
911  {
912  // skip this vertex
913  if (set_belong_p(computed, current))
914  continue;
916  continue;
917 
918  // compute freed images...
919  set_clear(deads);
920  compute_dead_vertices(deads, computed, thedag, current);
921 
923  pips_assert("there is a statement",
927  // int optype = dagvtx_optype(current);
928  int opid = dagvtx_opid(current);
929  const freia_api_t * api = get_freia_api(opid);
930  pips_assert("freia api found", api!=NULL);
931 
932  // if inplace, append freed images to availables
933  if (api->terapix.inplace)
934  {
935  SET_FOREACH(dagvtx, v, deads)
936  {
937  // but keep intermediate output images!
938  if (!gen_in_list_p(v, dag_outputs(thedag)))
939  {
940  _int img = (_int) hash_get(allocation, v);
941  if (img<0) img=-img;
942  set_add_element(avail_img, avail_img, (void*) img);
943  }
944  }
945  }
946 
947  // generate inS -> out computation
948  // - code
949  // imagelet inputs
951  sb_cat(body, " // ", i2a(n_ops), ": ", api->compact_name, "(");
952  if (ins)
953  {
954  // show input imagelet numbers
955  int in_count=0;
956  FOREACH(int, i, ins)
957  sb_cat(body, in_count++? ",": "", i2a(i>0? i: -i));
958  }
959  sb_cat(body, ")");
960 
961  // imagelet output
962  _int choice = 0;
963  if (api->arg_img_out==1)
964  {
965  bool is_output = gen_in_list_p(current, dag_outputs(thedag));
966  // SELECT one available imagelet
967  // if none is available, a new one is implicitely created
968  choice = select_imagelet(avail_img, &n_imagelets, is_output);
969  sb_cat(body, " -> ", i2a((int) choice));
970  // there is a subtlety here, if no I/O image was available
971  // then a copy will have to be inserted later on, see "PANIC".
972  if (choice<=n_double_buffers) choice = -choice;
973  hash_put(allocation, current, (void*) choice);
974  }
975  sb_cat(body, "\n");
976 
977  // update helper call arguments...
978  *params = gen_nconc(*params,
980  head, NULL, hparams, &nargs));
981 
982  // special case for replace_const, which needs a 4th argument
983  if (same_string_p(api->compact_name, ":"))
984  {
985  sb_cat(body, " // *special* set parameter for replace_const\n");
986  terapix_mcu_int(body, n_ops, "xmin1", 0);
987  terapix_mcu_int(body, n_ops, "ymin1", 0);
988  terapix_mcu_int(body, n_ops, "xmin2", 0);
989  terapix_mcu_int(body, n_ops, "ymin2", 0);
990  terapix_gram_management(body, decl, n_ops, api, current, hparams, used);
991  terapix_mcu_val(body, n_ops, "iter1", "TERAPIX_PE_NUMBER");
992  terapix_mcu_int(body, n_ops, "iter2", 0);
993  terapix_mcu_int(body, n_ops, "iter3", 0);
994  terapix_mcu_int(body, n_ops, "iter4", 0);
995  terapix_mcu_val(body, n_ops, "addrStart",
996  "TERAPIX_UCODE_SET_CONST_RAMREG");
997 
998  sb_cat(body, " // now take care of actual operation\n");
999  n_ops++;
1000  }
1001 
1002  if (api->terapix.memory)
1003  {
1004  string sop = strdup(i2a(n_ops));
1005  // reserve the necessary memory at the end of the segment
1006  available_memory -= api->terapix.memory;
1007  string mem = strdup(cat(RED_PTR, sop));
1008  sb_cat(init, " int ", mem, " = ", i2a(available_memory), ";\n");
1009 
1010  // initialize the memory based on the measure operation
1011  terapix_initialize_memory(decl, body, n_ops, mem, api, used);
1012 
1013  // imagelet computation
1014  sb_cat(body, " // set measure ", api->compact_name, " at ", mem, "\n");
1015  terapix_mcu_val(body, n_ops, "xmin2", mem);
1016  terapix_mcu_val(body, n_ops, "ymin2", "0");
1017 
1018  // should not be used, but just in case...
1019  terapix_mcu_val(body, n_ops, "xmin3", "0");
1020  terapix_mcu_val(body, n_ops, "ymin3", "0");
1021 
1022  // extraction
1023  sb_cat(tail, " // get measure ", api->compact_name,
1024  " result from ", mem, "\n");
1025  terapix_get_reduction(decl, tail, n_ops, mem, api);
1026 
1027  sb_cat(tail, " // assign reduction parameter",
1028  api->arg_misc_out>1? "s":"", "\n");
1029  int i = 0;
1031  {
1032  string var = (string) hash_get(hparams, arg);
1033  // hmmm, kind of a hack to get the possibly needed cast
1034  string cast = strdup(api->arg_out_types[i]);
1035  string space = strchr(cast, ' ');
1036  if (space) *space = '\0';
1037  sb_cat(tail, " *", var, " = (", cast, ") "
1038  "red_", sop, "[", i2a(i), "];\n");
1039  i++;
1040  free(cast);
1041  }
1042  free(mem);
1043  free(sop);
1044  }
1045 
1046  if (api==hwac_freia_api(AIPO "copy") && choice==INT(CAR(ins)))
1047  {
1048  // skip in place copy, which may happen if the selected target
1049  // image buffer happens to be the same as the input.
1050  sb_cat(body, " // in place copy skipped\n");
1051  n_ops--;
1052  }
1053  else
1054  {
1055  terapix_macro_code(body, decl, n_ops, api, used,
1056  hparams, current, ins, choice);
1057  }
1058 
1059  gen_free_list(ins), ins=NIL;
1060 
1061  // if NOT inplace, append freed images to availables now
1062  if (!api->terapix.inplace)
1063  {
1064  SET_FOREACH(dagvtx, v, deads)
1065  {
1066  // but keep intermediate output images!
1067  if (!gen_in_list_p(v, dag_outputs(thedag)))
1068  {
1069  _int img = (_int) hash_get(allocation, v);
1070  if (img<0) img=-img;
1071  set_add_element(avail_img, avail_img, (void*) img);
1072  }
1073  }
1074  }
1075 
1076  set_add_element(computed, computed, current);
1077  n_ops++;
1078  }
1079 
1080  // handle function image arguments
1082 
1083  if (n_outs)
1084  {
1085  int n = 0;
1086  sb_cat(dbio, " // outputs:\n");
1087  FOREACH(dagvtx, out, dag_outputs(thedag))
1088  {
1089  int oimg = (int) (_int) hash_get(allocation, out);
1090  if (oimg<0) oimg=-oimg;
1091  // when not overlapping, any I/O image is fine
1092  // when overlapping, must be one of the first
1093  // because the later ones are used in parallel as inputs
1094  if ((!trpx_overlap_io_p() && oimg>n_double_buffers) ||
1095  (trpx_overlap_io_p() && oimg>n_outs))
1096  {
1097  // PANIC:
1098  // if there is no available "IO" imagelet when an output is
1099  // produced, it will have to be put there with a copy later on.
1100  int old = oimg;
1101  oimg = select_imagelet(avail_img, NULL, true);
1102  pips_assert("IO imagelet found for output", oimg<=n_double_buffers);
1103 
1104  // generate copy code old -> oimg
1105  // hmmm... could not generate a test case where this is triggered...
1106  // the additional cost which should be reported?
1107  sb_cat(body, " // output copy ", i2a(old));
1108  sb_cat(body, " -> ", i2a(oimg), "\n");
1109  list lic = CONS(int, old, NIL);
1110  // -oimg to tell the code generator that we are dealing with
1111  // a double buffered image...
1112  terapix_macro_code(body, decl, n_ops, hwac_freia_api(AIPO "copy"),
1113  NULL, NULL, NULL, lic, -oimg);
1114  gen_free_list(lic);
1115  n_ops++;
1116  }
1117  // tell that oimg is an output
1118  // ??? tell that n_imagelets is an input
1119  string sn = strdup(i2a(n)), so = strdup(i2a(oimg));
1120  sb_cat(dbio, " // - imagelet ", so);
1121  sb_cat(dbio, " is o", sn, " for ");
1122  sb_cat(dbio,
1124  "\n");
1125  sb_cat(dbio, " tile_out[0][", sn, "].x = " IMG_PTR"io_", so, "_0;\n");
1126  sb_cat(dbio, " tile_out[0][", sn, "].y = 0;\n");
1127  sb_cat(dbio, " tile_out[1][", sn, "].x = " IMG_PTR"io_", so, "_1;\n");
1128  sb_cat(dbio, " tile_out[1][", sn, "].y = 0;\n");
1129  free(sn);
1130  free(so);
1131  n++;
1132  }
1133  sb_cat(dbio, "\n");
1134  sb_cat(body, "\n");
1135  }
1136  else
1137  {
1138  sb_cat(dbio, " // no output\n\n");
1139  }
1140 
1141  // now I know how many imagelets are needed
1142  int total_imagelets = n_imagelets + n_double_buffers;
1143  int imagelet_rows = available_memory/total_imagelets; // round down
1144  int imagelet_max_rows = imagelet_rows;
1145 
1146  // declarations when we know the number of operations
1147  // [2] for flip/flop double buffer handling
1148  sb_cat(decl, " // flip flop macro code and I/Os\n");
1149  sb_cat(decl, " terapix_mcu_macrocode mcu_macro[2][", i2a(n_ops), "];\n");
1150  if (n_ins)
1151  sb_cat(decl, " terapix_tile_info tile_in[2][", i2a(n_ins), "];\n");
1152  if (n_outs)
1153  sb_cat(decl, " terapix_tile_info tile_out[2][", i2a(n_outs), "];\n");
1154 
1155  // computed values
1156  sb_cat(decl, " // imagelets definitions:\n");
1157  sb_cat(decl, " // - ", i2a(n_imagelets), " computation imagelets\n");
1158  sb_cat(decl, " // - ", i2a(n_double_buffers), " double buffer imagelets\n");
1159 
1160  // we may optimize the row size for a target image height, if available
1161  int image_height = FREIA_DEFAULT_HEIGHT;
1162  int vertical_border = n>s? n: s;
1163  int max_computed_size = imagelet_rows-2*vertical_border;
1164  // this is really a MAXIMUM available size that can be set from outside
1165  int max_size = get_int_property(trpx_max_size);
1166 
1167  if (image_height==0)
1168  {
1169  // what about vol(cst())?
1170  pips_assert("at least one image is needed!", n_ins||n_outs);
1171  // dynamic adjustment of the imagelet size
1172  sb_cat(decl,
1173  " // dynamic optimal imagelet size computation\n"
1174  " // this formula must match what the scheduler does!\n"
1175  " int vertical_border = ", i2a(vertical_border), ";\n"
1176  // use first input image for the reference size, or default to output
1177  " int image_height = ", n_ins? "i": "o", "0->heightWa;\n");
1178  sb_cat(decl,
1179  " int max_computed_size = ", i2a(max_computed_size), ";\n"
1180  " int n_tiles = (image_height+max_computed_size-1)/max_computed_size;\n"
1181  " int imagelet_size = (n_tiles==1)? image_height:\n"
1182  " ((image_height+n_tiles-1)/n_tiles)+2*vertical_border;\n");
1183  if (max_size)
1184  {
1185  sb_cat(decl,
1186  " // max imagelet size requested..."
1187  " int max_size = ", i2a(max_size), ";\n"
1188  " if (imagelet_size>max_size)\n"
1189  " imagelet_size = max_size;\n");
1190  }
1191  }
1192  else // assume the provided image_height
1193  {
1194  // we adjust statically the imagelet size so that we avoid recomputing
1195  // pixels... the formula must match whatever the scheduler does!
1196  // ??? hmmm... only for inner tiles
1197  // #tiles is ceil(height/computed)
1198  int n_tiles = (image_height+max_computed_size-1)/max_computed_size;
1199  // now we compute back the row size
1200  int optim_rows = ((image_height+n_tiles-1)/n_tiles)+2*vertical_border;
1201  // fix if the tile is too large
1202  if (optim_rows>image_height) optim_rows = image_height;
1203  imagelet_rows = optim_rows;
1204 
1205  pips_assert("optimized row size lower than max row size",
1206  optim_rows<=imagelet_rows && optim_rows>0);
1207 
1208  // now we set the value directly
1209  sb_cat(decl, " // imagelet max size: ", i2a(imagelet_max_rows), "\n");
1210 
1211  // the runtime can use imagelet_rows or less
1212  sb_cat(decl, " int imagelet_size = ",
1213  i2a(max_size?
1214  // max_size is defined, may use it if smaller than computed size
1215  (max_size<imagelet_rows? max_size: imagelet_rows):
1216  // max_size is not defined
1217  imagelet_rows), ";\n");
1218  }
1219 
1220  // generate imagelet pointers
1221  for (int i=1; i<=total_imagelets; i++)
1222  {
1223  sb_cat(decl, " int " IMG_PTR, i2a(i), " = ");
1224  sb_cat(decl, i2a(imagelet_max_rows * (i-1)), ";\n");
1225  }
1226  // append reduction memory pointers
1227  sb_cat(decl, "\n");
1228 
1229  if (string_buffer_size(init)>0)
1230  {
1231  sb_cat(decl, " // memory for reductions\n");
1233  sb_cat(decl, "\n");
1234  }
1236 
1237  // generate imagelet double buffer pointers
1238  // sb_cat(dbio, " // double buffer management:\n");
1239  sb_cat(decl, " // double buffer assignment\n");
1240  for (int i=1; i<=n_double_buffers; i++)
1241  {
1242  // sb_cat(dbio, " // - buffer ", i2a(i), "/");
1243  // sb_cat(dbio, i2a(i+n_imagelets), "\n");
1244 
1245  sb_cat(decl, " int " IMG_PTR "io_", i2a(i), "_0 = ");
1246  sb_cat(decl, IMG_PTR, i2a(i), ";\n");
1247  sb_cat(decl, " int " IMG_PTR "io_", i2a(i), "_1 = ");
1248  sb_cat(decl, IMG_PTR, i2a(i+n_imagelets), ";\n");
1249  }
1250 
1251  // incorporate IO stuff
1252  string_buffer_append_sb(body, dbio);
1253  string_buffer_free(&dbio);
1254 
1255  // tell about imagelet erosion...
1256  // current output should be max(w,e) & max(n,s)
1257  sb_cat(body, " // imagelet erosion for the computation\n");
1258  // terapix runtime issue if n_tiles==1...
1259  sb_cat(body, " mcu_instr.borderTop = ", i2a(n), ";\n");
1260  sb_cat(body, " mcu_instr.borderBottom = ", i2a(s), ";\n");
1261  sb_cat(body, " mcu_instr.borderLeft = ", i2a(w), ";\n");
1262  sb_cat(body, " mcu_instr.borderRight = ", i2a(e), ";\n");
1263  sb_cat(body, " mcu_instr.imagelet_height = imagelet_size;\n"
1264  " mcu_instr.imagelet_width = TERAPIX_PE_NUMBER;\n"
1265  "\n");
1266 
1267  sb_cat(body, " // outputs\n"
1268  " mcu_instr.nbout = ", i2a(n_outs), ";\n");
1269  if (n_outs)
1270  sb_cat(body,
1271  " mcu_instr.out0 = tile_out[0];\n"
1272  " mcu_instr.out1 = tile_out[1];\n");
1273  else
1274  sb_cat(body,
1275  " mcu_instr.out0 = NULL;\n"
1276  " mcu_instr.out1 = NULL;\n");
1277 
1278  sb_cat(body, "\n"
1279  " // inputs\n"
1280  " mcu_instr.nbin = ", i2a(n_ins), ";\n");
1281  if (n_ins)
1282  sb_cat(body,
1283  " mcu_instr.in0 = tile_in[0];\n"
1284  " mcu_instr.in1 = tile_in[1];\n");
1285  else
1286  sb_cat(body,
1287  " mcu_instr.in0 = NULL;\n"
1288  " mcu_instr.in1 = NULL;\n");
1289 
1290  sb_cat(body,
1291  "\n"
1292  " // actual instructions\n"
1293  " mcu_instr.nbinstr = ", i2a(n_ops), ";\n"
1294  " mcu_instr.instr0 = mcu_macro[0];\n"
1295  " mcu_instr.instr1 = mcu_macro[1];\n");
1296 
1297  // tell about imagelet size
1298  // NOTE: the runtime *MUST* take care of possible in/out aliasing
1299  sb_cat(body,
1300  "\n"
1301  " // call terapix runtime\n"
1302  " param.size = -1; // not used\n"
1303  " param.raw = (void*) &mcu_instr;\n"
1304  " ret |= freia_cg_template_process(&param");
1305  for (int i=0; i<n_outs; i++)
1306  sb_cat(body, ", o", i2a(i));
1307  for (int i=0; i<n_ins; i++)
1308  sb_cat(body, ", i", i2a(i));
1309  sb_cat(body, ");\n");
1310 
1311  // ??? I must compute the total erosion
1312  // ??? I should check that something IS computed...
1313 
1315  sb_cat(code, ")\n");
1318  sb_cat(code, "\n");
1319  sb_cat(code, " // extract measures\n");
1321  sb_cat(code, "\n return ret;\n}\n\n");
1322 
1323  // cleanup computed vertices: they are REMOVED from the dag and "killed"
1324  // ??? should rather return them and the caller should to the cleaning?
1325  FOREACH(dagvtx, vr, vertices)
1326  {
1327  dag_remove_vertex(thedag, vr);
1328  if (set_belong_p(computed, vr))
1329  {
1333  free_dagvtx(vr);
1334  }
1335  }
1336  // cleanup
1337  gen_free_list(vertices), vertices = NIL;
1338  string_buffer_free(&head);
1339  string_buffer_free(&decl);
1340  string_buffer_free(&body);
1341  string_buffer_free(&tail);
1343  // ??? free strings!
1344  hash_table_free(hparams);
1345  set_free(avail_img);
1346  set_free(computed);
1347  set_free(deads);
1348  free(used);
1349 
1350  return n_outs;
1351 }
void free_dagvtx(dagvtx p)
struct paramStruct params
@ INT
Definition: atomic.c:48
bool dagvtx_other_stuff_p(const dagvtx v)
a vertex with a non AIPO or image related statement.
Definition: dag-utils.c:76
_int dagvtx_opid(const dagvtx v)
Definition: dag-utils.c:121
char * get_string_property(const char *)
const freia_api_t * hwac_freia_api(const char *function)
freia-utils.c
Definition: freia-utils.c:455
void freia_add_image_arguments(list limg, list *lparams)
prepend limg images in front of the argument list limg is consummed by the operation.
Definition: freia-utils.c:1234
list freia_get_vertex_params(const dagvtx v)
Definition: freia-utils.c:578
list freia_extract_params(const int napi, list args, string_buffer head, string_buffer head2, hash_table params, int *nparams)
returns an allocated expression list of the parameters only (i.e.
Definition: freia-utils.c:613
void hwac_kill_statement(statement s)
remove contents of statement s.
Definition: freia-utils.c:761
const freia_api_t * get_freia_api(int index)
Definition: freia-utils.c:477
call freia_statement_to_call(const statement s)
return the actual function call from a statement, dealing with assign and returns....
Definition: freia-utils.c:973
#define cat(args...)
Definition: freia.h:41
#define AIPO
Definition: freia.h:51
#define sb_cat(args...)
Definition: freia.h:42
#define FREIA_IMAGE
Definition: freia.h:52
#define FREIA_DEFAULT_HEIGHT
Definition: freia.h:54
#define pstatement_statement_p(x)
#define vtxcontent_out(x)
#define pstatement_statement(x)
#define dag_vertices(x)
#define vtxcontent_source(x)
#define RED_PTR
#define ip2s(n)
static void terapix_get_reduction(string_buffer decl, string_buffer tail, int n_op, string mem, const freia_api_t *api)
generate reduction extraction code
static void terapix_macro_code(string_buffer code, string_buffer decl, int op, const freia_api_t *api, bool *used, hash_table hparams, const dagvtx v, const list ins, int out)
generate terapix code for
static void terapix_initialize_memory(string_buffer decl, string_buffer body, int nop, string mem, const freia_api_t *api, bool *used)
initialize the memory at addr depending on the operation to perform
#define IMG_PTR
static bool * terapix_gram_init(void)
allocate bitfield to described used cells in global memory.
static list dag_vertex_pred_imagelets(const dag d, const dagvtx v, const hash_table allocation)
of ints
static void terapix_gram_management(string_buffer code, string_buffer decl, int op, const freia_api_t *api, const dagvtx v, hash_table hparams, bool *used)
manage GRAM global memory to pass parameters.
static void compute_dead_vertices(set deads, const set computed, const dag d, const dagvtx v)
Definition: freia_terapix.c:53
static _int select_imagelet(set availables, int *nimgs, bool first)
Return the first/last available imagelet, or create one if necessary This ensures that the choice is ...
static void terapix_mcu_val(string_buffer code, int op, string r, string s)
set some value string argument.
static void terapix_mcu_int(string_buffer code, int op, string ref, int val)
set an integer argument.
#define trpx_gram_width
Definition: freia_terapix.h:36
#define trpx_npe_prop
Definition: freia_terapix.h:34
#define trpx_gram_height
Definition: freia_terapix.h:37
#define trpx_max_size
Definition: freia_terapix.h:40
#define trpx_mem_prop
Definition: freia_terapix.h:33
#define trpx_dag_cut
Definition: freia_terapix.h:38
void free(void *)
list gen_copy_seq(list l)
Copy a list structure.
Definition: list.c:501
bool gen_in_list_p(const void *vo, const list lx)
tell whether vo belongs to lx
Definition: list.c:734
void hash_put(hash_table htp, const void *key, const void *val)
This functions stores a couple (key,val) in the hash table pointed to by htp.
Definition: hash.c:364
float_t space[SIZE][SIZE]
Definition: jacobi.c:7
char * i2a(int)
I2A (Integer TO Ascii) yields a string for a given Integer.
Definition: string.c:121
string bool_to_string(bool)
Definition: string.c:243
#define same_string_p(s1, s2)
#define SET_FOREACH(type_name, the_item, the_set)
enumerate set elements in their internal order.
Definition: newgen_set.h:78
set set_clear(set)
Assign the empty set to s s := {}.
Definition: set.c:326
bool set_belong_p(const set, const void *)
Definition: set.c:194
void string_buffer_append_sb(string_buffer, const string_buffer)
append the string buffer sb2 to string buffer sb.
size_t string_buffer_size(const string_buffer)
return the size of the string in string_buffer sb
void string_buffer_free(string_buffer *)
free string buffer structure, also free string contents according to the dup field
Definition: string_buffer.c:82
string_buffer string_buffer_make(bool dup)
allocate a new string buffer
Definition: string_buffer.c:58
char * string
STRING.
Definition: newgen_types.h:39
static char * module
Definition: pips.c:74
const char * entity_user_name(entity e)
Since entity_local_name may contain PIPS special characters such as prefixes (label,...
Definition: entity.c:487
static int init
Maximal value set for Fortran 77.
Definition: entity.c:320
#define call_arguments(x)
Definition: ri.h:711
char * strdup()
internally defined structure.
Definition: string_buffer.c:47
string compact_name
Definition: freia.h:75
string arg_out_types[3]
Definition: freia.h:85
unsigned int arg_misc_out
Definition: freia.h:82
static Panel_item choice
Definition: xv_schoose2.c:54

References AIPO, allocation, freia_api_t::arg_img_out, freia_api_t::arg_misc_out, freia_api_t::arg_out_types, bool_to_string(), call_arguments, CAR, cat, choice, freia_api_t::compact_name, compute_dead_vertices(), CONS, current, dag_inputs, dag_outputs, dag_remove_vertex(), dag_terapix_measures(), dag_vertex_pred_imagelets(), dag_vertices, dagvtx_content, dagvtx_opid(), dagvtx_other_stuff_p(), entity_user_name(), FOREACH, free(), free_dagvtx(), freia_add_image_arguments(), FREIA_DEFAULT_HEIGHT, freia_extract_params(), freia_get_vertex_params(), FREIA_IMAGE, freia_statement_to_call(), gen_copy_seq(), gen_free_list(), gen_in_list_p(), gen_length(), gen_nconc(), gen_nreverse(), get_freia_api(), get_int_property(), get_string_property(), hash_get(), hash_pointer, hash_put(), hash_table_free(), hash_table_make(), hwac_freia_api(), hwac_kill_statement(), i2a(), IMG_PTR, init, terapix_hw_t::inplace, int, INT, ip2s, terapix_hw_t::memory, module, NIL, out, params, pips_assert, pstatement_statement, pstatement_statement_p, RED_PTR, same_string_p, sb_cat, select_imagelet(), set_add_element(), set_belong_p(), set_clear(), SET_FOREACH, set_free(), set_make(), set_pointer, space, strdup(), string_buffer_append_sb(), string_buffer_free(), string_buffer_make(), string_buffer_size(), freia_api_t::terapix, terapix_get_reduction(), terapix_gram_init(), terapix_gram_management(), terapix_initialize_memory(), terapix_macro_code(), terapix_mcu_int(), terapix_mcu_val(), trpx_dag_cut, trpx_dmabw_prop, trpx_gram_height, trpx_gram_width, trpx_max_size, trpx_mem_prop, trpx_npe_prop, trpx_overlap_io_p, vtxcontent_out, and vtxcontent_source.

Referenced by freia_trpx_compile_one_dag().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ freia_trpx_compile_calls()

list freia_trpx_compile_calls ( string  module,
dag  fulld,
sequence  sq,
list  ls,
const hash_table  occs,
hash_table  exchanges,
const set  output_images,
FILE *  helper_file,
set  helpers,
int  number 
)

do compile a list of statements for terapix

freia_terapix.c

Parameters
module,currentmodule (function) name
ls,listof statements taken from the sequence
occs,occurencesof images (image -> set of statements)
helper_file,fileto which code is to be generated
number,numberof this statement sequence in module
Returns
list of intermediate image to allocate
Parameters
moduleodule
fulldulld
sqq
lsof statements
occsccs
exchangesxchanges
output_imagesutput_images
helper_fileelper_file
helperselpers
numberumber

Definition at line 1808 of file freia_terapix.c.

1819 {
1820  bool reduce_cc =
1821  get_bool_property("HWAC_TERAPIX_REDUCE_TO_CONNECTED_COMPONENTS");
1822 
1823  // build DAG for ls
1824  pips_debug(3, "considering %d statements\n", (int) gen_length(ls));
1825  pips_assert("some statements", ls);
1826 
1827  int n_op_init, n_op_init_copies;
1828  freia_aipo_count(fulld, &n_op_init, &n_op_init_copies);
1829 
1830  // must have distinct images in the graph for optimizations
1832  list new_images = dag_fix_image_reuse(fulld, init, occs);
1833 
1834  list added_before = NIL, added_after = NIL;
1835  freia_dag_optimize(fulld, exchanges, &added_before, &added_after);
1836 
1837  int n_op_opt, n_op_opt_copies;
1838  freia_aipo_count(fulld, &n_op_opt, &n_op_opt_copies);
1839 
1840  fprintf(helper_file,
1841  "\n"
1842  "// dag %d: %d ops and %d copies, "
1843  "optimized to %d ops and %d+%d+%d copies\n",
1844  number, n_op_init, n_op_init_copies,
1845  n_op_opt, n_op_opt_copies,
1846  (int) gen_length(added_before), (int) gen_length(added_after));
1847 
1848  // dump final dag
1849  dag_dot_dump_prefix(module, "dag_cleaned_", number, fulld,
1850  added_before, added_after);
1851 
1852  string fname_fulldag = strdup(cat(module, "_terapix", HELPER, i2a(number)));
1853 
1854  // First, split only on scalar deps...
1855  // is it that simple? NO!
1856  // consider A -> B -> s -> C -> D
1857  // \-> E -> F />
1858  // then ABEF / CD is chosen
1859  // although ABE / FCD and AB / EFCD would be also possible..
1860 
1861  pips_assert("erosion is clean", erosion==NULL);
1863  list ld = dag_split_on_scalars(fulld,
1868  output_images);
1869  hash_table_free(erosion), erosion = NULL;
1870 
1871  // split ld dags by connected components
1872  if (reduce_cc)
1873  {
1874  list nld = NIL;
1875  FOREACH(dag, d, ld)
1876  nld = gen_nconc(nld, dag_split_connected_components(d, output_images));
1877  gen_free_list(ld), ld = nld, nld = NIL;
1878  }
1879 
1880  pips_debug(4, "dag initial split in %d dags\n", (int) gen_length(ld));
1881 
1882  const char* dag_cut = get_string_property(trpx_dag_cut);
1883  pips_assert("valid cutting strategy", trpx_dag_cut_is_valid(dag_cut));
1884 
1885  // globally remaining statements
1886  set global_remainings = set_make(set_pointer);
1887  set_assign_list(global_remainings, ls);
1888 
1889  int n_split = 0;
1890  int stnb = -1;
1891  set dones = set_make(set_pointer);
1892 
1893  FOREACH(dag, d, ld)
1894  {
1895  // ??? should migrate beforehand?
1896 
1897  // skip if something is not implemented
1898  if (terapix_not_implemented(d))
1899  continue;
1900 
1901  if (dag_no_image_operation(d))
1902  continue;
1903 
1904  if (trpx_dag_cut_none_p(dag_cut))
1905  {
1906  migrate_statements(sq, d, dones);
1907  // direct handling of the dag
1908  stnb = freia_trpx_compile_one_dag(module, ls, d, fname_fulldag, n_split,
1909  -1, global_remainings, helper_file, helpers, stnb, init);
1910  }
1911  else if (trpx_dag_cut_compute_p(dag_cut))
1912  {
1913  // try split dag into subdags with a rough computed strategy
1915  int cut, n_cut = 0;
1916 
1917  // what about another strategy?
1918  // I can try every possible cuts and chose the best one,
1919  // that is to stop as soon as computation cost > communication cost?
1920  // or when costs are quite balanced in all cuts?
1921  // dag cutting strategy prop = none/computed/optimized?
1922 
1923  while ((cut = cut_decision(d, erosion)))
1924  {
1925  dag dc = cut_perform(d, cut, erosion, fulld, output_images);
1926 
1927  // may separate by connected components...
1928  list ld;
1929  if (reduce_cc)
1930  ld = dag_split_connected_components(dc, output_images);
1931  else
1932  ld = CONS(dag, dc, NIL);
1933 
1934  FOREACH(dag, dci, ld)
1935  {
1936  migrate_statements(sq, dci, dones);
1937  // generate code for cut
1938  stnb =
1939  freia_trpx_compile_one_dag(module, ls, dci, fname_fulldag, n_split,
1940  n_cut++, global_remainings, helper_file, helpers, stnb, init);
1941  // cleanup
1942  free_dag(dci);
1943  }
1944 
1946  gen_free_list(ld);
1947  }
1948 
1949  if (dag_vertices(d)) {
1950  // should it *ALWAYS* HAPPEN?
1951  migrate_statements(sq, d, dones);
1952  stnb = freia_trpx_compile_one_dag(module, ls, d, fname_fulldag, n_split,
1953  n_cut++, global_remainings, helper_file, helpers, stnb, init);
1954  }
1955 
1957  }
1958  else if (trpx_dag_cut_enumerate_p(dag_cut))
1959  pips_internal_error("not implemented yet");
1960  else
1961  pips_internal_error("cannot get there");
1962 
1963  n_split++;
1964  }
1965 
1966  freia_insert_added_stats(ls, added_before, true);
1967  added_before = NIL;
1968  freia_insert_added_stats(ls, added_after, false);
1969  added_after = NIL;
1970 
1971  // full cleanup
1972  set_free(global_remainings), global_remainings = NULL;
1973  free(fname_fulldag), fname_fulldag = NULL;
1974  FOREACH(dag, dc, ld)
1975  free_dag(dc);
1976  gen_free_list(ld);
1977 
1978  // deal with new images
1979  list real_new_images =
1980  freia_allocate_new_images_if_needed(ls, new_images, occs, init, init);
1981  gen_free_list(new_images);
1983  return real_new_images;
1984 }
void free_dag(dag p)
list dag_split_connected_components(dag d, set output_images)
build connected components
Definition: dag-utils.c:3035
bool dag_no_image_operation(dag d)
tell whether we have something to do with images ??? hmmm...
Definition: dag-utils.c:2500
list dag_split_on_scalars(const dag initial, bool(*alone_only)(const dagvtx), dagvtx(*choose_vertex)(const list, bool), gen_cmp_func_t priority, void(*priority_update)(const dag), const set output_images)
split a dag on scalar dependencies only, with a greedy heuristics.
Definition: dag-utils.c:2823
list dag_fix_image_reuse(dag d, hash_table init, const hash_table occs)
fix intermediate image reuse in dag
Definition: dag-utils.c:2779
void freia_dag_optimize(dag d, hash_table exchanges, list *lbefore, list *lafter)
remove dead image operations.
Definition: dag-utils.c:1416
void dag_dot_dump_prefix(const string module, const string prefix, int number, const dag d, const list lb, const list la)
Definition: dag-utils.c:504
bool get_bool_property(const string)
FC 2015-07-20: yuk, moved out to prevent an include cycle dependency include "properties....
list freia_allocate_new_images_if_needed(list ls, list images, const hash_table occs, const hash_table init, const hash_table signatures)
insert image allocation if needed, for intermediate image inserted before if an image is used only tw...
Definition: freia-utils.c:1650
void freia_insert_added_stats(list ls, list stats, bool before)
insert statements to actual code sequence in "ls" BEWARE that ls is assumed to be in reverse order....
Definition: freia-utils.c:1185
int freia_aipo_count(dag d, int *pa, int *pc)
Definition: freia-utils.c:1823
#define HELPER
Definition: freia.h:38
static void dag_terapix_reset_erosion(const dag d)
static int cut_decision(dag d, hash_table erosion)
would it seem interesting to split d?
static int freia_trpx_compile_one_dag(string module, list ls, dag d, string fname_fulldag, int n_split, int n_cut, set global_remainings, FILE *helper_file, set helpers, int stnb, hash_table signatures)
generate terapix code for this one dag, which should be already split.
static dag cut_perform(dag d, int cut, hash_table erodes, dag fulld, const set output_images)
cut dag "d", possibly a subdag of "fulld", at "erosion" "cut"
static bool terapix_not_implemented(dag d)
whether dag is not implemented in terapix
static dagvtx choose_terapix_vertex(const list lv, bool started)
choose a vertex, avoiding non combinable stuff if the list is started
static void migrate_statements(sequence sq, dag d, set dones)
#define trpx_dag_cut_none_p(s)
Definition: freia_terapix.h:43
#define trpx_dag_cut_is_valid(s)
Definition: freia_terapix.h:46
#define trpx_dag_cut_compute_p(s)
Definition: freia_terapix.h:44
#define trpx_dag_cut_enumerate_p(s)
Definition: freia_terapix.h:45
#define pips_internal_error
Definition: misc-local.h:149
int fprintf()
test sc_min : ce test s'appelle par : programme fichier1.data fichier2.data ...

Referenced by freia_compile().

+ Here is the caller graph for this function:

◆ freia_trpx_compile_one_dag()

static int freia_trpx_compile_one_dag ( string  module,
list  ls,
dag  d,
string  fname_fulldag,
int  n_split,
int  n_cut,
set  global_remainings,
FILE *  helper_file,
set  helpers,
int  stnb,
hash_table  signatures 
)
static

generate terapix code for this one dag, which should be already split.

return the statement number of the helper insertion

Parameters
lsof statements

Definition at line 1358 of file freia_terapix.c.

1370 {
1371  ifdebug(4) {
1373  dag_dump(stderr, "one_dag", d);
1374  }
1375 
1376  set remainings = set_make(set_pointer);
1378 
1379  // name_<number>_<split>[_<cut>]
1380  string fname_dag = strdup(cat(fname_fulldag, "_", i2a(n_split)));
1381  if (n_cut!=-1)
1382  {
1383  string s = strdup(cat(fname_dag, "_", i2a(n_cut)));
1384  free(fname_dag);
1385  fname_dag = s;
1386  }
1387 
1388  dag_dot_dump(module, fname_dag, d, NIL, NIL);
1389 
1390  // - output function in helper file
1391  list lparams = NIL;
1392 
1394  _int nout = freia_terapix_call(module, fname_dag, code, d, &lparams);
1395  string_buffer_to_file(code, helper_file);
1397 
1398  // - and substitute its call...
1399  stnb = freia_substitute_by_helper_call(d, global_remainings, remainings,
1400  ls, fname_dag, lparams, helpers, stnb);
1401 
1402  // record (simple) signature
1403  hash_put(signatures, local_name_to_top_level_entity(fname_dag), (void*) nout);
1404 
1405  // cleanup
1406  free(fname_dag), fname_dag = NULL;
1407 
1408  return stnb;
1409 }
void dag_dump(FILE *out, const string what, const dag d)
for dag debug
Definition: dag-utils.c:212
void dag_dot_dump(const string module, const string name, const dag d, const list lb, const list la)
generate a "dot" format from a dag to a file.
Definition: dag-utils.c:488
void set_append_vertex_statements(set s, list lv)
Definition: dag-utils.c:2385
int freia_substitute_by_helper_call(dag d, set global_remainings, set remainings, list ls, const string function_name, list lparams, set helpers, int preceeding)
substitute those statement in ls that are in dag d and accelerated by a call to function_name(lparams...
Definition: freia-utils.c:1073
static _int freia_terapix_call(const string module, const string fname_dag, string_buffer code, dag thedag, list *params)
generate a terapix call for dag thedag.
void string_buffer_to_file(const string_buffer, FILE *)
put string buffer into file.
list lparams
Array bounds.
Definition: reindexing.c:111
entity local_name_to_top_level_entity(const char *n)
This function try to find a top-level entity from a local name.
Definition: entity.c:1450

References cat, dag_consistency_asserts(), dag_dot_dump(), dag_dump(), dag_vertices, free(), freia_substitute_by_helper_call(), freia_terapix_call(), hash_put(), i2a(), ifdebug, local_name_to_top_level_entity(), lparams, module, NIL, set_append_vertex_statements(), set_make(), set_pointer, strdup(), string_buffer_free(), string_buffer_make(), and string_buffer_to_file().

+ Here is the call graph for this function:

◆ gram_param()

static void gram_param ( string_buffer  code,
string_buffer  decl,
string  name,
dagvtx  v,
hash_table  hparams,
int  width,
int  height,
bool  is_kernel,
bool used 
)
static

copy some operator parameters in the global ram (aka gram).

the coordinates used are (x_<name>, y_<name>).

Definition at line 386 of file freia_terapix.c.

390 {
391  int size = width*height;
392  pips_assert("something to copy...", size>0);
393 
394  int x = 0, y = 0;
395  terapix_gram_allocate(used, width, height, &x, &y);
396 
397  sb_cat(decl, " // operation ", name, " parameters\n");
398  sb_cat(decl, " int16_t p_", name, "[", i2a(size), "];\n");
399  sb_cat(decl, " const int32_t x_", name, " = ", i2a(x), ";\n");
400  sb_cat(decl, " const int32_t y_", name, " = ", i2a(y), ";\n");
401 
402  sb_cat(code, " // copy of operation ", name, " parameters\n");
403  list largs = freia_get_vertex_params(v);
404  pips_assert("some args...", gen_length(largs)>0);
405  string p1 = hash_get(hparams, EXPRESSION(CAR(largs)));
406  // copy code...
407  if (is_kernel)
408  {
409  sb_cat(code,
410  " for(i=0; i<", i2a(size), "; i++)\n"
411  " p_", name, "[i] = ", p1, "[i];\n");
412  }
413  else
414  {
415  switch (size)
416  {
417  case 1: // constant
418  sb_cat(code, " p_", name, "[0] = ", p1, ";\n");
419  break;
420  case 3: // threshold min/max/bin
421  sb_cat(code, " p_", name, "[0] = ", p1, ";\n");
422  sb_cat(code, " p_", name, "[1] = ",
423  hash_get(hparams, EXPRESSION(CAR(CDR(largs)))), ";\n");
424  sb_cat(code, " p_", name, "[2] = ",
425  hash_get(hparams, EXPRESSION(CAR(CDR(CDR(largs))))), ";\n");
426  break;
427  default:
428  pips_internal_error("unexpected gram size");
429  }
430  }
431 
432  sb_cat(code, " gram.xoffset = x_", name, ";\n");
433  sb_cat(code, " gram.yoffset = y_", name, ";\n");
434  sb_cat(code, " gram.width = ", i2a(width), ";\n");
435  sb_cat(code, " gram.height = ", i2a(height), ";\n");
436  sb_cat(code, " gram.params = p_", name, ";\n");
437  sb_cat(code, " freia_mg_write_dynamic_param(&dyn_param);\n");
438 }
static void terapix_gram_allocate(bool *used, int width, int height, int *x, int *y)
terapix allocate widthxheight in global memory
#define CDR(pcons)
Get the list less its first element.
Definition: newgen_list.h:111
#define EXPRESSION(x)
EXPRESSION.
Definition: ri.h:1217
static char * x
Definition: split_file.c:159

References CAR, CDR, EXPRESSION, freia_get_vertex_params(), gen_length(), hash_get(), i2a(), pips_assert, pips_internal_error, sb_cat, terapix_gram_allocate(), and x.

Referenced by terapix_gram_management().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ migrate_statements()

static void migrate_statements ( sequence  sq,
dag  d,
set  dones 
)
static

Definition at line 1791 of file freia_terapix.c.

1792 {
1793  set stats = set_make(set_pointer);
1794  dag_statements(stats, d);
1795  freia_migrate_statements(sq, stats, dones);
1796  set_union(dones, dones, stats);
1797  set_free(stats);
1798 }
void dag_statements(set stats, const dag d)
build the set of actual statements in d
Definition: dag-utils.c:64
void freia_migrate_statements(sequence sq, const set stats, const set before)
Definition: freia-utils.c:1905
set set_union(set, const set, const set)
Definition: set.c:211

◆ not_implemented()

static bool not_implemented ( dagvtx  v)
static

whether vertex is not implemented in terapix

Definition at line 1583 of file freia_terapix.c.

1584 {
1585  if (freia_convolution_p(v)) // special case
1586  {
1587  // skip if parametric
1588  _int w, h;
1589  return !freia_convolution_width_height(v, &w, &h, false);
1590  }
1592 }

◆ select_imagelet()

static _int select_imagelet ( set  availables,
int nimgs,
bool  first 
)
static

Return the first/last available imagelet, or create one if necessary This ensures that the choice is deterministic.

Moreover, as first numbers are IO imagelets, this help putting outputs in the right imagelet so as to avoid additionnal copies, if possible.

Definition at line 297 of file freia_terapix.c.

298 {
299  ifdebug(8) {
300  pips_debug(8, "selecting first=%s\n", bool_to_string(first));
301  set_fprint(stderr, "availables", availables, (string (*)()) i2a);
302  }
303 
304  _int choice = 0; // zero means no choice yet
305  // allocate if no images are available
306  if (set_empty_p(availables))
307  {
308  pips_assert("can create new images", nimgs!=NULL);
309  (*nimgs)++;
310  choice = *nimgs;
311  }
312  else // search
313  {
314  SET_FOREACH(_int, i, availables)
315  {
316  if (choice==0) choice = i;
317  if (first && (i<choice)) choice = i;
318  if (!first && (i>choice)) choice = i;
319  }
320  set_del_element(availables, availables, (void*) choice);
321  }
322  pips_assert("some choice was made", choice>0);
323  pips_debug(8, "choice is %"_intFMT"\n", choice);
324  return choice;
325 }
bool set_empty_p(const set)
tell whether set s is empty.
Definition: set.c:367
set set_del_element(set, const set, const void *)
Definition: set.c:265
void set_fprint(FILE *, string, const set, gen_string_func_t)
print set s to file stream out.
Definition: set.c:524

References _intFMT, bool_to_string(), choice, i2a(), ifdebug, pips_assert, pips_debug, set_del_element(), set_empty_p(), SET_FOREACH, and set_fprint().

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_get_reduction()

static void terapix_get_reduction ( string_buffer  decl,
string_buffer  tail,
int  n_op,
string  mem,
const freia_api_t api 
)
static

generate reduction extraction code

Definition at line 670 of file freia_terapix.c.

676 {
677  pips_assert("some results are expected", api->arg_misc_out>0);
678  string sop = strdup(i2a(n_op));
679  // I do not understand the underlying logic of these values
680  string width = api->arg_misc_out==3? "5": "1";
681  sb_cat(decl,
682  " // array for reduction ", sop, " extraction\n"
683  " int32_t red_", sop, "[", i2a(api->arg_misc_out), "];\n");
684  sb_cat(tail,
685  " redter.xres = ", mem, ";\n"
686  " redter.yres = 0;\n"
687  " redter.width = ", width, ";\n"
688  " redter.height = TERAPIX_PE_NUMBER;\n"
689  " redter.result = (void*) red_", sop, ";\n"
690  " redter.macroid = ", api->terapix.ucode, ";\n"
691  // just gessing that there must be a first input image
692  // ??? we assume that all image are of the same size?!
693  " redter.imgwidth = i0->width;\n"
694  " redter.imgheight = i0->height;\n"
695  " redter.subimgwidth = TERAPIX_PE_NUMBER;\n"
696  " redter.subimgheight = imagelet_size;\n"
697  "\n"
698  " ret |= freia_cg_read_reduction_results(&redres);\n"
699  "\n");
700  free(sop);
701 }

References freia_api_t::arg_misc_out, free(), i2a(), pips_assert, sb_cat, strdup(), freia_api_t::terapix, and terapix_hw_t::ucode.

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_gram_allocate()

static void terapix_gram_allocate ( bool used,
int  width,
int  height,
int x,
int y 
)
static

terapix allocate widthxheight in global memory

Returns
*x *y pointer to available memory

Definition at line 263 of file freia_terapix.c.

265 {
266  int row_size = get_int_property(trpx_gram_width);
267  int col_size = get_int_property(trpx_gram_height);
268  for (int j = 0; j<col_size-height+1; j++)
269  {
270  for (int i = 0; i<row_size-width+1; i++)
271  {
272  bool ok = true;
273  for (int w = 0; ok && w<width; w++)
274  for (int h = 0; ok && h<height; h++)
275  ok &= !used[(i+w)+(j+h)*row_size];
276  if (ok)
277  {
278  for (int w = 0; w<width; w++)
279  for (int h = 0; h<height; h++)
280  used[(i+w)+(j+h)*row_size] = true;
281  *x = i;
282  *y = j;
283  return;
284  }
285  }
286  }
287  pips_internal_error("cannot find available memory for %dx%d", width, height);
288 }
static bool ok

References get_int_property(), ok, pips_internal_error, trpx_gram_height, trpx_gram_width, and x.

Referenced by gram_param(), and terapix_init_row().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_gram_init()

static bool* terapix_gram_init ( void  )
static

allocate bitfield to described used cells in global memory.

Definition at line 249 of file freia_terapix.c.

250 {
251  int row_size = get_int_property(trpx_gram_width);
252  int col_size = get_int_property(trpx_gram_height);
253  bool * gram = (bool *) malloc(sizeof(bool)*row_size*col_size);
254  pips_assert("malloc ok", gram);
255  for (int i=0; i<row_size*col_size; i++)
256  gram[i] = false;
257  return gram;
258 }
void * malloc(YYSIZE_T)

References get_int_property(), malloc(), pips_assert, trpx_gram_height, and trpx_gram_width.

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_gram_management()

static void terapix_gram_management ( string_buffer  code,
string_buffer  decl,
int  op,
const freia_api_t api,
const dagvtx  v,
hash_table  hparams,
bool used 
)
static

manage GRAM global memory to pass parameters.

Definition at line 442 of file freia_terapix.c.

450 {
451  if (!api->arg_misc_in) return;
452 
453  list largs = freia_get_vertex_params(v);
454  string p1 = hash_get(hparams, EXPRESSION(CAR(largs)));
455 
456  // is it a new, never handled, parameter?
457  bool initialize = !hash_defined_p(hparams, p1);
458  // name suffix for variables...
459  if (initialize) hash_put(hparams, p1, strdup(i2a(op)));
460  string name = hash_get(hparams, p1);
461 
462  if (initialize)
463  {
464  switch (api->arg_misc_in)
465  {
466  case 3: // convolution or threshold
467  if (freia_convolution_p(v)) // convolution special case
468  {
469  _int w, h;
470  freia_convolution_width_height(v, &w, &h, true);
471  gram_param(code, decl, name, v, hparams, w, h, true, used);
472  }
473  else // threshold
474  gram_param(code, decl, name, v, hparams, 3, 1, false, used);
475  break;
476  case 1: // kernel or operation with a constant
477  if (api->terapix.north) // let us say it is a kernel...
478  gram_param(code, decl, name, v, hparams, 3, 3, true, used);
479  else
480  gram_param(code, decl, name, v, hparams, 1, 1, false, used);
481  break;
482  default:
483  pips_internal_error("unexpected number of input image arguments");
484  }
485  }
486 
487  // is it always [xy]min3?
488  terapix_mcu_pval(code, op, "xmin3", "x_", name);
489  terapix_mcu_pval(code, op, "ymin3", "y_", name);
490 }
static void gram_param(string_buffer code, string_buffer decl, string name, dagvtx v, hash_table hparams, int width, int height, bool is_kernel, bool *used)
copy some operator parameters in the global ram (aka gram).
static void terapix_mcu_pval(string_buffer code, int op, string ref, string p, string s)
set some prefixed value string argument.
static void initialize()
Definition: stats.c:407
unsigned int arg_misc_in
Definition: freia.h:83

References freia_api_t::arg_misc_in, CAR, EXPRESSION, freia_convolution_p(), freia_convolution_width_height(), freia_get_vertex_params(), gram_param(), hash_defined_p(), hash_get(), hash_put(), i2a(), initialize(), terapix_hw_t::north, pips_internal_error, strdup(), freia_api_t::terapix, and terapix_mcu_pval().

Referenced by freia_terapix_call(), and terapix_macro_code().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_image()

static void terapix_image ( string_buffer  sb,
int  ff,
int  n 
)
static

generate an image symbolic pointer (a name:-).

Definition at line 332 of file freia_terapix.c.

333 {
334  pips_assert("valid flip-flop", ff==0 || ff==1);
335  pips_assert("valid image number", n!=0);
336  if (n>0)
337  sb_cat(sb, IMG_PTR, i2a(n));
338  else
339  sb_cat(sb, IMG_PTR "io_", i2a(-n), ff? "_1": "_0");
340 }
Definition: statement.c:4047

References i2a(), IMG_PTR, pips_assert, and sb_cat.

Referenced by terapix_mcu_img().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_init_row()

static void terapix_init_row ( string_buffer  decl,
string_buffer  code,
string  base,
string  suff,
string  mem,
int  nrow,
string  val,
bool used 
)
static

initialize a few rows at mem address with value val

Definition at line 572 of file freia_terapix.c.

581 {
582  // get one memory cell for the value
583  int x = 0, y = 0;
584  terapix_gram_allocate(used, 1, 1, &x, &y);
585 
586  // operation name
587  string name = strdup(cat(base, "_", suff));
588 
589  // set the constant
590  sb_cat(decl, " // operation ", name, " initialization\n"
591  " int16_t p_", name, "[1];\n");
592  sb_cat(decl, " const int32_t x_", name, " = ", i2a(x), ";\n");
593  sb_cat(decl, " const int32_t y_", name, " = ", i2a(y), ";\n");
594 
595  sb_cat(code, " // initializing ", name, "\n"
596  " p_", name, "[0] = ", val, ";\n"
597  " gram.xoffset = x_", name, ";\n"
598  " gram.yoffset = y_", name, ";\n"
599  " gram.width = 1;\n"
600  " gram.height = 1;\n"
601  " gram.params = p_", name, ";\n"
602  " freia_mg_write_dynamic_param(&dyn_param);\n");
603 
604  // call the initialization
605  sb_cat(code,
606  " // initialize memory for operation ", name, "\n"
607  " mem_init.xmin1 = ", mem, ";\n"
608  " mem_init.ymin1 = 0;\n"
609  " mem_init.xmin2 = 0;\n"
610  " mem_init.ymin2 = 0;\n"
611  " mem_init.xmin3 = 0;\n"
612  " mem_init.ymin3 = 0;\n"
613  " mem_init.iter1 = TERAPIX_PE_NUMBER;\n"
614  " mem_init.iter2 = ", i2a(nrow),";\n"
615  " mem_init.iter3 = 0;\n"
616  " mem_init.iter4 = 0;\n"
617  " mem_init.addrStart = TERAPIX_UCODE_SET_CONST;\n"
618  " param.size = sizeof(terapix_mcu_macrocode); // not used?\n"
619  " param.raw = (void*) (&mem_init);\n"
620  " ret |= freia_mg_work(&param);\n"
621  " ret |= freia_mg_end_work();\n");
622 
623  // cleanup
624  free(name);
625 }
bdt base
Current expression.
Definition: bdt_read_paf.c:100

References base, cat, free(), i2a(), sb_cat, strdup(), terapix_gram_allocate(), and x.

Referenced by terapix_initialize_memory().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_initialize_memory()

static void terapix_initialize_memory ( string_buffer  decl,
string_buffer  body,
int  nop,
string  mem,
const freia_api_t api,
bool used 
)
static

initialize the memory at addr depending on the operation to perform

Parameters
decl,addeddeclarations are put there
body,generatedcode is put there
nop,currentoperation number
mem,memorysymbolic x address
api,freiaoperation
used,currentuse of Global RAM (gram)

Definition at line 635 of file freia_terapix.c.

642 {
643  string op = api->compact_name;
644  pips_assert("operation is a measure",
645  same_string_p(op, "min") || same_string_p(op, "min!") ||
646  same_string_p(op, "max") || same_string_p(op, "max!") ||
647  same_string_p(op, "vol"));
648  string sop = strdup(i2a(nop));
649 
650  // INT16 should be a property?
651 
652  if (same_string_p(op, "min") || same_string_p(op, "min!"))
653  terapix_init_row(decl, body, sop, "val", mem, 1, "INT16_MAX", used);
654  if (same_string_p(op, "max") || same_string_p(op, "max!"))
655  terapix_init_row(decl, body, sop, "val", mem, 1, "INT16_MIN", used);
656  if (same_string_p(op, "min!") || same_string_p(op, "max!"))
657  {
658  string memp1 = strdup(cat(mem,"+1"));
659  terapix_init_row(decl, body, sop, "loc", memp1, 4, "0", used);
660  free(memp1);
661  }
662  if (same_string_p(op, "vol"))
663  terapix_init_row(decl, body, sop, "val", mem, 2, "0", used);
664 
665  free(sop);
666 }
static void terapix_init_row(string_buffer decl, string_buffer code, string base, string suff, string mem, int nrow, string val, bool *used)
initialize a few rows at mem address with value val

References cat, freia_api_t::compact_name, free(), i2a(), pips_assert, same_string_p, strdup(), and terapix_init_row().

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_macro_code()

static void terapix_macro_code ( string_buffer  code,
string_buffer  decl,
int  op,
const freia_api_t api,
bool used,
hash_table  hparams,
const dagvtx  v,
const list  ins,
int  out 
)
static

generate terapix code for

Parameters
code,codestream being generated
decl,declarationstream being generated
op,operationnumber
api,actualfreia operator called
used,arrayto keep track of what gram cells are used
hparam,expressionto parameter mapping
v,dagvertex of the current operation
ins,listof image number inputs (i.e. operation arguments)
out,imagenumber output for the operation

Definition at line 503 of file freia_terapix.c.

507 {
508  // check image in/out consistency
509  pips_assert("#ins ok", gen_length(ins)==api->arg_img_in);
510  pips_assert("out ok", out? api->arg_img_out: !api->arg_img_out);
511 
512  switch (api->arg_img_in)
513  {
514  case 2:
515  pips_assert("2 ins, alu operation...", out);
516  int img1 = INT(CAR(ins)), img2 = INT(CAR(CDR(ins)));
517  terapix_mcu_img(code, op, "xmin1", api->terapix.reverse? img2: img1);
518  terapix_mcu_int(code, op, "ymin1", 0);
519  terapix_mcu_img(code, op, "xmin2", api->terapix.reverse? img1: img2);
520  terapix_mcu_int(code, op, "ymin2", 0);
521  terapix_mcu_img(code, op, "xmin3", out);
522  terapix_mcu_int(code, op, "ymin3", 0);
523  // ??? needed for replace const... although arg 3 is used already
524  // replace_const special argument management is handled directly elsewhere
525  // terapix_gram_management(code, decl, op, api, v, hparams, used);
526  break;
527  case 1:
528  // alu: image op cst 1
529  // threshold 3x1
530  // erode/dilate 3x3
531  // copy
532  terapix_mcu_img(code, op, "xmin1", INT(CAR(ins)));
533  terapix_mcu_int(code, op, "ymin1", 0);
534  if (out) {
535  terapix_mcu_img(code, op, "xmin2", out);
536  terapix_mcu_int(code, op, "ymin2", 0);
537  }
538  terapix_gram_management(code, decl, op, api, v, hparams, used);
539  break;
540  case 0:
541  pips_assert("no input, one output image", out);
542  // const image generation... NSP
543  terapix_mcu_img(code, op, "xmin1", out);
544  terapix_mcu_int(code, op, "ymin1", 0);
545  terapix_gram_management(code, decl, op, api, v, hparams, used);
546  break;
547  default:
548  pips_internal_error("unexpected number of input images");
549  }
550  terapix_mcu_val(code, op, "iter1", "TERAPIX_PE_NUMBER");
551  terapix_mcu_val(code, op, "iter2", "imagelet_size");
552  if (freia_convolution_p(v)) // convolution special case hack
553  {
554  _int w, h;
555  freia_convolution_width_height(v, &w, &h, true);
556  // ??? should I use the parameters?
557  // ??? or check their values?
558  // ??? or remove them from the list as they are inlined?
559  terapix_mcu_int(code, op, "iter3", (int) w);
560  terapix_mcu_int(code, op, "iter4", (int) h);
561  }
562  else
563  {
564  terapix_mcu_val(code, op, "iter3", "0");
565  terapix_mcu_val(code, op, "iter4", "0");
566  }
567  terapix_mcu_val(code, op, "addrStart", api->terapix.ucode);
568 }
static void terapix_mcu_img(string_buffer code, int op, string ref, int n)
set a double buffered image argument.
unsigned int arg_img_in
Definition: freia.h:80

References freia_api_t::arg_img_in, freia_api_t::arg_img_out, CAR, CDR, freia_convolution_p(), freia_convolution_width_height(), gen_length(), INT, out, pips_assert, pips_internal_error, terapix_hw_t::reverse, freia_api_t::terapix, terapix_gram_management(), terapix_mcu_img(), terapix_mcu_int(), terapix_mcu_val(), and terapix_hw_t::ucode.

Referenced by freia_terapix_call().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_mcu_img()

static void terapix_mcu_img ( string_buffer  code,
int  op,
string  ref,
int  n 
)
static

set a double buffered image argument.

Definition at line 344 of file freia_terapix.c.

345 {
346  sb_cat(code, " mcu_macro[0][", i2a(op), "].", ref, " = ");
347  terapix_image(code, 0, n);
348  sb_cat(code, ";\n");
349  sb_cat(code, " mcu_macro[1][", i2a(op), "].", ref, " = ");
350  terapix_image(code, 1, n);
351  sb_cat(code, ";\n");
352 }
static reference ref
Current stmt (an integer)
Definition: adg_read_paf.c:163
static void terapix_image(string_buffer sb, int ff, int n)
generate an image symbolic pointer (a name:-).

References i2a(), ref, sb_cat, and terapix_image().

Referenced by terapix_macro_code().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_mcu_int()

static void terapix_mcu_int ( string_buffer  code,
int  op,
string  ref,
int  val 
)
static

set an integer argument.

Definition at line 356 of file freia_terapix.c.

357 {
358  sb_cat(code, " mcu_macro[0][", i2a(op), "].", ref);
359  sb_cat(code, " = ", i2a(val), ";\n");
360  sb_cat(code, " mcu_macro[1][", i2a(op), "].", ref);
361  sb_cat(code, " = ", i2a(val), ";\n");
362 }

References i2a(), ref, and sb_cat.

Referenced by freia_terapix_call(), and terapix_macro_code().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_mcu_pval()

static void terapix_mcu_pval ( string_buffer  code,
int  op,
string  ref,
string  p,
string  s 
)
static

set some prefixed value string argument.

Definition at line 374 of file freia_terapix.c.

376 {
377  sb_cat(code, " mcu_macro[0][", i2a(op), "].", ref,
378  " = ", p, s, ";\n");
379  sb_cat(code, " mcu_macro[1][", i2a(op), "].", ref,
380  " = ", p, s, ";\n");
381 }

References i2a(), ref, and sb_cat.

Referenced by terapix_gram_management().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_mcu_val()

static void terapix_mcu_val ( string_buffer  code,
int  op,
string  r,
string  s 
)
static

set some value string argument.

Definition at line 366 of file freia_terapix.c.

367 {
368  sb_cat(code, " mcu_macro[0][", i2a(op), "].", r, " = ", s, ";\n");
369  sb_cat(code, " mcu_macro[1][", i2a(op), "].", r, " = ", s, ";\n");
370 }

References i2a(), and sb_cat.

Referenced by freia_terapix_call(), and terapix_macro_code().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ terapix_not_implemented()

static bool terapix_not_implemented ( dag  d)
static

whether dag is not implemented in terapix

Definition at line 1596 of file freia_terapix.c.

1597 {
1598  FOREACH(dagvtx, v, dag_vertices(d))
1599  if (not_implemented(v))
1600  return true;
1601  return false;
1602 }

◆ update_erosions()

static void update_erosions ( const dag  d,
const dagvtx  v,
hash_table  erosion 
)
static

update_erosions().

compute and store the imagelet erosion on vertex v output.

Definition at line 94 of file freia_terapix.c.

96 {
97  _int n = 0, s = 0, w = 0, e = 0;
98 
99  // compute most eroded imagelet
100  const list preds = dag_vertex_preds(d, v);
101  FOREACH(dagvtx, p, preds)
102  {
103  if ((_int)hash_get(erosion, NORTH(p))>n)
104  n = (_int) hash_get(erosion, NORTH(p));
105  if ((_int)hash_get(erosion, SOUTH(p))>s)
106  s = (_int) hash_get(erosion, SOUTH(p));
107  if ((_int)hash_get(erosion, WEST(p))>w)
108  w = (_int) hash_get(erosion, WEST(p));
109  if ((_int)hash_get(erosion, EAST(p))>e)
110  e = (_int) hash_get(erosion, EAST(p));
111  }
112  gen_free_list(preds);
113 
114  // update with vertex erosion
115  const freia_api_t * api = dagvtx_freia_api(v);
116 
117  // for erode/dilate, I look at the kernel, and if it is a
118  // "const" with initial values { 000 XXX XXX } => north=0 and so on.
119  // this is interesting on licensePlate, even if the zero
120  // computations are still performed...
121  if (freia_convolution_p(v)) // convolution special handling...
122  {
123  _int width, height;
124  if (freia_convolution_width_height(v, &width, &height, false))
125  {
126  w+=width/2;
127  e+=width/2;
128  n+=height/2;
129  s+=height/2;
130  }
131  // else simply ignore, should not be used anyway...
132  }
133  else if (api->terapix.north) // erode & dilate
134  {
135  bool north = true, south = true, west = true, east = true;
136  erosion_optimization(v, &north, &south, &west, &east);
137  if (north) n += api->terapix.north;
138  if (south) s += api->terapix.south;
139  if (west) w += api->terapix.west;
140  if (east) e += api->terapix.east;
141  }
142 
143  // store results
144  hash_put(erosion, NORTH(v), (void*) n);
145  hash_put(erosion, SOUTH(v), (void*) s);
146  hash_put(erosion, WEST(v), (void*) w);
147  hash_put(erosion, EAST(v), (void*) e);
148 }
static void erosion_optimization(dagvtx v, bool *north, bool *south, bool *west, bool *east)
tell whether the kernel is used on each of the 4 directions.
Definition: freia_terapix.c:69

References dag_vertex_preds(), dagvtx_freia_api, EAST, terapix_hw_t::east, erosion, erosion_optimization(), FOREACH, freia_convolution_p(), freia_convolution_width_height(), gen_free_list(), hash_get(), hash_put(), NORTH, terapix_hw_t::north, SOUTH, terapix_hw_t::south, freia_api_t::terapix, WEST, and terapix_hw_t::west.

Referenced by dag_terapix_measures().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ erosion

hash_table erosion = NULL
static

global variable used by the dagvtx_terapix_priority function, because qsort does not allow to pass some descriptor.

Definition at line 1424 of file freia_terapix.c.

Referenced by constant_image_p(), dag_terapix_erosion(), dag_terapix_measures(), dag_terapix_reset_erosion(), dagvtx_terapix_priority(), and update_erosions().