StarPU Handbook
Data Partition

Data Structures

struct  starpu_data_filter
 

Basic API

void starpu_data_partition (starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
 
void starpu_data_unpartition (starpu_data_handle_t root_data, unsigned gathering_node)
 
starpu_data_handle_t starpu_data_get_child (starpu_data_handle_t handle, unsigned i)
 
int starpu_data_get_nb_children (starpu_data_handle_t handle)
 
starpu_data_handle_t starpu_data_get_sub_data (starpu_data_handle_t root_data, unsigned depth,...)
 
starpu_data_handle_t starpu_data_vget_sub_data (starpu_data_handle_t root_data, unsigned depth, va_list pa)
 
void starpu_data_map_filters (starpu_data_handle_t root_data, unsigned nfilters,...)
 
void starpu_data_vmap_filters (starpu_data_handle_t root_data, unsigned nfilters, va_list pa)
 

Asynchronous API

void starpu_data_partition_plan (starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children)
 
void starpu_data_partition_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
 
void starpu_data_partition_readonly_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
 
void starpu_data_partition_readwrite_upgrade_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
 
void starpu_data_unpartition_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
 
void starpu_data_unpartition_submit_r (starpu_data_handle_t initial_handle, int gathering_node)
 
void starpu_data_unpartition_readonly_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
 
void starpu_data_partition_clean (starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
 
void starpu_data_unpartition_submit_sequential_consistency_cb (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void(*callback_func)(void *), void *callback_arg)
 
void starpu_data_partition_submit_sequential_consistency (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency)
 
void starpu_data_unpartition_submit_sequential_consistency (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency)
 
void starpu_data_partition_not_automatic (starpu_data_handle_t handle)
 

Predefined BCSR Filter Functions

Predefined partitioning functions for BCSR data. Examples on how to use them are shown in Partitioning Data.

void starpu_bcsr_filter_canonical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
struct starpu_data_interface_opsstarpu_bcsr_filter_canonical_block_child_ops (struct starpu_data_filter *f, unsigned child)
 

Predefined CSR Filter Functions

Predefined partitioning functions for CSR data. Examples on how to use them are shown in Partitioning Data.

void starpu_csr_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 

Predefined Matrix Filter Functions

Predefined partitioning functions for matrix data. Examples on how to use them are shown in Partitioning Data.

void starpu_matrix_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_matrix_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_matrix_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_matrix_filter_vertical_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 

Predefined Vector Filter Functions

Predefined partitioning functions for vector data. Examples on how to use them are shown in Partitioning Data.

void starpu_vector_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_vector_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_vector_filter_list_long (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_vector_filter_list (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_vector_filter_divide_in_2 (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 

Predefined Block Filter Functions

Predefined partitioning functions for block data. Examples on how to use them are shown in Partitioning Data. An example is available in examples/filters/shadow3d.c

void starpu_block_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_block_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_block_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_block_filter_vertical_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_block_filter_depth_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_block_filter_depth_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 
void starpu_filter_nparts_compute_chunk_size_and_offset (unsigned n, unsigned nparts, size_t elemsize, unsigned id, unsigned ld, unsigned *chunk_size, size_t *offset)
 

Detailed Description


Data Structure Documentation

◆ starpu_data_filter

struct starpu_data_filter

Describe a data partitioning operation, to be given to starpu_data_partition()

Data Fields

void(* filter_func )(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)
 
unsigned nchildren
 
unsigned(* get_nchildren )(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
 
struct starpu_data_interface_ops *(* get_child_ops )(struct starpu_data_filter *, unsigned id)
 
unsigned filter_arg
 
void * filter_arg_ptr
 

Field Documentation

◆ filter_func

void(* starpu_data_filter::filter_func) (void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)

Fill the child_interface structure with interface information for the i -th child of the parent father_interface (among nparts). The filter structure is provided, allowing to inspect the starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr parameters. The details of what needs to be filled in child_interface vary according to the data interface, but generally speaking:

  • id is usually just copied over from the father, when the sub data has the same structure as the father, e.g. a subvector is a vector, a submatrix is a matrix, etc. This is however not the case for instance when dividing a BCSR matrix into its dense blocks, which then are matrices.
  • nx, ny and alike are usually divided by the number of subdata, depending how the subdivision is done (e.g. nx division vs ny division for vertical matrix division vs horizontal matrix division).
  • ld for matrix interfaces are usually just copied over: the leading dimension (ld) usually does not change.
  • elemsize is usually just copied over.
  • ptr, the pointer to the data, has to be computed according to i and the father's ptr, so as to point to the start of the sub data. This should however be done only if the father has ptr different from NULL: in the OpenCL case notably, the dev_handle and offset fields are used instead.
  • dev_handle should be just copied over from the parent.
  • offset has to be computed according to i and the father's offset, so as to provide the offset of the start of the sub data. This is notably used for the OpenCL case.

◆ nchildren

unsigned starpu_data_filter::nchildren

Number of parts to partition the data into.

◆ get_nchildren

unsigned(* starpu_data_filter::get_nchildren) (struct starpu_data_filter *, starpu_data_handle_t initial_handle)

Return the number of children. This can be used instead of starpu_data_filter::nchildren when the number of children depends on the actual data (e.g. the number of blocks in a sparse matrix).

◆ get_child_ops

struct starpu_data_interface_ops*(* starpu_data_filter::get_child_ops) (struct starpu_data_filter *, unsigned id)

When children use different data interface, return which interface is used by child number id.

◆ filter_arg

unsigned starpu_data_filter::filter_arg

Additional parameter for the filter function

◆ filter_arg_ptr

void* starpu_data_filter::filter_arg_ptr

Additional pointer parameter for the filter function, such as the sizes of the different parts.

Function Documentation

◆ starpu_data_partition()

void starpu_data_partition ( starpu_data_handle_t  initial_handle,
struct starpu_data_filter f 
)

Request the partitioning of initial_handle into several subdata according to the filter f.

Here an example of how to use the function.

{
.nchildren = nslicesx
};
starpu_data_partition(A_handle, &f);

◆ starpu_data_unpartition()

void starpu_data_unpartition ( starpu_data_handle_t  root_data,
unsigned  gathering_node 
)

Unapply the filter which has been applied to root_data, thus unpartitioning the data. The pieces of data are collected back into one big piece in the gathering_node (usually STARPU_MAIN_RAM). Tasks working on the partitioned data will be waited for by starpu_data_unpartition().

Here an example of how to use the function.

◆ starpu_data_get_child()

starpu_data_handle_t starpu_data_get_child ( starpu_data_handle_t  handle,
unsigned  i 
)

Return the i -th child of the given handle, which must have been partitionned beforehand.

◆ starpu_data_get_nb_children()

int starpu_data_get_nb_children ( starpu_data_handle_t  handle)

Return the number of children handle has been partitioned into.

◆ starpu_data_get_sub_data()

starpu_data_handle_t starpu_data_get_sub_data ( starpu_data_handle_t  root_data,
unsigned  depth,
  ... 
)

After partitioning a StarPU data by applying a filter, starpu_data_get_sub_data() can be used to get handles for each of the data portions. root_data is the parent data that was partitioned. depth is the number of filters to traverse (in case several filters have been applied, to e.g. partition in row blocks, and then in column blocks), and the subsequent parameters are the indexes. The function returns a handle to the subdata.

Here an example of how to use the function.

h = starpu_data_get_sub_data(A_handle, 1, taskx);

◆ starpu_data_vget_sub_data()

starpu_data_handle_t starpu_data_vget_sub_data ( starpu_data_handle_t  root_data,
unsigned  depth,
va_list  pa 
)

Similar to starpu_data_get_sub_data() but use a va_list for the parameter list.

◆ starpu_data_map_filters()

void starpu_data_map_filters ( starpu_data_handle_t  root_data,
unsigned  nfilters,
  ... 
)

Apply nfilters filters to the handle designated by root_handle recursively. nfilters pointers to variables of the type starpu_data_filter should be given.

◆ starpu_data_vmap_filters()

void starpu_data_vmap_filters ( starpu_data_handle_t  root_data,
unsigned  nfilters,
va_list  pa 
)

Apply nfilters filters to the handle designated by root_handle recursively. Use a va_list of pointers to variables of the type starpu_data_filter.

◆ starpu_data_partition_plan()

void starpu_data_partition_plan ( starpu_data_handle_t  initial_handle,
struct starpu_data_filter f,
starpu_data_handle_t children 
)

Plan to partition initial_handle into several subdata according to the filter f. The handles are returned into the children array, which has to be the same size as the number of parts described in f. These handles are not immediately usable, starpu_data_partition_submit() has to be called to submit the actual partitioning.

Here is an example of how to use the function:

starpu_data_handle_t children[nslicesx];
{
.nchildren = nslicesx
};
starpu_data_partition_plan(A_handle, &f, children);

◆ starpu_data_partition_submit()

void starpu_data_partition_submit ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children 
)

Submit the actual partitioning of initial_handle into the nparts children handles. This call is asynchronous, it only submits that the partitioning should be done, so that the children handles can now be used to submit tasks, and initial_handle can not be used to submit tasks any more (to guarantee coherency). For instance,

starpu_data_partition_submit(A_handle, nslicesx, children);

◆ starpu_data_partition_readonly_submit()

void starpu_data_partition_readonly_submit ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children 
)

Similar to starpu_data_partition_submit(), but do not invalidate initial_handle. This allows to continue using it, but the application has to be careful not to write to initial_handle or children handles, only read from them, since the coherency is otherwise not guaranteed. This thus allows to submit various tasks which concurrently read from various partitions of the data.

When the application wants to write to initial_handle again, it should call starpu_data_unpartition_submit(), which will properly add dependencies between the reads on the children and the writes to be submitted.

If instead the application wants to write to children handles, it should call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add dependencies between the reads on the initial_handle and the writes to be submitted.

◆ starpu_data_partition_readwrite_upgrade_submit()

void starpu_data_partition_readwrite_upgrade_submit ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children 
)

Assume that a partitioning of initial_handle has already been submited in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade that partitioning into read-write mode for the children, by invalidating initial_handle, and adding the necessary dependencies.

◆ starpu_data_unpartition_submit()

void starpu_data_unpartition_submit ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children,
int  gathering_node 
)

Assuming that initial_handle is partitioned into children, submit an unpartitionning of initial_handle, i.e. submit a gathering of the pieces on the requested gathering_node memory node, and submit an invalidation of the children.

◆ starpu_data_unpartition_readonly_submit()

void starpu_data_unpartition_readonly_submit ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children,
int  gathering_node 
)

Similar to starpu_data_partition_submit(), but do not invalidate initial_handle. This allows to continue using it, but the application has to be careful not to write to initial_handle or children handles, only read from them, since the coherency is otherwise not guaranteed. This thus allows to submit various tasks which concurrently read from various partitions of the data.

◆ starpu_data_partition_clean()

void starpu_data_partition_clean ( starpu_data_handle_t  root_data,
unsigned  nparts,
starpu_data_handle_t children 
)

Clear the partition planning established between root_data and children with starpu_data_partition_plan(). This will notably submit an unregister all the children, which can thus not be used any more afterwards.

◆ starpu_data_unpartition_submit_sequential_consistency_cb()

void starpu_data_unpartition_submit_sequential_consistency_cb ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children,
int  gather_node,
int  sequential_consistency,
void(*)(void *)  callback_func,
void *  callback_arg 
)

Similar to starpu_data_unpartition_submit_sequential_consistency() but allow to specify a callback function for the unpartitiong task

◆ starpu_data_partition_submit_sequential_consistency()

void starpu_data_partition_submit_sequential_consistency ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children,
int  sequential_consistency 
)

Similar to starpu_data_partition_submit() but also allow to specify the coherency to be used for the main data initial_handle through the parameter sequential_consistency.

◆ starpu_data_unpartition_submit_sequential_consistency()

void starpu_data_unpartition_submit_sequential_consistency ( starpu_data_handle_t  initial_handle,
unsigned  nparts,
starpu_data_handle_t children,
int  gathering_node,
int  sequential_consistency 
)

Similar to starpu_data_unpartition_submit() but also allow to specify the coherency to be used for the main data initial_handle through the parameter sequential_consistency.

◆ starpu_data_partition_not_automatic()

void starpu_data_partition_not_automatic ( starpu_data_handle_t  handle)

Disable the automatic partitioning of the data handle for which a asynchronous plan has previously been submitted

◆ starpu_bcsr_filter_canonical_block()

void starpu_bcsr_filter_canonical_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block-sparse matrix into dense matrices. starpu_data_filter::get_child_ops needs to be set to starpu_bcsr_filter_canonical_block_child_ops()

◆ starpu_bcsr_filter_canonical_block_child_ops()

struct starpu_data_interface_ops* starpu_bcsr_filter_canonical_block_child_ops ( struct starpu_data_filter f,
unsigned  child 
)

Return the child_ops of the partition obtained with starpu_bcsr_filter_canonical_block().

◆ starpu_csr_filter_vertical_block()

void starpu_csr_filter_vertical_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block-sparse matrix into vertical block-sparse matrices.

◆ starpu_matrix_filter_block()

void starpu_matrix_filter_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a dense Matrix along the x dimension, thus getting (x/nparts ,y) matrices. If nparts does not divide x, the last submatrix contains the remainder.

◆ starpu_matrix_filter_block_shadow()

void starpu_matrix_filter_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a dense Matrix along the x dimension, with a shadow border filter_arg_ptr, thus getting ((x-2*shadow)/nparts +2*shadow,y) matrices. If nparts does not divide x-2*shadow, the last submatrix contains the remainder.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c

◆ starpu_matrix_filter_vertical_block()

void starpu_matrix_filter_vertical_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a dense Matrix along the y dimension, thus getting (x,y/nparts) matrices. If nparts does not divide y, the last submatrix contains the remainder.

◆ starpu_matrix_filter_vertical_block_shadow()

void starpu_matrix_filter_vertical_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a dense Matrix along the y dimension, with a shadow border filter_arg_ptr, thus getting (x,(y-2*shadow)/nparts +2*shadow) matrices. If nparts does not divide y-2*shadow, the last submatrix contains the remainder.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c

◆ starpu_vector_filter_block()

void starpu_vector_filter_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Return in child_interface the id th element of the vector represented by father_interface once partitioned in nparts chunks of equal size.

◆ starpu_vector_filter_block_shadow()

void starpu_vector_filter_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Return in child_interface the id th element of the vector represented by father_interface once partitioned in nparts chunks of equal size with a shadow border filter_arg_ptr, thus getting a vector of size (n-2*shadow)/nparts+2*shadow. The filter_arg_ptr field of f must be the shadow size casted into void*.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. An usage example is available in examples/filters/shadow.c

◆ starpu_vector_filter_list_long()

void starpu_vector_filter_list_long ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Return in child_interface the id th element of the vector represented by father_interface once partitioned into nparts chunks according to the filter_arg_ptr field of f. The filter_arg_ptr field must point to an array of nparts long elements, each of which specifies the number of elements in each chunk of the partition.

◆ starpu_vector_filter_list()

void starpu_vector_filter_list ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Return in child_interface the id th element of the vector represented by father_interface once partitioned into nparts chunks according to the filter_arg_ptr field of f. The filter_arg_ptr field must point to an array of nparts uint32_t elements, each of which specifies the number of elements in each chunk of the partition.

◆ starpu_vector_filter_divide_in_2()

void starpu_vector_filter_divide_in_2 ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Return in child_interface the id th element of the vector represented by father_interface once partitioned in 2 chunks of equal size, ignoring nparts. Thus, id must be 0 or 1.

◆ starpu_block_filter_block()

void starpu_block_filter_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the X dimension, thus getting (x/nparts ,y,z) 3D matrices. If nparts does not divide x, the last submatrix contains the remainder.

◆ starpu_block_filter_block_shadow()

void starpu_block_filter_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the X dimension, with a shadow border filter_arg_ptr, thus getting ((x-2*shadow)/nparts +2*shadow,y,z) blocks. If nparts does not divide x, the last submatrix contains the remainder.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.

◆ starpu_block_filter_vertical_block()

void starpu_block_filter_vertical_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the Y dimension, thus getting (x,y/nparts ,z) blocks. If nparts does not divide y, the last submatrix contains the remainder.

◆ starpu_block_filter_vertical_block_shadow()

void starpu_block_filter_vertical_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the Y dimension, with a shadow border filter_arg_ptr, thus getting (x,(y-2*shadow)/nparts +2*shadow,z) 3D matrices. If nparts does not divide y, the last submatrix contains the remainder.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.

◆ starpu_block_filter_depth_block()

void starpu_block_filter_depth_block ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the Z dimension, thus getting (x,y,z/nparts) blocks. If nparts does not divide z, the last submatrix contains the remainder.

◆ starpu_block_filter_depth_block_shadow()

void starpu_block_filter_depth_block_shadow ( void *  father_interface,
void *  child_interface,
struct starpu_data_filter f,
unsigned  id,
unsigned  nparts 
)

Partition a block along the Z dimension, with a shadow border filter_arg_ptr, thus getting (x,y,(z-2*shadow)/nparts +2*shadow) blocks. If nparts does not divide z, the last submatrix contains the remainder.

IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.

◆ starpu_filter_nparts_compute_chunk_size_and_offset()

void starpu_filter_nparts_compute_chunk_size_and_offset ( unsigned  n,
unsigned  nparts,
size_t  elemsize,
unsigned  id,
unsigned  ld,
unsigned *  chunk_size,
size_t *  offset 
)

Given an integer n, n the number of parts it must be divided in, id the part currently considered, determines the chunk_size and the offset, taking into account the size of the elements stored in the data structure elemsize and ld, the leading dimension, which is most often 1.