StarPU Handbook
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
MPI Support

Initialisation

#define STARPU_USE_MPI
int starpu_mpi_init_comm (int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
int starpu_mpi_init (int *argc, char ***argv, int initialize_mpi)
int starpu_mpi_initialize (void)
int starpu_mpi_initialize_extended (int *rank, int *world_size)
int starpu_mpi_shutdown (void)
void starpu_mpi_comm_amounts_retrieve (size_t *comm_amounts)
int starpu_mpi_comm_size (MPI_Comm comm, int *size)
int starpu_mpi_comm_rank (MPI_Comm comm, int *rank)
int starpu_mpi_world_rank (void)
int starpu_mpi_world_size (void)

Communication

int starpu_mpi_send (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
int starpu_mpi_recv (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
int starpu_mpi_isend (starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
int starpu_mpi_irecv (starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)
int starpu_mpi_isend_detached (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void(*callback)(void *), void *arg)
int starpu_mpi_irecv_detached (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void(*callback)(void *), void *arg)
int starpu_mpi_irecv_detached_sequential_consistency (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void(*callback)(void *), void *arg, int sequential_consistency)
int starpu_mpi_issend (starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
int starpu_mpi_issend_detached (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void(*callback)(void *), void *arg)
int starpu_mpi_wait (starpu_mpi_req *req, MPI_Status *status)
int starpu_mpi_test (starpu_mpi_req *req, int *flag, MPI_Status *status)
int starpu_mpi_barrier (MPI_Comm comm)
int starpu_mpi_wait_for_all (MPI_Comm comm)
int starpu_mpi_isend_detached_unlock_tag (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
int starpu_mpi_irecv_detached_unlock_tag (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
int starpu_mpi_isend_array_detached_unlock_tag (unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
int starpu_mpi_irecv_array_detached_unlock_tag (unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
int starpu_mpi_get_communication_tag (void)
void starpu_mpi_set_communication_tag (int tag)
int starpu_mpi_datatype_register (starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func)
int starpu_mpi_datatype_unregister (starpu_data_handle_t handle)

Communication Cache

int starpu_mpi_cache_is_enabled ()
int starpu_mpi_cache_set (int enabled)
void starpu_mpi_cache_flush (MPI_Comm comm, starpu_data_handle_t data_handle)
void starpu_mpi_cache_flush_all_data (MPI_Comm comm)
int starpu_mpi_cached_receive (starpu_data_handle_t data_handle)
int starpu_mpi_cached_send (starpu_data_handle_t data_handle, int dest)

MPI Insert Task

#define starpu_mpi_data_register(data_handle, tag, rank)
#define starpu_data_set_tag
#define starpu_mpi_data_set_rank(handle, rank)
#define starpu_data_set_rank
#define starpu_data_get_rank
#define starpu_data_get_tag
#define STARPU_EXECUTE_ON_NODE
#define STARPU_EXECUTE_ON_DATA
#define STARPU_NODE_SELECTION_POLICY
void starpu_mpi_data_register_comm (starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm)
void starpu_mpi_data_set_tag (starpu_data_handle_t handle, int tag)
void starpu_mpi_data_set_rank_comm (starpu_data_handle_t handle, int rank, MPI_Comm comm)
int starpu_mpi_data_get_rank (starpu_data_handle_t handle)
int starpu_mpi_data_get_tag (starpu_data_handle_t handle)
void starpu_mpi_data_migrate (MPI_Comm comm, starpu_data_handle_t handle, int new_rank)
int starpu_mpi_insert_task (MPI_Comm comm, struct starpu_codelet *codelet,...)
int starpu_mpi_task_insert (MPI_Comm comm, struct starpu_codelet *codelet,...)
struct starpu_taskstarpu_mpi_task_build (MPI_Comm comm, struct starpu_codelet *codelet,...)
int starpu_mpi_task_post_build (MPI_Comm comm, struct starpu_codelet *codelet,...)
void starpu_mpi_get_data_on_node (MPI_Comm comm, starpu_data_handle_t data_handle, int node)
void starpu_mpi_get_data_on_node_detached (MPI_Comm comm, starpu_data_handle_t data_handle, int node, void(*callback)(void *), void *arg)

Node Selection Policy

int starpu_mpi_node_selection_get_current_policy ()
int starpu_mpi_node_selection_set_current_policy (int policy)
int starpu_mpi_node_selection_register_policy (starpu_mpi_select_node_policy_func_t policy_func)
int starpu_mpi_node_selection_unregister_policy (int policy)

Collective Operations

void starpu_mpi_redux_data (MPI_Comm comm, starpu_data_handle_t data_handle)
int starpu_mpi_scatter_detached (starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void(*scallback)(void *), void *sarg, void(*rcallback)(void *), void *rarg)
int starpu_mpi_gather_detached (starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void(*scallback)(void *), void *sarg, void(*rcallback)(void *), void *rarg)

Detailed Description

Macro Definition Documentation

#define STARPU_USE_MPI

This macro is defined when StarPU has been installed with MPI support. It should be used in your code to detect the availability of MPI.

#define starpu_mpi_data_register (   data_handle,
  tag,
  rank 
)

Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator MPI_COMM_WORLD. It also automatically clears the MPI communication cache when unregistering the data.

#define starpu_data_set_tag

Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag

#define starpu_mpi_data_set_rank (   handle,
  rank 
)

Register to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined. It also automatically clears the MPI communication cache when unregistering the data. Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank

#define starpu_data_set_rank

Register to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined. It also automatically clears the MPI communication cache when unregistering the data. Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank

#define starpu_data_get_rank

Return the rank of the given data. Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank

#define starpu_data_get_tag

Return the tag of the given data. Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag

#define STARPU_EXECUTE_ON_NODE

this macro is used when calling starpu_mpi_task_insert(), and must be followed by a integer value which specified the node on which to execute the codelet.

#define STARPU_EXECUTE_ON_DATA

this macro is used when calling starpu_mpi_task_insert(), and must be followed by a data handle to specify that the node owning the given data will execute the codelet.

#define STARPU_NODE_SELECTION_POLICY

this macro is used when calling starpu_mpi_task_insert(), and must be followed by a identifier to a node selection policy. This is needed when several nodes own data in STARPU_W mode.

Function Documentation

int starpu_mpi_init_comm ( int *  argc,
char ***  argv,
int  initialize_mpi,
MPI_Comm  comm 
)

Initializes the starpumpi library with the given communicator. initialize_mpi indicates if MPI should be initialized or not by StarPU. If the value is not 0, MPI will be initialized by calling MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...). starpu_init() must be called before starpu_mpi_init_comm().

int starpu_mpi_init ( int *  argc,
char ***  argv,
int  initialize_mpi 
)

Call starpu_mpi_init_comm() with the MPI communicator MPI_COMM_WORLD.

int starpu_mpi_initialize ( void  )
Deprecated:
This function has been made deprecated. One should use instead the function starpu_mpi_init(). This function does not call MPI_Init(), it should be called beforehand.
int starpu_mpi_initialize_extended ( int *  rank,
int *  world_size 
)
Deprecated:
This function has been made deprecated. One should use instead the function starpu_mpi_init(). MPI will be initialized by starpumpi by calling MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...).
int starpu_mpi_shutdown ( void  )

Cleans the starpumpi library. This must be called between calling starpu_mpi functions and starpu_shutdown(). MPI_Finalize() will be called if StarPU-MPI has been initialized by starpu_mpi_init().

void starpu_mpi_comm_amounts_retrieve ( size_t *  comm_amounts)

Retrieve the current amount of communications from the current node in the array comm_amounts which must have a size greater or equal to the world size. Communications statistics must be enabled (see STARPU_COMM_STATS).

int starpu_mpi_comm_size ( MPI_Comm  comm,
int *  size 
)

Return in size the size of the communicator comm

int starpu_mpi_comm_rank ( MPI_Comm  comm,
int *  rank 
)

Return in rank the rank of the calling process in the communicator comm

int starpu_mpi_world_rank ( void  )

Return the rank of the calling process in the communicator MPI_COMM_WORLD

int starpu_mpi_world_size ( void  )

Return the size of the communicator MPI_COMM_WORLD

int starpu_mpi_send ( starpu_data_handle_t  data_handle,
int  dest,
int  mpi_tag,
MPI_Comm  comm 
)

Performs a standard-mode, blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm.

int starpu_mpi_recv ( starpu_data_handle_t  data_handle,
int  source,
int  mpi_tag,
MPI_Comm  comm,
MPI_Status *  status 
)

Performs a standard-mode, blocking receive in data_handle from the node source using the message tag mpi_tag within the communicator comm.

int starpu_mpi_isend ( starpu_data_handle_t  data_handle,
starpu_mpi_req *  req,
int  dest,
int  mpi_tag,
MPI_Comm  comm 
)

Posts a standard-mode, non blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm. After the call, the pointer to the request req can be used to test or to wait for the completion of the communication.

int starpu_mpi_irecv ( starpu_data_handle_t  data_handle,
starpu_mpi_req *  req,
int  source,
int  mpi_tag,
MPI_Comm  comm 
)

Posts a nonblocking receive in data_handle from the node source using the message tag mpi_tag within the communicator comm. After the call, the pointer to the request req can be used to test or to wait for the completion of the communication.

int starpu_mpi_isend_detached ( starpu_data_handle_t  data_handle,
int  dest,
int  mpi_tag,
MPI_Comm  comm,
void(*)(void *)  callback,
void *  arg 
)

Posts a standard-mode, non blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm. On completion, the callback function is called with the argument arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request.

int starpu_mpi_irecv_detached ( starpu_data_handle_t  data_handle,
int  source,
int  mpi_tag,
MPI_Comm  comm,
void(*)(void *)  callback,
void *  arg 
)

Posts a nonblocking receive in data_handle from the node source using the message tag mpi_tag within the communicator comm. On completion, the callback function is called with the argument arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request.

int starpu_mpi_irecv_detached_sequential_consistency ( starpu_data_handle_t  data_handle,
int  source,
int  mpi_tag,
MPI_Comm  comm,
void(*)(void *)  callback,
void *  arg,
int  sequential_consistency 
)

Posts a nonblocking receive in data_handle from the node source using the message tag mpi_tag within the communicator comm. On completion, the callback function is called with the argument arg. The parameter sequential_consistency allows to enable or disable the sequential consistency for data handle (sequential consistency will be enabled or disabled based on the value of the parameter sequential_consistency and the value of the sequential consistency defined for data_handle). Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request.

int starpu_mpi_issend ( starpu_data_handle_t  data_handle,
starpu_mpi_req *  req,
int  dest,
int  mpi_tag,
MPI_Comm  comm 
)

Performs a synchronous-mode, non-blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm.

int starpu_mpi_issend_detached ( starpu_data_handle_t  data_handle,
int  dest,
int  mpi_tag,
MPI_Comm  comm,
void(*)(void *)  callback,
void *  arg 
)

Performs a synchronous-mode, non-blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm. On completion, the callback function is called with the argument arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request.

int starpu_mpi_wait ( starpu_mpi_req *  req,
MPI_Status *  status 
)

Returns when the operation identified by request req is complete.

int starpu_mpi_test ( starpu_mpi_req *  req,
int *  flag,
MPI_Status *  status 
)

If the operation identified by req is complete, set flag to 1. The status object is set to contain information on the completed operation.

int starpu_mpi_barrier ( MPI_Comm  comm)

Blocks the caller until all group members of the communicator comm have called it.

int starpu_mpi_wait_for_all ( MPI_Comm  comm)

Wait until all StarPU tasks and communications for the given communicator are completed.

int starpu_mpi_isend_detached_unlock_tag ( starpu_data_handle_t  data_handle,
int  dest,
int  mpi_tag,
MPI_Comm  comm,
starpu_tag_t  tag 
)

Posts a standard-mode, non blocking send of data_handle to the node dest using the message tag mpi_tag within the communicator comm. On completion, tag is unlocked.

int starpu_mpi_irecv_detached_unlock_tag ( starpu_data_handle_t  data_handle,
int  source,
int  mpi_tag,
MPI_Comm  comm,
starpu_tag_t  tag 
)

Posts a nonblocking receive in data_handle from the node source using the message tag mpi_tag within the communicator comm. On completion, tag is unlocked.

int starpu_mpi_isend_array_detached_unlock_tag ( unsigned  array_size,
starpu_data_handle_t data_handle,
int *  dest,
int *  mpi_tag,
MPI_Comm *  comm,
starpu_tag_t  tag 
)

Posts array_size standard-mode, non blocking send. Each post sends the n-th data of the array data_handle to the n-th node of the array dest using the n-th message tag of the array mpi_tag within the n-th communicator of the array comm. On completion of the all the requests, tag is unlocked.

int starpu_mpi_irecv_array_detached_unlock_tag ( unsigned  array_size,
starpu_data_handle_t data_handle,
int *  source,
int *  mpi_tag,
MPI_Comm *  comm,
starpu_tag_t  tag 
)

Posts array_size nonblocking receive. Each post receives in the n-th data of the array data_handle from the n-th node of the array source using the n-th message tag of the array mpi_tag within the n-th communicator of the array comm. On completion of the all the requests, tag is unlocked.

int starpu_mpi_get_communication_tag ( void  )

todo

void starpu_mpi_set_communication_tag ( int  tag)

todo

int starpu_mpi_datatype_register ( starpu_data_handle_t  handle,
starpu_mpi_datatype_allocate_func_t  allocate_datatype_func,
starpu_mpi_datatype_free_func_t  free_datatype_func 
)

Register functions to create and free a MPI datatype for the given handle. It is important that the function is called before any communication can take place for a data with the given handle. See Exchanging User Defined Data Interface for an example.

int starpu_mpi_datatype_unregister ( starpu_data_handle_t  handle)

Unregister the MPI datatype functions stored for the interface of the given handle.

int starpu_mpi_cache_is_enabled ( )

Return 1 if the communication cache is enabled, 0 otherwise

int starpu_mpi_cache_set ( int  enabled)

If enabled is 1, enable the communication cache. Otherwise, clean the cache if it was enabled and disable it.

void starpu_mpi_cache_flush ( MPI_Comm  comm,
starpu_data_handle_t  data_handle 
)

Clear the send and receive communication cache for the data data_handle and invalidate the value. The function has to be called at the same point of task graph submission by all the MPI nodes on which the handle was registered. The function does nothing if the cache mechanism is disabled (see STARPU_MPI_CACHE).

void starpu_mpi_cache_flush_all_data ( MPI_Comm  comm)

Clear the send and receive communication cache for all data and invalidate their values. The function has to be called at the same point of task graph submission by all the MPI nodes. The function does nothing if the cache mechanism is disabled (see STARPU_MPI_CACHE).

int starpu_mpi_cached_receive ( starpu_data_handle_t  data_handle)

Test whether data_handle is cached for reception, i.e. the value was previously received from the owner node, and not flushed since then.

int starpu_mpi_cached_send ( starpu_data_handle_t  data_handle,
int  dest 
)

Test whether data_handle is cached for emission to node dest , i.e. the value was previously sent to dest, and not flushed since then.

void starpu_mpi_data_register_comm ( starpu_data_handle_t  data_handle,
int  tag,
int  rank,
MPI_Comm  comm 
)

Register to MPI a StarPU data handle with the given tag, rank and MPI communicator. It also automatically clears the MPI communication cache when unregistering the data.

void starpu_mpi_data_set_tag ( starpu_data_handle_t  handle,
int  tag 
)

Register to MPI a StarPU data handle with the given tag. No rank will be defined. It also automatically clears the MPI communication cache when unregistering the data.

void starpu_mpi_data_set_rank_comm ( starpu_data_handle_t  handle,
int  rank,
MPI_Comm  comm 
)

Register to MPI a StarPU data handle with the given rank and given communicator. No tag will be defined. It also automatically clears the MPI communication cache when unregistering the data.

int starpu_mpi_data_get_rank ( starpu_data_handle_t  handle)

Return the rank of the given data.

int starpu_mpi_data_get_tag ( starpu_data_handle_t  handle)

Return the tag of the given data.

void starpu_mpi_data_migrate ( MPI_Comm  comm,
starpu_data_handle_t  handle,
int  new_rank 
)

Migrate the data onto the new_rank MPI node. This means both transferring the data to node new_rank if it hasn't been transferred already, and setting the home node of the data to the new node. Further data transfers triggered by starpu_mpi_task_insert() will be done from that new node. This function thus needs to be called on all nodes which have registered the data. This also flushes the cache for this data to avoid incoherencies.

int starpu_mpi_insert_task ( MPI_Comm  comm,
struct starpu_codelet codelet,
  ... 
)

This function does the same as the function starpu_mpi_task_insert(). It has been kept to avoid breaking old codes.

int starpu_mpi_task_insert ( MPI_Comm  comm,
struct starpu_codelet codelet,
  ... 
)

Create and submit a task corresponding to codelet with the following arguments. The argument list must be zero-terminated.

The arguments following the codelet are the same types as for the function starpu_task_insert(). Access modes for data can also be set with STARPU_SSEND to specify the data has to be sent using a synchronous and non-blocking mode (see starpu_mpi_issend()). The extra argument STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the MPI node to execute the codelet. It is also possible to specify that the node owning a specific data will execute the codelet, by using STARPU_EXECUTE_ON_DATA followed by a data handle.

The internal algorithm is as follows:

  1. Find out which MPI node is going to execute the codelet.
  2. Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
  3. Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
  4. If several MPI nodes own data to be written to, send written data back to their owners.

The algorithm also includes a communication cache mechanism that allows not to send data twice to the same MPI node, unless the data has been modified. The cache can be disabled (see STARPU_MPI_CACHE).

struct starpu_task * starpu_mpi_task_build ( MPI_Comm  comm,
struct starpu_codelet codelet,
  ... 
)
read

Create a task corresponding to codelet with the following arguments. The argument list must be zero-terminated. The function performs the first two steps of the function starpu_mpi_task_insert(). Only the MPI node selected in the first step of the algorithm will return a valid task structure which can then be submitted, others will return NULL. The function starpu_mpi_task_post_build() MUST be called after that on all nodes, and after the submission of the task on the node which creates it, with the SAME list of arguments.

int starpu_mpi_task_post_build ( MPI_Comm  comm,
struct starpu_codelet codelet,
  ... 
)

This function MUST be called after a call to starpu_mpi_task_build(), with the SAME list of arguments. It performs the fourth – last – step of the algorithm described in starpu_mpi_task_insert().

void starpu_mpi_get_data_on_node ( MPI_Comm  comm,
starpu_data_handle_t  data_handle,
int  node 
)

Transfer data data_handle to MPI node node, sending it from its owner if needed. At least the target node and the owner have to call the function.

void starpu_mpi_get_data_on_node_detached ( MPI_Comm  comm,
starpu_data_handle_t  data_handle,
int  node,
void(*)(void *)  callback,
void *  arg 
)

Transfer data data_handle to MPI node node, sending it from its owner if needed. At least the target node and the owner have to call the function. On reception, the callback function is called with the argument arg.

int starpu_mpi_node_selection_get_current_policy ( )

Return the current policy used to select the node which will execute the codelet

int starpu_mpi_node_selection_set_current_policy ( int  policy)

Set the current policy used to select the node which will execute the codelet. The policy STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the node having the most data in R mode so as to minimize the amount of data to be transfered.

int starpu_mpi_node_selection_register_policy ( starpu_mpi_select_node_policy_func_t  policy_func)

Register a new policy which can then be used when there is several nodes owning data in W mode. Here an example of function defining a node selection policy. The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node 0 if no data fits the given size.

int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
{
// me is the current MPI rank
// nb_nodes is the number of MPI nodes
// descr is the description of the data specified when calling starpu_mpi_task_insert
// nb_data is the number of data in descr
int i;
for(i= 0 ; i<nb_data ; i++)
{
starpu_data_handle_t data = descr[i].handle;
enum starpu_data_access_mode mode = descr[i].mode;
if (mode & STARPU_R)
{
int rank = starpu_data_get_rank(data);
size_t size = starpu_data_get_size(data);
if (size > 1024*1024) return rank;
}
}
return 0;
}
int starpu_mpi_node_selection_unregister_policy ( int  policy)

Unregister a previously registered policy.

void starpu_mpi_redux_data ( MPI_Comm  comm,
starpu_data_handle_t  data_handle 
)

Perform a reduction on the given data. All nodes send the data to its owner node which will perform a reduction.

int starpu_mpi_scatter_detached ( starpu_data_handle_t data_handles,
int  count,
int  root,
MPI_Comm  comm,
void(*)(void *)  scallback,
void *  sarg,
void(*)(void *)  rcallback,
void *  rarg 
)

Scatter data among processes of the communicator based on the ownership of the data. For each data of the array data_handles, the process root sends the data to the process owning this data. Processes receiving data must have valid data handles to receive them. On completion of the collective communication, the scallback function is called with the argument sarg on the process root, the rcallback function is called with the argument rarg on any other process.

int starpu_mpi_gather_detached ( starpu_data_handle_t data_handles,
int  count,
int  root,
MPI_Comm  comm,
void(*)(void *)  scallback,
void *  sarg,
void(*)(void *)  rcallback,
void *  rarg 
)

Gather data from the different processes of the communicator onto the process root. Each process owning data handle in the array data_handles will send them to the process root. The process root must have valid data handles to receive the data. On completion of the collective communication, the rcallback function is called with the argument rarg on the process root, the scallback function is called with the argument sarg on any other process.