StarPU Handbook
Performance Model

Data Structures

struct  starpu_perfmodel_device
 
struct  starpu_perfmodel_arch
 
struct  starpu_perfmodel_history_entry
 
struct  starpu_perfmodel_history_list
 
struct  starpu_perfmodel_regression_model
 
struct  starpu_perfmodel_per_arch
 
struct  starpu_perfmodel
 

Macros

#define STARPU_NARCH
 
#define starpu_per_arch_perfmodel
 

Typedefs

typedef double(* starpu_perfmodel_per_arch_cost_function) (struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 
typedef size_t(* starpu_perfmodel_per_arch_size_base) (struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 
typedef struct _starpu_perfmodel_state * starpu_perfmodel_state_t
 

Enumerations

enum  starpu_perfmodel_type {
  STARPU_PERFMODEL_INVALID, STARPU_PER_ARCH, STARPU_COMMON, STARPU_HISTORY_BASED,
  STARPU_REGRESSION_BASED, STARPU_NL_REGRESSION_BASED, STARPU_MULTIPLE_REGRESSION_BASED
}
 

Functions

void starpu_perfmodel_init (struct starpu_perfmodel *model)
 
int starpu_perfmodel_load_file (const char *filename, struct starpu_perfmodel *model)
 
int starpu_perfmodel_load_symbol (const char *symbol, struct starpu_perfmodel *model)
 
int starpu_perfmodel_unload_model (struct starpu_perfmodel *model)
 
void starpu_perfmodel_get_model_path (const char *symbol, char *path, size_t maxlen)
 
void starpu_perfmodel_dump_xml (FILE *output, struct starpu_perfmodel *model)
 
void starpu_perfmodel_free_sampling_directories (void)
 
struct starpu_perfmodel_archstarpu_worker_get_perf_archtype (int workerid, unsigned sched_ctx_id)
 
int starpu_perfmodel_get_narch_combs (void)
 
int starpu_perfmodel_arch_comb_add (int ndevices, struct starpu_perfmodel_device *devices)
 
int starpu_perfmodel_arch_comb_get (int ndevices, struct starpu_perfmodel_device *devices)
 
struct starpu_perfmodel_archstarpu_perfmodel_arch_comb_fetch (int comb)
 
struct starpu_perfmodel_per_archstarpu_perfmodel_get_model_per_arch (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl)
 
struct starpu_perfmodel_per_archstarpu_perfmodel_get_model_per_devices (struct starpu_perfmodel *model, int impl,...)
 
int starpu_perfmodel_set_per_devices_cost_function (struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func,...)
 
int starpu_perfmodel_set_per_devices_size_base (struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func,...)
 
void starpu_perfmodel_debugfilepath (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
 
char * starpu_perfmodel_get_archtype_name (enum starpu_worker_archtype archtype)
 
void starpu_perfmodel_get_arch_name (struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
 
double starpu_perfmodel_history_based_expected_perf (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, uint32_t footprint)
 
void starpu_perfmodel_initialize (void)
 
int starpu_perfmodel_list (FILE *output)
 
void starpu_perfmodel_print (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
 
int starpu_perfmodel_print_all (struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
 
int starpu_perfmodel_print_estimations (struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
 
int starpu_perfmodel_list_combs (FILE *output, struct starpu_perfmodel *model)
 
void starpu_perfmodel_update_history (struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured)
 
void starpu_perfmodel_directory (FILE *output)
 
void starpu_bus_print_bandwidth (FILE *f)
 
void starpu_bus_print_affinity (FILE *f)
 
void starpu_bus_print_filenames (FILE *f)
 
double starpu_transfer_bandwidth (unsigned src_node, unsigned dst_node)
 
double starpu_transfer_latency (unsigned src_node, unsigned dst_node)
 
double starpu_transfer_predict (unsigned src_node, unsigned dst_node, size_t size)
 

Variables

struct starpu_perfmodel starpu_perfmodel_nop
 

Detailed Description


Data Structure Documentation

◆ starpu_perfmodel_device

struct starpu_perfmodel_device

todo

Data Fields
enum starpu_worker_archtype type

type of the device

int devid

identifier of the precise device

int ncores

number of execution in parallel, minus 1

◆ starpu_perfmodel_arch

struct starpu_perfmodel_arch

todo

Data Fields
int ndevices

number of the devices for the given arch

struct starpu_perfmodel_device * devices

list of the devices for the given arch

◆ starpu_perfmodel_history_entry

struct starpu_perfmodel_history_entry
Data Fields
double mean

mean_n = 1/n sum

double deviation

n dev_n = sum2 - 1/n (sum)^2

double sum

sum of samples (in µs)

double sum2

sum of samples^2

unsigned nsample

number of samples

unsigned nerror
uint32_t footprint

data footprint

size_t size

in bytes

double flops

Provided by the application

double duration
starpu_tag_t tag
double * parameters

◆ starpu_perfmodel_history_list

struct starpu_perfmodel_history_list
Data Fields
struct
starpu_perfmodel_history_list *
next
struct
starpu_perfmodel_history_entry *
entry

◆ starpu_perfmodel_regression_model

struct starpu_perfmodel_regression_model

todo

Data Fields
double sumlny

sum of ln(measured)

double sumlnx

sum of ln(size)

double sumlnx2

sum of ln(size)^2

unsigned long minx

minimum size

unsigned long maxx

maximum size

double sumlnxlny

sum of ln(size)*ln(measured)

double alpha

estimated = alpha * size ^ beta

double beta

estimated = alpha * size ^ beta

unsigned valid

whether the linear regression model is valid (i.e. enough measures)

double a

estimated = a size ^b + c

double b

estimated = a size ^b + c

double c

estimated = a size ^b + c

unsigned nl_valid

whether the non-linear regression model is valid (i.e. enough measures)

unsigned nsample

number of sample values for non-linear regression

double * coeff

list of computed coefficients for multiple linear regression model

unsigned ncoeff

number of coefficients for multiple linear regression model

unsigned multi_valid

whether the multiple linear regression model is valid

◆ starpu_perfmodel_per_arch

struct starpu_perfmodel_per_arch

information about the performance model of a given arch.

Data Fields

starpu_perfmodel_per_arch_cost_function cost_function
 
starpu_perfmodel_per_arch_size_base size_base
 
char debug_path [256]
 

Private Attributes

struct starpu_perfmodel_history_table * history
 
struct starpu_perfmodel_history_listlist
 
struct starpu_perfmodel_regression_model regression
 

Field Documentation

◆ cost_function

starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function

Used by STARPU_PER_ARCH, must point to functions which take a task, the target arch and implementation number (as mere conveniency, since the array is already indexed by these), and must return a task duration estimation in micro-seconds.

◆ size_base

starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base

Same as in structure starpu_perfmodel, but per-arch, in case it depends on the architecture-specific implementation.

◆ history

struct starpu_perfmodel_history_table* starpu_perfmodel_per_arch::history
private

The history of performance measurements.

◆ list

struct starpu_perfmodel_history_list* starpu_perfmodel_per_arch::list
private

Used by STARPU_HISTORY_BASED, STARPU_NL_REGRESSION_BASED and STARPU_MULTIPLE_REGRESSION_BASED, records all execution history measures.

◆ regression

struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
private

Used by STARPU_REGRESSION_BASED, STARPU_NL_REGRESSION_BASED and STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the regression.

◆ starpu_perfmodel

struct starpu_perfmodel

Contain all information about a performance model. At least the type and symbol fields have to be filled when defining a performance model for a codelet. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset, or by letting the compiler implicitly do it in e.g. static storage case. If not provided, other fields have to be zero.

Data Fields

enum starpu_perfmodel_type type
 
double(* cost_function )(struct starpu_task *, unsigned nimpl)
 
double(* arch_cost_function )(struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 
size_t(* size_base )(struct starpu_task *, unsigned nimpl)
 
uint32_t(* footprint )(struct starpu_task *)
 
const char * symbol
 
void(* parameters )(struct starpu_task *task, double *parameters)
 

Private Attributes

unsigned is_loaded
 
unsigned benchmarking
 
unsigned is_init
 
const char ** parameters_names
 
unsigned nparameters
 
unsigned ** combinations
 
unsigned ncombinations
 
starpu_perfmodel_state_t state
 

Field Documentation

◆ type

enum starpu_perfmodel_type starpu_perfmodel::type

type of performance model

◆ cost_function

double(* starpu_perfmodel::cost_function) (struct starpu_task *, unsigned nimpl)

Used by STARPU_COMMON. Take a task and implementation number, and must return a task duration estimation in micro-seconds.

◆ arch_cost_function

double(* starpu_perfmodel::arch_cost_function) (struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl)

Used by STARPU_COMMON. Take a task, an arch and implementation number, and must return a task duration estimation in micro-seconds on that arch.

◆ size_base

size_t(* starpu_perfmodel::size_base) (struct starpu_task *, unsigned nimpl)

Used by STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED and STARPU_NL_REGRESSION_BASED. If not NULL, take a task and implementation number, and return the size to be used as index to distinguish histories and as a base for regressions.

◆ footprint

uint32_t(* starpu_perfmodel::footprint) (struct starpu_task *)

Used by STARPU_HISTORY_BASED. If not NULL, take a task and return the footprint to be used as index to distinguish histories. The default is to use the starpu_task_data_footprint() function.

◆ symbol

const char* starpu_perfmodel::symbol

symbol name for the performance model, which will be used as file name to store the model. It must be set otherwise the model will be ignored.

◆ is_loaded

unsigned starpu_perfmodel::is_loaded
private

Whether the performance model is already loaded from the disk.

◆ parameters_names

const char** starpu_perfmodel::parameters_names
private

Names of parameters used for multiple linear regression models (M, N, K)

◆ nparameters

unsigned starpu_perfmodel::nparameters
private

Number of parameters used for multiple linear regression models

◆ combinations

unsigned** starpu_perfmodel::combinations
private

Table of combinations of parameters (and the exponents) used for multiple linear regression models

◆ ncombinations

unsigned starpu_perfmodel::ncombinations
private

Number of combination of parameters used for multiple linear regression models

Enumeration Type Documentation

◆ starpu_perfmodel_type

todo

Enumerator
STARPU_PER_ARCH 

Application-provided per-arch cost model function

STARPU_COMMON 

Application-provided common cost model function, with per-arch factor

STARPU_HISTORY_BASED 

Automatic history-based cost model

STARPU_REGRESSION_BASED 

Automatic linear regression-based cost model (alpha * size ^ beta)

STARPU_NL_REGRESSION_BASED 

Automatic non-linear regression-based cost model (a * size ^ b + c)

STARPU_MULTIPLE_REGRESSION_BASED 

Automatic multiple linear regression-based cost model. Application provides parameters, their combinations and exponents.

Function Documentation

◆ starpu_perfmodel_init()

void starpu_perfmodel_init ( struct starpu_perfmodel model)

Initialize the model performance model structure. This is automatically called when e.g. submitting a task using a codelet using this performance model.

◆ starpu_perfmodel_load_file()

int starpu_perfmodel_load_file ( const char *  filename,
struct starpu_perfmodel model 
)

Load the performance model found in the file named filename. model has to be completely zero, and will be filled with the information stored in the given file.

◆ starpu_perfmodel_load_symbol()

int starpu_perfmodel_load_symbol ( const char *  symbol,
struct starpu_perfmodel model 
)

Load a given performance model. model has to be completely zero, and will be filled with the information stored in $STARPU_HOME/.starpu. The function is intended to be used by external tools that want to read the performance model files.

◆ starpu_perfmodel_unload_model()

int starpu_perfmodel_unload_model ( struct starpu_perfmodel model)

Unload model which has been previously loaded through the function starpu_perfmodel_load_symbol()

◆ starpu_perfmodel_get_model_path()

void starpu_perfmodel_get_model_path ( const char *  symbol,
char *  path,
size_t  maxlen 
)

Fills path (supposed to be maxlen long) with the full path to the performance model file for symbol symbol. This path can later on be used for instance with starpu_perfmodel_load_file() .

◆ starpu_perfmodel_dump_xml()

void starpu_perfmodel_dump_xml ( FILE *  output,
struct starpu_perfmodel model 
)

Dump performance model model to output stream output, in XML format.

◆ starpu_perfmodel_free_sampling_directories()

void starpu_perfmodel_free_sampling_directories ( void  )

Free internal memory used for sampling directory management. It should only be called by an application which is not calling starpu_shutdown() as this function already calls it. See for example tools/starpu_perfmodel_display.c.

◆ starpu_worker_get_perf_archtype()

struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype ( int  workerid,
unsigned  sched_ctx_id 
)

Return the architecture type of the worker workerid.

◆ starpu_perfmodel_debugfilepath()

void starpu_perfmodel_debugfilepath ( struct starpu_perfmodel model,
struct starpu_perfmodel_arch arch,
char *  path,
size_t  maxlen,
unsigned  nimpl 
)

Return the path to the debugging information for the performance model.

◆ starpu_perfmodel_get_arch_name()

void starpu_perfmodel_get_arch_name ( struct starpu_perfmodel_arch arch,
char *  archname,
size_t  maxlen,
unsigned  nimpl 
)

Return the architecture name for arch

◆ starpu_perfmodel_history_based_expected_perf()

double starpu_perfmodel_history_based_expected_perf ( struct starpu_perfmodel model,
struct starpu_perfmodel_arch arch,
uint32_t  footprint 
)

Return the estimated time of a task with the given model and the given footprint.

◆ starpu_perfmodel_initialize()

void starpu_perfmodel_initialize ( void  )

If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.

◆ starpu_perfmodel_list()

int starpu_perfmodel_list ( FILE *  output)

Print a list of all performance models on output

◆ starpu_perfmodel_update_history()

void starpu_perfmodel_update_history ( struct starpu_perfmodel model,
struct starpu_task task,
struct starpu_perfmodel_arch arch,
unsigned  cpuid,
unsigned  nimpl,
double  measured 
)

Feed the performance model model with an explicit measurement measured (in µs), in addition to measurements done by StarPU itself. This can be useful when the application already has an existing set of measurements done in good conditions, that StarPU could benefit from instead of doing on-line measurements. An example of use can be seen in Performance Model Example.

◆ starpu_perfmodel_directory()

void starpu_perfmodel_directory ( FILE *  output)

Print the directory name storing performance models on output

◆ starpu_bus_print_bandwidth()

void starpu_bus_print_bandwidth ( FILE *  f)

Print a matrix of bus bandwidths on f.

◆ starpu_bus_print_affinity()

void starpu_bus_print_affinity ( FILE *  f)

Print the affinity devices on f.

◆ starpu_bus_print_filenames()

void starpu_bus_print_filenames ( FILE *  f)

Print on f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.

◆ starpu_transfer_bandwidth()

double starpu_transfer_bandwidth ( unsigned  src_node,
unsigned  dst_node 
)

Return the bandwidth of data transfer between two memory nodes

◆ starpu_transfer_latency()

double starpu_transfer_latency ( unsigned  src_node,
unsigned  dst_node 
)

Return the latency of data transfer between two memory nodes

◆ starpu_transfer_predict()

double starpu_transfer_predict ( unsigned  src_node,
unsigned  dst_node,
size_t  size 
)

Return the estimated time to transfer a given size between two memory nodes.

Variable Documentation

◆ starpu_perfmodel_nop

struct starpu_perfmodel starpu_perfmodel_nop

Performance model which just always return 1µs.