StarPU

StarPU Morse Nightly Tests

Buildbot

Buildbot

Note

All benchmarks are run on sirocco nodes from 2017, December 15th (sequential benchmarks used to be run on mirage nodes).

Contents

Tests for morse/morse_seq.txt

Tests for trunk/chameleon #HEAD (Profile seq)

Execution on queue court_sirocco with 1 nodes (--gres=gpu:3 -C Haswell -n 12) of the following script
	export STARPU_WORKER_STATS=1
	export STARPU_CALIBRATE=2
        ./timing/time_spotrf_tile --gpus=3 --threads=9 --niter=30 --ib=96 --nb=960 --n_range=48000:48000:9600 --check
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon c16e494 22765 output error 48000 48000 1 6.852 5380.51 13.07
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon c16e494 22764 output error 48000 48000 1 6.857 5376.34 15.28
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-23 trunk/chameleon ef41ba4 22725 output error 48000 48000 1 6.766 5448.49 27.46
2017-12-22 trunk/chameleon ef41ba4 22699 output error 48000 48000 1 6.786 5432.40 28.18
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error 48000 48000 1 6.739 5470.86 32.72
2017-12-19 trunk/chameleon - - output error - - - - - -
See also the tests archive.

Tests for trunk/chameleon #HEAD (Profile seq_sim)

Execution on queue court_sirocco with 1 nodes () of the following script
	export STARPU_HOME="$prefix_dir/../morse/trunk/chameleon/simucore/perfmodels/"
	export STARPU_HOSTNAME=sirocco
	export STARPU_CALIBRATE=0
	export STARPU_WORKER_STATS=1
        time ./timing/time_spotrf_tile --nowarmup --gpus=3 --threads=9 --niter=30 --nb=960 --ib=96 --n_range=48000:48000:9600
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon c16e494 22765 output error 48000 48000 1 7.115 5181.25 13.99
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon c16e494 22764 output error 48000 48000 1 7.113 5182.52 17.30
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-23 trunk/chameleon ef41ba4 22725 output error 48000 48000 1 7.112 5183.77 19.73
2017-12-22 trunk/chameleon ef41ba4 22699 output error 48000 48000 1 7.120 5177.50 14.22
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error 48000 48000 1 7.120 5177.39 13.99
2017-12-19 trunk/chameleon - - output error - - - - - -
See also the tests archive.

Tests for morse/morse_mpi.txt

Tests for trunk/chameleon #HEAD (Profile mpi)

Execution on queue court_sirocco with 4 nodes (-C Haswell --gres=gpu:4) of the following script
        printenv | grep SLURM
        printenv | grep STARPU_
        module list

	export STARPU_WORKER_STATS=1
	export STARPU_CALIBRATE=2
        export STARPU_LIMIT_CPU_MEM=120000
        export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000
        export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000
	# Compute P to set PxQ. Use square grid (Chameleon is unsafe with a non square grid), so P = Q = sqrt(number of nodes).
	export p=$(perl -e 'print int(sqrt('$SLURM_JOB_NUM_NODES')), "\n"')
	# Number of threads = number of processors per node - 1 - number of GPUS. We need to make sure the CPU managing GPU devices are not counted.
        export nbgpus=4
	export nbthreads=$(($SLURM_CPUS_ON_NODE - 1 - $nbgpus))
	# Start execution by giving the number of threads and the grid size. The problem size is set according to the given number of nodes.
	mpiexec -np $SLURM_JOB_NUM_NODES -pernode hostname
	mpiexec -np $SLURM_JOB_NUM_NODES -pernode ./timing/time_spotrf_tile --niter=30 --ib=96 --nb=1440 --n_range=144000:144000:14400 --P=$p --threads=$nbthreads --gpus=$nbgpus
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon - - output error - - - - - -
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-23 trunk/chameleon - - output error - - - - - -
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error - - - - - -
2017-12-19 trunk/chameleon - - output error - - - - - -
2017-12-16 trunk/chameleon ef41ba4 22656 output error - - - - - -
See also the tests archive.

Tests for trunk/chameleon #HEAD (Profile mpi_nmad)

Execution on queue court_sirocco with 4 nodes (-C Haswell --gres=gpu:4) of the following script
        printenv | grep SLURM
        printenv | grep STARPU_
        module list

	export STARPU_WORKER_STATS=1
	export STARPU_CALIBRATE=2
        export STARPU_LIMIT_CPU_MEM=120000
        export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000
        export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000
	# Compute P to set PxQ. Use square grid (Chameleon is unsafe with a non square grid), so P = Q = sqrt(number of nodes).
	export p=$(perl -e 'print int(sqrt('$SLURM_JOB_NUM_NODES')), "\n"')
	# Number of threads = number of processors per node - 1 - number of GPUS. We need to make sure the CPU managing GPU devices are not counted.
        export nbgpus=4
	export nbthreads=$(($SLURM_CPUS_ON_NODE - 2 - $nbgpus))
	# Start execution by giving the number of threads and the grid size. The problem size is set according to the given number of nodes.
	type mpiexec
	mpiexec -DLD_LIBRARY_PATH=${LD_LIBRARY_PATH} -np $SLURM_JOB_NUM_NODES -pernode hostname
	mpiexec -DLD_LIBRARY_PATH=${LD_LIBRARY_PATH} -np $SLURM_JOB_NUM_NODES -pernode ./timing/time_spotrf_tile --niter=30 --ib=96 --nb=1440 --n_range=144000:144000:14400 --P=$p --threads=$nbthreads --gpus=$nbgpus
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon - - output error - - - - - -
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-24 trunk/chameleon - - output error - - - - - -
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error 144000 144000 1 49.921 19943.25 313.60
2017-12-19 trunk/chameleon - - output error - - - - - -
2017-12-16 trunk/chameleon ef41ba4 22656 output error 144000 144000 1 50.012 19906.94 318.16
See also the tests archive.

Tests for trunk/chameleon #HEAD (Profile mpi_mad_mpi)

Execution on queue court_sirocco with 4 nodes (-C Haswell --gres=gpu:4) of the following script
        printenv | grep SLURM
        printenv | grep STARPU_
        module list

	export STARPU_WORKER_STATS=1
	export STARPU_CALIBRATE=2
        export STARPU_LIMIT_CPU_MEM=120000
        export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000
        export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000
	# Compute P to set PxQ. Use square grid (Chameleon is unsafe with a non square grid), so P = Q = sqrt(number of nodes).
	export p=$(perl -e 'print int(sqrt('$SLURM_JOB_NUM_NODES')), "\n"')
	# Number of threads = number of processors per node - 1 - number of GPUS. We need to make sure the CPU managing GPU devices are not counted.
        export nbgpus=4
	export nbthreads=$(($SLURM_CPUS_ON_NODE - 2 - $nbgpus))
	# Start execution by giving the number of threads and the grid size. The problem size is set according to the given number of nodes.
	type mpiexec
	mpiexec -DLD_LIBRARY_PATH=${LD_LIBRARY_PATH} -np $SLURM_JOB_NUM_NODES -pernode hostname
	mpiexec -DLD_LIBRARY_PATH=${LD_LIBRARY_PATH} -np $SLURM_JOB_NUM_NODES -pernode ./timing/time_spotrf_tile --niter=30 --ib=96 --nb=1440 --n_range=144000:144000:14400 --P=$p --threads=$nbthreads --gpus=$nbgpus
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon - - output error - - - - - -
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-25 trunk/chameleon - - output error - - - - - -
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error 144000 144000 1 64.319 15477.44 199.29
2017-12-19 trunk/chameleon - - output error - - - - - -
2017-12-16 trunk/chameleon ef41ba4 22656 output error 144000 144000 1 64.395 15458.43 158.59
See also the tests archive.

Tests for trunk/chameleon #HEAD (Profile mpi_sim)

Execution on queue court_sirocco with 1 nodes () of the following script
        printenv | grep SLURM
        printenv | grep STARPU_
        module list

	export STARPU_HOME="$prefix_dir/../morse/trunk/chameleon/simucore/perfmodels/"
	export STARPU_HOSTNAME=sirocco
	export STARPU_CALIBRATE=0
	export STARPU_WORKER_STATS=1
        export STARPU_LIMIT_CPU_MEM=120000
        export STARPU_LIMIT_MIN_SUBMITTED_TASKS=15000
        export STARPU_LIMIT_MAX_SUBMITTED_TASKS=16000
        export STARPU_MALLOC_SIMULATION_FOLD=2
	# Hardcoder le parametrage de la soumission MPI
	SLURM_JOB_NUM_NODES=4
	SLURM_CPUS_ON_NODE=24
	# Calcul de P pour déterminer PxQ. On prend une grille carrée (Chameleon bug plus souvent avec une grille non carrée), donc P = Q = sqrt(nombre de nœuds).
	p=$(perl -e 'print int(sqrt('$SLURM_JOB_NUM_NODES')), "\n"')
	# Le nombre de threads est le nombre de processeurs par nœud moins 1, moins le nombre de GPUs. Vu qu'on donne cette valeur à la main, il faut retrancher les threads pour les workers GPU soi-même.
        nbgpus=4
	nbthreads=$(($SLURM_CPUS_ON_NODE - 1 - $nbgpus))
	# Ligne de compilation : on ajoute les paramètres threads et p au lancement du test. La taille du problème sera à configurer en fonction du nombre de nœuds choisi.
	time $prefix_dir/bin/starpu_smpirun -np $SLURM_JOB_NUM_NODES -platform $HOME/.starpu/cluster.xml -hostfile $HOME/.starpu/hostfile --cfg=surf/precision:0.000010 --cfg=maxmin/precision:0.000010 --cfg=network/model:IB --cfg=smpi/test:0.000010 ./timing/time_spotrf_tile --nowarmup --ib=96 --nb=1440 --n_range=144000:144000:14400 --P=$p --gpus=$nbgpus
date morse_branch morse_scm starpu_svn output file error_file M N K/NRHS seconds Gflop/s Deviation
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-12 trunk/chameleon - - output error - - - - - -
2018-01-11 trunk/chameleon - - output error - - - - - -
2018-01-10 trunk/chameleon - - output error - - - - - -
2018-01-09 trunk/chameleon - - output error - - - - - -
2017-12-25 trunk/chameleon ef41ba4 22725 output error 144000 144000 1 30.251 32902.80 0.00
2017-12-21 trunk/chameleon - - output error - - - - - -
2017-12-20 trunk/chameleon ef41ba4 22669 output error 144000 144000 1 30.342 32804.40 0.00
2017-12-19 trunk/chameleon - - output error - - - - - -
2017-12-16 trunk/chameleon ef41ba4 22656 output error 144000 144000 1 30.316 32832.04 0.00
See also the tests archive.

Last updated on 2018/01/12 at 09:27.