Summary
The memory hierarchy, compiler optimizations. Choosing a baseline. Speedup. Amdahl’s Law. Weak scaling vs. strong scaling. Efficiency. Automating performance experiments. Using gnuplot to create performance graphs.
Slides: 06_performance.pdf
Videos
Code Examples
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
#include <stdlib.h> #include <stdio.h> #include <assert.h> #include <sys/time.h> #define N 20000 double ** a, * x, * y; static double mytime() { struct timeval t; gettimeofday(&t, NULL); return t.tv_sec + t.tv_usec/1000000.0; } static double ** allocate2d(int n, int m) { double * storage = malloc( n * m * sizeof(double) ); double ** a = malloc( n * sizeof(double*) ); assert(storage); assert(a); for (int i=0; i<n; i++) a[i] = & storage[ i * m ]; return a; } static void free2d(double ** a) { free(a[0]); // frees storage free(a); // frees a } int main() { a = allocate2d(N, N); x = malloc(N*sizeof(double)); assert(x); y = malloc(N*sizeof(double)); assert(y); for (int i=0; i<N; i++) for (int j=0; j<N; j++) a[i][j] = (i*N+j)/1000; for (int i=0; i<N; i++) x[i] = i; for (int i=0; i<N; i++) y[i] = 0.0; printf("Starting computation.\n"); fflush(stdout); double t0 = mytime(); for (int j=0; j<N; j++) for (int i=0; i<N; i++) y[i] += a[i][j]*x[j]; printf("Computation complete. Time = %lf seconds\n", mytime()-t0); free(x); free(y); free2d(a); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
#include <stdlib.h> #include <stdio.h> #include <assert.h> #include <sys/time.h> #define N 20000 double ** a, * x, * y; static double mytime() { struct timeval t; gettimeofday(&t, NULL); return t.tv_sec + t.tv_usec/1000000.0; } static double ** allocate2d(int n, int m) { double * storage = malloc( n * m * sizeof(double) ); double ** a = malloc( n * sizeof(double*) ); assert(storage); assert(a); for (int i=0; i<n; i++) a[i] = & storage[ i * m ]; return a; } static void free2d(double ** a) { free(a[0]); // frees storage free(a); // frees a } int main() { a = allocate2d(N, N); x = malloc(N*sizeof(double)); assert(x); y = malloc(N*sizeof(double)); assert(y); for (int i=0; i<N; i++) for (int j=0; j<N; j++) a[i][j] = (i*N+j)/1000; for (int i=0; i<N; i++) x[i] = i; for (int i=0; i<N; i++) y[i] = 0.0; printf("Starting computation.\n"); fflush(stdout); double t0 = mytime(); for (int i=0; i<N; i++) for (int j=0; j<N; j++) y[i] += a[i][j]*x[j]; printf("Computation complete. Time = %lf seconds\n", mytime()-t0); free(x); free(y); free2d(a); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
ROOT = ../../ include $(ROOT)/common.mk NAME = sat_mpi all: $(NAME).exec $(NAME).exec: $(NAME).c Makefile $(MPICCC) -o $@ $< $(NAME).dat: $(NAME).exec $(MPIRUN) -n 1 ./$(NAME).exec > $(NAME).dat $(MPIRUN) -n 2 ./$(NAME).exec >> $(NAME).dat $(MPIRUN) -n 4 ./$(NAME).exec >> $(NAME).dat $(MPIRUN) -n 8 ./$(NAME).exec >> $(NAME).dat $(MPIRUN) -n 16 ./$(NAME).exec >> $(NAME).dat $(MPIRUN) -n 32 ./$(NAME).exec >> $(NAME).dat graphs: gnuplot $(NAME).gnu clean:: rm -f *.dat *.pdf .PHONY: all graphs |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
set terminal pdf # size 4, 4 # set tmargin at screen 0.9 # set lmargin at screen 0.1 # set rmargin at screen -0.1 # set bmargin at screen 0.1 # set size ratio 1.2 set output "sat_mpi.pdf" # unset log # unset label # set key vertical top left set xlabel center "Number of processes" set ylabel center "time (seconds)" # set logscale y # set format y "10^{%T}" # set xtics 1, 1, 10 # set ytics set xr [0:32] set yr [0:45] plot "sat_mpi.dat" using 1:2 title 'MPI' with linespoints set output "sat_speedup.pdf" set xlabel center "Number of processes" set ylabel center "speedup" set xr [0:32] set yr [0:32] first(x) = ($0 > 0 ? base : base = x) plot "sat_mpi.dat" using 1:(first($2), base/$2) title 'Speedup' with linespoints set output "sat_efficiency.pdf" set xlabel center "Number of processes" set ylabel center "efficiency" set xr [0:32] set yr [0:1] first(x) = ($0 > 0 ? base : base = x) plot "sat_mpi.dat" using 1:(first($2), base/($2*$1)) title 'Efficiency' with linespoints |