ВходНаше всё Теги codebook 无线电组件 Поиск Опросы Закон Вторник
2 июля
427798
VVB (26.07.2013 13:39, просмотров: 15293)
А вот и результат недавней работы. Представляю вниманию сравнение различных архитектур ARM "лоб в лоб" с целью определения быстродействия для классического теста linpack для операций над числами с плавающей точкой двойной точности. Выводы можно делать разные, я пока повременю. Но скажу сразу: Cortex-R4 рвёт всех как тузик грелку за счёт кэша. Текст теста. /* ** ** LINPACK.C Linpack benchmark, calculates FLOPS. ** (FLoating Point Operations Per Second) ** ** Translated to C by Bonnie Toy 5/88 ** ** Modified by Will Menninger, 10/93, with these features: ** (modified on 2/25/94 to fix a problem with daxpy for ** unequal increments or equal increments not equal to 1. ** Jack Dongarra) **#include "runtime/lib.h" #include "kernel/uos.h" #include <kernel/internal.h> #include <runtime/math.h> #include <whetstone/whetstone.h> #include <time_service/time_service.h> ** - Defaults to double precision. ** - Averages ROLLed and UNROLLed performance. ** - User selectable array sizes. ** - Automatically does enough repetitions to take at least 10 CPU seconds. ** - Prints machine precision. ** - ANSI prototyping. ** ** To compile: cc -O -o linpack linpack.c -lm ** ** */ #include <stdlib.h> #include <math.h> #define ZERO 0.0e0 #define ONE 1.0e0 #define PREC "Double" #define BASE10DIG DBL_DIG static double linpack (long nreps,int arsize); static void matgen (double *a,int lda,int n,double *b,double *norma); static void dgefa (double *a,int lda,int n,int *ipvt,int *info,int roll); static void dgesl (double *a,int lda,int n,int *ipvt,double *b,int job,int roll); static void daxpy_r (int n,double da,double *dx,int incx,double *dy,int incy); static double ddot_r (int n,double *dx,int incx,double *dy,int incy); static void dscal_r (int n,double da,double *dx,int incx); static void daxpy_ur (int n,double da,double *dx,int incx,double *dy,int incy); static double ddot_ur (int n,double *dx,int incx,double *dy,int incy); static void dscal_ur (int n,double da,double *dx,int incx); static int idamax (int n,double *dx,int incx); static double second (void); static void *mempool; static void debug_printf(const char *, ...); void main_linpack(void) { int size = 40; int arsize; long arsize2d,memreq,nreps; size_t malloc_arg; arsize=size; arsize/=2; arsize*=2; if (arsize<10) { return; } arsize2d = (long)arsize*(long)arsize; memreq=arsize2d*sizeof(double)+(long)arsize*sizeof(double)+(long)arsize*sizeof(int) + 4; malloc_arg=(size_t)memreq; if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL) { debug_printf("Not enough memory available for given array size.\n\n"); return; } nreps=5; linpack(nreps,arsize); free(mempool); } static double linpack(long nreps,int arsize) { double *a,*b; double norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(double *)(((unsigned) mempool + 0x7) & ~0x7); b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; debug_printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops); return(totalt); } /* ** For matgen, ** We would like to declare a[][lda], but c does not allow it. In this ** function, references to a[i][j] are written a[lda*i+j]. */ static void matgen(double *a,int lda,int n,double *b,double *norma) { int init,i,j; init = 1325; *norma = 0.0; for (j = 0; j < n; j++) for (i = 0; i < n; i++) { init = (int)((long)3125*(long)init % 65536L); a[lda*j+i] = (init - 32768.0)/16384.0; *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; } for (i = 0; i < n; i++) b[i] = 0.0; for (j = 0; j < n; j++) for (i = 0; i < n; i++) b[i] = b[i] + a[lda*j+i]; } /* ** ** DGEFA benchmark ** ** We would like to declare a[][lda], but c does not allow it. In this ** function, references to a[i][j] are written a[lda*i+j]. ** ** dgefa factors a double precision matrix by gaussian elimination. ** ** dgefa is usually called by dgeco, but it can be called ** directly with a saving in time if rcond is not needed. ** (time for dgeco) = (1 + 9/n)*(time for dgefa) . ** ** on entry ** ** a double precision[n][lda] ** the matrix to be factored. ** ** lda integer ** the leading dimension of the array a . ** ** n integer ** the order of the matrix a . ** ** on return ** ** a an upper triangular matrix and the multipliers ** which were used to obtain it. ** the factorization can be written a = l*u where ** l is a product of permutation and unit lower ** triangular matrices and u is upper triangular. ** ** ipvt integer[n] ** an integer vector of pivot indices. ** ** info integer ** = 0 normal value. ** = k if u[k][k] .eq. 0.0 . this is not an error ** condition for this subroutine, but it does ** indicate that dgesl or dgedi will divide by zero ** if called. use rcond in dgeco for a reliable ** indication of singularity. ** ** linpack. this version dated 08/14/78 . ** cleve moler, university of New Mexico, argonne national lab. ** ** functions ** ** blas daxpy,dscal,idamax ** */ static void dgefa(double *a,int lda,int n,int *ipvt,int *info,int roll) { double t; int idamax(),j,k,kp1,l,nm1; /* gaussian elimination with partial pivoting */ if (roll) { *info = 0; nm1 = n - 1; if (nm1 >= 0) for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n-k,&a[lda*k+k],1) + k; ipvt[k] = l; /* zero pivot implies this column already triangularized */ if (a[lda*k+l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda*k+l]; a[lda*k+l] = a[lda*k+k]; a[lda*k+k] = t; } /* compute multipliers */ t = -ONE/a[lda*k+k]; dscal_r(n-(k+1),t,&a[lda*k+k+1],1); /* row elimination with column indexing */ for (j = kp1; j < n; j++) { t = a[lda*j+l]; if (l != k) { a[lda*j+l] = a[lda*j+k]; a[lda*j+k] = t; } daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1); } } else (*info) = k; } ipvt[n-1] = n-1; if (a[lda*(n-1)+(n-1)] == ZERO) (*info) = n-1; } else { *info = 0; nm1 = n - 1; if (nm1 >= 0) for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n-k,&a[lda*k+k],1) + k; ipvt[k] = l; /* zero pivot implies this column already triangularized */ if (a[lda*k+l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda*k+l]; a[lda*k+l] = a[lda*k+k]; a[lda*k+k] = t; } /* compute multipliers */ t = -ONE/a[lda*k+k]; dscal_ur(n-(k+1),t,&a[lda*k+k+1],1); /* row elimination with column indexing */ for (j = kp1; j < n; j++) { t = a[lda*j+l]; if (l != k) { a[lda*j+l] = a[lda*j+k]; a[lda*j+k] = t; } daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1); } } else (*info) = k; } ipvt[n-1] = n-1; if (a[lda*(n-1)+(n-1)] == ZERO) (*info) = n-1; } } /* ** ** DGESL benchmark ** ** We would like to declare a[][lda], but c does not allow it. In this ** function, references to a[i][j] are written a[lda*i+j]. ** ** dgesl solves the double precision system ** a * x = b or trans(a) * x = b ** using the factors computed by dgeco or dgefa. ** ** on entry ** ** a double precision[n][lda] ** the output from dgeco or dgefa. ** ** lda integer ** the leading dimension of the array a . ** ** n integer ** the order of the matrix a . ** ** ipvt integer[n] ** the pivot vector from dgeco or dgefa. ** ** b double precision[n] ** the right hand side vector. ** ** job integer ** = 0 to solve a*x = b , ** = nonzero to solve trans(a)*x = b where ** trans(a) is the transpose. ** ** on return ** ** b the solution vector x . ** ** error condition ** ** a division by zero will occur if the input factor contains a ** zero on the diagonal. technically this indicates singularity ** but it is often caused by improper arguments or improper ** setting of lda . it will not occur if the subroutines are ** called correctly and if dgeco has set rcond .gt. 0.0 ** or dgefa has set info .eq. 0 . ** ** to compute inverse(a) * c where c is a matrix ** with p columns ** dgeco(a,lda,n,ipvt,rcond,z) ** if (!rcond is too small){ ** for (j=0,j<p,j++) ** dgesl(a,lda,n,ipvt,c[j][0],0); ** } ** ** linpack. this version dated 08/14/78 . ** cleve moler, university of new mexico, argonne national lab. ** ** functions ** ** blas daxpy,ddot */ static void dgesl(double *a,int lda,int n,int *ipvt,double *b,int job,int roll) { double t; int k,kb,l,nm1; if (roll) { nm1 = n - 1; if (job == 0) { /* job = 0 , solve a * x = b */ /* first solve l*y = b */ if (nm1 >= 1) for (k = 0; k < nm1; k++) { l = ipvt[k]; t = b[l]; if (l != k) { b[l] = b[k]; b[k] = t; } daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1); } /* now solve u*x = y */ for (kb = 0; kb < n; kb++) { k = n - (kb + 1); b[k] = b[k]/a[lda*k+k]; t = -b[k]; daxpy_r(k,t,&a[lda*k+0],1,&b[0],1); } } else { /* job = nonzero, solve trans(a) * x = b */ /* first solve trans(u)*y = b */ for (k = 0; k < n; k++) { t = ddot_r(k,&a[lda*k+0],1,&b[0],1); b[k] = (b[k] - t)/a[lda*k+k]; } /* now solve trans(l)*x = y */ if (nm1 >= 1) for (kb = 1; kb < nm1; kb++) { k = n - (kb+1); b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); l = ipvt[k]; if (l != k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } } else { nm1 = n - 1; if (job == 0) { /* job = 0 , solve a * x = b */ /* first solve l*y = b */ if (nm1 >= 1) for (k = 0; k < nm1; k++) { l = ipvt[k]; t = b[l]; if (l != k) { b[l] = b[k]; b[k] = t; } daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1); } /* now solve u*x = y */ for (kb = 0; kb < n; kb++) { k = n - (kb + 1); b[k] = b[k]/a[lda*k+k]; t = -b[k]; daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1); } } else { /* job = nonzero, solve trans(a) * x = b */ /* first solve trans(u)*y = b */ for (k = 0; k < n; k++) { t = ddot_ur(k,&a[lda*k+0],1,&b[0],1); b[k] = (b[k] - t)/a[lda*k+k]; } /* now solve trans(l)*x = y */ if (nm1 >= 1) for (kb = 1; kb < nm1; kb++) { k = n - (kb+1); b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); l = ipvt[k]; if (l != k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } } } /* ** Constant times a vector plus a vector. ** Jack Dongarra, linpack, 3/11/78. ** ROLLED version */ static void daxpy_r(int n,double da,double *dx,int incx,double *dy,int incy) { int i,ix,iy; if (n <= 0) return; if (da == ZERO) return; if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 1; iy = 1; if(incx < 0) ix = (-n+1)*incx + 1; if(incy < 0)iy = (-n+1)*incy + 1; for (i = 0;i < n; i++) { dy[iy] = dy[iy] + da*dx[ix]; ix = ix + incx; iy = iy + incy; } return; } /* code for both increments equal to 1 */ for (i = 0;i < n; i++) dy[i] = dy[i] + da*dx[i]; } /* ** Forms the dot product of two vectors. ** Jack Dongarra, linpack, 3/11/78. ** ROLLED version */ static double ddot_r(int n,double *dx,int incx,double *dy,int incy) { double dtemp; int i,ix,iy; dtemp = ZERO; if (n <= 0) return(ZERO); if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 0; iy = 0; if (incx < 0) ix = (-n+1)*incx; if (incy < 0) iy = (-n+1)*incy; for (i = 0;i < n; i++) { dtemp = dtemp + dx[ix]*dy[iy]; ix = ix + incx; iy = iy + incy; } return(dtemp); } /* code for both increments equal to 1 */ for (i=0;i < n; i++) dtemp = dtemp + dx[i]*dy[i]; return(dtemp); } /* ** Scales a vector by a constant. ** Jack Dongarra, linpack, 3/11/78. ** ROLLED version */ static void dscal_r(int n,double da,double *dx,int incx) { int i,nincx; if (n <= 0) return; if (incx != 1) { /* code for increment not equal to 1 */ nincx = n*incx; for (i = 0; i < nincx; i = i + incx) dx[i] = da*dx[i]; return; } /* code for increment equal to 1 */ for (i = 0; i < n; i++) dx[i] = da*dx[i]; } /* ** constant times a vector plus a vector. ** Jack Dongarra, linpack, 3/11/78. ** UNROLLED version */ static void daxpy_ur(int n,double da,double *dx,int incx,double *dy,int incy) { int i,ix,iy,m; if (n <= 0) return; if (da == ZERO) return; if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 1; iy = 1; if(incx < 0) ix = (-n+1)*incx + 1; if(incy < 0)iy = (-n+1)*incy + 1; for (i = 0;i < n; i++) { dy[iy] = dy[iy] + da*dx[ix]; ix = ix + incx; iy = iy + incy; } return; } /* code for both increments equal to 1 */ m = n % 4; if ( m != 0) { for (i = 0; i < m; i++) dy[i] = dy[i] + da*dx[i]; if (n < 4) return; } for (i = m; i < n; i = i + 4) { dy[i] = dy[i] + da*dx[i]; dy[i+1] = dy[i+1] + da*dx[i+1]; dy[i+2] = dy[i+2] + da*dx[i+2]; dy[i+3] = dy[i+3] + da*dx[i+3]; } } /* ** Forms the dot product of two vectors. ** Jack Dongarra, linpack, 3/11/78. ** UNROLLED version */ static double ddot_ur(int n,double *dx,int incx,double *dy,int incy) { double dtemp; int i,ix,iy,m; dtemp = ZERO; if (n <= 0) return(ZERO); if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 0; iy = 0; if (incx < 0) ix = (-n+1)*incx; if (incy < 0) iy = (-n+1)*incy; for (i = 0;i < n; i++) { dtemp = dtemp + dx[ix]*dy[iy]; ix = ix + incx; iy = iy + incy; } return(dtemp); } /* code for both increments equal to 1 */ m = n % 5; if (m != 0) { for (i = 0; i < m; i++) dtemp = dtemp + dx[i]*dy[i]; if (n < 5) return(dtemp); } for (i = m; i < n; i = i + 5) { dtemp = dtemp + dx[i]*dy[i] + dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] + dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4]; } return(dtemp); } /* ** Scales a vector by a constant. ** Jack Dongarra, linpack, 3/11/78. ** UNROLLED version */ static void dscal_ur(int n,double da,double *dx,int incx) { int i,m,nincx; if (n <= 0) return; if (incx != 1) { /* code for increment not equal to 1 */ nincx = n*incx; for (i = 0; i < nincx; i = i + incx) dx[i] = da*dx[i]; return; } /* code for increment equal to 1 */ m = n % 5; if (m != 0) { for (i = 0; i < m; i++) dx[i] = da*dx[i]; if (n < 5) return; } for (i = m; i < n; i = i + 5) { dx[i] = da*dx[i]; dx[i+1] = da*dx[i+1]; dx[i+2] = da*dx[i+2]; dx[i+3] = da*dx[i+3]; dx[i+4] = da*dx[i+4]; } } /* ** Finds the index of element having max. absolute value. ** Jack Dongarra, linpack, 3/11/78. */ static int idamax(int n,double *dx,int incx) { double dmax; int i, ix, itemp; if (n < 1) return(-1); if (n ==1 ) return(0); if(incx != 1) { /* code for increment not equal to 1 */ ix = 1; dmax = fabs((double)dx[0]); ix = ix + incx; for (i = 1; i < n; i++) { if(fabs((double)dx[ix]) > dmax) { itemp = i; dmax = fabs((double)dx[ix]); } ix = ix + incx; } } else { /* code for increment equal to 1 */ itemp = 0; dmax = fabs((double)dx[0]); for (i = 1; i < n; i++) if(fabs((double)dx[i]) > dmax) { itemp = i; dmax = fabs((double)dx[i]); } } return (itemp); } static double second(void) { #if 0 /* оставлено из uOS */ return (double)((double)timer_milliseconds (&timer)) / ((double)1000); #else return 0; #endif } static void debug_printf(const char *fmt, ...) { ; }