///////////////////////////////////////////////////////////////////////////////// // // Levenberg - Marquardt non-linear minimization algorithm // Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// #ifndef LM_REAL // not included by misc.c #error This file should not be compiled directly! #endif /* precision-specific definitions */ #define LEVMAR_CHKJAC LM_ADD_PREFIX(levmar_chkjac) #define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx) #define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx) #define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult) #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar) #define LEVMAR_STDDEV LM_ADD_PREFIX(levmar_stddev) #define LEVMAR_CORCOEF LM_ADD_PREFIX(levmar_corcoef) #define LEVMAR_R2 LM_ADD_PREFIX(levmar_R2) #define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check) #define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy) #ifdef HAVE_LAPACK #define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse) static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m); /* BLAS matrix multiplication & LAPACK SVD routines */ #ifdef LM_BLAS_PREFIX #define GEMM LM_CAT_(LM_BLAS_PREFIX, LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX))) #else #define GEMM LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX)) #endif /* C := alpha*op( A )*op( B ) + beta*C */ extern void GEMM(char *transa, char *transb, int *m, int *n, int *k, LM_REAL *alpha, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, LM_REAL *beta, LM_REAL *c, int *ldc); #define GESVD LM_MK_LAPACK_NAME(gesvd) #define GESDD LM_MK_LAPACK_NAME(gesdd) extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info); /* lapack 3.0 new SVD routine, faster than xgesvd() */ extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *iwork, int *info); /* Cholesky decomposition */ #define POTF2 LM_MK_LAPACK_NAME(potf2) extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info); #define LEVMAR_CHOLESKY LM_ADD_PREFIX(levmar_chol) #else #define LEVMAR_LUINVERSE LM_ADD_PREFIX(levmar_LUinverse_noLapack) static int LEVMAR_LUINVERSE(LM_REAL *A, LM_REAL *B, int m); #endif /* HAVE_LAPACK */ /* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a) * using a block size of bsize. The product is returned in b. * Since a^T a is symmetric, its computation can be sped up by computing only its * upper triangular part and copying it to the lower part. * * More details on blocking can be found at * http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf */ void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL *a, LM_REAL *b, int n, int m) { #ifdef HAVE_LAPACK /* use BLAS matrix multiply */ LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0); /* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major, * therefore BLAS computes a*a^T with a and a*a^T in column major, which is equivalent to * computing a^T*a in row major! */ GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m); #else /* no LAPACK, use blocking-based multiply */ register int i, j, k, jj, kk; register LM_REAL sum, *bim, *akm; const int bsize=__BLOCKSZ__; #define __MIN__(x, y) (((x)<=(y))? (x) : (y)) #define __MAX__(x, y) (((x)>=(y))? (x) : (y)) /* compute upper triangular part using blocking */ for(jj=0; jj R^n: Given a p in R^m it yields hx in R^n * jacf points to a function implementing the Jacobian of func, whose correctness * is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the * Jacobian of func at p. Note that row i of j corresponds to the gradient of * the i-th component of func, evaluated at p. * p is an input array of length m containing the point of evaluation. * m is the number of variables * n is the number of functions * adata points to possible additional data and is passed uninterpreted * to func, jacf. * err is an array of length n. On output, err contains measures * of correctness of the respective gradients. if there is * no severe loss of significance, then if err[i] is 1.0 the * i-th gradient is correct, while if err[i] is 0.0 the i-th * gradient is incorrect. For values of err between 0.0 and 1.0, * the categorization is less certain. In general, a value of * err[i] greater than 0.5 indicates that the i-th gradient is * probably correct, while a value of err[i] less than 0.5 * indicates that the i-th gradient is probably incorrect. * * * The function does not perform reliably if cancellation or * rounding errors cause a severe loss of significance in the * evaluation of a function. therefore, none of the components * of p should be unusually small (in particular, zero) or any * other value which may cause loss of significance. */ int LEVMAR_CHKJAC( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), LM_REAL *p, int m, int n, void *adata, LM_REAL *err) { LM_REAL factor=LM_CNST(100.0); LM_REAL one=LM_CNST(1.0); LM_REAL zero=LM_CNST(0.0); LM_REAL *fvec, *fjac, *pp, *fvecp, *buf; register int i, j; LM_REAL eps, epsf, temp, epsmch; LM_REAL epslog; int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n; epsmch=LM_REAL_EPSILON; eps=(LM_REAL)sqrt(epsmch); buf=(LM_REAL *)malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n")); return 0; } fvec=buf; fjac=fvec+fvec_sz; pp=fjac+fjac_sz; fvecp=pp+pp_sz; /* compute fvec=func(p) */ (*func)(p, fvec, m, n, adata); /* compute the Jacobian at p */ (*jacf)(p, fjac, m, n, adata); /* compute pp */ for(j=0; j=epsf*FABS(fvec[i])) temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i])); err[i]=one; if(temp>epsmch && temp=eps) err[i]=zero; } free(buf); return 1; } #ifdef HAVE_LAPACK /* * This function computes the pseudoinverse of a square matrix A * into B using SVD. A and B can coincide * * The function returns 0 in case of error (e.g. A is singular), * the rank of A if successful * * A, B are mxm * */ static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m) { LM_REAL *buf=NULL; int buf_sz=0; static LM_REAL eps=LM_CNST(-1.0); register int i, j; LM_REAL *a, *u, *s, *vt, *work; int a_sz, u_sz, s_sz, vt_sz, tot_sz; LM_REAL thresh, one_over_denom; int info, rank, worksz, *iwork, iworksz; /* calculate required memory size */ worksz=5*m; // min worksize for GESVD //worksz=m*(7*m+4); // min worksize for GESDD iworksz=8*m; a_sz=m*m; u_sz=m*m; s_sz=m; vt_sz=m*m; tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n"); return 0; /* error */ } a=buf; u=a+a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; iwork=(int *)(work+worksz); /* store A (column major!) into a */ for(i=0; i0.0; eps*=LM_CNST(0.5)) ; eps*=LM_CNST(2.0); } /* compute the pseudoinverse in B */ for(i=0; ithresh; rank++){ one_over_denom=LM_CNST(1.0)/s[rank]; for(j=0; jmax) max=tmp; if(max==0.0){ PRINT_ERROR(RCAT("Singular matrix A in ", LEVMAR_LUINVERSE) "()!\n"); free(buf); return 0; } work[i]=LM_CNST(1.0)/max; } for(j=0; j=max){ max=tmp; maxi=i; } } if(j!=maxi){ for(k=0; k=0; --i){ sum=x[i]; for(j=i+1; jub[i]) return 0; return 1; } #ifdef HAVE_LAPACK /* compute the Cholesky decomposition of C in W, s.t. C=W^t W and W is upper triangular */ int LEVMAR_CHOLESKY(LM_REAL *C, LM_REAL *W, int m) { register int i, j; int info; /* copy weights array C to W so that LAPACK won't destroy it; * C is assumed symmetric, hence no transposition is needed */ for(i=0, j=m*m; i>bpwr)<0; i-=blocksize){ e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ]; j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1]; j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2]; j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3]; j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4]; j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5]; j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6]; j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ i=blockn; if(i0; i-=blocksize){ e[i ]=-y[i ]; sum0+=e[i ]*e[i ]; j1=i-1; e[j1]=-y[j1]; sum1+=e[j1]*e[j1]; j2=i-2; e[j2]=-y[j2]; sum2+=e[j2]*e[j2]; j3=i-3; e[j3]=-y[j3]; sum3+=e[j3]*e[j3]; j4=i-4; e[j4]=-y[j4]; sum0+=e[j4]*e[j4]; j5=i-5; e[j5]=-y[j5]; sum1+=e[j5]*e[j5]; j6=i-6; e[j6]=-y[j6]; sum2+=e[j6]*e[j6]; j7=i-7; e[j7]=-y[j7]; sum3+=e[j7]*e[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ i=blockn; if(i