///////////////////////////////////////////////////////////////////////////////// // // Solution of linear systems involved in the Levenberg - Marquardt // minimization algorithm // Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// /* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */ #ifndef LM_REAL // not included by Axb.c #error This file should not be compiled directly! #endif #ifdef LINSOLVERS_RETAIN_MEMORY #define __STATIC__ static #define FREE_LINSOLVER_MEM(B) // empty #else #define __STATIC__ // empty #define FREE_LINSOLVER_MEM(B) free(B) #endif /* LINSOLVERS_RETAIN_MEMORY */ #ifdef HAVE_LAPACK /* prototypes of LAPACK routines */ #define GEQRF LM_MK_LAPACK_NAME(geqrf) #define ORGQR LM_MK_LAPACK_NAME(orgqr) #define TRTRS LM_MK_LAPACK_NAME(trtrs) #define POTF2 LM_MK_LAPACK_NAME(potf2) #define POTRF LM_MK_LAPACK_NAME(potrf) #define POTRS LM_MK_LAPACK_NAME(potrs) #define GETRF LM_MK_LAPACK_NAME(getrf) #define GETRS LM_MK_LAPACK_NAME(getrs) #define GESVD LM_MK_LAPACK_NAME(gesvd) #define GESDD LM_MK_LAPACK_NAME(gesdd) /* QR decomposition */ extern int GEQRF(int *m, int *n, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); /* solution of triangular systems */ extern int TRTRS(char *uplo, char *trans, char *diag, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info); /* Cholesky decomposition and systems solution */ extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info); extern int POTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *info); /* block version of dpotf2 */ extern int POTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info); /* LU decomposition and systems solution */ extern int GETRF(int *m, int *n, LM_REAL *a, int *lda, int *ipiv, int *info); extern int GETRS(char *trans, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info); /* Singular Value Decomposition (SVD) */ extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info); /* lapack 3.0 new SVD routine, faster than xgesvd(). * In case that your version of LAPACK does not include them, use the above two older routines */ extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *iwork, int *info); /* precision-specific definitions */ #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR) #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS) #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol) #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU) #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD) /* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * The last equation can be solved directly. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static int nb=0; /* no __STATIC__ decl. here! */ LM_REAL *a, *qtb, *tau, *r, *work; int a_sz, qtb_sz, tau_sz, r_sz, tot_sz; register int i, j; int info, worksz, nrhs=1; register LM_REAL sum; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; qtb_sz=m; tau_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ if(!nb){ LM_REAL tmp; worksz=-1; // workspace query; optimal size is returned in tmp GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb } worksz=nb*m; tot_sz=a_sz + qtb_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; qtb=a+a_sz; tau=qtb+qtb_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; atb=a+a_sz; tau=atb+atb_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; b=a+a_sz; /* store A into a anb B into b. A is assumed symmetric, * hence no transposition is needed */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; b=a+a_sz; ipiv=(int *)(b+b_sz); /* store A (column major!) into a and B into b */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; u=a+a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; iwork=(int *)(work+worksz); /* store A (column major!) into a */ for(i=0; i0.0; eps*=LM_CNST(0.5)) ; eps*=LM_CNST(2.0); } /* compute the pseudoinverse in a */ for(i=0; ithresh; rank++){ one_over_denom=LM_CNST(1.0)/s[rank]; for(j=0; jbuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } } #else buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ PRINT_ERROR(RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); return LM_ERROR_MEMORY_ALLOCATION_FAILURE; } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; work=a+a_sz; idx=(int *)(work+work_sz); /* avoid destroying A, B by copying them to a, x resp. */ for(i=0; imax) max=tmp; if(max==0.0){ PRINT_ERROR(RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n"); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } work[i]=LM_CNST(1.0)/max; } for(j=0; j=max){ max=tmp; maxi=i; } } if(j!=maxi){ for(k=0; k=0; --i){ sum=x[i]; for(j=i+1; j