1/*
2 * Library:   lmfit (Levenberg-Marquardt least squares fitting)
3 *
4 * File:      lmmin.c
5 *
6 * Contents:  Levenberg-Marquardt minimization.
7 *
8 * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9 *            Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
10 *
11 * License:   see ../COPYING (FreeBSD)
12 *
13 * Homepage:  apps.jcns.fz-juelich.de/lmfit
14 */
15
16#include <assert.h>
17#include <stdlib.h>
18#include <stdio.h>
19#include <math.h>
20#include <float.h>
21#include "lmmin.h"
22
23#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
24#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
25#define SQR(x) (x) * (x)
26
27/* Declare functions that do the heavy numerics.
28   Implementions are in this source file, below lmmin.
29   Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
30void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
31              const double* diag, const double* qtb, const double delta,
32              double* par, double* x, double* Sdiag, double* aux, double* xdi);
33void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
34              double* Acnorm, double* W);
35void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
36               const double* diag, const double* qtb, double* x,
37               double* Sdiag, double* W);
38
39/******************************************************************************/
40/*  Numeric constants                                                         */
41/******************************************************************************/
42
43/* Set machine-dependent constants to values from float.h. */
44#define LM_MACHEP DBL_EPSILON       /* resolution of arithmetic */
45#define LM_DWARF DBL_MIN            /* smallest nonzero number */
46#define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
47#define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
48#define LM_USERTOL 30 * LM_MACHEP   /* users are recommended to require this */
49
50/* If the above values do not work, the following seem good for an x86:
51 LM_MACHEP     .555e-16
52 LM_DWARF      9.9e-324
53 LM_SQRT_DWARF 1.e-160
54 LM_SQRT_GIANT 1.e150
55 LM_USER_TOL   1.e-14
56   The following values should work on any machine:
57 LM_MACHEP     1.2e-16
58 LM_DWARF      1.0e-38
59 LM_SQRT_DWARF 3.834e-20
60 LM_SQRT_GIANT 1.304e19
61 LM_USER_TOL   1.e-14
62*/
63
64/* Predefined control parameter sets (msgfile=NULL means stdout). */
65const lm_control_struct lm_control_double = {
66    LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
67    100., 100, 1, NULL, 0, -1, -1};
68const lm_control_struct lm_control_float = {
69    1.e-7, 1.e-7, 1.e-7, 1.e-7,
70    100., 100, 1, NULL, 0, -1, -1};
71
72/******************************************************************************/
73/*  Message texts (indexed by status.info)                                    */
74/******************************************************************************/
75
76const char* lm_infmsg[] = {
77    "found zero (sum of squares below underflow limit)",
78    "converged  (the relative error in the sum of squares is at most tol)",
79    "converged  (the relative error of the parameter vector is at most tol)",
80    "converged  (both errors are at most tol)",
81    "trapped    (by degeneracy; increasing epsilon might help)",
82    "exhausted  (number of function calls exceeding preset patience)",
83    "failed     (ftol<tol: cannot reduce sum of squares any further)",
84    "failed     (xtol<tol: cannot improve approximate solution any further)",
85    "failed     (gtol<tol: cannot improve approximate solution any further)",
86    "crashed    (not enough memory)",
87    "exploded   (fatal coding error: improper input parameters)",
88    "stopped    (break requested within function evaluation)",
89    "found nan  (function value is not-a-number or infinite)"};
90
91const char* lm_shortmsg[] = {
92    "found zero",
93    "converged (f)",
94    "converged (p)",
95    "converged (2)",
96    "degenerate",
97    "call limit",
98    "failed (f)",
99    "failed (p)",
100    "failed (o)",
101    "no memory",
102    "invalid input",
103    "user break",
104    "found nan"};
105
106/******************************************************************************/
107/*  Monitoring auxiliaries.                                                   */
108/******************************************************************************/
109
110void lm_print_pars(int nout, const double* par, FILE* fout)
111{
112    int i;
113    for (i = 0; i < nout; ++i)
114        fprintf(fout, " %16.9g", par[i]);
115    fprintf(fout, "\n");
116}
117
118/******************************************************************************/
119/*  lmmin (main minimization routine)                                         */
120/******************************************************************************/
121
122void lmmin(const int n, double* x, const int m, const void* data,
123           void (*evaluate)(const double* par, const int m_dat,
124                            const void* data, double* fvec, int* userbreak),
125           const lm_control_struct* C, lm_status_struct* S)
126{
127    int j, i;
128    double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
129        sum, temp, temp1, temp2, temp3;
130
131    /***  Initialize internal variables.  ***/
132
133    int maxfev = C->patience * (n+1);
134
135    int inner_success; /* flag for loop control */
136    double lmpar = 0;  /* Levenberg-Marquardt parameter */
137    double delta = 0;
138    double xnorm = 0;
139    double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
140
141    int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
142
143    /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
144       compile-time initialization of lm_control_double and similar). */
145    FILE* msgfile = C->msgfile ? C->msgfile : stdout;
146
147    /***  Default status info; must be set before first return statement.  ***/
148
149    S->outcome = 0; /* status code */
150    S->userbreak = 0;
151    S->nfev = 0; /* function evaluation counter */
152
153    /***  Check input parameters for errors.  ***/
154
155    if (n <= 0) {
156        fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
157        S->outcome = 10;
158        return;
159    }
160    if (m < n) {
161        fprintf(stderr, "lmmin: number of data points (%i) "
162                        "smaller than number of parameters (%i)\n",
163                m, n);
164        S->outcome = 10;
165        return;
166    }
167    if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
168        fprintf(stderr,
169                "lmmin: negative tolerance (at least one of %g %g %g)\n",
170                C->ftol, C->xtol, C->gtol);
171        S->outcome = 10;
172        return;
173    }
174    if (maxfev <= 0) {
175        fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
176                maxfev);
177        S->outcome = 10;
178        return;
179    }
180    if (C->stepbound <= 0) {
181        fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
182        S->outcome = 10;
183        return;
184    }
185    if (C->scale_diag != 0 && C->scale_diag != 1) {
186        fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
187                        "should be 0 or 1\n",
188                C->scale_diag);
189        S->outcome = 10;
190        return;
191    }
192
193    /***  Allocate work space.  ***/
194
195    /* Allocate total workspace with just one system call */
196    char* ws;
197    if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
198                     n * sizeof(int))) == NULL) {
199        S->outcome = 9;
200        return;
201    }
202
203    /* Assign workspace segments. */
204    char* pws = ws;
205    double* fvec = (double*)pws;
206    pws += m * sizeof(double) / sizeof(char);
207    double* diag = (double*)pws;
208    pws += n * sizeof(double) / sizeof(char);
209    double* qtf = (double*)pws;
210    pws += n * sizeof(double) / sizeof(char);
211    double* fjac = (double*)pws;
212    pws += n * m * sizeof(double) / sizeof(char);
213    double* wa1 = (double*)pws;
214    pws += n * sizeof(double) / sizeof(char);
215    double* wa2 = (double*)pws;
216    pws += n * sizeof(double) / sizeof(char);
217    double* wa3 = (double*)pws;
218    pws += n * sizeof(double) / sizeof(char);
219    double* wf = (double*)pws;
220    pws += m * sizeof(double) / sizeof(char);
221    int* Pivot = (int*)pws;
222    pws += n * sizeof(int) / sizeof(char);
223
224    /* Initialize diag. */
225    if (!C->scale_diag)
226        for (j = 0; j < n; j++)
227            diag[j] = 1;
228
229    /***  Evaluate function at starting point and calculate norm.  ***/
230
231    if (C->verbosity) {
232        fprintf(msgfile, "lmmin start ");
233        lm_print_pars(nout, x, msgfile);
234    }
235    (*evaluate)(x, m, data, fvec, &(S->userbreak));
236    if (C->verbosity > 4)
237        for (i = 0; i < m; ++i)
238            fprintf(msgfile, "    fvec[%4i] = %18.8g\n", i, fvec[i]);
239    S->nfev = 1;
240    if (S->userbreak)
241        goto terminate;
242    fnorm = lm_enorm(m, fvec);
243    if (C->verbosity)
244        fprintf(msgfile, "  fnorm = %18.8g\n", fnorm);
245
246    if (!isfinite(fnorm)) {
247        S->outcome = 12; /* nan */
248        goto terminate;
249    } else if (fnorm <= LM_DWARF) {
250        S->outcome = 0; /* sum of squares almost zero, nothing to do */
251        goto terminate;
252    }
253
254    /***  The outer loop: compute gradient, then descend.  ***/
255
256    for (int outer = 0;; ++outer) {
257
258        /** Calculate the Jacobian. **/
259        for (j = 0; j < n; j++) {
260            temp = x[j];
261            step = MAX(eps * eps, eps * fabs(temp));
262            x[j] += step; /* replace temporarily */
263            (*evaluate)(x, m, data, wf, &(S->userbreak));
264            ++(S->nfev);
265            if (S->userbreak)
266                goto terminate;
267            for (i = 0; i < m; i++)
268                fjac[j*m+i] = (wf[i] - fvec[i]) / step;
269            x[j] = temp; /* restore */
270        }
271        if (C->verbosity >= 10) {
272            /* print the entire matrix */
273            printf("\nlmmin Jacobian\n");
274            for (i = 0; i < m; i++) {
275                printf("  ");
276                for (j = 0; j < n; j++)
277                    printf("%.5e ", fjac[j*m+i]);
278                printf("\n");
279            }
280        }
281
282        /** Compute the QR factorization of the Jacobian. **/
283
284        /* fjac is an m by n array. The upper n by n submatrix of fjac is made
285         *   to contain an upper triangular matrix R with diagonal elements of
286         *   nonincreasing magnitude such that
287         *
288         *         P^T*(J^T*J)*P = R^T*R
289         *
290         *         (NOTE: ^T stands for matrix transposition),
291         *
292         *   where P is a permutation matrix and J is the final calculated
293         *   Jacobian. Column j of P is column Pivot(j) of the identity matrix.
294         *   The lower trapezoidal part of fjac contains information generated
295         *   during the computation of R.
296         *
297         * Pivot is an integer array of length n. It defines a permutation
298         *   matrix P such that jac*P = Q*R, where jac is the final calculated
299         *   Jacobian, Q is orthogonal (not stored), and R is upper triangular
300         *   with diagonal elements of nonincreasing magnitude. Column j of P
301         *   is column Pivot(j) of the identity matrix.
302         */
303
304        lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
305        /* return values are Pivot, wa1=rdiag, wa2=acnorm */
306
307        /** Form Q^T * fvec, and store first n components in qtf. **/
308        for (i = 0; i < m; i++)
309            wf[i] = fvec[i];
310
311        for (j = 0; j < n; j++) {
312            temp3 = fjac[j*m+j];
313            if (temp3 != 0) {
314                sum = 0;
315                for (i = j; i < m; i++)
316                    sum += fjac[j*m+i] * wf[i];
317                temp = -sum / temp3;
318                for (i = j; i < m; i++)
319                    wf[i] += fjac[j*m+i] * temp;
320            }
321            fjac[j*m+j] = wa1[j];
322            qtf[j] = wf[j];
323        }
324
325        /**  Compute norm of scaled gradient and detect degeneracy. **/
326        gnorm = 0;
327        for (j = 0; j < n; j++) {
328            if (wa2[Pivot[j]] == 0)
329                continue;
330            sum = 0;
331            for (i = 0; i <= j; i++)
332                sum += fjac[j*m+i] * qtf[i];
333            gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
334        }
335
336        if (gnorm <= C->gtol) {
337            S->outcome = 4;
338            goto terminate;
339        }
340
341        /** Initialize or update diag and delta. **/
342        if (!outer) { /* first iteration only */
343            if (C->scale_diag) {
344                /* diag := norms of the columns of the initial Jacobian */
345                for (j = 0; j < n; j++)
346                    diag[j] = wa2[j] ? wa2[j] : 1;
347                /* xnorm := || D x || */
348                for (j = 0; j < n; j++)
349                    wa3[j] = diag[j] * x[j];
350                xnorm = lm_enorm(n, wa3);
351                if (C->verbosity >= 2) {
352                    fprintf(msgfile, "lmmin diag  ");
353                    lm_print_pars(nout, x, msgfile); // xnorm
354                    fprintf(msgfile, "  xnorm = %18.8g\n", xnorm);
355                }
356                /* Only now print the header for the loop table. */
357                if (C->verbosity >= 3) {
358                    fprintf(msgfile, "  o  i     lmpar    prered"
359                                     "          ratio    dirder      delta"
360                                     "      pnorm                 fnorm");
361                    for (i = 0; i < nout; ++i)
362                        fprintf(msgfile, "               p%i", i);
363                    fprintf(msgfile, "\n");
364                }
365            } else {
366                xnorm = lm_enorm(n, x);
367            }
368            if (!isfinite(xnorm)) {
369                S->outcome = 12; /* nan */
370                goto terminate;
371            }
372            /* Initialize the step bound delta. */
373            if (xnorm)
374                delta = C->stepbound * xnorm;
375            else
376                delta = C->stepbound;
377        } else {
378            if (C->scale_diag) {
379                for (j = 0; j < n; j++)
380                    diag[j] = MAX(diag[j], wa2[j]);
381            }
382        }
383
384        /** The inner loop. **/
385        int inner = 0;
386        do {
387
388            /** Determine the Levenberg-Marquardt parameter. **/
389            lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
390                     wa1, wa2, wf, wa3);
391            /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
392
393            /* Predict scaled reduction. */
394            pnorm = lm_enorm(n, wa3);
395            if (!isfinite(pnorm)) {
396                S->outcome = 12; /* nan */
397                goto terminate;
398            }
399            temp2 = lmpar * SQR(pnorm / fnorm);
400            for (j = 0; j < n; j++) {
401                wa3[j] = 0;
402                for (i = 0; i <= j; i++)
403                    wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
404            }
405            temp1 = SQR(lm_enorm(n, wa3) / fnorm);
406            if (!isfinite(temp1)) {
407                S->outcome = 12; /* nan */
408                goto terminate;
409            }
410            prered = temp1 + 2*temp2;
411            dirder = -temp1 + temp2; /* scaled directional derivative */
412
413            /* At first call, adjust the initial step bound. */
414            if (!outer && pnorm < delta)
415                delta = pnorm;
416
417            /** Evaluate the function at x + p. **/
418            for (j = 0; j < n; j++)
419                wa2[j] = x[j] - wa1[j];
420            (*evaluate)(wa2, m, data, wf, &(S->userbreak));
421            ++(S->nfev);
422            if (S->userbreak)
423                goto terminate;
424            fnorm1 = lm_enorm(m, wf);
425            if (!isfinite(fnorm1)) {
426                S->outcome = 12; /* nan */
427                goto terminate;
428            }
429
430            /** Evaluate the scaled reduction. **/
431
432            /* Actual scaled reduction. */
433            actred = 1 - SQR(fnorm1 / fnorm);
434
435            /* Ratio of actual to predicted reduction. */
436            ratio = prered ? actred / prered : 0;
437
438            if (C->verbosity == 2) {
439                fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
440                lm_print_pars(nout, wa2, msgfile); // fnorm1,
441            } else if (C->verbosity >= 3) {
442                printf("%3i %2i %9.2g %9.2g %14.6g"
443                       " %9.2g %10.3e %10.3e %21.15e",
444                       outer, inner, lmpar, prered, ratio,
445                       dirder, delta, pnorm, fnorm1);
446                for (i = 0; i < nout; ++i)
447                    fprintf(msgfile, " %16.9g", wa2[i]);
448                fprintf(msgfile, "\n");
449            }
450
451            /* Update the step bound. */
452            if (ratio <= 0.25) {
453                if (actred >= 0)
454                    temp = 0.5;
455                else if (actred > -99) /* -99 = 1-1/0.1^2 */
456                    temp = MAX(dirder / (2*dirder + actred), 0.1);
457                else
458                    temp = 0.1;
459                delta = temp * MIN(delta, pnorm / 0.1);
460                lmpar /= temp;
461            } else if (ratio >= 0.75) {
462                delta = 2 * pnorm;
463                lmpar *= 0.5;
464            } else if (!lmpar) {
465                delta = 2 * pnorm;
466            }
467
468            /**  On success, update solution, and test for convergence. **/
469
470            inner_success = ratio >= 1e-4;
471            if (inner_success) {
472
473                /* Update x, fvec, and their norms. */
474                if (C->scale_diag) {
475                    for (j = 0; j < n; j++) {
476                        x[j] = wa2[j];
477                        wa2[j] = diag[j] * x[j];
478                    }
479                } else {
480                    for (j = 0; j < n; j++)
481                        x[j] = wa2[j];
482                }
483                for (i = 0; i < m; i++)
484                    fvec[i] = wf[i];
485                xnorm = lm_enorm(n, wa2);
486                if (!isfinite(xnorm)) {
487                    S->outcome = 12; /* nan */
488                    goto terminate;
489                }
490                fnorm = fnorm1;
491            }
492
493            /* Convergence tests. */
494            S->outcome = 0;
495            if (fnorm <= LM_DWARF)
496                goto terminate; /* success: sum of squares almost zero */
497            /* Test two criteria (both may be fulfilled). */
498            if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
499                S->outcome = 1; /* success: x almost stable */
500            if (delta <= C->xtol * xnorm)
501                S->outcome += 2; /* success: sum of squares almost stable */
502            if (S->outcome != 0) {
503                goto terminate;
504            }
505
506            /** Tests for termination and stringent tolerances. **/
507            if (S->nfev >= maxfev) {
508                S->outcome = 5;
509                goto terminate;
510            }
511            if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
512                ratio <= 2) {
513                S->outcome = 6;
514                goto terminate;
515            }
516            if (delta <= LM_MACHEP * xnorm) {
517                S->outcome = 7;
518                goto terminate;
519            }
520            if (gnorm <= LM_MACHEP) {
521                S->outcome = 8;
522                goto terminate;
523            }
524
525            /** End of the inner loop. Repeat if iteration unsuccessful. **/
526            ++inner;
527        } while (!inner_success);
528
529    }; /***  End of the outer loop.  ***/
530
531terminate:
532    S->fnorm = lm_enorm(m, fvec);
533    if (C->verbosity >= 2)
534        printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
535               xnorm, C->ftol, C->xtol);
536    if (C->verbosity & 1) {
537        fprintf(msgfile, "lmmin final ");
538        lm_print_pars(nout, x, msgfile); // S->fnorm,
539        fprintf(msgfile, "  fnorm = %18.8g\n", S->fnorm);
540    }
541    if (S->userbreak) /* user-requested break */
542        S->outcome = 11;
543
544    /***  Deallocate the workspace.  ***/
545    free(ws);
546
547} /*** lmmin. ***/
548
549/******************************************************************************/
550/*  lm_lmpar (determine Levenberg-Marquardt parameter)                        */
551/******************************************************************************/
552
553void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
554              const double* diag, const double* qtb, const double delta,
555              double* par, double* x, double* Sdiag, double* aux, double* xdi)
556/*     Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
557 *     an m-vector b, and a positive number delta, the problem is to
558 *     determine a parameter value par such that if x solves the system
559 *
560 *          A*x = b  and  sqrt(par)*D*x = 0
561 *
562 *     in the least squares sense, and dxnorm is the Euclidean norm of D*x,
563 *     then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
564 *     abs(dxnorm-delta) < 0.1*delta.
565 *
566 *     Using lm_qrsolv, this subroutine completes the solution of the
567 *     problem if it is provided with the necessary information from the
568 *     QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
569 *     where P is a permutation matrix, Q has orthogonal columns, and R is
570 *     an upper triangular matrix with diagonal elements of nonincreasing
571 *     magnitude, then lmpar expects the full upper triangle of R, the
572 *     permutation matrix P, and the first n components of Q^T*b. On output
573 *     lmpar also provides an upper triangular matrix S such that
574 *
575 *          P^T*(A^T*A + par*D*D)*P = S^T*S.
576 *
577 *     S is employed within lmpar and may be of separate interest.
578 *
579 *     Only a few iterations are generally needed for convergence of the
580 *     algorithm. If, however, the limit of 10 iterations is reached, then
581 *     the output par will contain the best value obtained so far.
582 *
583 *     Parameters:
584 *
585 *      n is a positive integer INPUT variable set to the order of r.
586 *
587 *      r is an n by n array. On INPUT the full upper triangle must contain
588 *        the full upper triangle of the matrix R. On OUTPUT the full upper
589 *        triangle is unaltered, and the strict lower triangle contains the
590 *        strict upper triangle (transposed) of the upper triangular matrix S.
591 *
592 *      ldr is a positive integer INPUT variable not less than n which
593 *        specifies the leading dimension of the array R.
594 *
595 *      Pivot is an integer INPUT array of length n which defines the
596 *        permutation matrix P such that A*P = Q*R. Column j of P is column
597 *        Pivot(j) of the identity matrix.
598 *
599 *      diag is an INPUT array of length n which must contain the diagonal
600 *        elements of the matrix D.
601 *
602 *      qtb is an INPUT array of length n which must contain the first
603 *        n elements of the vector Q^T*b.
604 *
605 *      delta is a positive INPUT variable which specifies an upper bound
606 *        on the Euclidean norm of D*x.
607 *
608 *      par is a nonnegative variable. On INPUT par contains an initial
609 *        estimate of the Levenberg-Marquardt parameter. On OUTPUT par
610 *        contains the final estimate.
611 *
612 *      x is an OUTPUT array of length n which contains the least-squares
613 *        solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
614 *
615 *      Sdiag is an array of length n needed as workspace; on OUTPUT it
616 *        contains the diagonal elements of the upper triangular matrix S.
617 *
618 *      aux is a multi-purpose work array of length n.
619 *
620 *      xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
621 *
622 */
623{
624    int i, iter, j, nsing;
625    double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
626    double sum, temp;
627    static double p1 = 0.1;
628
629    /*** Compute and store in x the Gauss-Newton direction. If the Jacobian
630         is rank-deficient, obtain a least-squares solution. ***/
631
632    nsing = n;
633    for (j = 0; j < n; j++) {
634        aux[j] = qtb[j];
635        if (r[j*ldr+j] == 0 && nsing == n)
636            nsing = j;
637        if (nsing < n)
638            aux[j] = 0;
639    }
640    for (j = nsing-1; j >= 0; j--) {
641        aux[j] = aux[j] / r[j+ldr*j];
642        temp = aux[j];
643        for (i = 0; i < j; i++)
644            aux[i] -= r[j*ldr+i] * temp;
645    }
646
647    for (j = 0; j < n; j++)
648        x[Pivot[j]] = aux[j];
649
650    /*** Initialize the iteration counter, evaluate the function at the origin,
651         and test for acceptance of the Gauss-Newton direction. ***/
652
653    for (j = 0; j < n; j++)
654        xdi[j] = diag[j] * x[j];
655    dxnorm = lm_enorm(n, xdi);
656    fp = dxnorm - delta;
657    if (fp <= p1 * delta) {
658#ifdef LMFIT_DEBUG_MESSAGES
659        printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
660#endif
661        *par = 0;
662        return;
663    }
664
665    /*** If the Jacobian is not rank deficient, the Newton step provides a
666         lower bound, parl, for the zero of the function. Otherwise set this
667         bound to zero. ***/
668
669    parl = 0;
670    if (nsing >= n) {
671        for (j = 0; j < n; j++)
672            aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
673
674        for (j = 0; j < n; j++) {
675            sum = 0;
676            for (i = 0; i < j; i++)
677                sum += r[j*ldr+i] * aux[i];
678            aux[j] = (aux[j] - sum) / r[j+ldr*j];
679        }
680        temp = lm_enorm(n, aux);
681        parl = fp / delta / temp / temp;
682    }
683
684    /*** Calculate an upper bound, paru, for the zero of the function. ***/
685
686    for (j = 0; j < n; j++) {
687        sum = 0;
688        for (i = 0; i <= j; i++)
689            sum += r[j*ldr+i] * qtb[i];
690        aux[j] = sum / diag[Pivot[j]];
691    }
692    gnorm = lm_enorm(n, aux);
693    paru = gnorm / delta;
694    if (paru == 0)
695        paru = LM_DWARF / MIN(delta, p1);
696
697    /*** If the input par lies outside of the interval (parl,paru),
698         set par to the closer endpoint. ***/
699
700    *par = MAX(*par, parl);
701    *par = MIN(*par, paru);
702    if (*par == 0)
703        *par = gnorm / dxnorm;
704
705    /*** Iterate. ***/
706
707    for (iter = 0;; iter++) {
708
709        /** Evaluate the function at the current value of par. **/
710        if (*par == 0)
711            *par = MAX(LM_DWARF, 0.001 * paru);
712        temp = sqrt(*par);
713        for (j = 0; j < n; j++)
714            aux[j] = temp * diag[j];
715
716        lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
717        /* return values are r, x, Sdiag */
718
719        for (j = 0; j < n; j++)
720            xdi[j] = diag[j] * x[j]; /* used as output */
721        dxnorm = lm_enorm(n, xdi);
722        fp_old = fp;
723        fp = dxnorm - delta;
724
725        /** If the function is small enough, accept the current value
726            of par. Also test for the exceptional cases where parl
727            is zero or the number of iterations has reached 10. **/
728        if (fabs(fp) <= p1 * delta ||
729            (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
730#ifdef LMFIT_DEBUG_MESSAGES
731            printf("debug lmpar nsing=%d, iter=%d, "
732                   "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
733                   nsing, iter, *par, parl, paru, delta, fp);
734#endif
735            break; /* the only exit from the iteration. */
736        }
737
738        /** Compute the Newton correction. **/
739        for (j = 0; j < n; j++)
740            aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
741
742        for (j = 0; j < n; j++) {
743            aux[j] = aux[j] / Sdiag[j];
744            for (i = j+1; i < n; i++)
745                aux[i] -= r[j*ldr+i] * aux[j];
746        }
747        temp = lm_enorm(n, aux);
748        parc = fp / delta / temp / temp;
749
750        /** Depending on the sign of the function, update parl or paru. **/
751        if (fp > 0)
752            parl = MAX(parl, *par);
753        else /* fp < 0 [the case fp==0 is precluded by the break condition] */
754            paru = MIN(paru, *par);
755
756        /** Compute an improved estimate for par. **/
757        *par = MAX(parl, *par + parc);
758    }
759
760} /*** lm_lmpar. ***/
761
762/******************************************************************************/
763/*  lm_qrfac (QR factorization, from lapack)                                  */
764/******************************************************************************/
765
766void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
767              double* Acnorm, double* W)
768/*
769 *     This subroutine uses Householder transformations with column pivoting
770 *     to compute a QR factorization of the m by n matrix A. That is, qrfac
771 *     determines an orthogonal matrix Q, a permutation matrix P, and an
772 *     upper trapezoidal matrix R with diagonal elements of nonincreasing
773 *     magnitude, such that A*P = Q*R. The Householder transformation for
774 *     column k, k = 1,2,...,n, is of the form
775 *
776 *          I - 2*w*wT/|w|^2
777 *
778 *     where w has zeroes in the first k-1 positions.
779 *
780 *     Parameters:
781 *
782 *      m is an INPUT parameter set to the number of rows of A.
783 *
784 *      n is an INPUT parameter set to the number of columns of A.
785 *
786 *      A is an m by n array. On INPUT, A contains the matrix for which the
787 *        QR factorization is to be computed. On OUTPUT the strict upper
788 *        trapezoidal part of A contains the strict upper trapezoidal part
789 *        of R, and the lower trapezoidal part of A contains a factored form
790 *        of Q (the non-trivial elements of the vectors w described above).
791 *
792 *      Pivot is an integer OUTPUT array of length n that describes the
793 *        permutation matrix P. Column j of P is column Pivot(j) of the
794 *        identity matrix.
795 *
796 *      Rdiag is an OUTPUT array of length n which contains the diagonal
797 *        elements of R.
798 *
799 *      Acnorm is an OUTPUT array of length n which contains the norms of
800 *        the corresponding columns of the input matrix A. If this information
801 *        is not needed, then Acnorm can share storage with Rdiag.
802 *
803 *      W is a work array of length n.
804 *
805 */
806{
807    int i, j, k, kmax;
808    double ajnorm, sum, temp;
809
810#ifdef LMFIT_DEBUG_MESSAGES
811    printf("debug qrfac\n");
812#endif
813
814    /** Compute initial column norms;
815        initialize Pivot with identity permutation. ***/
816    for (j = 0; j < n; j++) {
817        W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
818        Pivot[j] = j;
819    }
820
821    /** Loop over columns of A. **/
822    assert(n <= m);
823    for (j = 0; j < n; j++) {
824
825        /** Bring the column of largest norm into the pivot position. **/
826        kmax = j;
827        for (k = j+1; k < n; k++)
828            if (Rdiag[k] > Rdiag[kmax])
829                kmax = k;
830
831        if (kmax != j) {
832            /* Swap columns j and kmax. */
833            k = Pivot[j];
834            Pivot[j] = Pivot[kmax];
835            Pivot[kmax] = k;
836            for (i = 0; i < m; i++) {
837                temp = A[j*m+i];
838                A[j*m+i] = A[kmax*m+i];
839                A[kmax*m+i] = temp;
840            }
841            /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
842            Rdiag[kmax] = Rdiag[j];
843            W[kmax] = W[j];
844        }
845
846        /** Compute the Householder reflection vector w_j to reduce the
847            j-th column of A to a multiple of the j-th unit vector. **/
848        ajnorm = lm_enorm(m-j, &A[j*m+j]);
849        if (ajnorm == 0) {
850            Rdiag[j] = 0;
851            continue;
852        }
853
854        /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
855           where the sign +- is chosen to avoid cancellation in w_jj. */
856        if (A[j*m+j] < 0)
857            ajnorm = -ajnorm;
858        for (i = j; i < m; i++)
859            A[j*m+i] /= ajnorm;
860        A[j*m+j] += 1;
861
862        /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
863            to the remaining columns, and update the norms. **/
864        for (k = j+1; k < n; k++) {
865            /* Compute scalar product w_j * a_j. */
866            sum = 0;
867            for (i = j; i < m; i++)
868                sum += A[j*m+i] * A[k*m+i];
869
870            /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
871            temp = sum / A[j*m+j];
872
873            /* Carry out transform U_w_j * a_k. */
874            for (i = j; i < m; i++)
875                A[k*m+i] -= temp * A[j*m+i];
876
877            /* No idea what happens here. */
878            if (Rdiag[k] != 0) {
879                temp = A[m*k+j] / Rdiag[k];
880                if (fabs(temp) < 1) {
881                    Rdiag[k] *= sqrt(1 - SQR(temp));
882                    temp = Rdiag[k] / W[k];
883                } else
884                    temp = 0;
885                if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
886                    Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
887                    W[k] = Rdiag[k];
888                }
889            }
890        }
891
892        Rdiag[j] = -ajnorm;
893    }
894} /*** lm_qrfac. ***/
895
896/******************************************************************************/
897/*  lm_qrsolv (linear least-squares)                                          */
898/******************************************************************************/
899
900void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
901               const double* diag, const double* qtb, double* x,
902               double* Sdiag, double* W)
903/*
904 *     Given an m by n matrix A, an n by n diagonal matrix D, and an
905 *     m-vector b, the problem is to determine an x which solves the
906 *     system
907 *
908 *          A*x = b  and  D*x = 0
909 *
910 *     in the least squares sense.
911 *
912 *     This subroutine completes the solution of the problem if it is
913 *     provided with the necessary information from the QR factorization,
914 *     with column pivoting, of A. That is, if A*P = Q*R, where P is a
915 *     permutation matrix, Q has orthogonal columns, and R is an upper
916 *     triangular matrix with diagonal elements of nonincreasing magnitude,
917 *     then qrsolv expects the full upper triangle of R, the permutation
918 *     matrix P, and the first n components of Q^T*b. The system
919 *     A*x = b, D*x = 0, is then equivalent to
920 *
921 *          R*z = Q^T*b,  P^T*D*P*z = 0,
922 *
923 *     where x = P*z. If this system does not have full rank, then a least
924 *     squares solution is obtained. On output qrsolv also provides an upper
925 *     triangular matrix S such that
926 *
927 *          P^T*(A^T*A + D*D)*P = S^T*S.
928 *
929 *     S is computed within qrsolv and may be of separate interest.
930 *
931 *     Parameters:
932 *
933 *      n is a positive integer INPUT variable set to the order of R.
934 *
935 *      r is an n by n array. On INPUT the full upper triangle must contain
936 *        the full upper triangle of the matrix R. On OUTPUT the full upper
937 *        triangle is unaltered, and the strict lower triangle contains the
938 *        strict upper triangle (transposed) of the upper triangular matrix S.
939 *
940 *      ldr is a positive integer INPUT variable not less than n which
941 *        specifies the leading dimension of the array R.
942 *
943 *      Pivot is an integer INPUT array of length n which defines the
944 *        permutation matrix P such that A*P = Q*R. Column j of P is column
945 *        Pivot(j) of the identity matrix.
946 *
947 *      diag is an INPUT array of length n which must contain the diagonal
948 *        elements of the matrix D.
949 *
950 *      qtb is an INPUT array of length n which must contain the first
951 *        n elements of the vector Q^T*b.
952 *
953 *      x is an OUTPUT array of length n which contains the least-squares
954 *        solution of the system A*x = b, D*x = 0.
955 *
956 *      Sdiag is an OUTPUT array of length n which contains the diagonal
957 *        elements of the upper triangular matrix S.
958 *
959 *      W is a work array of length n.
960 *
961 */
962{
963    int i, kk, j, k, nsing;
964    double qtbpj, sum, temp;
965    double _sin, _cos, _tan, _cot; /* local variables, not functions */
966
967    /*** Copy R and Q^T*b to preserve input and initialize S.
968         In particular, save the diagonal elements of R in x. ***/
969
970    for (j = 0; j < n; j++) {
971        for (i = j; i < n; i++)
972            r[j*ldr+i] = r[i*ldr+j];
973        x[j] = r[j*ldr+j];
974        W[j] = qtb[j];
975    }
976
977    /*** Eliminate the diagonal matrix D using a Givens rotation. ***/
978
979    for (j = 0; j < n; j++) {
980
981        /*** Prepare the row of D to be eliminated, locating the diagonal
982             element using P from the QR factorization. ***/
983
984        if (diag[Pivot[j]] != 0) {
985            for (k = j; k < n; k++)
986                Sdiag[k] = 0;
987            Sdiag[j] = diag[Pivot[j]];
988
989            /*** The transformations to eliminate the row of D modify only
990                 a single element of Q^T*b beyond the first n, which is
991                 initially 0. ***/
992
993            qtbpj = 0;
994            for (k = j; k < n; k++) {
995
996                /** Determine a Givens rotation which eliminates the
997                    appropriate element in the current row of D. **/
998                if (Sdiag[k] == 0)
999                    continue;
1000                kk = k + ldr * k;
1001                if (fabs(r[kk]) < fabs(Sdiag[k])) {
1002                    _cot = r[kk] / Sdiag[k];
1003                    _sin = 1 / hypot(1, _cot);
1004                    _cos = _sin * _cot;
1005                } else {
1006                    _tan = Sdiag[k] / r[kk];
1007                    _cos = 1 / hypot(1, _tan);
1008                    _sin = _cos * _tan;
1009                }
1010
1011                /** Compute the modified diagonal element of R and
1012                    the modified element of (Q^T*b,0). **/
1013                r[kk] = _cos * r[kk] + _sin * Sdiag[k];
1014                temp = _cos * W[k] + _sin * qtbpj;
1015                qtbpj = -_sin * W[k] + _cos * qtbpj;
1016                W[k] = temp;
1017
1018                /** Accumulate the tranformation in the row of S. **/
1019                for (i = k+1; i < n; i++) {
1020                    temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
1021                    Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
1022                    r[k*ldr+i] = temp;
1023                }
1024            }
1025        }
1026
1027        /** Store the diagonal element of S and restore
1028            the corresponding diagonal element of R. **/
1029        Sdiag[j] = r[j*ldr+j];
1030        r[j*ldr+j] = x[j];
1031    }
1032
1033    /*** Solve the triangular system for z. If the system is singular, then
1034        obtain a least-squares solution. ***/
1035
1036    nsing = n;
1037    for (j = 0; j < n; j++) {
1038        if (Sdiag[j] == 0 && nsing == n)
1039            nsing = j;
1040        if (nsing < n)
1041            W[j] = 0;
1042    }
1043
1044    for (j = nsing-1; j >= 0; j--) {
1045        sum = 0;
1046        for (i = j+1; i < nsing; i++)
1047            sum += r[j*ldr+i] * W[i];
1048        W[j] = (W[j] - sum) / Sdiag[j];
1049    }
1050
1051    /*** Permute the components of z back to components of x. ***/
1052
1053    for (j = 0; j < n; j++)
1054        x[Pivot[j]] = W[j];
1055
1056} /*** lm_qrsolv. ***/
1057
1058/******************************************************************************/
1059/*  lm_enorm (Euclidean norm)                                                 */
1060/******************************************************************************/
1061
1062double lm_enorm(int n, const double* x)
1063/*     This function calculates the Euclidean norm of an n-vector x.
1064 *
1065 *     The Euclidean norm is computed by accumulating the sum of squares
1066 *     in three different sums. The sums of squares for the small and large
1067 *     components are scaled so that no overflows occur. Non-destructive
1068 *     underflows are permitted. Underflows and overflows do not occur in
1069 *     the computation of the unscaled sum of squares for the intermediate
1070 *     components. The definitions of small, intermediate and large components
1071 *     depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1072 *     restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
1073 *     and LM_SQRT_GIANT**2 not overflow.
1074 *
1075 *     Parameters:
1076 *
1077 *      n is a positive integer INPUT variable.
1078 *
1079 *      x is an INPUT array of length n.
1080 */
1081{
1082    int i;
1083    double agiant, s1, s2, s3, xabs, x1max, x3max;
1084
1085    s1 = 0;
1086    s2 = 0;
1087    s3 = 0;
1088    x1max = 0;
1089    x3max = 0;
1090    agiant = LM_SQRT_GIANT / n;
1091
1092    /** Sum squares. **/
1093    for (i = 0; i < n; i++) {
1094        xabs = fabs(x[i]);
1095        if (xabs > LM_SQRT_DWARF) {
1096            if (xabs < agiant) {
1097                s2 += SQR(xabs);
1098            } else if (xabs > x1max) {
1099                s1 = 1 + s1 * SQR(x1max / xabs);
1100                x1max = xabs;
1101            } else {
1102                s1 += SQR(xabs / x1max);
1103            }
1104        } else if (xabs > x3max) {
1105            s3 = 1 + s3 * SQR(x3max / xabs);
1106            x3max = xabs;
1107        } else if (xabs != 0) {
1108            s3 += SQR(xabs / x3max);
1109        }
1110    }
1111
1112    /** Calculate the norm. **/
1113    if (s1 != 0)
1114        return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1115    else if (s2 != 0)
1116        if (s2 >= x3max)
1117            return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1118        else
1119            return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1120    else
1121        return x3max * sqrt(s3);
1122
1123} /*** lm_enorm. ***/
1124