1/*
2 $License:
3    Copyright (C) 2011-2012 InvenSense Corporation, All Rights Reserved.
4    See included License.txt for License information.
5 $
6 */
7
8/*******************************************************************************
9 *
10 * $Id:$
11 *
12 ******************************************************************************/
13
14/**
15 *   @defgroup  ML_MATH_FUNC ml_math_func
16 *   @brief     Motion Library - Math Functions
17 *              Common math functions the Motion Library
18 *
19 *   @{
20 *       @file ml_math_func.c
21 *       @brief Math Functions.
22 */
23
24#include "mlmath.h"
25#include "ml_math_func.h"
26#include "mlinclude.h"
27#include <string.h>
28
29/** @internal
30 * Does the cross product of compass by gravity, then converts that
31 * to the world frame using the quaternion, then computes the angle that
32 * is made.
33 *
34 * @param[in] compass Compass Vector (Body Frame), length 3
35 * @param[in] grav Gravity Vector (Body Frame), length 3
36 * @param[in] quat Quaternion, Length 4
37 * @return Angle Cross Product makes after quaternion rotation.
38 */
39float inv_compass_angle(const long *compass, const long *grav, const float *quat)
40{
41    float cgcross[4], q1[4], q2[4], qi[4];
42    float angW;
43
44    // Compass cross Gravity
45    cgcross[0] = 0.f;
46    cgcross[1] = (float)compass[1] * grav[2] - (float)compass[2] * grav[1];
47    cgcross[2] = (float)compass[2] * grav[0] - (float)compass[0] * grav[2];
48    cgcross[3] = (float)compass[0] * grav[1] - (float)compass[1] * grav[0];
49
50    // Now convert cross product into world frame
51    inv_q_multf(quat, cgcross, q1);
52    inv_q_invertf(quat, qi);
53    inv_q_multf(q1, qi, q2);
54
55    // Protect against atan2 of 0,0
56    if ((q2[2] == 0.f) && (q2[1] == 0.f))
57        return 0.f;
58
59    // This is the unfiltered heading correction
60    angW = -atan2f(q2[2], q2[1]);
61    return angW;
62}
63
64/**
65 *  @brief  The gyro data magnitude squared :
66 *          (1 degree per second)^2 = 2^6 = 2^GYRO_MAG_SQR_SHIFT.
67 * @param[in] gyro Gyro data scaled with 1 dps = 2^16
68 *  @return the computed magnitude squared output of the gyroscope.
69 */
70unsigned long inv_get_gyro_sum_of_sqr(const long *gyro)
71{
72    unsigned long gmag = 0;
73    long temp;
74    int kk;
75
76    for (kk = 0; kk < 3; ++kk) {
77        temp = gyro[kk] >> (16 - (GYRO_MAG_SQR_SHIFT / 2));
78        gmag += temp * temp;
79    }
80
81    return gmag;
82}
83
84/** Performs a multiply and shift by 29. These are good functions to write in assembly on
85 * with devices with small memory where you want to get rid of the long long which some
86 * assemblers don't handle well
87 * @param[in] a
88 * @param[in] b
89 * @return ((long long)a*b)>>29
90*/
91long inv_q29_mult(long a, long b)
92{
93#ifdef UMPL_ELIMINATE_64BIT
94    long result;
95    result = (long)((float)a * b / (1L << 29));
96    return result;
97#else
98    long long temp;
99    long result;
100    temp = (long long)a * b;
101    result = (long)(temp >> 29);
102    return result;
103#endif
104}
105
106/** Performs a multiply and shift by 30. These are good functions to write in assembly on
107 * with devices with small memory where you want to get rid of the long long which some
108 * assemblers don't handle well
109 * @param[in] a
110 * @param[in] b
111 * @return ((long long)a*b)>>30
112*/
113long inv_q30_mult(long a, long b)
114{
115#ifdef UMPL_ELIMINATE_64BIT
116    long result;
117    result = (long)((float)a * b / (1L << 30));
118    return result;
119#else
120    long long temp;
121    long result;
122    temp = (long long)a * b;
123    result = (long)(temp >> 30);
124    return result;
125#endif
126}
127
128#ifndef UMPL_ELIMINATE_64BIT
129long inv_q30_div(long a, long b)
130{
131    long long temp;
132    long result;
133    temp = (((long long)a) << 30) / b;
134    result = (long)temp;
135    return result;
136}
137#endif
138
139/** Performs a multiply and shift by shift. These are good functions to write
140 * in assembly on with devices with small memory where you want to get rid of
141 * the long long which some assemblers don't handle well
142 * @param[in] a First multicand
143 * @param[in] b Second multicand
144 * @param[in] shift Shift amount after multiplying
145 * @return ((long long)a*b)<<shift
146*/
147#ifndef UMPL_ELIMINATE_64BIT
148long inv_q_shift_mult(long a, long b, int shift)
149{
150    long result;
151    result = (long)(((long long)a * b) >> shift);
152    return result;
153}
154#endif
155
156/** Performs a fixed point quaternion multiply.
157* @param[in] q1 First Quaternion Multicand, length 4. 1.0 scaled
158*            to 2^30
159* @param[in] q2 Second Quaternion Multicand, length 4. 1.0 scaled
160*            to 2^30
161* @param[out] qProd Product after quaternion multiply. Length 4.
162*             1.0 scaled to 2^30.
163*/
164void inv_q_mult(const long *q1, const long *q2, long *qProd)
165{
166    INVENSENSE_FUNC_START;
167    qProd[0] = inv_q30_mult(q1[0], q2[0]) - inv_q30_mult(q1[1], q2[1]) -
168               inv_q30_mult(q1[2], q2[2]) - inv_q30_mult(q1[3], q2[3]);
169
170    qProd[1] = inv_q30_mult(q1[0], q2[1]) + inv_q30_mult(q1[1], q2[0]) +
171               inv_q30_mult(q1[2], q2[3]) - inv_q30_mult(q1[3], q2[2]);
172
173    qProd[2] = inv_q30_mult(q1[0], q2[2]) - inv_q30_mult(q1[1], q2[3]) +
174               inv_q30_mult(q1[2], q2[0]) + inv_q30_mult(q1[3], q2[1]);
175
176    qProd[3] = inv_q30_mult(q1[0], q2[3]) + inv_q30_mult(q1[1], q2[2]) -
177               inv_q30_mult(q1[2], q2[1]) + inv_q30_mult(q1[3], q2[0]);
178}
179
180/** Performs a fixed point quaternion addition.
181* @param[in] q1 First Quaternion term, length 4. 1.0 scaled
182*            to 2^30
183* @param[in] q2 Second Quaternion term, length 4. 1.0 scaled
184*            to 2^30
185* @param[out] qSum Sum after quaternion summation. Length 4.
186*             1.0 scaled to 2^30.
187*/
188void inv_q_add(long *q1, long *q2, long *qSum)
189{
190    INVENSENSE_FUNC_START;
191    qSum[0] = q1[0] + q2[0];
192    qSum[1] = q1[1] + q2[1];
193    qSum[2] = q1[2] + q2[2];
194    qSum[3] = q1[3] + q2[3];
195}
196
197void inv_vector_normalize(long *vec, int length)
198{
199    INVENSENSE_FUNC_START;
200    double normSF = 0;
201    int ii;
202    for (ii = 0; ii < length; ii++) {
203        normSF +=
204            inv_q30_to_double(vec[ii]) * inv_q30_to_double(vec[ii]);
205    }
206    if (normSF > 0) {
207        normSF = 1 / sqrt(normSF);
208        for (ii = 0; ii < length; ii++) {
209            vec[ii] = (int)((double)vec[ii] * normSF);
210        }
211    } else {
212        vec[0] = 1073741824L;
213        for (ii = 1; ii < length; ii++) {
214            vec[ii] = 0;
215        }
216    }
217}
218
219void inv_q_normalize(long *q)
220{
221    INVENSENSE_FUNC_START;
222    inv_vector_normalize(q, 4);
223}
224
225void inv_q_invert(const long *q, long *qInverted)
226{
227    INVENSENSE_FUNC_START;
228    qInverted[0] = q[0];
229    qInverted[1] = -q[1];
230    qInverted[2] = -q[2];
231    qInverted[3] = -q[3];
232}
233
234double quaternion_to_rotation_angle(const long *quat) {
235    double quat0 = (double )quat[0] / 1073741824;
236    if (quat0 > 1.0f) {
237        quat0 = 1.0;
238    } else if (quat0 < -1.0f) {
239        quat0 = -1.0;
240    }
241
242    return acos(quat0)*2*180/M_PI;
243}
244
245/** Rotates a 3-element vector by Rotation defined by Q
246*/
247void inv_q_rotate(const long *q, const long *in, long *out)
248{
249    long q_temp1[4], q_temp2[4];
250    long in4[4], out4[4];
251
252    // Fixme optimize
253    in4[0] = 0;
254    memcpy(&in4[1], in, 3 * sizeof(long));
255    inv_q_mult(q, in4, q_temp1);
256    inv_q_invert(q, q_temp2);
257    inv_q_mult(q_temp1, q_temp2, out4);
258    memcpy(out, &out4[1], 3 * sizeof(long));
259}
260
261void inv_q_multf(const float *q1, const float *q2, float *qProd)
262{
263    INVENSENSE_FUNC_START;
264    qProd[0] =
265        (q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2] - q1[3] * q2[3]);
266    qProd[1] =
267        (q1[0] * q2[1] + q1[1] * q2[0] + q1[2] * q2[3] - q1[3] * q2[2]);
268    qProd[2] =
269        (q1[0] * q2[2] - q1[1] * q2[3] + q1[2] * q2[0] + q1[3] * q2[1]);
270    qProd[3] =
271        (q1[0] * q2[3] + q1[1] * q2[2] - q1[2] * q2[1] + q1[3] * q2[0]);
272}
273
274void inv_q_addf(const float *q1, const float *q2, float *qSum)
275{
276    INVENSENSE_FUNC_START;
277    qSum[0] = q1[0] + q2[0];
278    qSum[1] = q1[1] + q2[1];
279    qSum[2] = q1[2] + q2[2];
280    qSum[3] = q1[3] + q2[3];
281}
282
283void inv_q_normalizef(float *q)
284{
285    INVENSENSE_FUNC_START;
286    float normSF = 0;
287    float xHalf = 0;
288    normSF = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2] + q[3] * q[3]);
289    if (normSF < 2) {
290        xHalf = 0.5f * normSF;
291        normSF = normSF * (1.5f - xHalf * normSF * normSF);
292        normSF = normSF * (1.5f - xHalf * normSF * normSF);
293        normSF = normSF * (1.5f - xHalf * normSF * normSF);
294        normSF = normSF * (1.5f - xHalf * normSF * normSF);
295        q[0] *= normSF;
296        q[1] *= normSF;
297        q[2] *= normSF;
298        q[3] *= normSF;
299    } else {
300        q[0] = 1.0;
301        q[1] = 0.0;
302        q[2] = 0.0;
303        q[3] = 0.0;
304    }
305    normSF = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2] + q[3] * q[3]);
306}
307
308/** Performs a length 4 vector normalization with a square root.
309* @param[in,out] q vector to normalize. Returns [1,0,0,0] is magnitude is zero.
310*/
311void inv_q_norm4(float *q)
312{
313    float mag;
314    mag = sqrtf(q[0] * q[0] + q[1] * q[1] + q[2] * q[2] + q[3] * q[3]);
315    if (mag) {
316        q[0] /= mag;
317        q[1] /= mag;
318        q[2] /= mag;
319        q[3] /= mag;
320    } else {
321        q[0] = 1.f;
322        q[1] = 0.f;
323        q[2] = 0.f;
324        q[3] = 0.f;
325    }
326}
327
328void inv_q_invertf(const float *q, float *qInverted)
329{
330    INVENSENSE_FUNC_START;
331    qInverted[0] = q[0];
332    qInverted[1] = -q[1];
333    qInverted[2] = -q[2];
334    qInverted[3] = -q[3];
335}
336
337/**
338 * Converts a quaternion to a rotation matrix.
339 * @param[in] quat 4-element quaternion in fixed point. One is 2^30.
340 * @param[out] rot Rotation matrix in fixed point. One is 2^30. The
341 *             First 3 elements of the rotation matrix, represent
342 *             the first row of the matrix. Rotation matrix multiplied
343 *             by a 3 element column vector transform a vector from Body
344 *             to World.
345 */
346void inv_quaternion_to_rotation(const long *quat, long *rot)
347{
348    rot[0] =
349        inv_q29_mult(quat[1], quat[1]) + inv_q29_mult(quat[0],
350                quat[0]) -
351        1073741824L;
352    rot[1] =
353        inv_q29_mult(quat[1], quat[2]) - inv_q29_mult(quat[3], quat[0]);
354    rot[2] =
355        inv_q29_mult(quat[1], quat[3]) + inv_q29_mult(quat[2], quat[0]);
356    rot[3] =
357        inv_q29_mult(quat[1], quat[2]) + inv_q29_mult(quat[3], quat[0]);
358    rot[4] =
359        inv_q29_mult(quat[2], quat[2]) + inv_q29_mult(quat[0],
360                quat[0]) -
361        1073741824L;
362    rot[5] =
363        inv_q29_mult(quat[2], quat[3]) - inv_q29_mult(quat[1], quat[0]);
364    rot[6] =
365        inv_q29_mult(quat[1], quat[3]) - inv_q29_mult(quat[2], quat[0]);
366    rot[7] =
367        inv_q29_mult(quat[2], quat[3]) + inv_q29_mult(quat[1], quat[0]);
368    rot[8] =
369        inv_q29_mult(quat[3], quat[3]) + inv_q29_mult(quat[0],
370                quat[0]) -
371        1073741824L;
372}
373
374/**
375 * Converts a quaternion to a rotation vector. A rotation vector is
376 * a method to represent a 4-element quaternion vector in 3-elements.
377 * To get the quaternion from the 3-elements, The last 3-elements of
378 * the quaternion will be the given rotation vector. The first element
379 * of the quaternion will be the positive value that will be required
380 * to make the magnitude of the quaternion 1.0 or 2^30 in fixed point units.
381 * @param[in] quat 4-element quaternion in fixed point. One is 2^30.
382 * @param[out] rot Rotation vector in fixed point. One is 2^30.
383 */
384void inv_quaternion_to_rotation_vector(const long *quat, long *rot)
385{
386    rot[0] = quat[1];
387    rot[1] = quat[2];
388    rot[2] = quat[3];
389
390    if (quat[0] < 0.0) {
391        rot[0] = -rot[0];
392        rot[1] = -rot[1];
393        rot[2] = -rot[2];
394    }
395}
396
397/** Converts a 32-bit long to a big endian byte stream */
398unsigned char *inv_int32_to_big8(long x, unsigned char *big8)
399{
400    big8[0] = (unsigned char)((x >> 24) & 0xff);
401    big8[1] = (unsigned char)((x >> 16) & 0xff);
402    big8[2] = (unsigned char)((x >> 8) & 0xff);
403    big8[3] = (unsigned char)(x & 0xff);
404    return big8;
405}
406
407/** Converts a big endian byte stream into a 32-bit long */
408long inv_big8_to_int32(const unsigned char *big8)
409{
410    long x;
411    x = ((long)big8[0] << 24) | ((long)big8[1] << 16) | ((long)big8[2] << 8)
412        | ((long)big8[3]);
413    return x;
414}
415
416/** Converts a big endian byte stream into a 16-bit integer (short) */
417short inv_big8_to_int16(const unsigned char *big8)
418{
419    short x;
420    x = ((short)big8[0] << 8) | ((short)big8[1]);
421    return x;
422}
423
424/** Converts a little endian byte stream into a 16-bit integer (short) */
425short inv_little8_to_int16(const unsigned char *little8)
426{
427    short x;
428    x = ((short)little8[1] << 8) | ((short)little8[0]);
429    return x;
430}
431
432/** Converts a 16-bit short to a big endian byte stream */
433unsigned char *inv_int16_to_big8(short x, unsigned char *big8)
434{
435    big8[0] = (unsigned char)((x >> 8) & 0xff);
436    big8[1] = (unsigned char)(x & 0xff);
437    return big8;
438}
439
440void inv_matrix_det_inc(float *a, float *b, int *n, int x, int y)
441{
442    int k, l, i, j;
443    for (i = 0, k = 0; i < *n; i++, k++) {
444        for (j = 0, l = 0; j < *n; j++, l++) {
445            if (i == x)
446                i++;
447            if (j == y)
448                j++;
449            *(b + 6 * k + l) = *(a + 6 * i + j);
450        }
451    }
452    *n = *n - 1;
453}
454
455void inv_matrix_det_incd(double *a, double *b, int *n, int x, int y)
456{
457    int k, l, i, j;
458    for (i = 0, k = 0; i < *n; i++, k++) {
459        for (j = 0, l = 0; j < *n; j++, l++) {
460            if (i == x)
461                i++;
462            if (j == y)
463                j++;
464            *(b + 6 * k + l) = *(a + 6 * i + j);
465        }
466    }
467    *n = *n - 1;
468}
469
470float inv_matrix_det(float *p, int *n)
471{
472    float d[6][6], sum = 0;
473    int i, j, m;
474    m = *n;
475    if (*n == 2)
476        return (*p ** (p + 7) - *(p + 1) ** (p + 6));
477    for (i = 0, j = 0; j < m; j++) {
478        *n = m;
479        inv_matrix_det_inc(p, &d[0][0], n, i, j);
480        sum =
481            sum + *(p + 6 * i + j) * SIGNM(i +
482                                            j) *
483            inv_matrix_det(&d[0][0], n);
484    }
485
486    return (sum);
487}
488
489double inv_matrix_detd(double *p, int *n)
490{
491    double d[6][6], sum = 0;
492    int i, j, m;
493    m = *n;
494    if (*n == 2)
495        return (*p ** (p + 7) - *(p + 1) ** (p + 6));
496    for (i = 0, j = 0; j < m; j++) {
497        *n = m;
498        inv_matrix_det_incd(p, &d[0][0], n, i, j);
499        sum =
500            sum + *(p + 6 * i + j) * SIGNM(i +
501                                            j) *
502            inv_matrix_detd(&d[0][0], n);
503    }
504
505    return (sum);
506}
507
508/** Wraps angle from (-M_PI,M_PI]
509 * @param[in] ang Angle in radians to wrap
510 * @return Wrapped angle from (-M_PI,M_PI]
511 */
512float inv_wrap_angle(float ang)
513{
514    if (ang > M_PI)
515        return ang - 2 * (float)M_PI;
516    else if (ang <= -(float)M_PI)
517        return ang + 2 * (float)M_PI;
518    else
519        return ang;
520}
521
522/** Finds the minimum angle difference ang1-ang2 such that difference
523 * is between [-M_PI,M_PI]
524 * @param[in] ang1
525 * @param[in] ang2
526 * @return angle difference ang1-ang2
527 */
528float inv_angle_diff(float ang1, float ang2)
529{
530    float d;
531    ang1 = inv_wrap_angle(ang1);
532    ang2 = inv_wrap_angle(ang2);
533    d = ang1 - ang2;
534    if (d > M_PI)
535        d -= 2 * (float)M_PI;
536    else if (d < -(float)M_PI)
537        d += 2 * (float)M_PI;
538    return d;
539}
540
541/** bernstein hash, derived from public domain source */
542uint32_t inv_checksum(const unsigned char *str, int len)
543{
544    uint32_t hash = 5381;
545    int i, c;
546
547    for (i = 0; i < len; i++) {
548        c = *(str + i);
549        hash = ((hash << 5) + hash) + c;    /* hash * 33 + c */
550    }
551
552    return hash;
553}
554
555static unsigned short inv_row_2_scale(const signed char *row)
556{
557    unsigned short b;
558
559    if (row[0] > 0)
560        b = 0;
561    else if (row[0] < 0)
562        b = 4;
563    else if (row[1] > 0)
564        b = 1;
565    else if (row[1] < 0)
566        b = 5;
567    else if (row[2] > 0)
568        b = 2;
569    else if (row[2] < 0)
570        b = 6;
571    else
572        b = 7;  // error
573    return b;
574}
575
576
577/** Converts an orientation matrix made up of 0,+1,and -1 to a scalar representation.
578* @param[in] mtx Orientation matrix to convert to a scalar.
579* @return Description of orientation matrix. The lowest 2 bits (0 and 1) represent the column the one is on for the
580* first row, with the bit number 2 being the sign. The next 2 bits (3 and 4) represent
581* the column the one is on for the second row with bit number 5 being the sign.
582* The next 2 bits (6 and 7) represent the column the one is on for the third row with
583* bit number 8 being the sign. In binary the identity matrix would therefor be:
584* 010_001_000 or 0x88 in hex.
585*/
586unsigned short inv_orientation_matrix_to_scalar(const signed char *mtx)
587{
588
589    unsigned short scalar;
590
591    /*
592       XYZ  010_001_000 Identity Matrix
593       XZY  001_010_000
594       YXZ  010_000_001
595       YZX  000_010_001
596       ZXY  001_000_010
597       ZYX  000_001_010
598     */
599
600    scalar = inv_row_2_scale(mtx);
601    scalar |= inv_row_2_scale(mtx + 3) << 3;
602    scalar |= inv_row_2_scale(mtx + 6) << 6;
603
604    return scalar;
605}
606
607/** Uses the scalar orientation value to convert from chip frame to body frame
608* @param[in] orientation A scalar that represent how to go from chip to body frame
609* @param[in] input Input vector, length 3
610* @param[out] output Output vector, length 3
611*/
612void inv_convert_to_body(unsigned short orientation, const long *input, long *output)
613{
614    output[0] = input[orientation      & 0x03] * SIGNSET(orientation & 0x004);
615    output[1] = input[(orientation>>3) & 0x03] * SIGNSET(orientation & 0x020);
616    output[2] = input[(orientation>>6) & 0x03] * SIGNSET(orientation & 0x100);
617}
618
619/** Uses the scalar orientation value to convert from body frame to chip frame
620* @param[in] orientation A scalar that represent how to go from chip to body frame
621* @param[in] input Input vector, length 3
622* @param[out] output Output vector, length 3
623*/
624void inv_convert_to_chip(unsigned short orientation, const long *input, long *output)
625{
626    output[orientation & 0x03]      = input[0] * SIGNSET(orientation & 0x004);
627    output[(orientation>>3) & 0x03] = input[1] * SIGNSET(orientation & 0x020);
628    output[(orientation>>6) & 0x03] = input[2] * SIGNSET(orientation & 0x100);
629}
630
631
632/** Uses the scalar orientation value to convert from chip frame to body frame and
633* apply appropriate scaling.
634* @param[in] orientation A scalar that represent how to go from chip to body frame
635* @param[in] sensitivity Sensitivity scale
636* @param[in] input Input vector, length 3
637* @param[out] output Output vector, length 3
638*/
639void inv_convert_to_body_with_scale(unsigned short orientation,
640                                    long sensitivity,
641                                    const long *input, long *output)
642{
643    output[0] = inv_q30_mult(input[orientation & 0x03] *
644                             SIGNSET(orientation & 0x004), sensitivity);
645    output[1] = inv_q30_mult(input[(orientation>>3) & 0x03] *
646                             SIGNSET(orientation & 0x020), sensitivity);
647    output[2] = inv_q30_mult(input[(orientation>>6) & 0x03] *
648                             SIGNSET(orientation & 0x100), sensitivity);
649}
650
651/** find a norm for a vector
652* @param[in] a vector [3x1]
653* @param[out] output the norm of the input vector
654*/
655double inv_vector_norm(const float *x)
656{
657    return sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]);
658}
659
660void inv_init_biquad_filter(inv_biquad_filter_t *pFilter, float *pBiquadCoeff) {
661    int i;
662    // initial state to zero
663    pFilter->state[0] = 0;
664    pFilter->state[1] = 0;
665
666    // set up coefficients
667    for (i=0; i<5; i++) {
668        pFilter->c[i] = pBiquadCoeff[i];
669    }
670}
671
672void inv_calc_state_to_match_output(inv_biquad_filter_t *pFilter, float input)
673{
674    pFilter->input = input;
675    pFilter->output = input;
676    pFilter->state[0] = input / (1 + pFilter->c[2] + pFilter->c[3]);
677    pFilter->state[1] = pFilter->state[0];
678}
679
680float inv_biquad_filter_process(inv_biquad_filter_t *pFilter, float input)  {
681    float stateZero;
682
683    pFilter->input = input;
684    // calculate the new state;
685    stateZero = pFilter->input - pFilter->c[2]*pFilter->state[0]
686                               - pFilter->c[3]*pFilter->state[1];
687
688    pFilter->output = stateZero + pFilter->c[0]*pFilter->state[0]
689                                + pFilter->c[1]*pFilter->state[1];
690
691    // update the output and state
692    pFilter->output = pFilter->output * pFilter->c[4];
693    pFilter->state[1] = pFilter->state[0];
694    pFilter->state[0] = stateZero;
695    return pFilter->output;
696}
697
698void inv_get_cross_product_vec(float *cgcross, float compass[3], float grav[3])  {
699
700    cgcross[0] = (float)compass[1] * grav[2] - (float)compass[2] * grav[1];
701    cgcross[1] = (float)compass[2] * grav[0] - (float)compass[0] * grav[2];
702    cgcross[2] = (float)compass[0] * grav[1] - (float)compass[1] * grav[0];
703}
704
705void mlMatrixVectorMult(long matrix[9], const long vecIn[3], long *vecOut)  {
706        // matrix format
707        //  [ 0  3  6;
708        //    1  4  7;
709        //    2  5  8]
710
711        // vector format:  [0  1  2]^T;
712        int i, j;
713        long temp;
714
715        for (i=0; i<3; i++)	{
716                temp = 0;
717                for (j=0; j<3; j++)  {
718                        temp += inv_q30_mult(matrix[i+j*3], vecIn[j]);
719                }
720                vecOut[i] = temp;
721        }
722}
723
724//============== 1/sqrt(x), 1/x, sqrt(x) Functions ================================
725
726/** Calculates 1/square-root of a fixed-point number (30 bit mantissa, positive): Q1.30
727* Input must be a positive scaled (2^30) integer
728* The number is scaled to lie between a range in which a Newton-Raphson
729* iteration works best. Corresponding square root of the power of two is returned.
730*  Caller must scale final result by 2^rempow (while avoiding overflow).
731* @param[in] x0, length 1
732* @param[out] rempow, length 1
733* @return scaledSquareRoot on success or zero.
734*/
735long inv_inverse_sqrt(long x0, int*rempow)
736{
737	//% Inverse sqrt NR in the neighborhood of 1.3>x>=0.65
738	//% x(k+1) = x(k)*(3 - x0*x(k)^2)
739
740	//% Seed equals 1. Works best in this region.
741	//xx0 = int32(1*2^30);
742
743	long oneoversqrt2, oneandhalf, x0_2;
744	unsigned long xx;
745	int pow2, sq2scale, nr_iters;
746	//long upscale, sqrt_upscale, upsclimit;
747	//long downscale, sqrt_downscale, downsclimit;
748
749	// Precompute some constants for efficiency
750	//% int32(2^30*1/sqrt(2))
751	oneoversqrt2=759250125L;
752	//% int32(1.5*2^30);
753	oneandhalf=1610612736L;
754
755	//// Further scaling into optimal region saves one or more NR iterations. Maps into region (.9, 1.1)
756	//// int32(0.9/log(2)*2^30)
757	//upscale = 1394173804L;
758	//// int32(sqrt(0.9/log(2))*2^30)
759	//sqrt_upscale = 1223512453L;
760	// // int32(1.1*log(2)/.9*2^30)
761	//upsclimit = 909652478L;
762	//// int32(1.1/log(4)*2^30)
763	//downscale = 851995103L;
764	//// int32(sqrt(1.1/log(4))*2^30)
765	//sqrt_downscale = 956463682L;
766	// // int32(0.9*log(4)/1.1*2^30)
767	//downsclimit = 1217881829L;
768
769	nr_iters = test_limits_and_scale(&x0, &pow2);
770
771	sq2scale=pow2%2;  // Find remainder. Is it even or odd?
772
773
774	// Further scaling to decrease NR iterations
775	// With the mapping below, 89% of calculations will require 2 NR iterations or less.
776	// TBD
777
778
779	x0_2 = x0 >>1; // This scaling incorporates factor of 2 in NR iteration below.
780	// Initial condition starts at 1: xx=(1L<<30);
781	// First iteration is simple: Instead of initializing xx=1, assign to result of first iteration:
782	// xx= (3/2-x0/2);
783	//% NR formula: xx=xx*(3/2-x0*xx*xx/2); = xx*(1.5 - (x0/2)*xx*xx)
784	// Initialize NR (first iteration). Note we are starting with xx=1, so the first iteration becomes an initialization.
785	xx = oneandhalf - x0_2;
786 	if ( nr_iters>=2 ) {
787		// Second NR iteration
788		xx = inv_q30_mult( xx, ( oneandhalf - inv_q30_mult(x0_2, inv_q30_mult(xx,xx) ) ) );
789		if ( nr_iters==3 ) {
790			// Third NR iteration.
791			xx = inv_q30_mult( xx, ( oneandhalf - inv_q30_mult(x0_2, inv_q30_mult(xx,xx) ) ) );
792			// Fourth NR iteration: Not needed due to single precision.
793		}
794	}
795	if (sq2scale) {
796		*rempow = (pow2>>1) + 1; // Account for sqrt(2) in denominator, note we multiply if s2scale is true
797		return (inv_q30_mult(xx,oneoversqrt2));
798	}
799	else {
800		*rempow = pow2>>1;
801		return xx;
802	}
803}
804
805
806/** Calculates square-root of a fixed-point number (30 bit mantissa, positive)
807* Input must be a positive scaled (2^30) integer
808* The number is scaled to lie between a range in which a Newton-Raphson
809* iteration works best.
810* @param[in] x0, length 1
811* @return scaledSquareRoot on success or zero. **/
812long inv_fast_sqrt(long x0)
813{
814
815	//% Square-Root with NR in the neighborhood of 1.3>x>=0.65 (log(2) <= x <= log(4) )
816    // Two-variable NR iteration:
817    // Initialize: a=x; c=x-1;
818    // 1st Newton Step:  a=a-a*c/2; ( or: a = x - x*(x-1)/2  )
819    // Iterate: c = c*c*(c-3)/4
820    //          a = a - a*c/2    --> reevaluating c at this step gives error of approximation
821
822	//% Seed equals 1. Works best in this region.
823	//xx0 = int32(1*2^30);
824
825	long sqrt2, oneoversqrt2, one_pt5;
826	long xx, cc;
827	int pow2, sq2scale, nr_iters;
828
829	// Return if input is zero. Negative should really error out.
830	if (x0 <= 0L) {
831		return 0L;
832	}
833
834	sqrt2 =1518500250L;
835	oneoversqrt2=759250125L;
836	one_pt5=1610612736L;
837
838	nr_iters = test_limits_and_scale(&x0, &pow2);
839
840	sq2scale = 0;
841	if (pow2 > 0)
842		sq2scale=pow2%2;  // Find remainder. Is it even or odd?
843	pow2 = pow2-sq2scale; // Now pow2 is even. Note we are adding because result is scaled with sqrt(2)
844
845	// Sqrt 1st NR iteration
846	cc = x0 - (1L<<30);
847	xx = x0 - (inv_q30_mult(x0, cc)>>1);
848 	if ( nr_iters>=2 ) {
849		// Sqrt second NR iteration
850		// cc = cc*cc*(cc-3)/4; = cc*cc*(cc/2 - 3/2)/2;
851		// cc = ( cc*cc*((cc>>1) - onePt5) ) >> 1
852		cc = inv_q30_mult( cc, inv_q30_mult(cc, (cc>>1) - one_pt5) ) >> 1;
853		xx = xx - (inv_q30_mult(xx, cc)>>1);
854		if ( nr_iters==3 ) {
855			// Sqrt third NR iteration
856			cc = inv_q30_mult( cc, inv_q30_mult(cc, (cc>>1) - one_pt5) ) >> 1;
857			xx = xx - (inv_q30_mult(xx, cc)>>1);
858		}
859	}
860	if (sq2scale)
861		xx = inv_q30_mult(xx,oneoversqrt2);
862	// Scale the number with the half of the power of 2 scaling
863	if (pow2>0)
864		xx = (xx >> (pow2>>1));
865	else if (pow2 == -1)
866		xx = inv_q30_mult(xx,sqrt2);
867	return xx;
868}
869
870/** Calculates 1/x of a fixed-point number (30 bit mantissa)
871* Input must be a scaled (2^30) integer (+/-)
872* The number is scaled to lie between a range in which a Newton-Raphson
873* iteration works best. Corresponding multiplier power of two is returned.
874*  Caller must scale final result by 2^pow (while avoiding overflow).
875* @param[in] x, length 1
876* @param[out] pow, length 1
877* @return scaledOneOverX on success or zero.
878*/
879long inv_one_over_x(long x0, int*pow)
880{
881	//% NR for 1/x in the neighborhood of log(2) => x => log(4)
882	//%    y(k+1)=y(k)*(2 \ 96 x0*y(k))
883    //% with y(0) = 1 as the starting value for NR
884
885	long two, xx;
886	int numberwasnegative, nr_iters, did_upscale, did_downscale;
887
888	long upscale, downscale, upsclimit, downsclimit;
889
890	*pow = 0;
891	// Return if input is zero.
892	if (x0 == 0L) {
893		return 0L;
894	}
895
896	// This is really (2^31-1), i.e. 1.99999... .
897	// Approximation error is 1e-9. Note 2^31 will overflow to sign bit, so it can't be used here.
898	two = 2147483647L;
899
900	// int32(0.92/log(2)*2^30)
901	upscale = 1425155444L;
902	// int32(1.08/upscale*2^30)
903	upsclimit = 873697834L;
904
905	// int32(1.08/log(4)*2^30)
906	downscale = 836504283L;
907	// int32(0.92/downscale*2^30)
908	downsclimit = 1268000423L;
909
910	// Algorithm is intended to work with positive numbers only. Change sign:
911	numberwasnegative = 0;
912	if (x0 < 0L) {
913		numberwasnegative = 1;
914		x0 = -x0;
915	}
916
917	nr_iters = test_limits_and_scale(&x0, pow);
918
919	did_upscale=0;
920	did_downscale=0;
921	// Pre-scaling to reduce NR iterations and improve accuracy:
922	if (x0<=upsclimit) {
923		x0 = inv_q30_mult(x0, upscale);
924		did_upscale = 1;
925		// The scaling ALWAYS leaves the number in the 2-NR iterations region:
926		nr_iters = 2;
927		// Is x0 in the single NR iteration region (0.994, 1.006) ?
928		if (x0 > 1067299373 && x0 < 1080184275)
929			nr_iters = 1;
930	} else if (x0>=downsclimit) {
931		x0 = inv_q30_mult(x0, downscale);
932		did_downscale = 1;
933		// The scaling ALWAYS leaves the number in the 2-NR iterations region:
934		nr_iters = 2;
935		// Is x0 in the single NR iteration region (0.994, 1.006) ?
936		if (x0 > 1067299373 && x0 < 1080184275)
937			nr_iters = 1;
938	}
939
940	xx = (two - x0) + 1; // Note 2 will overflow so the computation (2-x) is done with "two" == (2^30-1)
941	// First NR iteration
942	xx = inv_q30_mult( xx, (two - inv_q30_mult(x0, xx)) + 1 );
943 	if ( nr_iters>=2 ) {
944		// Second NR iteration
945		xx = inv_q30_mult( xx, (two - inv_q30_mult(x0, xx)) + 1 );
946		if ( nr_iters==3 ) {
947			// THird NR iteration.
948			xx = inv_q30_mult( xx, (two - inv_q30_mult(x0, xx)) + 1 );
949			// Fourth NR iteration: Not needed due to single precision.
950		}
951	}
952
953	// Post-scaling
954	if (did_upscale)
955		xx = inv_q30_mult( xx, upscale);
956	else if (did_downscale)
957		xx = inv_q30_mult( xx, downscale);
958
959	if (numberwasnegative)
960		xx = -xx;
961	return xx;
962}
963
964/** Auxiliary function used by inv_OneOverX(), inv_fastSquareRoot(), inv_inverseSqrt().
965* Finds the range of the argument, determines the optimal number of Newton-Raphson
966* iterations and . Corresponding square root of the power of two is returned.
967* Restrictions: Number is represented as Q1.30.
968*               Number is betweeen the range 2<x<=0
969* @param[in] x, length 1
970* @param[out] pow, length 1
971* @return # of NR iterations, x0 scaled between log(2) and log(4) and 2^N scaling (N=pow)
972*/
973int test_limits_and_scale(long *x0, int *pow)
974{
975	long lowerlimit, upperlimit, oneiterlothr, oneiterhithr, zeroiterlothr, zeroiterhithr;
976
977	// Lower Limit: ll = int32(log(2)*2^30);
978	lowerlimit = 744261118L;
979	//Upper Limit ul = int32(log(4)*2^30);
980	upperlimit = 1488522236L;
981	//  int32(0.9*2^30)
982	oneiterlothr = 966367642L;
983	// int32(1.1*2^30)
984	oneiterhithr = 1181116006L;
985	// int32(0.99*2^30)
986	zeroiterlothr=1063004406L;
987	//int32(1.01*2^30)
988	zeroiterhithr=1084479242L;
989
990	// Scale number such that Newton Raphson iteration works best:
991	// Find the power of two scaling that leaves the number in the optimal range,
992	// ll <= number <= ul. Note odd powers have special scaling further below
993	if (*x0 > upperlimit) {
994		// Halving the number will push it in the optimal range since largest value is 2
995		*x0 = *x0>>1;
996		*pow=-1;
997	} else if (*x0 < lowerlimit) {
998		// Find position of highest bit, counting from left, and scale number
999		*pow=get_highest_bit_position((unsigned long*)x0);
1000		if (*x0 >= upperlimit) {
1001			// Halving the number will push it in the optimal range
1002			*x0 = *x0>>1;
1003			*pow=*pow-1;
1004		}
1005		else if (*x0 < lowerlimit) {
1006			// Doubling the number will push it in the optimal range
1007			*x0 = *x0<<1;
1008			*pow=*pow+1;
1009		}
1010	} else {
1011		*pow = 0;
1012	}
1013
1014	if ( *x0<oneiterlothr || *x0>oneiterhithr )
1015		return 3; // 3 NR iterations
1016	if ( *x0<zeroiterlothr || *x0>zeroiterhithr )
1017		return 2; // 2 NR iteration
1018
1019	return 1; // 1 NR iteration
1020}
1021
1022/** Auxiliary function used by testLimitsAndScale()
1023* Find the highest nonzero bit in an unsigned 32 bit integer:
1024* @param[in] value, length 1.
1025* @return highes bit position.
1026**/int get_highest_bit_position(unsigned long *value)
1027{
1028    int position;
1029    position = 0;
1030    if (*value == 0) return 0;
1031
1032    if ((*value & 0xFFFF0000) == 0) {
1033		position += 16;
1034		*value=*value<<16;
1035	}
1036    if ((*value & 0xFF000000) == 0) {
1037		position += 8;
1038		*value=*value<<8;
1039	}
1040    if ((*value & 0xF0000000) == 0) {
1041		position += 4;
1042		*value=*value<<4;
1043	}
1044    if ((*value & 0xC0000000) == 0) {
1045		position += 2;
1046		*value=*value<<2;
1047	}
1048
1049	// If we got too far into sign bit, shift back. Note we are using an
1050	// unsigned long here, so right shift is going to shift all the bits.
1051    if ((*value & 0x80000000)) {
1052		position -= 1;
1053		*value=*value>>1;
1054	}
1055    return position;
1056}
1057
1058/* compute real part of quaternion, element[0]
1059@param[in]  inQuat, 3 elements gyro quaternion
1060@param[out] outquat, 4 elements gyro quaternion
1061*/
1062int inv_compute_scalar_part(const long * inQuat, long* outQuat)
1063{
1064    long scalarPart = 0;
1065
1066    scalarPart = inv_fast_sqrt((1L<<30) - inv_q30_mult(inQuat[0], inQuat[0])
1067                                        - inv_q30_mult(inQuat[1], inQuat[1])
1068                                        - inv_q30_mult(inQuat[2], inQuat[2]) );
1069                outQuat[0] = scalarPart;
1070                outQuat[1] = inQuat[0];
1071                outQuat[2] = inQuat[1];
1072                outQuat[3] = inQuat[2];
1073
1074                return 0;
1075}
1076/**
1077 * @}
1078 */
1079